diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/compile_commands.json b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/compile_commands.json index 904a0846e..97b3e9bfa 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/compile_commands.json +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/compile_commands.json @@ -1,151 +1,151 @@ [ { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o CoralBenchmark.o -D__CUDACC__=1 CoralBenchmark.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CoralBenchmark.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CoralBenchmark.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o CycleTracking.o -D__CUDACC__=1 CycleTracking.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o DecompositionObject.o -D__CUDACC__=1 DecompositionObject.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DecompositionObject.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DecompositionObject.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o DirectionCosine.o -D__CUDACC__=1 DirectionCosine.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o EnergySpectrum.o -D__CUDACC__=1 EnergySpectrum.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o GlobalFccGrid.o -D__CUDACC__=1 GlobalFccGrid.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o GridAssignmentObject.o -D__CUDACC__=1 GridAssignmentObject.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o InputBlock.o -D__CUDACC__=1 InputBlock.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/InputBlock.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/InputBlock.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MC_Base_Particle.o -D__CUDACC__=1 MC_Base_Particle.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Base_Particle.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Base_Particle.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MC_Domain.o -D__CUDACC__=1 MC_Domain.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MC_Fast_Timer.o -D__CUDACC__=1 MC_Fast_Timer.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Fast_Timer.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Fast_Timer.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MC_Particle_Buffer.o -D__CUDACC__=1 MC_Particle_Buffer.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle_Buffer.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle_Buffer.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MeshPartition.o -D__CUDACC__=1 MeshPartition.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MeshPartition.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MeshPartition.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MonteCarlo.o -D__CUDACC__=1 MonteCarlo.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o MpiCommObject.o -D__CUDACC__=1 MpiCommObject.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MpiCommObject.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MpiCommObject.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o Parameters.o -D__CUDACC__=1 Parameters.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/Parameters.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/Parameters.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o ParticleVault.o -D__CUDACC__=1 ParticleVault.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o ParticleVaultContainer.o -D__CUDACC__=1 ParticleVaultContainer.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVaultContainer.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVaultContainer.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o PopulationControl.o -D__CUDACC__=1 PopulationControl.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/PopulationControl.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/PopulationControl.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o SharedMemoryCommObject.o -D__CUDACC__=1 SharedMemoryCommObject.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/SharedMemoryCommObject.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/SharedMemoryCommObject.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o Tallies.o -D__CUDACC__=1 Tallies.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/Tallies.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/Tallies.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o cmdLineParser.o -D__CUDACC__=1 cmdLineParser.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cmdLineParser.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cmdLineParser.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o cudaFunctions.o -D__CUDACC__=1 cudaFunctions.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o initMC.o -D__CUDACC__=1 initMC.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o main.o -D__CUDACC__=1 main.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o parseUtils.o -D__CUDACC__=1 parseUtils.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/parseUtils.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/parseUtils.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o utils.o -D__CUDACC__=1 utils.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utils.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utils.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o utilsMpi.o -D__CUDACC__=1 utilsMpi.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utilsMpi.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utilsMpi.cc" }, { "command": "nvcc -c -I/include/ -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o Random.o -D__CUDACC__=1 Random.cc", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/Random.cc" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/Random.cc" }, { "command": "nvcc -DHAVE_CUDA -DHAVE_UVM=1 -std=c++11 -O3 -o qs CoralBenchmark.o CycleTracking.o DecompositionObject.o DirectionCosine.o EnergySpectrum.o GlobalFccGrid.o GridAssignmentObject.o InputBlock.o MC_Base_Particle.o MC_Domain.o MC_Fast_Timer.o MC_Particle_Buffer.o MeshPartition.o MonteCarlo.o MpiCommObject.o Parameters.o ParticleVault.o ParticleVaultContainer.o PopulationControl.o SharedMemoryCommObject.o Tallies.o cmdLineParser.o cudaFunctions.o initMC.o main.o parseUtils.o utils.o utilsMpi.o Random.o -D__CUDACC__=1", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src" } ] \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/AtomicMacro.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/AtomicMacro.hh.yaml index b2447aeee..bc99732c8 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/AtomicMacro.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/AtomicMacro.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/AtomicMacro.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/AtomicMacro.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' Offset: 3129 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' Offset: 3193 Length: 11 ReplacementText: '' @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' Offset: 3300 Length: 18 ReplacementText: '' @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' Offset: 3331 Length: 13 ReplacementText: DPCT_COMPATIBILITY_TEMP @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' Offset: 3349 Length: 13 ReplacementText: DPCT_COMPATIBILITY_TEMP @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' Offset: 4096 Length: 13 ReplacementText: DPCT_COMPATIBILITY_TEMP @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' Offset: 4281 Length: 16 ReplacementText: 'dpct::atomic_fetch_add(&x, v)' @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' Offset: 4331 Length: 16 ReplacementText: 'dpct::atomic_fetch_add(&x, 1)' @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' Offset: 4392 Length: 16 ReplacementText: 'dpct::atomic_fetch_add(&x, v)' @@ -83,7 +83,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/AtomicMacro.hh' Digest: b95fb138e417bb4c5ab15dc45f6ad43d DpctVersion: 18.0.0 MainHelperFileName: '' @@ -92,7 +92,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -101,7 +101,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CollisionEvent.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CollisionEvent.hh.yaml index f748328f3..0dbb5b0fc 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CollisionEvent.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CollisionEvent.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CollisionEvent.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CollisionEvent.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 3070 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 3455 Length: 0 ReplacementText: "\n#include \n" @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 3818 Length: 8 ReplacementText: 'sycl::sin(phi)' @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 3847 Length: 8 ReplacementText: 'sycl::cos(phi)' @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 3878 Length: 35 ReplacementText: 'sycl::sqrt((1.0 - (cosTheta * cosTheta)))' @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 4069 Length: 313 ReplacementText: "sycl::sqrt((1.0 - ((PhysicalConstants::_neutronRestMassEnergy *\n PhysicalConstants::_neutronRestMassEnergy) /\n ((energy + PhysicalConstants::_neutronRestMassEnergy) *\n (energy + PhysicalConstants::_neutronRestMassEnergy)))))" @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 4682 Length: 17 ReplacementText: 'sycl::log(randomNumber)' @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 4720 Length: 0 ReplacementText: "/*\nDPCT1110:31: The total declared local variable size in device function CollisionEvent exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 4874 Length: 20 ReplacementText: '' @@ -82,7 +82,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 4997 Length: 112 ReplacementText: '' @@ -91,7 +91,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 5714 Length: 20 ReplacementText: '' @@ -100,7 +100,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 5807 Length: 99 ReplacementText: '' @@ -109,7 +109,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 5999 Length: 20 ReplacementText: '' @@ -118,7 +118,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 6190 Length: 192 ReplacementText: '' @@ -127,7 +127,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 7366 Length: 20 ReplacementText: '' @@ -136,7 +136,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 7710 Length: 345 ReplacementText: '' @@ -145,7 +145,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 8377 Length: 20 ReplacementText: '' @@ -154,7 +154,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 8459 Length: 90 ReplacementText: '' @@ -163,7 +163,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 8551 Length: 20 ReplacementText: '' @@ -172,7 +172,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 8756 Length: 167 ReplacementText: '' @@ -181,7 +181,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 8993 Length: 20 ReplacementText: '' @@ -190,7 +190,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 9078 Length: 91 ReplacementText: '' @@ -199,7 +199,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 9224 Length: 20 ReplacementText: '' @@ -208,7 +208,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 9309 Length: 90 ReplacementText: '' @@ -217,7 +217,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 9451 Length: 20 ReplacementText: '' @@ -226,7 +226,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 9603 Length: 173 ReplacementText: '' @@ -235,7 +235,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 11168 Length: 20 ReplacementText: '' @@ -244,7 +244,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 11291 Length: 112 ReplacementText: '' @@ -253,7 +253,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Offset: 11427 Length: 0 ReplacementText: "\ninline HOST_DEVICE bool CollisionEvent_host_ct7(MonteCarlo *monteCarlo, MC_Particle &mc_particle, unsigned int tally_index, int particle_index, int *tallyArray)\n{\n\n const MC_Cell_State &cell = monteCarlo->domain[mc_particle.domain].cell_state[mc_particle.cell];\n\n\n int globalMatIndex = cell._material;\n\n //------------------------------------------------------------------------------------------------------------------\n // Pick the isotope and reaction.\n //------------------------------------------------------------------------------------------------------------------\n double randomNumber = rngSample(&mc_particle.random_number_seed);\n double totalCrossSection = mc_particle.totalCrossSection;\n double currentCrossSection = totalCrossSection * randomNumber;\n int selectedIso = -1;\n int selectedUniqueNumber = -1;\n int selectedReact = -1;\n\n int numIsos = (int)monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso.size();\n\n\n for (int isoIndex = 0; isoIndex < numIsos && currentCrossSection >= 0; isoIndex++)\n {\n\n int uniqueNumber = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._gid;\n int numReacts = monteCarlo->_nuclearData->getNumberReactions(uniqueNumber);\n\n for (int reactIndex = 0; reactIndex < numReacts; reactIndex++)\n {\n currentCrossSection -= macroscopicCrossSection(monteCarlo, reactIndex, mc_particle.domain, mc_particle.cell,\n isoIndex, mc_particle.energy_group);\n if (currentCrossSection < 0)\n {\n selectedIso = isoIndex;\n selectedUniqueNumber = uniqueNumber;\n selectedReact = reactIndex;\n break;\n }\n }\n }\n qs_assert(selectedIso != -1);\n qs_assert(selectedUniqueNumber != -1);\n qs_assert(selectedReact != -1);\n\n //------------------------------------------------------------------------------------------------------------------\n // Do the collision.\n //------------------------------------------------------------------------------------------------------------------\n double energyOut[MAX_PRODUCTION_SIZE];\n double angleOut[MAX_PRODUCTION_SIZE];\n int nOut = 0;\n\n double mat_mass = monteCarlo->_materialDatabase->_mat[globalMatIndex]._mass;\n monteCarlo->_nuclearData->_isotopes[selectedUniqueNumber]._species[0]._reactions[selectedReact].sampleCollision(\n mc_particle.kinetic_energy, mat_mass, &energyOut[0], &angleOut[0], nOut, &(mc_particle.random_number_seed), MAX_PRODUCTION_SIZE);\n\n\n//--------------------------------------------------------------------------------------------------------------\n// Post-Collision Phase 1:\n// Tally the collision\n//--------------------------------------------------------------------------------------------------------------\n\n// Set the reaction for this particle.\n\n ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._collision);\n\n\n\n NuclearDataReaction::Enum reactionType = monteCarlo->_nuclearData->_isotopes[selectedUniqueNumber]._species[0]._reactions[selectedReact]._reactionType;\n\n\n switch (reactionType)\n {\n case NuclearDataReaction::Scatter:\n\n ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._scatter);\n\n break;\n case NuclearDataReaction::Absorption:\n\n ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._absorb);\n\n break;\n case NuclearDataReaction::Fission:\n\n ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._fission);\n ATOMIC_ADD(monteCarlo->_tallies->_balanceTask[tally_index]._produce, nOut);\n\n break;\n case NuclearDataReaction::Undefined:\n#ifdef DEBUG\n printf(\"reactionType invalid\\n\");\n#endif\n qs_assert(false);\n }\n\n if (nOut == 0)\n {\n return false;\n }\n\n for (int secondaryIndex = 1; secondaryIndex < nOut; secondaryIndex++)\n {\n // Newly created particles start as copies of their parent\n MC_Particle secondaryParticle = mc_particle;\n secondaryParticle.random_number_seed = rngSpawn_Random_Number_Seed(&mc_particle.random_number_seed);\n secondaryParticle.identifier = secondaryParticle.random_number_seed;\n updateTrajectory(energyOut[secondaryIndex], angleOut[secondaryIndex], secondaryParticle);\n\n // Atomic capture will be called here\n monteCarlo->_particleVaultContainer->addExtraParticle(secondaryParticle);\n }\n\n updateTrajectory(energyOut[0], angleOut[0], mc_particle);\n\n // If a fission reaction produces secondary particles we also add the original\n // particle to the \"extras\" that we will handle later. This avoids the\n // possibility of a particle doing multiple fission reactions in a single\n // kernel invocation and overflowing the extra storage with secondary particles.\n if (nOut > 1)\n {\n // Atomic capture will be called here\n monteCarlo->_particleVaultContainer->addExtraParticle(mc_particle);\n }\n\n// If we are still tracking this particle the update its energy group\n\n mc_particle.energy_group = monteCarlo->_nuclearData->getEnergyGroup(mc_particle.kinetic_energy);\n\n\n return nOut == 1;\n}" @@ -263,7 +263,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CollisionEvent.hh' Digest: 4a7ff666215bd49f99cacb6e5372493a DpctVersion: 18.0.0 MainHelperFileName: '' @@ -272,7 +272,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -281,7 +281,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CycleTracking.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CycleTracking.hh.yaml index 9606427f6..70dd49f44 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CycleTracking.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CycleTracking.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CycleTracking.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/CycleTracking.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' Offset: 5270 Length: 20 ReplacementText: '' @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' Offset: 5361 Length: 101 ReplacementText: '' @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' Offset: 6947 Length: 20 ReplacementText: '' @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' Offset: 7042 Length: 100 ReplacementText: '' @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' Offset: 7926 Length: 20 ReplacementText: '' @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' Offset: 8017 Length: 96 ReplacementText: '' @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' Offset: 8738 Length: 0 ReplacementText: "\ninline HOST_DEVICE_CUDA void CycleTrackingFunction_host_ct3(MonteCarlo *monteCarlo, MC_Particle &mc_particle, int particle_index, ParticleVault *processingVault, ParticleVault *processedVault, int *tallyArray)\n{\n bool keepTrackingThisParticle = true;\n unsigned int tally_index = (particle_index) % monteCarlo->_tallies->GetNumBalanceReplications();\n unsigned int flux_tally_index = (particle_index) % monteCarlo->_tallies->GetNumFluxReplications();\n unsigned int cell_tally_index = (particle_index) % monteCarlo->_tallies->GetNumCellTallyReplications();\n\n int i1 = 0;\n // The while loop will exit after a particle reaches census or goes through MaxIters iterations, whichever comes first. If a particle reaches MaxIters it will be added to the ExtraVaults and processed in a later kernel. MaxIt can be defined in the makefile, otherwise it defaults to a large number that should ensure that it is never reached.\n int MaxIters = MaxIt;\n\n do\n {\n // Determine the outcome of a particle at the end of this segment such as:\n //\n // (0) Undergo a collision within the current cell,\n // (1) Cross a facet of the current cell,\n // (2) Reach the end of the time step and enter census,\n //\n MC_Segment_Outcome_type::Enum segment_outcome = MC_Segment_Outcome_type::Max_Number;\n i1 += 1;\n if (keepTrackingThisParticle)\n {\n\n#ifdef EXPONENTIAL_TALLY\n monteCarlo->_tallies->TallyCellValue(exp(rngSample(&mc_particle.random_number_seed)), mc_particle.domain, cell_tally_index, mc_particle.cell);\n#endif\n segment_outcome = MC_Segment_Outcome(monteCarlo, mc_particle, flux_tally_index);\n\n\n ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._numSegments);\n\n\n\n mc_particle.num_segments += 1.; /* Track the number of segments this particle has\n undergone this cycle on all processes. */\n // segment_outcome = keepTrackingThisParticle ? segment_outcome : MC_Segment_Outcome_type::Max_Number;\n }\n switch (segment_outcome)\n {\n\n case MC_Segment_Outcome_type::Collision:\n {\n // The particle undergoes a collision event producing:\n // (0) Other-than-one same-species secondary particle, or\n // (1) Exactly one same-species secondary particle.\n if (CollisionEvent(monteCarlo, mc_particle, tally_index, particle_index, tallyArray) == MC_Collision_Event_Return::Continue_Tracking)\n {\n keepTrackingThisParticle = true;\n }\n else\n {\n keepTrackingThisParticle = false;\n }\n }\n break;\n\n case MC_Segment_Outcome_type::Facet_Crossing:\n {\n // The particle has reached a cell facet.\n MC_Tally_Event::Enum facet_crossing_type = MC_Facet_Crossing_Event(mc_particle, monteCarlo, particle_index, processingVault);\n\n if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Transit_Exit)\n {\n keepTrackingThisParticle = true; // Transit Event\n }\n else if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Escape)\n {\n\n ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._escape);\n\n\n mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Escape;\n mc_particle.species = -1;\n keepTrackingThisParticle = false;\n }\n else if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Reflection)\n {\n MCT_Reflect_Particle(monteCarlo, mc_particle);\n keepTrackingThisParticle = true;\n }\n else\n {\n // Enters an adjacent cell in an off-processor domain.\n keepTrackingThisParticle = false;\n }\n }\n break;\n\n case MC_Segment_Outcome_type::Census:\n {\n // The particle has reached the end of the time step.\n processedVault->pushParticle(mc_particle);\n\n ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[tally_index]._census);\n\n\n keepTrackingThisParticle = false;\n }\n break;\n\n case MC_Segment_Outcome_type::Max_Number:\n {\n\n keepTrackingThisParticle = false;\n }\n break;\n\n default:\n qs_assert(false);\n keepTrackingThisParticle = false;\n break; // should this be an error\n }\n } while (keepTrackingThisParticle && i1 < MaxIt);\n\n if (keepTrackingThisParticle == false)\n {\n processingVault->invalidateParticle(particle_index);\n }\n else\n {\n monteCarlo->_particleVaultContainer->addExtraParticle(mc_particle);\n }\n}" @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' Offset: 8740 Length: 0 ReplacementText: "/*\nDPCT1110:32: The total declared local variable size in device function CycleTrackingGuts exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" @@ -74,7 +74,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/CycleTracking.hh' Digest: 988d9764f5170ed6e4181d97f202ea12 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -83,7 +83,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -92,7 +92,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DeclareMacro.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DeclareMacro.hh.yaml index cc4cf750a..7dbab02a3 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DeclareMacro.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DeclareMacro.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DeclareMacro.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DeclareMacro.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' Offset: 0 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' Offset: 3101 Length: 9 ReplacementText: '' @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' Offset: 3110 Length: 10 ReplacementText: '' @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' Offset: 3146 Length: 9 ReplacementText: '' @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' Offset: 3155 Length: 10 ReplacementText: '' @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' Offset: 3231 Length: 10 ReplacementText: '' @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' Offset: 3318 Length: 10 ReplacementText: '' @@ -65,7 +65,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DeclareMacro.hh' Digest: b27ca1aa9f3c6327a99223417c8c8855 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -74,7 +74,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -83,7 +83,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DirectionCosine.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DirectionCosine.hh.yaml index 1249091fd..f1924db69 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DirectionCosine.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DirectionCosine.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DirectionCosine.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/DirectionCosine.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.hh' Offset: 3082 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.hh' Offset: 8229 Length: 37 ReplacementText: 'sycl::sqrt((1.0 - (cos_theta * cos_theta)))' @@ -20,7 +20,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.hh' Digest: bdb2736f769bc82a098178607a09589d DpctVersion: 18.0.0 MainHelperFileName: '' @@ -29,7 +29,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -38,7 +38,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MCT.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MCT.hh.yaml index 08fd45153..7457d22ec 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MCT.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MCT.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MCT.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MCT.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 3881 Length: 0 ReplacementText: "\nHOST_DEVICE\nMC_Nearest_Facet MCT_Nearest_Facet_host_ct2(\n MC_Particle *mc_particle,\n MC_Location &location,\n MC_Vector &coordinate,\n const DirectionCosine *direction_cosine,\n double distance_threshold,\n double current_best_distance,\n bool new_segment,\n MonteCarlo *monteCarlo);" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 4069 Length: 0 ReplacementText: "\nHOST_DEVICE\nvoid MCT_Generate_Coordinate_3D_G_host_ct9(\n uint64_t *random_number_seed,\n int domain_num,\n int cell,\n MC_Vector &coordinate,\n MonteCarlo *monteCarlo);" @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 4380 Length: 20 ReplacementText: '' @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 4465 Length: 72 ReplacementText: '' @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 4674 Length: 0 ReplacementText: "\ninline HOST_DEVICE\n\n Subfacet_Adjacency &\n MCT_Adjacent_Facet_host_ct0(const MC_Location &location, MC_Particle &mc_particle, MonteCarlo *monteCarlo)\n\n{\n\n MC_Domain &domain = monteCarlo->domain[location.domain];\n\n Subfacet_Adjacency &adjacency = domain.mesh._cellConnectivity[location.cell]._facet[location.facet].subfacet;\n\n return adjacency;\n}" @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 4772 Length: 0 ReplacementText: "\nHOST_DEVICE\nvoid MCT_Reflect_Particle_host_ct4(MonteCarlo *mcco, MC_Particle &particle);" @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 8118 Length: 20 ReplacementText: '' @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 8203 Length: 72 ReplacementText: '' @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 9522 Length: 0 ReplacementText: "\ninline HOST_DEVICE\n MC_Nearest_Facet\n MCT_Nearest_Facet_host_ct2(MC_Particle *mc_particle,\n MC_Location &location,\n MC_Vector &coordinate,\n const DirectionCosine *direction_cosine,\n double distance_threshold,\n double current_best_distance,\n bool new_segment,\n MonteCarlo *monteCarlo)\n{\n // #ifndef BCMN_HAVE_OPENMP\n // MC_FASTTIMER_START(MC_Fast_Timer::Nearest_Facet);\n // #endif\n //\n\n if (location.domain < 0 || location.cell < 0)\n {\n qs_assert(false);\n // std::string output_string;\n // mc_particle->Copy_Particle_To_String(output_string);\n // MC_Fatal_Jump( \"Bad location value. region: %d domain: %d, cell: %d.\\nParticle record\\n%s\\n\",\n // location.region, location.domain, location.cell, output_string.c_str());\n }\n\n MC_Domain &domain = monteCarlo->domain[location.domain];\n\n\n MC_Nearest_Facet nearest_facet =\n MCT_Nearest_Facet_3D_G(mc_particle, domain, location, coordinate, direction_cosine);\n\n if (nearest_facet.distance_to_facet < 0)\n {\n nearest_facet.distance_to_facet = 0;\n }\n\n if (nearest_facet.distance_to_facet >= PhysicalConstants::_hugeDouble)\n {\n qs_assert(false);\n // MC_Warning( \"Infinite distance (cell not bound) for location [Reg:%d Local Dom:%d \"\n // \"Global Dom: %d Cell:%d Fac:%d], coordinate (%g %g %g) and direction (%g %g %g).\\n\",\n // location.region, location.domain,\n // mcco->region->Global_Domain_Number(location.region, location.domain),\n // location.cell, location.facet,\n // coordinate.x, coordinate.y, coordinate.z,\n // direction_cosine->alpha, direction_cosine->beta, direction_cosine->gamma);\n // if ( mc_particle )\n // {\n // MC_Warning( \"mc_particle.identifier %\" PRIu64 \"\\n\", mc_particle->identifier );\n // }\n }\n\n // #ifndef BCMN_HAVE_OPENMP\n // MC_FASTTIMER_STOP(MC_Fast_Timer::Nearest_Facet);\n // #endif\n\n return nearest_facet;\n}" @@ -82,7 +82,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 9627 Length: 0 ReplacementText: "/*\nDPCT1110:29: The total declared local variable size in device function MCT_Generate_Coordinate_3D_G exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" @@ -91,7 +91,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 10022 Length: 20 ReplacementText: '' @@ -100,7 +100,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 10108 Length: 73 ReplacementText: '' @@ -109,7 +109,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 12500 Length: 0 ReplacementText: "\ninline HOST_DEVICE_CUDA void MCT_Generate_Coordinate_3D_G_host_ct9(uint64_t *random_number_seed,\n int domain_num,\n int cell,\n MC_Vector &coordinate,\n MonteCarlo *monteCarlo)\n{\n\n const MC_Domain &domain = monteCarlo->domain[domain_num];\n\n\n // Determine the cell-center nodal point coordinates.\n MC_Vector center = MCT_Cell_Position_3D_G(domain, cell);\n\n int num_facets = domain.mesh._cellConnectivity[cell].num_facets;\n if (num_facets == 0)\n {\n coordinate.x = coordinate.y = coordinate.z = 0;\n return;\n }\n\n double random_number = rngSample(random_number_seed);\n double which_volume = random_number * 6.0 * domain.cell_state[cell]._volume;\n\n // Find the tet to sample from.\n double current_volume = 0.0;\n int facet_index = -1;\n const MC_Vector *point0 = NULL;\n const MC_Vector *point1 = NULL;\n const MC_Vector *point2 = NULL;\n while (current_volume < which_volume)\n {\n facet_index++;\n\n if (facet_index == num_facets)\n {\n break;\n }\n\n int facet_points[3];\n MCT_Facet_Points_3D_G(domain, cell, facet_index, 3, facet_points);\n point0 = &domain.mesh._node[facet_points[0]];\n point1 = &domain.mesh._node[facet_points[1]];\n point2 = &domain.mesh._node[facet_points[2]];\n\n double subvolume = MCT_Cell_Volume_3D_G_vector_tetDet(*point0, *point1, *point2, center);\n current_volume += subvolume;\n }\n\n // Sample from the tet.\n double r1 = rngSample(random_number_seed);\n double r2 = rngSample(random_number_seed);\n double r3 = rngSample(random_number_seed);\n\n // Cut and fold cube into prism.\n if (r1 + r2 > 1.0)\n {\n r1 = 1.0 - r1;\n r2 = 1.0 - r2;\n }\n // Cut and fold prism into tetrahedron.\n if (r2 + r3 > 1.0)\n {\n double tmp = r3;\n r3 = 1.0 - r1 - r2;\n r2 = 1.0 - tmp;\n }\n else if (r1 + r2 + r3 > 1.0)\n {\n double tmp = r3;\n r3 = r1 + r2 + r3 - 1.0;\n r1 = 1.0 - r2 - tmp;\n }\n\n // numbers 1-4 are the barycentric coordinates of the random point.\n double r4 = 1.0 - r1 - r2 - r3;\n\n // error check\n if ((point0 == NULL) || (point1 == NULL) || (point2 == NULL))\n {\n MC_Fatal_Jump(\"Programmer Error: points must not be NULL: point0=%p point1=%p point2=%p\",\n point0, point1, point2);\n return;\n }\n\n coordinate.x = (r4 * center.x + r1 * point0->x + r2 * point1->x + r3 * point2->x);\n coordinate.y = (r4 * center.y + r1 * point0->y + r2 * point1->y + r3 * point2->y);\n coordinate.z = (r4 * center.z + r1 * point0->z + r2 * point1->z + r3 * point2->z);\n}" @@ -118,7 +118,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 21135 Length: 20 ReplacementText: '' @@ -127,7 +127,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 21222 Length: 74 ReplacementText: '' @@ -136,7 +136,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 22265 Length: 0 ReplacementText: "\ninline HOST_DEVICE void MCT_Reflect_Particle_host_ct4(MonteCarlo *monteCarlo, MC_Particle &particle)\n{\n DirectionCosine *direction_cosine = particle.Get_Direction_Cosine();\n MC_Location location = particle.Get_Location();\n\n\n const MC_Domain &domain = location.get_domain(monteCarlo);\n\n const MC_General_Plane &plane = domain.mesh._cellGeometry[location.cell]._facet[location.facet];\n\n MC_Vector facet_normal(plane.A, plane.B, plane.C);\n\n double dot = 2.0 * (direction_cosine->alpha * facet_normal.x +\n direction_cosine->beta * facet_normal.y +\n direction_cosine->gamma * facet_normal.z);\n\n if (dot > 0) // do not reflect a particle that is ALREADY pointing inward\n {\n // reflect the particle\n direction_cosine->alpha -= dot * facet_normal.x;\n direction_cosine->beta -= dot * facet_normal.y;\n direction_cosine->gamma -= dot * facet_normal.z;\n }\n\n // Calculate the reflected, velocity components.\n double particle_speed = particle.velocity.Length();\n particle.velocity.x = particle_speed * particle.direction_cosine.alpha;\n particle.velocity.y = particle_speed * particle.direction_cosine.beta;\n particle.velocity.z = particle_speed * particle.direction_cosine.gamma;\n}" @@ -145,7 +145,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Offset: 26507 Length: 0 ReplacementText: " /*\n DPCT1110:28: The total declared local variable size in device function MCT_Nearest_Facet_3D_G exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n */\n" @@ -155,7 +155,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MCT.hh' Digest: c5cb5924e7e11dcced00f7acc3b224d6 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -164,7 +164,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -173,7 +173,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.cc.dp.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.cc.dp.o index f1a664afd..db03f6ec0 100644 Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.cc.dp.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.cc.dp.o differ diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.hh.yaml index 8a1843694..623b0e7f3 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Domain.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 3068 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 7089 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 7199 Length: 21 ReplacementText: '' @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 7221 Length: 0 ReplacementText: '.wait())' @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 7291 Length: 0 ReplacementText: " /*\n DPCT1064:37: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 7312 Length: 78 ReplacementText: 'DPCT_CHECK_ERROR(cell_state_h[j]._total = sycl::malloc_device(numEnergyGroups, dpct::get_in_order_queue()))' @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 7414 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 7509 Length: 23 ReplacementText: '' @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 7533 Length: 0 ReplacementText: '.wait())' @@ -82,7 +82,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 7547 Length: 0 ReplacementText: " /*\n DPCT1064:38: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -91,7 +91,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 7564 Length: 95 ReplacementText: 'DPCT_CHECK_ERROR(domain_h[i].cell_state = sycl::malloc_device(domain[i].cell_state.size(), dpct::get_in_order_queue()))' @@ -100,7 +100,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 7835 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -109,7 +109,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 7931 Length: 23 ReplacementText: '' @@ -118,7 +118,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 7955 Length: 0 ReplacementText: '.wait())' @@ -127,7 +127,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 8118 Length: 0 ReplacementText: " /*\n DPCT1064:39: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -136,7 +136,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 8135 Length: 91 ReplacementText: 'DPCT_CHECK_ERROR(domain_h[i].mesh._nbrRank = sycl::malloc_device(domain[i].mesh._nbrRank.size(), dpct::get_in_order_queue()))' @@ -145,7 +145,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 8246 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -154,7 +154,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 8365 Length: 23 ReplacementText: '' @@ -163,7 +163,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 8389 Length: 0 ReplacementText: '.wait())' @@ -172,7 +172,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 8459 Length: 0 ReplacementText: " /*\n DPCT1064:40: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -181,7 +181,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 8476 Length: 92 ReplacementText: 'DPCT_CHECK_ERROR(domain_h[i].mesh._node = sycl::malloc_device(domain_h[i].mesh._nodeSize, dpct::get_in_order_queue()))' @@ -190,7 +190,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 8588 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -199,7 +199,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 8703 Length: 23 ReplacementText: '' @@ -208,7 +208,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 8727 Length: 0 ReplacementText: '.wait())' @@ -217,7 +217,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 9264 Length: 0 ReplacementText: " /*\n DPCT1064:41: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -226,7 +226,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 9284 Length: 91 ReplacementText: 'DPCT_CHECK_ERROR(cellConnectivity[j]._point = sycl::malloc_device(cellConnectivity[j].num_points, dpct::get_in_order_queue()))' @@ -235,7 +235,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 9398 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -244,7 +244,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 9521 Length: 23 ReplacementText: '' @@ -253,7 +253,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 9545 Length: 0 ReplacementText: '.wait())' @@ -262,7 +262,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 9548 Length: 0 ReplacementText: " /*\n DPCT1064:42: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -271,7 +271,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 9568 Length: 106 ReplacementText: 'DPCT_CHECK_ERROR(cellConnectivity[j]._facet = sycl::malloc_device(cellConnectivity[j].num_facets, dpct::get_in_order_queue()))' @@ -280,7 +280,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 9697 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -289,7 +289,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 9835 Length: 23 ReplacementText: '' @@ -298,7 +298,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 9859 Length: 0 ReplacementText: '.wait())' @@ -307,7 +307,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 9872 Length: 0 ReplacementText: " /*\n DPCT1064:43: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -316,7 +316,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 9889 Length: 111 ReplacementText: 'DPCT_CHECK_ERROR(domain_h[i].mesh._cellConnectivity = sycl::malloc_device(_cellConnectivitySize, dpct::get_in_order_queue()))' @@ -325,7 +325,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 10020 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -334,7 +334,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 10136 Length: 23 ReplacementText: '' @@ -343,7 +343,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 10160 Length: 0 ReplacementText: '.wait())' @@ -352,7 +352,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 10590 Length: 0 ReplacementText: " /*\n DPCT1064:44: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -361,7 +361,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 10611 Length: 92 ReplacementText: 'DPCT_CHECK_ERROR(cellGeometry[j]._facet = sycl::malloc_device(cellGeometry[j]._size, dpct::get_in_order_queue()))' @@ -370,7 +370,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 10727 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -379,7 +379,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 10846 Length: 23 ReplacementText: '' @@ -388,7 +388,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 10870 Length: 0 ReplacementText: '.wait())' @@ -397,7 +397,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 10883 Length: 0 ReplacementText: " /*\n DPCT1064:45: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -406,7 +406,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 10900 Length: 102 ReplacementText: 'DPCT_CHECK_ERROR(domain_h[i].mesh._cellGeometry = sycl::malloc_device(_cellGeometrySize, dpct::get_in_order_queue()))' @@ -415,7 +415,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 11022 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -424,7 +424,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 11125 Length: 23 ReplacementText: '' @@ -433,7 +433,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 11149 Length: 0 ReplacementText: '.wait())' @@ -442,7 +442,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 11201 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -451,7 +451,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 11260 Length: 23 ReplacementText: '' @@ -460,7 +460,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Offset: 11284 Length: 0 ReplacementText: '.wait())' @@ -470,7 +470,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.hh' Digest: 74e3ff7df1b40e6fb49829ea224af620 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -479,7 +479,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -488,7 +488,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Crossing_Event.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Crossing_Event.hh.yaml index 3441b3655..685687728 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Crossing_Event.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Crossing_Event.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Crossing_Event.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Crossing_Event.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh' Offset: 5710 Length: 20 ReplacementText: '' @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh' Offset: 5859 Length: 172 ReplacementText: '' @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh' Offset: 6308 Length: 0 ReplacementText: "\ninline HOST_DEVICE\n\n MC_Tally_Event::Enum\n MC_Facet_Crossing_Event_host_ct8(MC_Particle &mc_particle, MonteCarlo *monteCarlo, int particle_index, ParticleVault *processingVault)\n{\n MC_Location location = mc_particle.Get_Location();\n\n Subfacet_Adjacency &facet_adjacency = MCT_Adjacent_Facet(location, mc_particle, monteCarlo);\n\n if (facet_adjacency.event == MC_Subfacet_Adjacency_Event::Transit_On_Processor)\n {\n // The particle will enter into an adjacent cell.\n mc_particle.domain = facet_adjacency.adjacent.domain;\n mc_particle.cell = facet_adjacency.adjacent.cell;\n mc_particle.facet = facet_adjacency.adjacent.facet;\n mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Transit_Exit;\n }\n else if (facet_adjacency.event == MC_Subfacet_Adjacency_Event::Boundary_Escape)\n {\n // The particle will escape across the system boundary.\n mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Escape;\n }\n else if (facet_adjacency.event == MC_Subfacet_Adjacency_Event::Boundary_Reflection)\n {\n // The particle will reflect off of the system boundary.\n mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Reflection;\n }\n else if (facet_adjacency.event == MC_Subfacet_Adjacency_Event::Transit_Off_Processor)\n {\n // The particle will enter into an adjacent cell on a spatial neighbor.\n // The neighboring domain is on another processor. Set domain local domain on neighbor proc\n\n mc_particle.domain = facet_adjacency.adjacent.domain;\n mc_particle.cell = facet_adjacency.adjacent.cell;\n mc_particle.facet = facet_adjacency.adjacent.facet;\n mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Communication;\n\n\n // Select particle buffer\n int neighbor_rank = monteCarlo->domain[facet_adjacency.current.domain].mesh._nbrRank[facet_adjacency.neighbor_index];\n\n\n processingVault->putParticle(mc_particle, particle_index);\n\n // Push neighbor rank and mc_particle onto the send queue\n monteCarlo->_particleVaultContainer->getSendQueue()->push(neighbor_rank, particle_index);\n }\n\n return mc_particle.last_event;\n}" @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh' Offset: 6486 Length: 0 ReplacementText: "\nHOST_DEVICE\nMC_Tally_Event::Enum MC_Facet_Crossing_Event_host_ct8(MC_Particle &mc_particle, MonteCarlo *monteCarlo, int particle_index, ParticleVault *processingVault);" @@ -38,7 +38,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Crossing_Event.hh' Digest: 0542db4059d5ecfa00807413ab577331 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -47,7 +47,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -56,7 +56,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Geometry.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Geometry.hh.yaml index 33d6cce10..369ec4dc1 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Geometry.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Geometry.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Geometry.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Facet_Geometry.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Geometry.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Geometry.hh' Offset: 116 Length: 0 ReplacementText: "\n#include \n" @@ -11,7 +11,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Geometry.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Facet_Geometry.hh' Digest: 25e923f58af9f1fe977f65ecc36dd782 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -20,7 +20,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -29,7 +29,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Particle.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Particle.hh.yaml index 86447a50a..c6d6e397e 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Particle.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Particle.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Particle.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Particle.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 7178 Length: 0 ReplacementText: " /*\n DPCT1040:0: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 7269 Length: 0 ReplacementText: " /*\n DPCT1040:1: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 7354 Length: 0 ReplacementText: " /*\n DPCT1040:2: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 7475 Length: 0 ReplacementText: " /*\n DPCT1040:3: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 7532 Length: 0 ReplacementText: " /*\n DPCT1040:4: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 7581 Length: 0 ReplacementText: " /*\n DPCT1040:5: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 7638 Length: 0 ReplacementText: " /*\n DPCT1040:6: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 7698 Length: 0 ReplacementText: " /*\n DPCT1040:7: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 7744 Length: 0 ReplacementText: " /*\n DPCT1040:8: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -82,7 +82,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 7806 Length: 0 ReplacementText: " /*\n DPCT1040:9: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -91,7 +91,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 7863 Length: 0 ReplacementText: " /*\n DPCT1040:10: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -100,7 +100,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 7925 Length: 0 ReplacementText: " /*\n DPCT1040:11: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -109,7 +109,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 7987 Length: 0 ReplacementText: " /*\n DPCT1040:12: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -118,7 +118,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 8041 Length: 0 ReplacementText: " /*\n DPCT1040:13: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -127,7 +127,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 8094 Length: 0 ReplacementText: " /*\n DPCT1040:14: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -136,7 +136,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 8151 Length: 0 ReplacementText: " /*\n DPCT1040:15: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -145,7 +145,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 8206 Length: 0 ReplacementText: " /*\n DPCT1040:16: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -154,7 +154,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 8253 Length: 0 ReplacementText: " /*\n DPCT1040:17: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -163,7 +163,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 8303 Length: 0 ReplacementText: " /*\n DPCT1040:18: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -172,7 +172,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 8351 Length: 0 ReplacementText: " /*\n DPCT1040:19: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -181,7 +181,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 8406 Length: 0 ReplacementText: " /*\n DPCT1040:20: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -190,7 +190,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 8455 Length: 0 ReplacementText: " /*\n DPCT1040:21: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -199,7 +199,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 8502 Length: 0 ReplacementText: " /*\n DPCT1040:22: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -208,7 +208,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 8550 Length: 0 ReplacementText: " /*\n DPCT1040:23: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -217,7 +217,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Offset: 8603 Length: 0 ReplacementText: " /*\n DPCT1040:24: Use sycl::stream instead of printf if your code is used on the device.\n */\n" @@ -227,7 +227,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Particle.hh' Digest: 527af0727d791ca55e1a8b82f6151284 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -236,7 +236,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -245,7 +245,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Segment_Outcome.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Segment_Outcome.hh.yaml index 1d3125223..f32d01536 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Segment_Outcome.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Segment_Outcome.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Segment_Outcome.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Segment_Outcome.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh' Offset: 1546 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh' Offset: 1908 Length: 0 ReplacementText: "\n#include \n" @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh' Offset: 3030 Length: 0 ReplacementText: "/*\nDPCT1110:30: The total declared local variable size in device function MC_Segment_Outcome exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh' Offset: 5052 Length: 18 ReplacementText: 'sycl::log(random_number)' @@ -38,7 +38,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Segment_Outcome.hh' Digest: 8ca386619c55f71fc592b27f15599c56 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -47,7 +47,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -56,7 +56,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_SourceNow.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_SourceNow.hh.yaml index b3ffd5b27..db673e190 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_SourceNow.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_SourceNow.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_SourceNow.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_SourceNow.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_SourceNow.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_SourceNow.hh' Offset: 2069 Length: 0 ReplacementText: "\n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_SourceNow.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_SourceNow.hh' Offset: 2680 Length: 13 ReplacementText: DPCT_COMPATIBILITY_TEMP @@ -20,7 +20,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_SourceNow.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_SourceNow.hh' Digest: 36feb2beaac4882ff223e7d81f8a9db8 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -29,7 +29,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -38,7 +38,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Vector.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Vector.hh.yaml index 1bcbf54ba..c4d49d7a0 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Vector.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Vector.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Vector.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MC_Vector.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh' Offset: 3068 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh' Offset: 4788 Length: 27 ReplacementText: 'sycl::sqrt(x * x + y * y + z * z)' @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh' Offset: 4961 Length: 81 ReplacementText: 'sycl::sqrt((x - vv.x) * (x - vv.x) + (y - vv.y) * (y - vv.y) + (z - vv.z) * (z - vv.z))' @@ -29,7 +29,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Vector.hh' Digest: 0ca62b380fd6ba895e7109a8d0c566c0 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -38,7 +38,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -47,7 +47,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MacroscopicCrossSection.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MacroscopicCrossSection.hh.yaml index 3099bb689..0e9032a5f 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MacroscopicCrossSection.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MacroscopicCrossSection.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MacroscopicCrossSection.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MacroscopicCrossSection.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 3466 Length: 0 ReplacementText: "\nHOST_DEVICE\ndouble macroscopicCrossSection_host_ct5(MonteCarlo *monteCarlo, int reactionIndex, int domainIndex, int cellIndex,\n int isoIndex, int energyGroup);" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 3662 Length: 0 ReplacementText: "\nHOST_DEVICE\ndouble weightedMacroscopicCrossSection_host_ct6(MonteCarlo *monteCarlo, int taskIndex, int domainIndex,\n int cellIndex, int energyGroup);" @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 4337 Length: 20 ReplacementText: '' @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 4544 Length: 208 ReplacementText: '' @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 5047 Length: 20 ReplacementText: '' @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 5255 Length: 209 ReplacementText: '' @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 5552 Length: 20 ReplacementText: '' @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 5952 Length: 387 ReplacementText: '' @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 6412 Length: 0 ReplacementText: "\ninline HOST_DEVICE double macroscopicCrossSection_host_ct5(MonteCarlo *monteCarlo, int reactionIndex, int domainIndex, int cellIndex,\n int isoIndex, int energyGroup)\n{\n// Initialize various data items.\n\n int globalMatIndex = monteCarlo->domain[domainIndex].cell_state[cellIndex]._material;\n double atomFraction = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._atomFraction;\n\n\n double microscopicCrossSection = 0.0;\n // The cell number density is the fraction of the atoms in cell\n // volume of this isotope. We set this (elsewhere) to 1/nIsotopes.\n // This is a statement that we treat materials as if all of their\n // isotopes are present in equal amounts\n\n\n double cellNumberDensity = monteCarlo->domain[domainIndex].cell_state[cellIndex]._cellNumberDensity;\n int isotopeGid = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._gid;\n\n if (atomFraction == 0.0 || cellNumberDensity == 0.0)\n {\n return 1e-20;\n }\n\n\n if (reactionIndex < 0)\n {\n // Return total cross section\n microscopicCrossSection = monteCarlo->_nuclearData->getTotalCrossSection(isotopeGid, energyGroup);\n }\n else\n {\n // Return the reaction cross section\n microscopicCrossSection = monteCarlo->_nuclearData->getReactionCrossSection((unsigned int)reactionIndex, isotopeGid, energyGroup);\n }\n\n\n return atomFraction * cellNumberDensity * microscopicCrossSection;\n}" @@ -82,7 +82,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 7198 Length: 20 ReplacementText: '' @@ -91,7 +91,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 7341 Length: 132 ReplacementText: '' @@ -100,7 +100,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 7600 Length: 20 ReplacementText: '' @@ -109,7 +109,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 7786 Length: 190 ReplacementText: '' @@ -118,7 +118,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Offset: 8272 Length: 0 ReplacementText: "\ninline HOST_DEVICE double weightedMacroscopicCrossSection_host_ct6(MonteCarlo *monteCarlo, int taskIndex, int domainIndex,\n int cellIndex, int energyGroup)\n{\n\n double *precomputedCrossSection =\n &monteCarlo->domain[domainIndex].cell_state[cellIndex]._total[energyGroup];\n\n qs_assert(precomputedCrossSection != NULL);\n if (*precomputedCrossSection > 0.0)\n return *precomputedCrossSection;\n\n\n int globalMatIndex = monteCarlo->domain[domainIndex].cell_state[cellIndex]._material;\n int nIsotopes = (int)monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso.size();\n\n double sum = 0.0;\n for (int isoIndex = 0; isoIndex < nIsotopes; isoIndex++)\n {\n sum += macroscopicCrossSection(monteCarlo, -1, domainIndex, cellIndex,\n isoIndex, energyGroup);\n }\n\n ATOMIC_WRITE(*precomputedCrossSection, sum);\n\n return sum;\n}" @@ -128,7 +128,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MacroscopicCrossSection.hh' Digest: 41eb50e42544ae2d846972b7c47008f8 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -137,7 +137,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -146,7 +146,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MainSourceFiles.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MainSourceFiles.yaml index b8a6b8d3c..2ff4c1bf3 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MainSourceFiles.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MainSourceFiles.yaml @@ -1,7 +1,7 @@ --- MainSourceFile: MainSrcFiles_placehold Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.cc' Offset: 88 Length: 0 ReplacementText: "\n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc' Offset: 2148 Length: 0 ReplacementText: _host_ct1 @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc' Offset: 2604 Length: 0 ReplacementText: _host_ct1 @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc' Offset: 0 Length: 0 ReplacementText: "#include \n#include \n" @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc' Offset: 3649 Length: 26 ReplacementText: 'std::min(std::max(0, tt.x()), _nx-1)' @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc' Offset: 3689 Length: 26 ReplacementText: 'std::min(std::max(0, tt.y()), _ny-1)' @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc' Offset: 3729 Length: 26 ReplacementText: 'std::min(std::max(0, tt.z()), _nz-1)' @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' Offset: 0 Length: 0 ReplacementText: "#include \n#include \n" @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' Offset: 117 Length: 0 ReplacementText: "\n#include \n" @@ -82,7 +82,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' Offset: 3064 Length: 23 ReplacementText: 'std::min(minCenter, iCenter)' @@ -91,7 +91,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' Offset: 3634 Length: 10 ReplacementText: 'std::max(0, ix)' @@ -100,7 +100,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' Offset: 3654 Length: 10 ReplacementText: 'std::max(0, iy)' @@ -109,7 +109,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' Offset: 3674 Length: 10 ReplacementText: 'std::max(0, iz)' @@ -118,7 +118,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' Offset: 3694 Length: 14 ReplacementText: 'std::min(_nx-1, ix)' @@ -127,7 +127,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' Offset: 3718 Length: 14 ReplacementText: 'std::min(_ny-1, iy)' @@ -136,7 +136,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' Offset: 3742 Length: 14 ReplacementText: 'std::min(_nz-1, iz)' @@ -145,7 +145,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.cc' Offset: 501 Length: 0 ReplacementText: "\n#include \n" @@ -154,7 +154,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Fast_Timer.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Fast_Timer.cc' Offset: 1691 Length: 0 ReplacementText: "\n#include \n" @@ -163,7 +163,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 3015 Length: 0 ReplacementText: "#include \n#include \n" @@ -172,7 +172,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 4027 Length: 19 ReplacementText: '0' @@ -181,7 +181,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 4104 Length: 19 ReplacementText: '0' @@ -190,7 +190,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 4176 Length: 19 ReplacementText: '0' @@ -199,7 +199,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 4259 Length: 19 ReplacementText: '0' @@ -208,7 +208,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 7079 Length: 19 ReplacementText: '0' @@ -217,7 +217,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 7161 Length: 19 ReplacementText: '0' @@ -226,7 +226,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 8250 Length: 24 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(_nuclearData, dpct::get_in_order_queue())' @@ -235,7 +235,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 8274 Length: 0 ReplacementText: ')' @@ -244,7 +244,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 8337 Length: 28 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(_materialDatabase, dpct::get_in_order_queue())' @@ -253,7 +253,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 8365 Length: 0 ReplacementText: ')' @@ -262,7 +262,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 8542 Length: 19 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(domain_d, dpct::get_in_order_queue())' @@ -271,7 +271,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 8561 Length: 0 ReplacementText: ')' @@ -280,7 +280,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 8581 Length: 21 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(_material_d, dpct::get_in_order_queue())' @@ -289,7 +289,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 8602 Length: 0 ReplacementText: ')' @@ -298,7 +298,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 8622 Length: 24 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(_nuclearData_d, dpct::get_in_order_queue())' @@ -307,7 +307,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Offset: 8646 Length: 0 ReplacementText: ')' @@ -316,7 +316,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/PopulationControl.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/PopulationControl.cc' Offset: 267 Length: 0 ReplacementText: "\n#include \n" @@ -325,7 +325,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 3015 Length: 0 ReplacementText: "#include \n#include \n" @@ -334,7 +334,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 3154 Length: 11 ReplacementText: '' @@ -343,7 +343,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 3183 Length: 0 ReplacementText: 'const sycl::nd_item<3> &item_ct1' @@ -352,7 +352,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 3236 Length: 0 ReplacementText: item_ct1 @@ -361,7 +361,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 3394 Length: 24 ReplacementText: "dpct::get_in_order_queue().parallel_for(\n sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), \n [=](sycl::nd_item<3> item_ct1) {\n testname::WarmUpKernel(item_ct1);\n });" @@ -370,7 +370,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: true - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 3418 Length: 1 ReplacementText: '' @@ -379,7 +379,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 3477 Length: 23 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' @@ -388,7 +388,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 3558 Length: 4 ReplacementText: 'sycl::range<3>' @@ -397,7 +397,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 3570 Length: 4 ReplacementText: 'sycl::range<3>' @@ -406,7 +406,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 3772 Length: 7 ReplacementText: 'block[2]' @@ -415,7 +415,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 3805 Length: 7 ReplacementText: 'block[1]' @@ -424,7 +424,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 3822 Length: 7 ReplacementText: 'block[0]' @@ -433,7 +433,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 4069 Length: 6 ReplacementText: 'grid[2]' @@ -442,7 +442,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 4098 Length: 6 ReplacementText: 'grid[1]' @@ -451,7 +451,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 4118 Length: 6 ReplacementText: 'grid[0]' @@ -460,7 +460,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 4210 Length: 6 ReplacementText: 'grid[2]' @@ -469,7 +469,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 4243 Length: 6 ReplacementText: 'grid[1]' @@ -478,7 +478,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 4295 Length: 6 ReplacementText: 'grid[0]' @@ -487,7 +487,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 4404 Length: 6 ReplacementText: 'grid[2]' @@ -496,7 +496,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 4437 Length: 6 ReplacementText: 'grid[1]' @@ -505,7 +505,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Offset: 4470 Length: 6 ReplacementText: 'grid[0]' @@ -514,7 +514,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' Offset: 3015 Length: 0 ReplacementText: "#include \n#include \n" @@ -523,7 +523,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' Offset: 4878 Length: 19 ReplacementText: '0' @@ -532,7 +532,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' Offset: 5362 Length: 94 ReplacementText: 'DPCT_CHECK_ERROR(ptr_dm = (void *)sycl::malloc_device(monteCarlo->_materialDatabase->_mat.size()*sizeof(Material_d), dpct::get_in_order_queue()))' @@ -541,7 +541,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' Offset: 5576 Length: 54 ReplacementText: 'DPCT_CHECK_ERROR(ptr_dn = (void *)sycl::malloc_device(sizeof(NuclearData_d), dpct::get_in_order_queue()))' @@ -550,7 +550,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' Offset: 5705 Length: 81 ReplacementText: 'DPCT_CHECK_ERROR(ptr_dmesh = (void *)sycl::malloc_device(monteCarlo->domain.size()*sizeof(MC_Domain_d), dpct::get_in_order_queue()))' @@ -559,7 +559,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' Offset: 6115 Length: 26 ReplacementText: 'DPCT_CHECK_ERROR(Ngpus = dpct::dev_mgr::instance().device_count())' @@ -568,7 +568,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' Offset: 6594 Length: 0 ReplacementText: " /*\n DPCT1093:55: The \"GPUID\" device may be not the one intended for use. Adjust the selected device if needed.\n */\n" @@ -577,7 +577,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' Offset: 6615 Length: 13 ReplacementText: 'DPCT_CHECK_ERROR(dpct::select_device' @@ -586,7 +586,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' Offset: 6635 Length: 0 ReplacementText: ')' @@ -595,7 +595,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 3015 Length: 31 ReplacementText: "#include \n#include \n" @@ -604,7 +604,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 4038 Length: 26 ReplacementText: 'DPCT_CHECK_ERROR(Ngpus = dpct::dev_mgr::instance().device_count())' @@ -613,7 +613,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 4138 Length: 0 ReplacementText: " /*\n DPCT1093:52: The \"GPUID\" device may be not the one intended for use. Adjust the selected device if needed.\n */\n" @@ -622,7 +622,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 4151 Length: 13 ReplacementText: 'DPCT_CHECK_ERROR(dpct::select_device' @@ -631,7 +631,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 4171 Length: 0 ReplacementText: ')' @@ -640,7 +640,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 5206 Length: 81 ReplacementText: 'DPCT_CHECK_ERROR(tallies = (uint64_cu *)sycl::malloc_host(sizeof(uint64_cu) * NUM_TALLIES * replications, dpct::get_in_order_queue()))' @@ -649,7 +649,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 5330 Length: 79 ReplacementText: 'DPCT_CHECK_ERROR(tallies_d = (uint64_cu *)sycl::malloc_device(sizeof(uint64_cu) * NUM_TALLIES * replications, dpct::get_in_order_queue()))' @@ -658,7 +658,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 5601 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -667,7 +667,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 5678 Length: 24 ReplacementText: '' @@ -676,7 +676,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 5703 Length: 0 ReplacementText: '.wait())' @@ -685,7 +685,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 6228 Length: 21 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(tallies, dpct::get_in_order_queue())' @@ -694,7 +694,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 6249 Length: 0 ReplacementText: ')' @@ -703,7 +703,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 6265 Length: 19 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(tallies_d, dpct::get_in_order_queue())' @@ -712,7 +712,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 6284 Length: 0 ReplacementText: ')' @@ -721,7 +721,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 7552 Length: 23 ReplacementText: '' @@ -730,7 +730,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 7575 Length: 11 ReplacementText: '' @@ -739,7 +739,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 7735 Length: 0 ReplacementText: ",\n const sycl::nd_item<3> &item_ct1,\n uint8_t *dpct_local" @@ -748,7 +748,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 7780 Length: 0 ReplacementText: item_ct1 @@ -757,7 +757,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 7822 Length: 0 ReplacementText: item_ct1 @@ -766,7 +766,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 7904 Length: 33 ReplacementText: 'auto values_l = (int *)dpct_local;' @@ -775,7 +775,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 8039 Length: 15 ReplacementText: "/*\n DPCT1065:53: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" @@ -784,7 +784,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 8212 Length: 15 ReplacementText: "/*\n DPCT1065:54: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" @@ -793,7 +793,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 10887 Length: 0 ReplacementText: " /*\n DPCT1049:33: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed.\n */\n" @@ -802,7 +802,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 10911 Length: 156 ReplacementText: "{\n dpct::has_capability_or_fail(dpct::get_in_order_queue().get_device(), {sycl::aspect::fp64});\n dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n /*\n DPCT1083:56: The size of local memory in the migrated code may be different from the original code. Check that the allocated memory size in the migrated code is correct.\n */\n sycl::local_accessor dpct_local_acc_ct1(sycl::range<1>(NUM_TALLIES * replications * sizeof(int)), cgh);\n\n cgh.parallel_for(\n sycl::nd_range<3>(sycl::range<3>(1, 1, num_wgs) * sycl::range<3>(1, 1, wg_size), sycl::range<3>(1, 1, wg_size)), \n [=](sycl::nd_item<3> item_ct1) {\n CycleTrackingKernel(monteCarlo, numParticles, processingVault, processedVault, tallies_d, item_ct1, dpct_local_acc_ct1.get_pointer());\n });\n });\n }" @@ -811,7 +811,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: true - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 11067 Length: 1 ReplacementText: '' @@ -820,7 +820,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 11179 Length: 23 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' @@ -829,7 +829,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 11238 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -838,7 +838,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 11315 Length: 24 ReplacementText: '' @@ -847,7 +847,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 11340 Length: 0 ReplacementText: '.wait())' @@ -856,7 +856,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 13614 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -865,7 +865,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 13691 Length: 24 ReplacementText: '' @@ -874,7 +874,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Offset: 13716 Length: 0 ReplacementText: '.wait())' @@ -884,27 +884,27 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.cc' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/DirectionCosine.cc' Digest: f62fe054a60de7c39322557f627f2be9 - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/EnergySpectrum.cc' Digest: 9e9b5ddc0d8c6d2b88a226b622c2e957 - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GlobalFccGrid.cc' Digest: c444dc769c997afe7e29ca950cdfdadc - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/GridAssignmentObject.cc' Digest: 11ffc8d96e80cdb070efba9a49ce4a3c - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.cc' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Domain.cc' Digest: 0d6ea0bd49a66cb8fe99f55f55498dc8 - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Fast_Timer.cc' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MC_Fast_Timer.cc' Digest: 349b5af82883464b77234521083d1aec - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MonteCarlo.cc' Digest: b320237ce90b5c6fe1c7a592b30434b3 - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/PopulationControl.cc' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/PopulationControl.cc' Digest: bc5aa455481a9f7636f401db17783dda - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.cc' Digest: 332692e08b5b3008eb41ac16407ade5c - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/initMC.cc' Digest: e53e6b957b7d1551e9d3981c5a878e3f - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/main.cc' Digest: a0beca8ec967f380d8b8819024a8f2f2 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -1001,7 +1001,7 @@ CompileTargets: Compiler: nvcc OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -1010,7 +1010,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MaterialDatabase.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MaterialDatabase.hh.yaml index 7a6016dfb..b49a4595a 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MaterialDatabase.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MaterialDatabase.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MaterialDatabase.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MaterialDatabase.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' Offset: 3072 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' Offset: 5258 Length: 0 ReplacementText: " /*\n DPCT1064:51: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' Offset: 5273 Length: 57 ReplacementText: 'DPCT_CHECK_ERROR(local_I_d = sycl::malloc_device(isosize, dpct::get_in_order_queue()))' @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' Offset: 5348 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' Offset: 5446 Length: 23 ReplacementText: '' @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' Offset: 5470 Length: 0 ReplacementText: '.wait())' @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' Offset: 5637 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' Offset: 5709 Length: 23 ReplacementText: '' @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' Offset: 5733 Length: 0 ReplacementText: '.wait())' @@ -83,7 +83,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MaterialDatabase.hh' Digest: 2b1834f6239e53594a29b9e21f1ad0e7 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -92,7 +92,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -101,7 +101,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MemoryControl.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MemoryControl.hh.yaml index 21b0243d5..f71400010 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MemoryControl.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MemoryControl.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MemoryControl.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MemoryControl.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MemoryControl.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MemoryControl.hh' Offset: 3068 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MemoryControl.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MemoryControl.hh' Offset: 3743 Length: 19 ReplacementText: '0' @@ -20,7 +20,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MemoryControl.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/MemoryControl.hh' Digest: d666f14739e9728ac8081ec8b3f7fee4 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -29,7 +29,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -38,7 +38,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MonteCarlo.cc.dp.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MonteCarlo.cc.dp.o index 3e470fc88..603734f51 100644 Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MonteCarlo.cc.dp.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/MonteCarlo.cc.dp.o differ diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/NuclearData.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/NuclearData.hh.yaml index 944ffc51e..dd3f68670 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/NuclearData.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/NuclearData.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/NuclearData.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/NuclearData.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 3064 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 4555 Length: 34 ReplacementText: 'dpct::pow(10, polynomial(log10(energy)))' @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 13749 Length: 34 ReplacementText: 'dpct::pow(10, polynomial(log10(energy)))' @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 19935 Length: 0 ReplacementText: " /*\n DPCT1064:46: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 19947 Length: 85 ReplacementText: 'DPCT_CHECK_ERROR(nuclearIsotope_I_d = sycl::malloc_device(isotopesSize, dpct::get_in_order_queue()))' @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 20240 Length: 0 ReplacementText: " /*\n DPCT1064:47: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 20252 Length: 70 ReplacementText: 'DPCT_CHECK_ERROR(nuclearEnergy_I_d = sycl::malloc_device(energiesSize, dpct::get_in_order_queue()))' @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 20561 Length: 0 ReplacementText: " /*\n DPCT1064:48: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 20576 Length: 83 ReplacementText: 'DPCT_CHECK_ERROR(nuclearSpecies_I_d = sycl::malloc_device(speciesSize, dpct::get_in_order_queue()))' @@ -82,7 +82,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 20963 Length: 0 ReplacementText: " /*\n DPCT1064:49: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -91,7 +91,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 20981 Length: 79 ReplacementText: 'DPCT_CHECK_ERROR(nuclear_I_d = sycl::malloc_device(reactionsSize, dpct::get_in_order_queue()))' @@ -100,7 +100,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 21400 Length: 0 ReplacementText: " /*\n DPCT1064:50: Migrated cudaMalloc call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -109,7 +109,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 21421 Length: 77 ReplacementText: 'DPCT_CHECK_ERROR(crossSections_I_d = sycl::malloc_device(NumcrossSectionSize, dpct::get_in_order_queue()))' @@ -118,7 +118,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 21523 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -127,7 +127,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 21671 Length: 24 ReplacementText: '' @@ -136,7 +136,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 21696 Length: 0 ReplacementText: '.wait())' @@ -145,7 +145,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 22077 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -154,7 +154,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 22154 Length: 23 ReplacementText: '' @@ -163,7 +163,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 22178 Length: 0 ReplacementText: '.wait())' @@ -172,7 +172,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 22343 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -181,7 +181,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 22431 Length: 23 ReplacementText: '' @@ -190,7 +190,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 22455 Length: 0 ReplacementText: '.wait())' @@ -199,7 +199,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 22613 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -208,7 +208,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 22702 Length: 23 ReplacementText: '' @@ -217,7 +217,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 22726 Length: 0 ReplacementText: '.wait())' @@ -226,7 +226,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 22863 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -235,7 +235,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 22958 Length: 23 ReplacementText: '' @@ -244,7 +244,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 22982 Length: 0 ReplacementText: '.wait())' @@ -253,7 +253,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 23290 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -262,7 +262,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 23352 Length: 23 ReplacementText: '' @@ -271,7 +271,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Offset: 23376 Length: 0 ReplacementText: '.wait())' @@ -281,7 +281,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/NuclearData.hh' Digest: d5c0a5d1457a5f95c50e1ebf275d33d7 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -290,7 +290,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -299,7 +299,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVault.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVault.hh.yaml index c90c157d9..8d9debc9d 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVault.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVault.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVault.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVault.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' Offset: 5587 Length: 0 ReplacementText: "/*\nDPCT1110:25: The total declared local variable size in device function pushParticle exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' Offset: 7384 Length: 0 ReplacementText: "/*\nDPCT1110:26: The total declared local variable size in device function getParticle exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' Offset: 7772 Length: 0 ReplacementText: "/*\nDPCT1110:27: The total declared local variable size in device function putParticle exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' Offset: 9177 Length: 20 ReplacementText: '' @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' Offset: 9301 Length: 113 ReplacementText: '' @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' Offset: 9489 Length: 0 ReplacementText: "\ninline HOST_DEVICE void MC_Load_Particle_host_ct1(MonteCarlo *monteCarlo, MC_Particle &mc_particle, ParticleVault *particleVault, int particle_index)\n{\n // particleVault.popParticle(mc_particle);\n particleVault->getParticle(mc_particle, particle_index);\n\n // Time to Census\n if (mc_particle.time_to_census <= 0.0)\n {\n mc_particle.time_to_census += monteCarlo->time_info->time_step;\n }\n\n // Age\n if (mc_particle.age < 0.0)\n {\n mc_particle.age = 0.0;\n }\n\n// Energy Group\n\n mc_particle.energy_group = monteCarlo->_nuclearData->getEnergyGroup(mc_particle.kinetic_energy);\n\n // printf(\"file=%s line=%d\\n\",__FILE__,__LINE__);\n}" @@ -56,7 +56,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/ParticleVault.hh' Digest: 5c484422d6e6caea178fd73adef39cac DpctVersion: 18.0.0 MainHelperFileName: '' @@ -65,7 +65,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -74,7 +74,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVaultContainer.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVaultContainer.o index 9f93322fc..0ee720123 100644 Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVaultContainer.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/ParticleVaultContainer.o differ diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/Tallies.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/Tallies.o index 9604f11c0..e28e0e1d9 100644 Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/Tallies.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/Tallies.o differ diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.cc.dp.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.cc.dp.o index bda057986..ad3f1dae1 100644 Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.cc.dp.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.cc.dp.o differ diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.hh.yaml index 125e8bc4f..2c9af8565 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaFunctions.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3064 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3183 Length: 4 ReplacementText: 'sycl::range<3>' @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3195 Length: 4 ReplacementText: 'sycl::range<3>' @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3302 Length: 0 ReplacementText: 'const sycl::nd_item<3> &item_ct1' @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3324 Length: 10 ReplacementText: 'item_ct1.get_group(2)' @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3355 Length: 10 ReplacementText: 'item_ct1.get_group(1)' @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3368 Length: 9 ReplacementText: 'item_ct1.get_group_range(2)' @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3398 Length: 10 ReplacementText: 'item_ct1.get_group(0)' @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3411 Length: 9 ReplacementText: 'item_ct1.get_group_range(2)' @@ -82,7 +82,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3423 Length: 9 ReplacementText: 'item_ct1.get_group_range(1)' @@ -91,7 +91,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3465 Length: 10 ReplacementText: 'item_ct1.get_local_range(2)' @@ -100,7 +100,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3478 Length: 10 ReplacementText: 'item_ct1.get_local_range(1)' @@ -109,7 +109,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3491 Length: 10 ReplacementText: 'item_ct1.get_local_range(0)' @@ -118,7 +118,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3524 Length: 11 ReplacementText: 'item_ct1.get_local_id(0)' @@ -127,7 +127,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3539 Length: 10 ReplacementText: 'item_ct1.get_local_range(2)' @@ -136,7 +136,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3552 Length: 10 ReplacementText: 'item_ct1.get_local_range(1)' @@ -145,7 +145,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3585 Length: 11 ReplacementText: 'item_ct1.get_local_id(1)' @@ -154,7 +154,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3599 Length: 10 ReplacementText: 'item_ct1.get_local_range(2)' @@ -163,7 +163,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3631 Length: 11 ReplacementText: 'item_ct1.get_local_id(2)' @@ -172,7 +172,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3703 Length: 0 ReplacementText: 'const sycl::nd_item<3> &item_ct1' @@ -181,7 +181,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3727 Length: 11 ReplacementText: 'item_ct1.get_local_id(0)' @@ -190,7 +190,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3742 Length: 10 ReplacementText: 'item_ct1.get_local_range(2)' @@ -199,7 +199,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3755 Length: 10 ReplacementText: 'item_ct1.get_local_range(1)' @@ -208,7 +208,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3788 Length: 11 ReplacementText: 'item_ct1.get_local_id(1)' @@ -217,7 +217,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3802 Length: 10 ReplacementText: 'item_ct1.get_local_range(2)' @@ -226,7 +226,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Offset: 3834 Length: 11 ReplacementText: 'item_ct1.get_local_id(2)' @@ -236,7 +236,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaFunctions.hh' Digest: 421bbc8c14b09bcf01ba6c1741b6eee6 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -245,7 +245,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -254,7 +254,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaUtils.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaUtils.hh.yaml index 845b6a3af..ef554d405 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaUtils.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaUtils.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaUtils.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/cudaUtils.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Offset: 3056 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Offset: 3130 Length: 18 ReplacementText: '' @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Offset: 3148 Length: 26 ReplacementText: '' @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Offset: 3174 Length: 30 ReplacementText: '' @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Offset: 3512 Length: 0 ReplacementText: " /*\n DPCT1010:34: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n */\n" @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Offset: 3520 Length: 11 ReplacementText: 'dpct::err0' @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Offset: 3538 Length: 18 ReplacementText: '0' @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Offset: 3566 Length: 204 ReplacementText: '' @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Offset: 4639 Length: 19 ReplacementText: '0' @@ -82,7 +82,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Offset: 4941 Length: 0 ReplacementText: " /*\n DPCT1064:36: Migrated cudaMallocManaged call is used in a macro/template definition and may not be valid for all macro/template uses. Adjust the code.\n */\n" @@ -91,7 +91,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Offset: 4966 Length: 27 ReplacementText: 'DPCT_CHECK_ERROR(*ptr = (T *)sycl::malloc_shared(size, dpct::get_in_order_queue()))' @@ -100,7 +100,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Offset: 5358 Length: 13 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(ptr, dpct::get_in_order_queue())' @@ -109,7 +109,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Offset: 5371 Length: 0 ReplacementText: ')' @@ -119,7 +119,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/cudaUtils.hh' Digest: 54a377620c3255007518227343afb537 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -128,7 +128,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -137,7 +137,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/initMC.cc.dp.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/initMC.cc.dp.o index 943c997ec..6667ca4cb 100644 Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/initMC.cc.dp.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/initMC.cc.dp.o differ diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/main.cc.dp.o b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/main.cc.dp.o index 46c306d23..24638c6fc 100644 Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/main.cc.dp.o and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/main.cc.dp.o differ diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/mpi_stubs_internal.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/mpi_stubs_internal.hh.yaml index 6e2695f50..4f7dfbd5a 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/mpi_stubs_internal.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/mpi_stubs_internal.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/mpi_stubs_internal.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/mpi_stubs_internal.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/mpi_stubs_internal.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/mpi_stubs_internal.hh' Offset: 1159 Length: 0 ReplacementText: ' dpct_type_241340' @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/mpi_stubs_internal.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/mpi_stubs_internal.hh' Offset: 1248 Length: 0 ReplacementText: ' dpct_type_811906' @@ -20,7 +20,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/mpi_stubs_internal.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/mpi_stubs_internal.hh' Digest: a1fef4b40b5db866c7e27de918dbe7ea DpctVersion: 18.0.0 MainHelperFileName: '' @@ -29,7 +29,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -38,7 +38,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/qs b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/qs index 2233466ff..7d6a5b266 100755 Binary files a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/qs and b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/qs differ diff --git a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/utilsMpi.hh.yaml b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/utilsMpi.hh.yaml index a5e4994c7..487be6d99 100644 --- a/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/utilsMpi.hh.yaml +++ b/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/utilsMpi.hh.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/utilsMpi.hh' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/out/utilsMpi.hh' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utilsMpi.hh' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utilsMpi.hh' Offset: 2789 Length: 0 ReplacementText: ' dpct_type_169390' @@ -11,7 +11,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utilsMpi.hh' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src/utilsMpi.hh' Digest: b0026dd3148b2f5b3e6166f0ace78f9d DpctVersion: 18.0.0 MainHelperFileName: '' @@ -20,7 +20,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: false AsyncHandler: Value: 'false' @@ -29,7 +29,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/QuickSilver/CUDA/src' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeCache.txt b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeCache.txt index e37082de2..440d57370 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeCache.txt +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeCache.txt @@ -1,5 +1,5 @@ # This is the CMakeCache file. -# For build in directory: /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build +# For build in directory: /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build # It was generated by CMake: /usr/bin/cmake # You can edit this file to change values found and used by cmake. # If you do not want to change any of the values, simply exit the editor. @@ -329,13 +329,13 @@ CUDA_rt_LIBRARY:FILEPATH=/usr/lib/x86_64-linux-gnu/librt.a USE_SM:BOOL=OFF //Value Computed by CMake -bitcracker_BINARY_DIR:STATIC=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build +bitcracker_BINARY_DIR:STATIC=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build //Value Computed by CMake bitcracker_IS_TOP_LEVEL:STATIC=ON //Value Computed by CMake -bitcracker_SOURCE_DIR:STATIC=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA +bitcracker_SOURCE_DIR:STATIC=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA ######################## @@ -347,7 +347,7 @@ CMAKE_ADDR2LINE-ADVANCED:INTERNAL=1 //ADVANCED property for variable: CMAKE_AR CMAKE_AR-ADVANCED:INTERNAL=1 //This is the directory where this CMakeCache.txt was created -CMAKE_CACHEFILE_DIR:INTERNAL=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build +CMAKE_CACHEFILE_DIR:INTERNAL=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build //Major version of cmake used to create the current loaded cache CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3 //Minor version of cmake used to create the current loaded cache @@ -410,7 +410,7 @@ CMAKE_HAVE_LIBC_PTHREAD:INTERNAL=1 CMAKE_HAVE_PTHREAD_H:INTERNAL=1 //Source directory with the top level CMakeLists.txt file for this // project -CMAKE_HOME_DIRECTORY:INTERNAL=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA +CMAKE_HOME_DIRECTORY:INTERNAL=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA //Install .so files without execute permission. CMAKE_INSTALL_SO_NO_EXE:INTERNAL=1 //ADVANCED property for variable: CMAKE_LINKER @@ -477,7 +477,7 @@ CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1 CUDA_64_BIT_DEVICE_CODE-ADVANCED:INTERNAL=1 //List of intermediate files that are part of the cuda dependency // scanning. -CUDA_ADDITIONAL_CLEAN_FILES:INTERNAL=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend +CUDA_ADDITIONAL_CLEAN_FILES:INTERNAL=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend //ADVANCED property for variable: CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE-ADVANCED:INTERNAL=1 //ADVANCED property for variable: CUDA_BUILD_CUBIN diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake index 6ccc334a3..a99054488 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake @@ -77,7 +77,7 @@ endif() -set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/home/chenshe1/sandbox/dpct_install/include;/opt/intel/oneapi/tbb/2021.10.0/include;/opt/intel/oneapi/mpi/2021.10.0/include;/opt/intel/oneapi/dev-utilities/2021.10.0/include;/opt/intel/oneapi/compiler/2023.2.0/linux/lib/oclfpga/include;/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include") +set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/home/local_user/sandbox/dpct_install/include;/opt/intel/oneapi/tbb/2021.10.0/include;/opt/intel/oneapi/mpi/2021.10.0/include;/opt/intel/oneapi/dev-utilities/2021.10.0/include;/opt/intel/oneapi/compiler/2023.2.0/linux/lib/oclfpga/include;/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include") set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc") set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/opt/intel/oneapi/mpi/2021.10.0/libfabric/lib;/opt/intel/oneapi/mpi/2021.10.0/lib;/opt/intel/oneapi/compiler/2023.2.0/linux/lib;/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/opt/intel/oneapi/tbb/2021.10.0/lib/intel64/gcc4.8;/opt/intel/oneapi/mpi/2021.10.0/lib/release;/opt/intel/oneapi/compiler/2023.2.0/linux/compiler/lib/intel64_lin") set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake index f2a5d63ad..d2bdced59 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake @@ -2,8 +2,8 @@ # Generated by "Unix Makefiles" Generator, CMake Version 3.22 # Relative path conversion top directories. -set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA") -set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build") +set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA") +set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build") # Force unix paths in dependencies. set(CMAKE_FORCE_UNIX_PATHS 1) diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeOutput.log b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeOutput.log index f8741541e..0da9e8382 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeOutput.log +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeOutput.log @@ -10,13 +10,13 @@ The output was: Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "a.out" -The CXX compiler identification is GNU, found in "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/a.out" +The CXX compiler identification is GNU, found in "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/a.out" Detecting CXX compiler ABI info compiled with the following output: -Change Dir: /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp +Change Dir: /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_848f8/fast && /usr/bin/gmake -f CMakeFiles/cmTC_848f8.dir/build.make CMakeFiles/cmTC_848f8.dir/build -gmake[1]: Entering directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp' +gmake[1]: Entering directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp' Building CXX object CMakeFiles/cmTC_848f8.dir/CMakeCXXCompilerABI.cpp.o /usr/bin/c++ -v -o CMakeFiles/cmTC_848f8.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp Using built-in specs. @@ -40,7 +40,7 @@ ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed" ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86_64-linux-gnu/include" #include "..." search starts here: #include <...> search starts here: - /home/chenshe1/sandbox/dpct_install/include + /home/local_user/sandbox/dpct_install/include /opt/intel/oneapi/tbb/2021.10.0/env/../include /opt/intel/oneapi/mpi/2021.10.0//include /opt/intel/oneapi/dev-utilities/2021.10.0/include @@ -82,14 +82,14 @@ LIBRARY_PATH=/opt/intel/oneapi/mpi/2021.10.0//libfabric/lib/../lib/:/opt/intel/o COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_848f8' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_848f8.' /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/cc539bMd.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_848f8 /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/opt/intel/oneapi/mpi/2021.10.0//libfabric/lib/../lib -L/opt/intel/oneapi/mpi/2021.10.0//lib/../lib -L/opt/intel/oneapi/compiler/2023.2.0/linux/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/opt/intel/oneapi/tbb/2021.10.0/env/../lib/intel64/gcc4.8 -L/opt/intel/oneapi/mpi/2021.10.0//libfabric/lib -L/opt/intel/oneapi/mpi/2021.10.0//lib/release -L/opt/intel/oneapi/mpi/2021.10.0//lib -L/opt/intel/oneapi/compiler/2023.2.0/linux/compiler/lib/intel64_lin -L/opt/intel/oneapi/compiler/2023.2.0/linux/lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_848f8.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_848f8' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_848f8.' -gmake[1]: Leaving directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp' +gmake[1]: Leaving directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp' Parsed CXX implicit include dir info from above output: rv=done found start of include info found start of implicit include info - add: [/home/chenshe1/sandbox/dpct_install/include] + add: [/home/local_user/sandbox/dpct_install/include] add: [/opt/intel/oneapi/tbb/2021.10.0/env/../include] add: [/opt/intel/oneapi/mpi/2021.10.0//include] add: [/opt/intel/oneapi/dev-utilities/2021.10.0/include] @@ -102,7 +102,7 @@ Parsed CXX implicit include dir info from above output: rv=done add: [/usr/include/x86_64-linux-gnu] add: [/usr/include] end of search list found - collapse include dir [/home/chenshe1/sandbox/dpct_install/include] ==> [/home/chenshe1/sandbox/dpct_install/include] + collapse include dir [/home/local_user/sandbox/dpct_install/include] ==> [/home/local_user/sandbox/dpct_install/include] collapse include dir [/opt/intel/oneapi/tbb/2021.10.0/env/../include] ==> [/opt/intel/oneapi/tbb/2021.10.0/include] collapse include dir [/opt/intel/oneapi/mpi/2021.10.0//include] ==> [/opt/intel/oneapi/mpi/2021.10.0/include] collapse include dir [/opt/intel/oneapi/dev-utilities/2021.10.0/include] ==> [/opt/intel/oneapi/dev-utilities/2021.10.0/include] @@ -114,15 +114,15 @@ Parsed CXX implicit include dir info from above output: rv=done collapse include dir [/usr/local/include] ==> [/usr/local/include] collapse include dir [/usr/include/x86_64-linux-gnu] ==> [/usr/include/x86_64-linux-gnu] collapse include dir [/usr/include] ==> [/usr/include] - implicit include dirs: [/home/chenshe1/sandbox/dpct_install/include;/opt/intel/oneapi/tbb/2021.10.0/include;/opt/intel/oneapi/mpi/2021.10.0/include;/opt/intel/oneapi/dev-utilities/2021.10.0/include;/opt/intel/oneapi/compiler/2023.2.0/linux/lib/oclfpga/include;/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include] + implicit include dirs: [/home/local_user/sandbox/dpct_install/include;/opt/intel/oneapi/tbb/2021.10.0/include;/opt/intel/oneapi/mpi/2021.10.0/include;/opt/intel/oneapi/dev-utilities/2021.10.0/include;/opt/intel/oneapi/compiler/2023.2.0/linux/lib/oclfpga/include;/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include] Parsed CXX implicit link information from above output: link line regex: [^( *|.*[/\])(ld|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\]+-)?ld|collect2)[^/\]*( |$)] - ignore line: [Change Dir: /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp] + ignore line: [Change Dir: /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp] ignore line: [] ignore line: [Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_848f8/fast && /usr/bin/gmake -f CMakeFiles/cmTC_848f8.dir/build.make CMakeFiles/cmTC_848f8.dir/build] - ignore line: [gmake[1]: Entering directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'] + ignore line: [gmake[1]: Entering directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp'] ignore line: [Building CXX object CMakeFiles/cmTC_848f8.dir/CMakeCXXCompilerABI.cpp.o] ignore line: [/usr/bin/c++ -v -o CMakeFiles/cmTC_848f8.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp] ignore line: [Using built-in specs.] @@ -146,7 +146,7 @@ Parsed CXX implicit link information from above output: ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86_64-linux-gnu/include"] ignore line: [#include "..." search starts here:] ignore line: [#include <...> search starts here:] - ignore line: [ /home/chenshe1/sandbox/dpct_install/include] + ignore line: [ /home/local_user/sandbox/dpct_install/include] ignore line: [ /opt/intel/oneapi/tbb/2021.10.0/env/../include] ignore line: [ /opt/intel/oneapi/mpi/2021.10.0//include] ignore line: [ /opt/intel/oneapi/dev-utilities/2021.10.0/include] @@ -267,30 +267,30 @@ Parsed CXX implicit link information from above output: Determining if the include file pthread.h exists passed with the following output: -Change Dir: /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp +Change Dir: /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_36779/fast && /usr/bin/gmake -f CMakeFiles/cmTC_36779.dir/build.make CMakeFiles/cmTC_36779.dir/build -gmake[1]: Entering directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp' +gmake[1]: Entering directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp' Building CXX object CMakeFiles/cmTC_36779.dir/CheckIncludeFile.cxx.o -/usr/bin/c++ -O3 -ffast-math -std=c++17 -o CMakeFiles/cmTC_36779.dir/CheckIncludeFile.cxx.o -c /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp/CheckIncludeFile.cxx +/usr/bin/c++ -O3 -ffast-math -std=c++17 -o CMakeFiles/cmTC_36779.dir/CheckIncludeFile.cxx.o -c /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp/CheckIncludeFile.cxx Linking CXX executable cmTC_36779 /usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_36779.dir/link.txt --verbose=1 /usr/bin/c++ -O3 -ffast-math CMakeFiles/cmTC_36779.dir/CheckIncludeFile.cxx.o -o cmTC_36779 -gmake[1]: Leaving directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp' +gmake[1]: Leaving directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp' Performing C++ SOURCE FILE Test CMAKE_HAVE_LIBC_PTHREAD succeeded with the following output: -Change Dir: /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp +Change Dir: /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_98f23/fast && /usr/bin/gmake -f CMakeFiles/cmTC_98f23.dir/build.make CMakeFiles/cmTC_98f23.dir/build -gmake[1]: Entering directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp' +gmake[1]: Entering directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp' Building CXX object CMakeFiles/cmTC_98f23.dir/src.cxx.o -/usr/bin/c++ -DCMAKE_HAVE_LIBC_PTHREAD -O3 -ffast-math -std=c++17 -o CMakeFiles/cmTC_98f23.dir/src.cxx.o -c /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp/src.cxx +/usr/bin/c++ -DCMAKE_HAVE_LIBC_PTHREAD -O3 -ffast-math -std=c++17 -o CMakeFiles/cmTC_98f23.dir/src.cxx.o -c /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp/src.cxx Linking CXX executable cmTC_98f23 /usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_98f23.dir/link.txt --verbose=1 /usr/bin/c++ -O3 -ffast-math CMakeFiles/cmTC_98f23.dir/src.cxx.o -o cmTC_98f23 -gmake[1]: Leaving directory '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp' +gmake[1]: Leaving directory '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/CMakeTmp' Source file was: diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/Makefile2 b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/Makefile2 index 9227d8d78..593560775 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/Makefile2 +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/Makefile2 @@ -54,10 +54,10 @@ RM = /usr/bin/cmake -E rm -f EQUALS = = # The top-level source directory on which CMake was run. -CMAKE_SOURCE_DIR = /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA +CMAKE_SOURCE_DIR = /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA # The top-level build directory on which CMake was run. -CMAKE_BINARY_DIR = /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build +CMAKE_BINARY_DIR = /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build #============================================================================= # Directory level rules for the build root directory @@ -81,14 +81,14 @@ clean: CMakeFiles/bitcracker.dir/clean CMakeFiles/bitcracker.dir/all: $(MAKE) $(MAKESILENT) -f CMakeFiles/bitcracker.dir/build.make CMakeFiles/bitcracker.dir/depend $(MAKE) $(MAKESILENT) -f CMakeFiles/bitcracker.dir/build.make CMakeFiles/bitcracker.dir/build - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=1,2,3,4,5 "Built target bitcracker" + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=1,2,3,4,5 "Built target bitcracker" .PHONY : CMakeFiles/bitcracker.dir/all # Build rule for subdir invocation for target. CMakeFiles/bitcracker.dir/rule: cmake_check_build_system - $(CMAKE_COMMAND) -E cmake_progress_start /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles 5 + $(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles 5 $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/bitcracker.dir/all - $(CMAKE_COMMAND) -E cmake_progress_start /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles 0 + $(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles 0 .PHONY : CMakeFiles/bitcracker.dir/rule # Convenience name for target. diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/TargetDirectories.txt b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/TargetDirectories.txt index e6f34f5d6..d23be5742 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/TargetDirectories.txt +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/TargetDirectories.txt @@ -1,3 +1,3 @@ -/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir -/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/edit_cache.dir -/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/rebuild_cache.dir +/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir +/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/edit_cache.dir +/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/rebuild_cache.dir diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/build.make b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/build.make index d09a42494..bfa52afbd 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/build.make +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/build.make @@ -53,10 +53,10 @@ RM = /usr/bin/cmake -E rm -f EQUALS = = # The top-level source directory on which CMake was run. -CMAKE_SOURCE_DIR = /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA +CMAKE_SOURCE_DIR = /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA # The top-level build directory on which CMake was run. -CMAKE_BINARY_DIR = /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build +CMAKE_BINARY_DIR = /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build # Include any dependencies generated for this target. include CMakeFiles/bitcracker.dir/depend.make @@ -72,40 +72,40 @@ include CMakeFiles/bitcracker.dir/flags.make CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o: ../src/main.cu - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o" - cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/. - cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o -D generated_cubin_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o.cubin.txt -P /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o" + cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/. + cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o.cubin.txt -P /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o: ../src/utils.cu - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o" - cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/. - cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o -D generated_cubin_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o.cubin.txt -P /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o" + cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/. + cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o.cubin.txt -P /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o: ../src/w_blocks.cu - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_3) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o" - cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/. - cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o -D generated_cubin_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o.cubin.txt -P /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_3) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o" + cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/. + cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o.cubin.txt -P /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o: CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o: ../src/attack.cu - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_4) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o" - cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/. - cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o -D generated_cubin_file:STRING=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o.cubin.txt -P /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_4) "Building NVCC (Device) object CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o" + cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -E make_directory /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/. + cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o.cubin.txt -P /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake # Object files for target bitcracker bitcracker_OBJECTS = # External object files for target bitcracker bitcracker_EXTERNAL_OBJECTS = \ -"/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o" \ -"/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o" \ -"/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o" \ -"/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o" +"/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o" \ +"/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o" \ +"/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o" \ +"/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o" bitcracker: CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o bitcracker: CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o @@ -117,7 +117,7 @@ bitcracker: /usr/lib/x86_64-linux-gnu/librt.a bitcracker: /usr/local/cuda/lib64/libcudart_static.a bitcracker: /usr/lib/x86_64-linux-gnu/librt.a bitcracker: CMakeFiles/bitcracker.dir/link.txt - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_5) "Linking CXX executable bitcracker" + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_5) "Linking CXX executable bitcracker" $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/bitcracker.dir/link.txt --verbose=$(VERBOSE) # Rule to build all files generated by this target. @@ -132,6 +132,6 @@ CMakeFiles/bitcracker.dir/depend: CMakeFiles/bitcracker.dir/src/bitcracker_gener CMakeFiles/bitcracker.dir/depend: CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o CMakeFiles/bitcracker.dir/depend: CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o CMakeFiles/bitcracker.dir/depend: CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o - cd /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/DependInfo.cmake --color=$(COLOR) + cd /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/DependInfo.cmake --color=$(COLOR) .PHONY : CMakeFiles/bitcracker.dir/depend diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake index b1aea4c6c..7adebec63 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake @@ -58,18 +58,18 @@ endif() # Set these up as variables to make reading the generated file easier set(CMAKE_COMMAND "/usr/bin/cmake") # path -set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu") # path -set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.NVCC-depend") # path -set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend") # path +set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend") # path set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path set(build_cubin OFF) # bool set(CUDA_HOST_COMPILER "") # path # We won't actually use these variables for now, but we need to set this, in # order to force this file to be run again if it changes. -set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path -set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o") # path -set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o.cubin.txt") # path +set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path +set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o.cubin.txt") # path set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list @@ -79,7 +79,7 @@ set(CUDA_NVCC_FLAGS_MINSIZEREL ; ) set(CUDA_NVCC_FLAGS_RELEASE ; ) set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ; ) set(nvcc_flags -m64) # list -set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes) +set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes) string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}") set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ). set(format_flag "-c") # string diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake.pre-gen index bf20c2959..011ee063a 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake.pre-gen +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.cmake.pre-gen @@ -58,18 +58,18 @@ endif() # Set these up as variables to make reading the generated file easier set(CMAKE_COMMAND "/usr/bin/cmake") # path -set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu") # path -set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.NVCC-depend") # path -set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend") # path +set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o.depend") # path set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path set(build_cubin OFF) # bool set(CUDA_HOST_COMPILER "") # path # We won't actually use these variables for now, but we need to set this, in # order to force this file to be run again if it changes. -set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path -set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o") # path -set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o.cubin.txt") # path +set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path +set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o.cubin.txt") # path set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake index e1752ec21..e8fd29c5b 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake @@ -58,18 +58,18 @@ endif() # Set these up as variables to make reading the generated file easier set(CMAKE_COMMAND "/usr/bin/cmake") # path -set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu") # path -set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.NVCC-depend") # path -set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend") # path +set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend") # path set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path set(build_cubin OFF) # bool set(CUDA_HOST_COMPILER "") # path # We won't actually use these variables for now, but we need to set this, in # order to force this file to be run again if it changes. -set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path -set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o") # path -set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o.cubin.txt") # path +set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path +set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o.cubin.txt") # path set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list @@ -79,7 +79,7 @@ set(CUDA_NVCC_FLAGS_MINSIZEREL ; ) set(CUDA_NVCC_FLAGS_RELEASE ; ) set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ; ) set(nvcc_flags -m64) # list -set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes) +set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes) string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}") set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ). set(format_flag "-c") # string diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake.pre-gen index 29074bd92..b3887194b 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake.pre-gen +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.cmake.pre-gen @@ -58,18 +58,18 @@ endif() # Set these up as variables to make reading the generated file easier set(CMAKE_COMMAND "/usr/bin/cmake") # path -set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu") # path -set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.NVCC-depend") # path -set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend") # path +set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o.depend") # path set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path set(build_cubin OFF) # bool set(CUDA_HOST_COMPILER "") # path # We won't actually use these variables for now, but we need to set this, in # order to force this file to be run again if it changes. -set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path -set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o") # path -set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o.cubin.txt") # path +set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path +set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o.cubin.txt") # path set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake index e7df3120e..ba2d994d4 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake @@ -58,18 +58,18 @@ endif() # Set these up as variables to make reading the generated file easier set(CMAKE_COMMAND "/usr/bin/cmake") # path -set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu") # path -set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.NVCC-depend") # path -set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend") # path +set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend") # path set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path set(build_cubin OFF) # bool set(CUDA_HOST_COMPILER "") # path # We won't actually use these variables for now, but we need to set this, in # order to force this file to be run again if it changes. -set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path -set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o") # path -set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o.cubin.txt") # path +set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path +set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o.cubin.txt") # path set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list @@ -79,7 +79,7 @@ set(CUDA_NVCC_FLAGS_MINSIZEREL ; ) set(CUDA_NVCC_FLAGS_RELEASE ; ) set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ; ) set(nvcc_flags -m64) # list -set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes) +set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes) string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}") set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ). set(format_flag "-c") # string diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake.pre-gen index f2f4dc692..ebb346937 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake.pre-gen +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.cmake.pre-gen @@ -58,18 +58,18 @@ endif() # Set these up as variables to make reading the generated file easier set(CMAKE_COMMAND "/usr/bin/cmake") # path -set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu") # path -set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.NVCC-depend") # path -set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend") # path +set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o.depend") # path set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path set(build_cubin OFF) # bool set(CUDA_HOST_COMPILER "") # path # We won't actually use these variables for now, but we need to set this, in # order to force this file to be run again if it changes. -set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path -set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o") # path -set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o.cubin.txt") # path +set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path +set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o.cubin.txt") # path set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake index e2f6c56ab..5c9797cc7 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake @@ -58,18 +58,18 @@ endif() # Set these up as variables to make reading the generated file easier set(CMAKE_COMMAND "/usr/bin/cmake") # path -set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu") # path -set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.NVCC-depend") # path -set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend") # path +set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend") # path set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path set(build_cubin OFF) # bool set(CUDA_HOST_COMPILER "") # path # We won't actually use these variables for now, but we need to set this, in # order to force this file to be run again if it changes. -set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path -set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o") # path -set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o.cubin.txt") # path +set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path +set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o.cubin.txt") # path set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list @@ -79,7 +79,7 @@ set(CUDA_NVCC_FLAGS_MINSIZEREL ; ) set(CUDA_NVCC_FLAGS_RELEASE ; ) set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ; ) set(nvcc_flags -m64) # list -set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes) +set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src;/usr/local/cuda/include]==]) # list (needs to be in lua quotes to address backslashes) string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}") set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ). set(format_flag "-c") # string diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake.pre-gen index dfc5bffb4..543d2bea1 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake.pre-gen +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.cmake.pre-gen @@ -58,18 +58,18 @@ endif() # Set these up as variables to make reading the generated file easier set(CMAKE_COMMAND "/usr/bin/cmake") # path -set(source_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu") # path -set(NVCC_generated_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.NVCC-depend") # path -set(cmake_dependency_file "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend") # path +set(source_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o.depend") # path set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path set(build_cubin OFF) # bool set(CUDA_HOST_COMPILER "") # path # We won't actually use these variables for now, but we need to set this, in # order to force this file to be run again if it changes. -set(generated_file_path "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path -set(generated_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o") # path -set(generated_cubin_file_internal "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o.cubin.txt") # path +set(generated_file_path "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/.") # path +set(generated_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o.cubin.txt") # path set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path set(CUDA_NVCC_FLAGS -gencode;arch=compute_80,code=sm_80 ;; ) # list diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/Makefile b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/Makefile index 022d6c02e..e4ce5face 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/Makefile +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/Makefile @@ -57,10 +57,10 @@ RM = /usr/bin/cmake -E rm -f EQUALS = = # The top-level source directory on which CMake was run. -CMAKE_SOURCE_DIR = /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA +CMAKE_SOURCE_DIR = /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA # The top-level build directory on which CMake was run. -CMAKE_BINARY_DIR = /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build +CMAKE_BINARY_DIR = /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build #============================================================================= # Targets provided globally by CMake. @@ -87,9 +87,9 @@ rebuild_cache/fast: rebuild_cache # The main all target all: cmake_check_build_system - $(CMAKE_COMMAND) -E cmake_progress_start /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build//CMakeFiles/progress.marks + $(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build//CMakeFiles/progress.marks $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 all - $(CMAKE_COMMAND) -E cmake_progress_start /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles 0 + $(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles 0 .PHONY : all # The main clean target diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/cmake_install.cmake b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/cmake_install.cmake index 832a11142..f570cfe8c 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/cmake_install.cmake +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/cmake_install.cmake @@ -1,4 +1,4 @@ -# Install script for directory: /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA +# Install script for directory: /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA # Set the install prefix if(NOT DEFINED CMAKE_INSTALL_PREFIX) @@ -50,5 +50,5 @@ endif() string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT "${CMAKE_INSTALL_MANIFEST_FILES}") -file(WRITE "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/${CMAKE_INSTALL_MANIFEST}" +file(WRITE "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/${CMAKE_INSTALL_MANIFEST}" "${CMAKE_INSTALL_MANIFEST_CONTENT}") diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/compile_commands.json b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/compile_commands.json index b6250e061..8aa597aa9 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/compile_commands.json +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/compile_commands.json @@ -1,26 +1,26 @@ [ { - "command": "nvcc -c -o /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu" + "command": "nvcc -c -o /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_w_blocks.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu", + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu" }, { - "command": "nvcc -c -o /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu" + "command": "nvcc -c -o /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_attack.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu", + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu" }, { - "command": "nvcc -c -o /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu" + "command": "nvcc -c -o /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_main.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu", + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu" }, { - "command": "nvcc -c -o /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src", - "file": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu" + "command": "nvcc -c -o /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src/./bitcracker_generated_utils.cu.o -m64 -O3 -ffast-math -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src -D__CUDACC__=1 /home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu", + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build/CMakeFiles/bitcracker.dir/src", + "file": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/utils.cu" }, { "command": "ld -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/cc8r5h9i.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -melf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -o bitcracker /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o CMakeFiles/bitcracker.dir/src/bitcracker_generated_main.cu.o CMakeFiles/bitcracker.dir/src/bitcracker_generated_utils.cu.o CMakeFiles/bitcracker.dir/src/bitcracker_generated_w_blocks.cu.o CMakeFiles/bitcracker.dir/src/bitcracker_generated_attack.cu.o /usr/local/cuda/lib64/libcudart_static.a /usr/lib/x86_64-linux-gnu/librt.a /usr/local/cuda/lib64/libcudart_static.a /usr/lib/x86_64-linux-gnu/librt.a /usr/lib/gcc/x86_64-linux-gnu/11/crtfastmath.o /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o", - "directory": "/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build" + "directory": "/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build" } ] \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/MainSourceFiles.yaml b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/MainSourceFiles.yaml index 1b98a55f8..61e823792 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/MainSourceFiles.yaml +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/MainSourceFiles.yaml @@ -1,7 +1,7 @@ --- MainSourceFile: MainSrcFiles_placehold Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 1633 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 2138 Length: 10 ReplacementText: '' @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 2413 Length: 0 ReplacementText: ",\n\tunsigned int const *TS0,\n\tunsigned int const *TS1,\n\tunsigned int const *TS2,\n\tunsigned int const *TS3" @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 2819 Length: 26 ReplacementText: 'dpct::byte_level_permute(m0, 0, 0x0123)' @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 2880 Length: 26 ReplacementText: 'dpct::byte_level_permute(m1, 0, 0x0123)' @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 2941 Length: 26 ReplacementText: 'dpct::byte_level_permute(m2, 0, 0x0123)' @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 3002 Length: 26 ReplacementText: 'dpct::byte_level_permute(m3, 0, 0x0123)' @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 17971 Length: 37 ReplacementText: 'dpct::byte_level_permute(enc_schedule0, 0, 0x0123)' @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 18027 Length: 37 ReplacementText: 'dpct::byte_level_permute(enc_schedule1, 0, 0x0123)' @@ -82,7 +82,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 18083 Length: 37 ReplacementText: 'dpct::byte_level_permute(enc_schedule2, 0, 0x0123)' @@ -91,7 +91,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 18139 Length: 37 ReplacementText: 'dpct::byte_level_permute(enc_schedule3, 0, 0x0123)' @@ -100,7 +100,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 18181 Length: 10 ReplacementText: "/*\nDPCT1110:3: The total declared local variable size in device function decrypt_vmk_with_mac exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" @@ -109,7 +109,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 18589 Length: 0 ReplacementText: ",\n const sycl::nd_item<3> &item_ct1,\n unsigned int const *TS0,\n unsigned int const *TS1,\n unsigned int const *TS2,\n unsigned int const *TS3" @@ -118,7 +118,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 19258 Length: 11 ReplacementText: 'item_ct1.get_local_id(2)' @@ -127,7 +127,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 19272 Length: 10 ReplacementText: 'item_ct1.get_group(2)' @@ -136,7 +136,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 19285 Length: 10 ReplacementText: 'item_ct1.get_local_range(2)' @@ -145,7 +145,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 32334 Length: 0 ReplacementText: ', TS0, TS1, TS2, TS3' @@ -154,7 +154,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 33736 Length: 0 ReplacementText: ', TS0, TS1, TS2, TS3' @@ -163,7 +163,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 35146 Length: 0 ReplacementText: ', TS0, TS1, TS2, TS3' @@ -172,7 +172,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 36386 Length: 0 ReplacementText: ', TS0, TS1, TS2, TS3' @@ -181,7 +181,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 36758 Length: 0 ReplacementText: ', TS0, TS1, TS2, TS3' @@ -190,7 +190,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 37153 Length: 0 ReplacementText: ', TS0, TS1, TS2, TS3' @@ -199,7 +199,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 37548 Length: 0 ReplacementText: ', TS0, TS1, TS2, TS3' @@ -208,7 +208,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 37930 Length: 0 ReplacementText: ', TS0, TS1, TS2, TS3' @@ -217,7 +217,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 40314 Length: 0 ReplacementText: ' try ' @@ -226,7 +226,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 42561 Length: 0 ReplacementText: " /*\n DPCT1048:0: The original value cudaHostAllocDefault is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code.\n */\n" @@ -235,7 +235,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 42577 Length: 120 ReplacementText: 'DPCT_CHECK_ERROR(h_found = sycl::malloc_host(1, dpct::get_in_order_queue()))' @@ -244,7 +244,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 42701 Length: 0 ReplacementText: " /*\n DPCT1048:1: The original value cudaHostAllocDefault is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code.\n */\n" @@ -253,7 +253,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 42717 Length: 120 ReplacementText: 'DPCT_CHECK_ERROR(h_pswd_char = sycl::malloc_host(max_num_pswd_per_read * PSWD_NUM_CHAR, dpct::get_in_order_queue()))' @@ -262,7 +262,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 42841 Length: 0 ReplacementText: " /*\n DPCT1048:2: The original value cudaHostAllocDefault is not meaningful in the migrated code and was removed or replaced with 0. You may need to check the migrated code.\n */\n" @@ -271,7 +271,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 42857 Length: 120 ReplacementText: 'DPCT_CHECK_ERROR(h_pswd_uint32 = sycl::malloc_host(max_num_pswd_per_read * PSWD_NUM_UINT32, dpct::get_in_order_queue()))' @@ -280,7 +280,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 43359 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -289,7 +289,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 43421 Length: 24 ReplacementText: '' @@ -298,7 +298,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 43446 Length: 0 ReplacementText: '.wait())' @@ -307,7 +307,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 43622 Length: 75 ReplacementText: 'DPCT_CHECK_ERROR(d_vmk = sycl::malloc_device(VMK_FULL_SIZE, dpct::get_in_order_queue()))' @@ -316,7 +316,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 43717 Length: 75 ReplacementText: 'DPCT_CHECK_ERROR(d_vmkIV = sycl::malloc_device(IV_SIZE, dpct::get_in_order_queue()))' @@ -325,7 +325,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 43812 Length: 75 ReplacementText: 'DPCT_CHECK_ERROR(d_mac = sycl::malloc_device(MAC_SIZE, dpct::get_in_order_queue()))' @@ -334,7 +334,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 43907 Length: 75 ReplacementText: 'DPCT_CHECK_ERROR(d_macIV = sycl::malloc_device(IV_SIZE, dpct::get_in_order_queue()))' @@ -343,7 +343,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44002 Length: 75 ReplacementText: 'DPCT_CHECK_ERROR(d_computedMacIV = sycl::malloc_device(IV_SIZE, dpct::get_in_order_queue()))' @@ -352,7 +352,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44097 Length: 75 ReplacementText: 'DPCT_CHECK_ERROR(d_found = sycl::malloc_device(1, dpct::get_in_order_queue()))' @@ -361,7 +361,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44193 Length: 100 ReplacementText: 'DPCT_CHECK_ERROR(d_pswd_uint32 = sycl::malloc_device(max_num_pswd_per_read * PSWD_NUM_UINT32, dpct::get_in_order_queue()))' @@ -370,7 +370,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44405 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -379,7 +379,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44487 Length: 25 ReplacementText: '' @@ -388,7 +388,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44513 Length: 0 ReplacementText: '.wait())' @@ -397,7 +397,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44534 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -406,7 +406,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44616 Length: 25 ReplacementText: '' @@ -415,7 +415,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44642 Length: 0 ReplacementText: '.wait())' @@ -424,7 +424,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44666 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -433,7 +433,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44748 Length: 25 ReplacementText: '' @@ -442,7 +442,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44774 Length: 0 ReplacementText: '.wait())' @@ -451,7 +451,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44798 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -460,7 +460,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44880 Length: 25 ReplacementText: '' @@ -469,7 +469,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44906 Length: 0 ReplacementText: '.wait())' @@ -478,7 +478,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 44930 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -487,7 +487,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 45012 Length: 25 ReplacementText: '' @@ -496,7 +496,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 45038 Length: 0 ReplacementText: '.wait())' @@ -505,7 +505,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 45058 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -514,7 +514,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 45141 Length: 24 ReplacementText: '' @@ -523,7 +523,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 45166 Length: 0 ReplacementText: '.wait())' @@ -532,7 +532,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 45368 Length: 23 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' @@ -541,7 +541,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 47337 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -550,7 +550,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 47428 Length: 24 ReplacementText: '' @@ -559,7 +559,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 47453 Length: 0 ReplacementText: '.wait())' @@ -568,7 +568,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 47477 Length: 23 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' @@ -577,7 +577,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 48003 Length: 0 ReplacementText: " /*\n DPCT1049:4: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed.\n */\n" @@ -586,7 +586,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 48011 Length: 391 ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n TS0.init();\n TS1.init();\n TS2.init();\n TS3.init();\n\n auto TS0_ptr_ct1 = TS0.get_ptr();\n auto TS1_ptr_ct1 = TS1.get_ptr();\n auto TS2_ptr_ct1 = TS2.get_ptr();\n auto TS3_ptr_ct1 = TS3.get_ptr();\n\n cgh.parallel_for(\n sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, block_size), sycl::range<3>(1, 1, block_size)), \n [=](sycl::nd_item<3> item_ct1) {\n decrypt_vmk_with_mac(num_read_pswd, d_found, d_vmk, d_vmkIV, d_mac, d_macIV, d_computedMacIV, v0, v1, v2, v3, s0, s1, s2, s3, d_pswd_uint32, d_w_words_uint32, item_ct1, TS0_ptr_ct1, TS1_ptr_ct1, TS2_ptr_ct1, TS3_ptr_ct1);\n });\n });" @@ -595,7 +595,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: true - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 48402 Length: 1 ReplacementText: '' @@ -604,7 +604,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 48445 Length: 23 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' @@ -613,7 +613,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 48881 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -622,7 +622,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 48930 Length: 24 ReplacementText: '' @@ -631,7 +631,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 48955 Length: 0 ReplacementText: '.wait())' @@ -640,7 +640,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 48973 Length: 23 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' @@ -649,7 +649,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 50751 Length: 21 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(h_found, dpct::get_in_order_queue())' @@ -658,7 +658,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 50772 Length: 0 ReplacementText: ')' @@ -667,7 +667,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 50792 Length: 25 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(h_pswd_char, dpct::get_in_order_queue())' @@ -676,7 +676,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 50817 Length: 0 ReplacementText: ')' @@ -685,7 +685,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 50837 Length: 27 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(h_pswd_uint32, dpct::get_in_order_queue())' @@ -694,7 +694,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 50864 Length: 0 ReplacementText: ')' @@ -703,7 +703,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 50923 Length: 15 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_vmk, dpct::get_in_order_queue())' @@ -712,7 +712,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 50938 Length: 0 ReplacementText: ')' @@ -721,7 +721,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 50958 Length: 17 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_vmkIV, dpct::get_in_order_queue())' @@ -730,7 +730,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 50975 Length: 0 ReplacementText: ')' @@ -739,7 +739,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 50995 Length: 15 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_mac, dpct::get_in_order_queue())' @@ -748,7 +748,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 51010 Length: 0 ReplacementText: ')' @@ -757,7 +757,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 51030 Length: 17 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_macIV, dpct::get_in_order_queue())' @@ -766,7 +766,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 51047 Length: 0 ReplacementText: ')' @@ -775,7 +775,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 51067 Length: 25 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_computedMacIV, dpct::get_in_order_queue())' @@ -784,7 +784,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 51092 Length: 0 ReplacementText: ')' @@ -793,7 +793,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 51112 Length: 17 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_found, dpct::get_in_order_queue())' @@ -802,7 +802,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 51129 Length: 0 ReplacementText: ')' @@ -811,7 +811,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 51149 Length: 23 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_pswd_uint32, dpct::get_in_order_queue())' @@ -820,7 +820,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 51172 Length: 0 ReplacementText: ')' @@ -829,7 +829,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Offset: 51247 Length: 0 ReplacementText: "\ncatch (sycl::exception const &exc) {\n std::cerr << exc.what() << \"Exception caught at file:\" << __FILE__ << \", line:\" << __LINE__ << std::endl;\n std::exit(1);\n}" @@ -838,7 +838,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' Offset: 1633 Length: 0 ReplacementText: "#include \n#include \n" @@ -847,7 +847,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' Offset: 2654 Length: 0 ReplacementText: ' try ' @@ -856,7 +856,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' Offset: 5570 Length: 0 ReplacementText: "\t/*\n\tDPCT1093:8: The \"0\" device may be not the one intended for use. Adjust the selected device if needed.\n\t*/\n" @@ -865,7 +865,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' Offset: 5583 Length: 13 ReplacementText: 'DPCT_CHECK_ERROR(dpct::select_device' @@ -874,7 +874,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' Offset: 5599 Length: 0 ReplacementText: ')' @@ -883,7 +883,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' Offset: 5942 Length: 98 ReplacementText: 'DPCT_CHECK_ERROR(d_w_words_uint32 = sycl::malloc_device(NUM_HASH_BLOCKS * HASH_BLOCK_NUM_UINT32, dpct::get_in_order_queue()))' @@ -892,7 +892,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' Offset: 6856 Length: 26 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_w_words_uint32, dpct::get_in_order_queue())' @@ -901,7 +901,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' Offset: 6882 Length: 0 ReplacementText: ')' @@ -910,7 +910,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' Offset: 7075 Length: 0 ReplacementText: "\ncatch (sycl::exception const &exc) {\n std::cerr << exc.what() << \"Exception caught at file:\" << __FILE__ << \", line:\" << __LINE__ << std::endl;\n std::exit(1);\n}" @@ -919,7 +919,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 1633 Length: 0 ReplacementText: "#include \n#include \n" @@ -928,7 +928,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 2333 Length: 10 ReplacementText: '' @@ -937,7 +937,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 2462 Length: 0 ReplacementText: ",\n const sycl::nd_item<3> &item_ct1" @@ -946,7 +946,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 2485 Length: 10 ReplacementText: 'item_ct1.get_group(2)' @@ -955,7 +955,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 2498 Length: 10 ReplacementText: 'item_ct1.get_local_range(2)' @@ -964,7 +964,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 2511 Length: 11 ReplacementText: 'item_ct1.get_local_id(2)' @@ -973,7 +973,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 6853 Length: 69 ReplacementText: 'DPCT_CHECK_ERROR(salt_d = sycl::malloc_device(SALT_SIZE, dpct::get_in_order_queue()))' @@ -982,7 +982,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 6939 Length: 69 ReplacementText: 'DPCT_CHECK_ERROR(padding_d = sycl::malloc_device(PADDING_SIZE, dpct::get_in_order_queue()))' @@ -991,7 +991,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 7055 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -1000,7 +1000,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 7122 Length: 24 ReplacementText: '' @@ -1009,7 +1009,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 7147 Length: 0 ReplacementText: '.wait())' @@ -1018,7 +1018,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 7164 Length: 10 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' @@ -1027,7 +1027,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 7231 Length: 24 ReplacementText: '' @@ -1036,7 +1036,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 7256 Length: 0 ReplacementText: '.wait())' @@ -1045,7 +1045,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 7617 Length: 65 ReplacementText: "dpct::get_in_order_queue().parallel_for(\n\t sycl::nd_range<3>(sycl::range<3>(1, 1, 1024) * sycl::range<3>(1, 1, 16), sycl::range<3>(1, 1, 16)), \n\t [=](sycl::nd_item<3> item_ct1) {\n\t kernel_w_block(salt_d, padding_d, d_w_words_uint32, item_ct1);\n\t });" @@ -1054,7 +1054,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: true - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 7682 Length: 1 ReplacementText: '' @@ -1063,7 +1063,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 7729 Length: 23 ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' @@ -1072,7 +1072,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 8096 Length: 16 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(salt_d, dpct::get_in_order_queue())' @@ -1081,7 +1081,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 8112 Length: 0 ReplacementText: ')' @@ -1090,7 +1090,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 8132 Length: 19 ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(padding_d, dpct::get_in_order_queue())' @@ -1099,7 +1099,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Offset: 8151 Length: 0 ReplacementText: ')' @@ -1109,11 +1109,11 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/attack.cu' Digest: 1e837048a442f2258c61090d201699d9 - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/main.cu' Digest: 60e076c4d80f751b34af1cc20d4cd774 - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/w_blocks.cu' Digest: 6de252b294499305c70f60e14eb8c6a9 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -1135,7 +1135,7 @@ CompileTargets: Compiler: nvcc OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA' Specified: false AsyncHandler: Value: 'false' @@ -1144,7 +1144,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/bitcracker b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/bitcracker index f60370f5b..9003bc97d 100755 Binary files a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/bitcracker and b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/bitcracker differ diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/aes.h.yaml b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/aes.h.yaml index bd5d375d9..f637c8109 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/aes.h.yaml +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/aes.h.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/aes.h' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/aes.h' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h' Offset: 0 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h' Offset: 1648 Length: 3666 ReplacementText: "static dpct::constant_memory TS0(sycl::range<1>(256), {\n 0xC66363A5U, 0xF87C7C84U, 0xEE777799U, 0xF67B7B8DU, 0xFFF2F20DU, 0xD66B6BBDU, 0xDE6F6FB1U, 0x91C5C554U, \n 0x60303050U, 0x02010103U, 0xCE6767A9U, 0x562B2B7DU, 0xE7FEFE19U, 0xB5D7D762U, 0x4DABABE6U, 0xEC76769AU, \n 0x8FCACA45U, 0x1F82829DU, 0x89C9C940U, 0xFA7D7D87U, 0xEFFAFA15U, 0xB25959EBU, 0x8E4747C9U, 0xFBF0F00BU, \n 0x41ADADECU, 0xB3D4D467U, 0x5FA2A2FDU, 0x45AFAFEAU, 0x239C9CBFU, 0x53A4A4F7U, 0xE4727296U, 0x9BC0C05BU, \n 0x75B7B7C2U, 0xE1FDFD1CU, 0x3D9393AEU, 0x4C26266AU, 0x6C36365AU, 0x7E3F3F41U, 0xF5F7F702U, 0x83CCCC4FU, \n 0x6834345CU, 0x51A5A5F4U, 0xD1E5E534U, 0xF9F1F108U, 0xE2717193U, 0xABD8D873U, 0x62313153U, 0x2A15153FU, \n 0x0804040CU, 0x95C7C752U, 0x46232365U, 0x9DC3C35EU, 0x30181828U, 0x379696A1U, 0x0A05050FU, 0x2F9A9AB5U, \n 0x0E070709U, 0x24121236U, 0x1B80809BU, 0xDFE2E23DU, 0xCDEBEB26U, 0x4E272769U, 0x7FB2B2CDU, 0xEA75759FU, \n 0x1209091BU, 0x1D83839EU, 0x582C2C74U, 0x341A1A2EU, 0x361B1B2DU, 0xDC6E6EB2U, 0xB45A5AEEU, 0x5BA0A0FBU, \n 0xA45252F6U, 0x763B3B4DU, 0xB7D6D661U, 0x7DB3B3CEU, 0x5229297BU, 0xDDE3E33EU, 0x5E2F2F71U, 0x13848497U, \n 0xA65353F5U, 0xB9D1D168U, 0x00000000U, 0xC1EDED2CU, 0x40202060U, 0xE3FCFC1FU, 0x79B1B1C8U, 0xB65B5BEDU, \n 0xD46A6ABEU, 0x8DCBCB46U, 0x67BEBED9U, 0x7239394BU, 0x944A4ADEU, 0x984C4CD4U, 0xB05858E8U, 0x85CFCF4AU, \n 0xBBD0D06BU, 0xC5EFEF2AU, 0x4FAAAAE5U, 0xEDFBFB16U, 0x864343C5U, 0x9A4D4DD7U, 0x66333355U, 0x11858594U, \n 0x8A4545CFU, 0xE9F9F910U, 0x04020206U, 0xFE7F7F81U, 0xA05050F0U, 0x783C3C44U, 0x259F9FBAU, 0x4BA8A8E3U, \n 0xA25151F3U, 0x5DA3A3FEU, 0x804040C0U, 0x058F8F8AU, 0x3F9292ADU, 0x219D9DBCU, 0x70383848U, 0xF1F5F504U, \n 0x63BCBCDFU, 0x77B6B6C1U, 0xAFDADA75U, 0x42212163U, 0x20101030U, 0xE5FFFF1AU, 0xFDF3F30EU, 0xBFD2D26DU, \n 0x81CDCD4CU, 0x180C0C14U, 0x26131335U, 0xC3ECEC2FU, 0xBE5F5FE1U, 0x359797A2U, 0x884444CCU, 0x2E171739U, \n 0x93C4C457U, 0x55A7A7F2U, 0xFC7E7E82U, 0x7A3D3D47U, 0xC86464ACU, 0xBA5D5DE7U, 0x3219192BU, 0xE6737395U, \n 0xC06060A0U, 0x19818198U, 0x9E4F4FD1U, 0xA3DCDC7FU, 0x44222266U, 0x542A2A7EU, 0x3B9090ABU, 0x0B888883U, \n 0x8C4646CAU, 0xC7EEEE29U, 0x6BB8B8D3U, 0x2814143CU, 0xA7DEDE79U, 0xBC5E5EE2U, 0x160B0B1DU, 0xADDBDB76U, \n 0xDBE0E03BU, 0x64323256U, 0x743A3A4EU, 0x140A0A1EU, 0x924949DBU, 0x0C06060AU, 0x4824246CU, 0xB85C5CE4U, \n 0x9FC2C25DU, 0xBDD3D36EU, 0x43ACACEFU, 0xC46262A6U, 0x399191A8U, 0x319595A4U, 0xD3E4E437U, 0xF279798BU, \n 0xD5E7E732U, 0x8BC8C843U, 0x6E373759U, 0xDA6D6DB7U, 0x018D8D8CU, 0xB1D5D564U, 0x9C4E4ED2U, 0x49A9A9E0U, \n 0xD86C6CB4U, 0xAC5656FAU, 0xF3F4F407U, 0xCFEAEA25U, 0xCA6565AFU, 0xF47A7A8EU, 0x47AEAEE9U, 0x10080818U, \n 0x6FBABAD5U, 0xF0787888U, 0x4A25256FU, 0x5C2E2E72U, 0x381C1C24U, 0x57A6A6F1U, 0x73B4B4C7U, 0x97C6C651U, \n 0xCBE8E823U, 0xA1DDDD7CU, 0xE874749CU, 0x3E1F1F21U, 0x964B4BDDU, 0x61BDBDDCU, 0x0D8B8B86U, 0x0F8A8A85U, \n 0xE0707090U, 0x7C3E3E42U, 0x71B5B5C4U, 0xCC6666AAU, 0x904848D8U, 0x06030305U, 0xF7F6F601U, 0x1C0E0E12U, \n 0xC26161A3U, 0x6A35355FU, 0xAE5757F9U, 0x69B9B9D0U, 0x17868691U, 0x99C1C158U, 0x3A1D1D27U, 0x279E9EB9U, \n 0xD9E1E138U, 0xEBF8F813U, 0x2B9898B3U, 0x22111133U, 0xD26969BBU, 0xA9D9D970U, 0x078E8E89U, 0x339494A7U, \n 0x2D9B9BB6U, 0x3C1E1E22U, 0x15878792U, 0xC9E9E920U, 0x87CECE49U, 0xAA5555FFU, 0x50282878U, 0xA5DFDF7AU, \n 0x038C8C8FU, 0x59A1A1F8U, 0x09898980U, 0x1A0D0D17U, 0x65BFBFDAU, 0xD7E6E631U, 0x844242C6U, 0xD06868B8U, \n 0x824141C3U, 0x299999B0U, 0x5A2D2D77U, 0x1E0F0F11U, 0x7BB0B0CBU, 0xA85454FCU, 0x6DBBBBD6U, 0x2C16163AU\n});" @@ -19,7 +19,7 @@ Replacements: InitStr: "{\n 0xC66363A5U, 0xF87C7C84U, 0xEE777799U, 0xF67B7B8DU, 0xFFF2F20DU, 0xD66B6BBDU, 0xDE6F6FB1U, 0x91C5C554U, \n 0x60303050U, 0x02010103U, 0xCE6767A9U, 0x562B2B7DU, 0xE7FEFE19U, 0xB5D7D762U, 0x4DABABE6U, 0xEC76769AU, \n 0x8FCACA45U, 0x1F82829DU, 0x89C9C940U, 0xFA7D7D87U, 0xEFFAFA15U, 0xB25959EBU, 0x8E4747C9U, 0xFBF0F00BU, \n 0x41ADADECU, 0xB3D4D467U, 0x5FA2A2FDU, 0x45AFAFEAU, 0x239C9CBFU, 0x53A4A4F7U, 0xE4727296U, 0x9BC0C05BU, \n 0x75B7B7C2U, 0xE1FDFD1CU, 0x3D9393AEU, 0x4C26266AU, 0x6C36365AU, 0x7E3F3F41U, 0xF5F7F702U, 0x83CCCC4FU, \n 0x6834345CU, 0x51A5A5F4U, 0xD1E5E534U, 0xF9F1F108U, 0xE2717193U, 0xABD8D873U, 0x62313153U, 0x2A15153FU, \n 0x0804040CU, 0x95C7C752U, 0x46232365U, 0x9DC3C35EU, 0x30181828U, 0x379696A1U, 0x0A05050FU, 0x2F9A9AB5U, \n 0x0E070709U, 0x24121236U, 0x1B80809BU, 0xDFE2E23DU, 0xCDEBEB26U, 0x4E272769U, 0x7FB2B2CDU, 0xEA75759FU, \n 0x1209091BU, 0x1D83839EU, 0x582C2C74U, 0x341A1A2EU, 0x361B1B2DU, 0xDC6E6EB2U, 0xB45A5AEEU, 0x5BA0A0FBU, \n 0xA45252F6U, 0x763B3B4DU, 0xB7D6D661U, 0x7DB3B3CEU, 0x5229297BU, 0xDDE3E33EU, 0x5E2F2F71U, 0x13848497U, \n 0xA65353F5U, 0xB9D1D168U, 0x00000000U, 0xC1EDED2CU, 0x40202060U, 0xE3FCFC1FU, 0x79B1B1C8U, 0xB65B5BEDU, \n 0xD46A6ABEU, 0x8DCBCB46U, 0x67BEBED9U, 0x7239394BU, 0x944A4ADEU, 0x984C4CD4U, 0xB05858E8U, 0x85CFCF4AU, \n 0xBBD0D06BU, 0xC5EFEF2AU, 0x4FAAAAE5U, 0xEDFBFB16U, 0x864343C5U, 0x9A4D4DD7U, 0x66333355U, 0x11858594U, \n 0x8A4545CFU, 0xE9F9F910U, 0x04020206U, 0xFE7F7F81U, 0xA05050F0U, 0x783C3C44U, 0x259F9FBAU, 0x4BA8A8E3U, \n 0xA25151F3U, 0x5DA3A3FEU, 0x804040C0U, 0x058F8F8AU, 0x3F9292ADU, 0x219D9DBCU, 0x70383848U, 0xF1F5F504U, \n 0x63BCBCDFU, 0x77B6B6C1U, 0xAFDADA75U, 0x42212163U, 0x20101030U, 0xE5FFFF1AU, 0xFDF3F30EU, 0xBFD2D26DU, \n 0x81CDCD4CU, 0x180C0C14U, 0x26131335U, 0xC3ECEC2FU, 0xBE5F5FE1U, 0x359797A2U, 0x884444CCU, 0x2E171739U, \n 0x93C4C457U, 0x55A7A7F2U, 0xFC7E7E82U, 0x7A3D3D47U, 0xC86464ACU, 0xBA5D5DE7U, 0x3219192BU, 0xE6737395U, \n 0xC06060A0U, 0x19818198U, 0x9E4F4FD1U, 0xA3DCDC7FU, 0x44222266U, 0x542A2A7EU, 0x3B9090ABU, 0x0B888883U, \n 0x8C4646CAU, 0xC7EEEE29U, 0x6BB8B8D3U, 0x2814143CU, 0xA7DEDE79U, 0xBC5E5EE2U, 0x160B0B1DU, 0xADDBDB76U, \n 0xDBE0E03BU, 0x64323256U, 0x743A3A4EU, 0x140A0A1EU, 0x924949DBU, 0x0C06060AU, 0x4824246CU, 0xB85C5CE4U, \n 0x9FC2C25DU, 0xBDD3D36EU, 0x43ACACEFU, 0xC46262A6U, 0x399191A8U, 0x319595A4U, 0xD3E4E437U, 0xF279798BU, \n 0xD5E7E732U, 0x8BC8C843U, 0x6E373759U, 0xDA6D6DB7U, 0x018D8D8CU, 0xB1D5D564U, 0x9C4E4ED2U, 0x49A9A9E0U, \n 0xD86C6CB4U, 0xAC5656FAU, 0xF3F4F407U, 0xCFEAEA25U, 0xCA6565AFU, 0xF47A7A8EU, 0x47AEAEE9U, 0x10080818U, \n 0x6FBABAD5U, 0xF0787888U, 0x4A25256FU, 0x5C2E2E72U, 0x381C1C24U, 0x57A6A6F1U, 0x73B4B4C7U, 0x97C6C651U, \n 0xCBE8E823U, 0xA1DDDD7CU, 0xE874749CU, 0x3E1F1F21U, 0x964B4BDDU, 0x61BDBDDCU, 0x0D8B8B86U, 0x0F8A8A85U, \n 0xE0707090U, 0x7C3E3E42U, 0x71B5B5C4U, 0xCC6666AAU, 0x904848D8U, 0x06030305U, 0xF7F6F601U, 0x1C0E0E12U, \n 0xC26161A3U, 0x6A35355FU, 0xAE5757F9U, 0x69B9B9D0U, 0x17868691U, 0x99C1C158U, 0x3A1D1D27U, 0x279E9EB9U, \n 0xD9E1E138U, 0xEBF8F813U, 0x2B9898B3U, 0x22111133U, 0xD26969BBU, 0xA9D9D970U, 0x078E8E89U, 0x339494A7U, \n 0x2D9B9BB6U, 0x3C1E1E22U, 0x15878792U, 0xC9E9E920U, 0x87CECE49U, 0xAA5555FFU, 0x50282878U, 0xA5DFDF7AU, \n 0x038C8C8FU, 0x59A1A1F8U, 0x09898980U, 0x1A0D0D17U, 0x65BFBFDAU, 0xD7E6E631U, 0x844242C6U, 0xD06868B8U, \n 0x824141C3U, 0x299999B0U, 0x5A2D2D77U, 0x1E0F0F11U, 0x7BB0B0CBU, 0xA85454FCU, 0x6DBBBBD6U, 0x2C16163AU\n}" NewHostVarName: TS0_host_ct1 BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h' Offset: 5316 Length: 3666 ReplacementText: "static dpct::constant_memory TS1(sycl::range<1>(256), {\n 0xA5C66363U, 0x84F87C7CU, 0x99EE7777U, 0x8DF67B7BU, 0x0DFFF2F2U, 0xBDD66B6BU, 0xB1DE6F6FU, 0x5491C5C5U, \n 0x50603030U, 0x03020101U, 0xA9CE6767U, 0x7D562B2BU, 0x19E7FEFEU, 0x62B5D7D7U, 0xE64DABABU, 0x9AEC7676U, \n 0x458FCACAU, 0x9D1F8282U, 0x4089C9C9U, 0x87FA7D7DU, 0x15EFFAFAU, 0xEBB25959U, 0xC98E4747U, 0x0BFBF0F0U, \n 0xEC41ADADU, 0x67B3D4D4U, 0xFD5FA2A2U, 0xEA45AFAFU, 0xBF239C9CU, 0xF753A4A4U, 0x96E47272U, 0x5B9BC0C0U, \n 0xC275B7B7U, 0x1CE1FDFDU, 0xAE3D9393U, 0x6A4C2626U, 0x5A6C3636U, 0x417E3F3FU, 0x02F5F7F7U, 0x4F83CCCCU, \n 0x5C683434U, 0xF451A5A5U, 0x34D1E5E5U, 0x08F9F1F1U, 0x93E27171U, 0x73ABD8D8U, 0x53623131U, 0x3F2A1515U, \n 0x0C080404U, 0x5295C7C7U, 0x65462323U, 0x5E9DC3C3U, 0x28301818U, 0xA1379696U, 0x0F0A0505U, 0xB52F9A9AU, \n 0x090E0707U, 0x36241212U, 0x9B1B8080U, 0x3DDFE2E2U, 0x26CDEBEBU, 0x694E2727U, 0xCD7FB2B2U, 0x9FEA7575U, \n 0x1B120909U, 0x9E1D8383U, 0x74582C2CU, 0x2E341A1AU, 0x2D361B1BU, 0xB2DC6E6EU, 0xEEB45A5AU, 0xFB5BA0A0U, \n 0xF6A45252U, 0x4D763B3BU, 0x61B7D6D6U, 0xCE7DB3B3U, 0x7B522929U, 0x3EDDE3E3U, 0x715E2F2FU, 0x97138484U, \n 0xF5A65353U, 0x68B9D1D1U, 0x00000000U, 0x2CC1EDEDU, 0x60402020U, 0x1FE3FCFCU, 0xC879B1B1U, 0xEDB65B5BU, \n 0xBED46A6AU, 0x468DCBCBU, 0xD967BEBEU, 0x4B723939U, 0xDE944A4AU, 0xD4984C4CU, 0xE8B05858U, 0x4A85CFCFU, \n 0x6BBBD0D0U, 0x2AC5EFEFU, 0xE54FAAAAU, 0x16EDFBFBU, 0xC5864343U, 0xD79A4D4DU, 0x55663333U, 0x94118585U, \n 0xCF8A4545U, 0x10E9F9F9U, 0x06040202U, 0x81FE7F7FU, 0xF0A05050U, 0x44783C3CU, 0xBA259F9FU, 0xE34BA8A8U, \n 0xF3A25151U, 0xFE5DA3A3U, 0xC0804040U, 0x8A058F8FU, 0xAD3F9292U, 0xBC219D9DU, 0x48703838U, 0x04F1F5F5U, \n 0xDF63BCBCU, 0xC177B6B6U, 0x75AFDADAU, 0x63422121U, 0x30201010U, 0x1AE5FFFFU, 0x0EFDF3F3U, 0x6DBFD2D2U, \n 0x4C81CDCDU, 0x14180C0CU, 0x35261313U, 0x2FC3ECECU, 0xE1BE5F5FU, 0xA2359797U, 0xCC884444U, 0x392E1717U, \n 0x5793C4C4U, 0xF255A7A7U, 0x82FC7E7EU, 0x477A3D3DU, 0xACC86464U, 0xE7BA5D5DU, 0x2B321919U, 0x95E67373U, \n 0xA0C06060U, 0x98198181U, 0xD19E4F4FU, 0x7FA3DCDCU, 0x66442222U, 0x7E542A2AU, 0xAB3B9090U, 0x830B8888U, \n 0xCA8C4646U, 0x29C7EEEEU, 0xD36BB8B8U, 0x3C281414U, 0x79A7DEDEU, 0xE2BC5E5EU, 0x1D160B0BU, 0x76ADDBDBU, \n 0x3BDBE0E0U, 0x56643232U, 0x4E743A3AU, 0x1E140A0AU, 0xDB924949U, 0x0A0C0606U, 0x6C482424U, 0xE4B85C5CU, \n 0x5D9FC2C2U, 0x6EBDD3D3U, 0xEF43ACACU, 0xA6C46262U, 0xA8399191U, 0xA4319595U, 0x37D3E4E4U, 0x8BF27979U, \n 0x32D5E7E7U, 0x438BC8C8U, 0x596E3737U, 0xB7DA6D6DU, 0x8C018D8DU, 0x64B1D5D5U, 0xD29C4E4EU, 0xE049A9A9U, \n 0xB4D86C6CU, 0xFAAC5656U, 0x07F3F4F4U, 0x25CFEAEAU, 0xAFCA6565U, 0x8EF47A7AU, 0xE947AEAEU, 0x18100808U, \n 0xD56FBABAU, 0x88F07878U, 0x6F4A2525U, 0x725C2E2EU, 0x24381C1CU, 0xF157A6A6U, 0xC773B4B4U, 0x5197C6C6U, \n 0x23CBE8E8U, 0x7CA1DDDDU, 0x9CE87474U, 0x213E1F1FU, 0xDD964B4BU, 0xDC61BDBDU, 0x860D8B8BU, 0x850F8A8AU, \n 0x90E07070U, 0x427C3E3EU, 0xC471B5B5U, 0xAACC6666U, 0xD8904848U, 0x05060303U, 0x01F7F6F6U, 0x121C0E0EU, \n 0xA3C26161U, 0x5F6A3535U, 0xF9AE5757U, 0xD069B9B9U, 0x91178686U, 0x5899C1C1U, 0x273A1D1DU, 0xB9279E9EU, \n 0x38D9E1E1U, 0x13EBF8F8U, 0xB32B9898U, 0x33221111U, 0xBBD26969U, 0x70A9D9D9U, 0x89078E8EU, 0xA7339494U, \n 0xB62D9B9BU, 0x223C1E1EU, 0x92158787U, 0x20C9E9E9U, 0x4987CECEU, 0xFFAA5555U, 0x78502828U, 0x7AA5DFDFU, \n 0x8F038C8CU, 0xF859A1A1U, 0x80098989U, 0x171A0D0DU, 0xDA65BFBFU, 0x31D7E6E6U, 0xC6844242U, 0xB8D06868U, \n 0xC3824141U, 0xB0299999U, 0x775A2D2DU, 0x111E0F0FU, 0xCB7BB0B0U, 0xFCA85454U, 0xD66DBBBBU, 0x3A2C1616U\n});" @@ -28,7 +28,7 @@ Replacements: InitStr: "{\n 0xA5C66363U, 0x84F87C7CU, 0x99EE7777U, 0x8DF67B7BU, 0x0DFFF2F2U, 0xBDD66B6BU, 0xB1DE6F6FU, 0x5491C5C5U, \n 0x50603030U, 0x03020101U, 0xA9CE6767U, 0x7D562B2BU, 0x19E7FEFEU, 0x62B5D7D7U, 0xE64DABABU, 0x9AEC7676U, \n 0x458FCACAU, 0x9D1F8282U, 0x4089C9C9U, 0x87FA7D7DU, 0x15EFFAFAU, 0xEBB25959U, 0xC98E4747U, 0x0BFBF0F0U, \n 0xEC41ADADU, 0x67B3D4D4U, 0xFD5FA2A2U, 0xEA45AFAFU, 0xBF239C9CU, 0xF753A4A4U, 0x96E47272U, 0x5B9BC0C0U, \n 0xC275B7B7U, 0x1CE1FDFDU, 0xAE3D9393U, 0x6A4C2626U, 0x5A6C3636U, 0x417E3F3FU, 0x02F5F7F7U, 0x4F83CCCCU, \n 0x5C683434U, 0xF451A5A5U, 0x34D1E5E5U, 0x08F9F1F1U, 0x93E27171U, 0x73ABD8D8U, 0x53623131U, 0x3F2A1515U, \n 0x0C080404U, 0x5295C7C7U, 0x65462323U, 0x5E9DC3C3U, 0x28301818U, 0xA1379696U, 0x0F0A0505U, 0xB52F9A9AU, \n 0x090E0707U, 0x36241212U, 0x9B1B8080U, 0x3DDFE2E2U, 0x26CDEBEBU, 0x694E2727U, 0xCD7FB2B2U, 0x9FEA7575U, \n 0x1B120909U, 0x9E1D8383U, 0x74582C2CU, 0x2E341A1AU, 0x2D361B1BU, 0xB2DC6E6EU, 0xEEB45A5AU, 0xFB5BA0A0U, \n 0xF6A45252U, 0x4D763B3BU, 0x61B7D6D6U, 0xCE7DB3B3U, 0x7B522929U, 0x3EDDE3E3U, 0x715E2F2FU, 0x97138484U, \n 0xF5A65353U, 0x68B9D1D1U, 0x00000000U, 0x2CC1EDEDU, 0x60402020U, 0x1FE3FCFCU, 0xC879B1B1U, 0xEDB65B5BU, \n 0xBED46A6AU, 0x468DCBCBU, 0xD967BEBEU, 0x4B723939U, 0xDE944A4AU, 0xD4984C4CU, 0xE8B05858U, 0x4A85CFCFU, \n 0x6BBBD0D0U, 0x2AC5EFEFU, 0xE54FAAAAU, 0x16EDFBFBU, 0xC5864343U, 0xD79A4D4DU, 0x55663333U, 0x94118585U, \n 0xCF8A4545U, 0x10E9F9F9U, 0x06040202U, 0x81FE7F7FU, 0xF0A05050U, 0x44783C3CU, 0xBA259F9FU, 0xE34BA8A8U, \n 0xF3A25151U, 0xFE5DA3A3U, 0xC0804040U, 0x8A058F8FU, 0xAD3F9292U, 0xBC219D9DU, 0x48703838U, 0x04F1F5F5U, \n 0xDF63BCBCU, 0xC177B6B6U, 0x75AFDADAU, 0x63422121U, 0x30201010U, 0x1AE5FFFFU, 0x0EFDF3F3U, 0x6DBFD2D2U, \n 0x4C81CDCDU, 0x14180C0CU, 0x35261313U, 0x2FC3ECECU, 0xE1BE5F5FU, 0xA2359797U, 0xCC884444U, 0x392E1717U, \n 0x5793C4C4U, 0xF255A7A7U, 0x82FC7E7EU, 0x477A3D3DU, 0xACC86464U, 0xE7BA5D5DU, 0x2B321919U, 0x95E67373U, \n 0xA0C06060U, 0x98198181U, 0xD19E4F4FU, 0x7FA3DCDCU, 0x66442222U, 0x7E542A2AU, 0xAB3B9090U, 0x830B8888U, \n 0xCA8C4646U, 0x29C7EEEEU, 0xD36BB8B8U, 0x3C281414U, 0x79A7DEDEU, 0xE2BC5E5EU, 0x1D160B0BU, 0x76ADDBDBU, \n 0x3BDBE0E0U, 0x56643232U, 0x4E743A3AU, 0x1E140A0AU, 0xDB924949U, 0x0A0C0606U, 0x6C482424U, 0xE4B85C5CU, \n 0x5D9FC2C2U, 0x6EBDD3D3U, 0xEF43ACACU, 0xA6C46262U, 0xA8399191U, 0xA4319595U, 0x37D3E4E4U, 0x8BF27979U, \n 0x32D5E7E7U, 0x438BC8C8U, 0x596E3737U, 0xB7DA6D6DU, 0x8C018D8DU, 0x64B1D5D5U, 0xD29C4E4EU, 0xE049A9A9U, \n 0xB4D86C6CU, 0xFAAC5656U, 0x07F3F4F4U, 0x25CFEAEAU, 0xAFCA6565U, 0x8EF47A7AU, 0xE947AEAEU, 0x18100808U, \n 0xD56FBABAU, 0x88F07878U, 0x6F4A2525U, 0x725C2E2EU, 0x24381C1CU, 0xF157A6A6U, 0xC773B4B4U, 0x5197C6C6U, \n 0x23CBE8E8U, 0x7CA1DDDDU, 0x9CE87474U, 0x213E1F1FU, 0xDD964B4BU, 0xDC61BDBDU, 0x860D8B8BU, 0x850F8A8AU, \n 0x90E07070U, 0x427C3E3EU, 0xC471B5B5U, 0xAACC6666U, 0xD8904848U, 0x05060303U, 0x01F7F6F6U, 0x121C0E0EU, \n 0xA3C26161U, 0x5F6A3535U, 0xF9AE5757U, 0xD069B9B9U, 0x91178686U, 0x5899C1C1U, 0x273A1D1DU, 0xB9279E9EU, \n 0x38D9E1E1U, 0x13EBF8F8U, 0xB32B9898U, 0x33221111U, 0xBBD26969U, 0x70A9D9D9U, 0x89078E8EU, 0xA7339494U, \n 0xB62D9B9BU, 0x223C1E1EU, 0x92158787U, 0x20C9E9E9U, 0x4987CECEU, 0xFFAA5555U, 0x78502828U, 0x7AA5DFDFU, \n 0x8F038C8CU, 0xF859A1A1U, 0x80098989U, 0x171A0D0DU, 0xDA65BFBFU, 0x31D7E6E6U, 0xC6844242U, 0xB8D06868U, \n 0xC3824141U, 0xB0299999U, 0x775A2D2DU, 0x111E0F0FU, 0xCB7BB0B0U, 0xFCA85454U, 0xD66DBBBBU, 0x3A2C1616U\n}" NewHostVarName: TS1_host_ct1 BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h' Offset: 8984 Length: 3666 ReplacementText: "static dpct::constant_memory TS2(sycl::range<1>(256), {\n 0x63A5C663U, 0x7C84F87CU, 0x7799EE77U, 0x7B8DF67BU, 0xF20DFFF2U, 0x6BBDD66BU, 0x6FB1DE6FU, 0xC55491C5U, \n 0x30506030U, 0x01030201U, 0x67A9CE67U, 0x2B7D562BU, 0xFE19E7FEU, 0xD762B5D7U, 0xABE64DABU, 0x769AEC76U, \n 0xCA458FCAU, 0x829D1F82U, 0xC94089C9U, 0x7D87FA7DU, 0xFA15EFFAU, 0x59EBB259U, 0x47C98E47U, 0xF00BFBF0U, \n 0xADEC41ADU, 0xD467B3D4U, 0xA2FD5FA2U, 0xAFEA45AFU, 0x9CBF239CU, 0xA4F753A4U, 0x7296E472U, 0xC05B9BC0U, \n 0xB7C275B7U, 0xFD1CE1FDU, 0x93AE3D93U, 0x266A4C26U, 0x365A6C36U, 0x3F417E3FU, 0xF702F5F7U, 0xCC4F83CCU, \n 0x345C6834U, 0xA5F451A5U, 0xE534D1E5U, 0xF108F9F1U, 0x7193E271U, 0xD873ABD8U, 0x31536231U, 0x153F2A15U, \n 0x040C0804U, 0xC75295C7U, 0x23654623U, 0xC35E9DC3U, 0x18283018U, 0x96A13796U, 0x050F0A05U, 0x9AB52F9AU, \n 0x07090E07U, 0x12362412U, 0x809B1B80U, 0xE23DDFE2U, 0xEB26CDEBU, 0x27694E27U, 0xB2CD7FB2U, 0x759FEA75U, \n 0x091B1209U, 0x839E1D83U, 0x2C74582CU, 0x1A2E341AU, 0x1B2D361BU, 0x6EB2DC6EU, 0x5AEEB45AU, 0xA0FB5BA0U, \n 0x52F6A452U, 0x3B4D763BU, 0xD661B7D6U, 0xB3CE7DB3U, 0x297B5229U, 0xE33EDDE3U, 0x2F715E2FU, 0x84971384U, \n 0x53F5A653U, 0xD168B9D1U, 0x00000000U, 0xED2CC1EDU, 0x20604020U, 0xFC1FE3FCU, 0xB1C879B1U, 0x5BEDB65BU, \n 0x6ABED46AU, 0xCB468DCBU, 0xBED967BEU, 0x394B7239U, 0x4ADE944AU, 0x4CD4984CU, 0x58E8B058U, 0xCF4A85CFU, \n 0xD06BBBD0U, 0xEF2AC5EFU, 0xAAE54FAAU, 0xFB16EDFBU, 0x43C58643U, 0x4DD79A4DU, 0x33556633U, 0x85941185U, \n 0x45CF8A45U, 0xF910E9F9U, 0x02060402U, 0x7F81FE7FU, 0x50F0A050U, 0x3C44783CU, 0x9FBA259FU, 0xA8E34BA8U, \n 0x51F3A251U, 0xA3FE5DA3U, 0x40C08040U, 0x8F8A058FU, 0x92AD3F92U, 0x9DBC219DU, 0x38487038U, 0xF504F1F5U, \n 0xBCDF63BCU, 0xB6C177B6U, 0xDA75AFDAU, 0x21634221U, 0x10302010U, 0xFF1AE5FFU, 0xF30EFDF3U, 0xD26DBFD2U, \n 0xCD4C81CDU, 0x0C14180CU, 0x13352613U, 0xEC2FC3ECU, 0x5FE1BE5FU, 0x97A23597U, 0x44CC8844U, 0x17392E17U, \n 0xC45793C4U, 0xA7F255A7U, 0x7E82FC7EU, 0x3D477A3DU, 0x64ACC864U, 0x5DE7BA5DU, 0x192B3219U, 0x7395E673U, \n 0x60A0C060U, 0x81981981U, 0x4FD19E4FU, 0xDC7FA3DCU, 0x22664422U, 0x2A7E542AU, 0x90AB3B90U, 0x88830B88U, \n 0x46CA8C46U, 0xEE29C7EEU, 0xB8D36BB8U, 0x143C2814U, 0xDE79A7DEU, 0x5EE2BC5EU, 0x0B1D160BU, 0xDB76ADDBU, \n 0xE03BDBE0U, 0x32566432U, 0x3A4E743AU, 0x0A1E140AU, 0x49DB9249U, 0x060A0C06U, 0x246C4824U, 0x5CE4B85CU, \n 0xC25D9FC2U, 0xD36EBDD3U, 0xACEF43ACU, 0x62A6C462U, 0x91A83991U, 0x95A43195U, 0xE437D3E4U, 0x798BF279U, \n 0xE732D5E7U, 0xC8438BC8U, 0x37596E37U, 0x6DB7DA6DU, 0x8D8C018DU, 0xD564B1D5U, 0x4ED29C4EU, 0xA9E049A9U, \n 0x6CB4D86CU, 0x56FAAC56U, 0xF407F3F4U, 0xEA25CFEAU, 0x65AFCA65U, 0x7A8EF47AU, 0xAEE947AEU, 0x08181008U, \n 0xBAD56FBAU, 0x7888F078U, 0x256F4A25U, 0x2E725C2EU, 0x1C24381CU, 0xA6F157A6U, 0xB4C773B4U, 0xC65197C6U, \n 0xE823CBE8U, 0xDD7CA1DDU, 0x749CE874U, 0x1F213E1FU, 0x4BDD964BU, 0xBDDC61BDU, 0x8B860D8BU, 0x8A850F8AU, \n 0x7090E070U, 0x3E427C3EU, 0xB5C471B5U, 0x66AACC66U, 0x48D89048U, 0x03050603U, 0xF601F7F6U, 0x0E121C0EU, \n 0x61A3C261U, 0x355F6A35U, 0x57F9AE57U, 0xB9D069B9U, 0x86911786U, 0xC15899C1U, 0x1D273A1DU, 0x9EB9279EU, \n 0xE138D9E1U, 0xF813EBF8U, 0x98B32B98U, 0x11332211U, 0x69BBD269U, 0xD970A9D9U, 0x8E89078EU, 0x94A73394U, \n 0x9BB62D9BU, 0x1E223C1EU, 0x87921587U, 0xE920C9E9U, 0xCE4987CEU, 0x55FFAA55U, 0x28785028U, 0xDF7AA5DFU, \n 0x8C8F038CU, 0xA1F859A1U, 0x89800989U, 0x0D171A0DU, 0xBFDA65BFU, 0xE631D7E6U, 0x42C68442U, 0x68B8D068U, \n 0x41C38241U, 0x99B02999U, 0x2D775A2DU, 0x0F111E0FU, 0xB0CB7BB0U, 0x54FCA854U, 0xBBD66DBBU, 0x163A2C16U\n});" @@ -37,7 +37,7 @@ Replacements: InitStr: "{\n 0x63A5C663U, 0x7C84F87CU, 0x7799EE77U, 0x7B8DF67BU, 0xF20DFFF2U, 0x6BBDD66BU, 0x6FB1DE6FU, 0xC55491C5U, \n 0x30506030U, 0x01030201U, 0x67A9CE67U, 0x2B7D562BU, 0xFE19E7FEU, 0xD762B5D7U, 0xABE64DABU, 0x769AEC76U, \n 0xCA458FCAU, 0x829D1F82U, 0xC94089C9U, 0x7D87FA7DU, 0xFA15EFFAU, 0x59EBB259U, 0x47C98E47U, 0xF00BFBF0U, \n 0xADEC41ADU, 0xD467B3D4U, 0xA2FD5FA2U, 0xAFEA45AFU, 0x9CBF239CU, 0xA4F753A4U, 0x7296E472U, 0xC05B9BC0U, \n 0xB7C275B7U, 0xFD1CE1FDU, 0x93AE3D93U, 0x266A4C26U, 0x365A6C36U, 0x3F417E3FU, 0xF702F5F7U, 0xCC4F83CCU, \n 0x345C6834U, 0xA5F451A5U, 0xE534D1E5U, 0xF108F9F1U, 0x7193E271U, 0xD873ABD8U, 0x31536231U, 0x153F2A15U, \n 0x040C0804U, 0xC75295C7U, 0x23654623U, 0xC35E9DC3U, 0x18283018U, 0x96A13796U, 0x050F0A05U, 0x9AB52F9AU, \n 0x07090E07U, 0x12362412U, 0x809B1B80U, 0xE23DDFE2U, 0xEB26CDEBU, 0x27694E27U, 0xB2CD7FB2U, 0x759FEA75U, \n 0x091B1209U, 0x839E1D83U, 0x2C74582CU, 0x1A2E341AU, 0x1B2D361BU, 0x6EB2DC6EU, 0x5AEEB45AU, 0xA0FB5BA0U, \n 0x52F6A452U, 0x3B4D763BU, 0xD661B7D6U, 0xB3CE7DB3U, 0x297B5229U, 0xE33EDDE3U, 0x2F715E2FU, 0x84971384U, \n 0x53F5A653U, 0xD168B9D1U, 0x00000000U, 0xED2CC1EDU, 0x20604020U, 0xFC1FE3FCU, 0xB1C879B1U, 0x5BEDB65BU, \n 0x6ABED46AU, 0xCB468DCBU, 0xBED967BEU, 0x394B7239U, 0x4ADE944AU, 0x4CD4984CU, 0x58E8B058U, 0xCF4A85CFU, \n 0xD06BBBD0U, 0xEF2AC5EFU, 0xAAE54FAAU, 0xFB16EDFBU, 0x43C58643U, 0x4DD79A4DU, 0x33556633U, 0x85941185U, \n 0x45CF8A45U, 0xF910E9F9U, 0x02060402U, 0x7F81FE7FU, 0x50F0A050U, 0x3C44783CU, 0x9FBA259FU, 0xA8E34BA8U, \n 0x51F3A251U, 0xA3FE5DA3U, 0x40C08040U, 0x8F8A058FU, 0x92AD3F92U, 0x9DBC219DU, 0x38487038U, 0xF504F1F5U, \n 0xBCDF63BCU, 0xB6C177B6U, 0xDA75AFDAU, 0x21634221U, 0x10302010U, 0xFF1AE5FFU, 0xF30EFDF3U, 0xD26DBFD2U, \n 0xCD4C81CDU, 0x0C14180CU, 0x13352613U, 0xEC2FC3ECU, 0x5FE1BE5FU, 0x97A23597U, 0x44CC8844U, 0x17392E17U, \n 0xC45793C4U, 0xA7F255A7U, 0x7E82FC7EU, 0x3D477A3DU, 0x64ACC864U, 0x5DE7BA5DU, 0x192B3219U, 0x7395E673U, \n 0x60A0C060U, 0x81981981U, 0x4FD19E4FU, 0xDC7FA3DCU, 0x22664422U, 0x2A7E542AU, 0x90AB3B90U, 0x88830B88U, \n 0x46CA8C46U, 0xEE29C7EEU, 0xB8D36BB8U, 0x143C2814U, 0xDE79A7DEU, 0x5EE2BC5EU, 0x0B1D160BU, 0xDB76ADDBU, \n 0xE03BDBE0U, 0x32566432U, 0x3A4E743AU, 0x0A1E140AU, 0x49DB9249U, 0x060A0C06U, 0x246C4824U, 0x5CE4B85CU, \n 0xC25D9FC2U, 0xD36EBDD3U, 0xACEF43ACU, 0x62A6C462U, 0x91A83991U, 0x95A43195U, 0xE437D3E4U, 0x798BF279U, \n 0xE732D5E7U, 0xC8438BC8U, 0x37596E37U, 0x6DB7DA6DU, 0x8D8C018DU, 0xD564B1D5U, 0x4ED29C4EU, 0xA9E049A9U, \n 0x6CB4D86CU, 0x56FAAC56U, 0xF407F3F4U, 0xEA25CFEAU, 0x65AFCA65U, 0x7A8EF47AU, 0xAEE947AEU, 0x08181008U, \n 0xBAD56FBAU, 0x7888F078U, 0x256F4A25U, 0x2E725C2EU, 0x1C24381CU, 0xA6F157A6U, 0xB4C773B4U, 0xC65197C6U, \n 0xE823CBE8U, 0xDD7CA1DDU, 0x749CE874U, 0x1F213E1FU, 0x4BDD964BU, 0xBDDC61BDU, 0x8B860D8BU, 0x8A850F8AU, \n 0x7090E070U, 0x3E427C3EU, 0xB5C471B5U, 0x66AACC66U, 0x48D89048U, 0x03050603U, 0xF601F7F6U, 0x0E121C0EU, \n 0x61A3C261U, 0x355F6A35U, 0x57F9AE57U, 0xB9D069B9U, 0x86911786U, 0xC15899C1U, 0x1D273A1DU, 0x9EB9279EU, \n 0xE138D9E1U, 0xF813EBF8U, 0x98B32B98U, 0x11332211U, 0x69BBD269U, 0xD970A9D9U, 0x8E89078EU, 0x94A73394U, \n 0x9BB62D9BU, 0x1E223C1EU, 0x87921587U, 0xE920C9E9U, 0xCE4987CEU, 0x55FFAA55U, 0x28785028U, 0xDF7AA5DFU, \n 0x8C8F038CU, 0xA1F859A1U, 0x89800989U, 0x0D171A0DU, 0xBFDA65BFU, 0xE631D7E6U, 0x42C68442U, 0x68B8D068U, \n 0x41C38241U, 0x99B02999U, 0x2D775A2DU, 0x0F111E0FU, 0xB0CB7BB0U, 0x54FCA854U, 0xBBD66DBBU, 0x163A2C16U\n}" NewHostVarName: TS2_host_ct1 BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h' Offset: 12651 Length: 3666 ReplacementText: "static dpct::constant_memory TS3(sycl::range<1>(256), {\n 0x6363A5C6U, 0x7C7C84F8U, 0x777799EEU, 0x7B7B8DF6U, 0xF2F20DFFU, 0x6B6BBDD6U, 0x6F6FB1DEU, 0xC5C55491U, \n 0x30305060U, 0x01010302U, 0x6767A9CEU, 0x2B2B7D56U, 0xFEFE19E7U, 0xD7D762B5U, 0xABABE64DU, 0x76769AECU, \n 0xCACA458FU, 0x82829D1FU, 0xC9C94089U, 0x7D7D87FAU, 0xFAFA15EFU, 0x5959EBB2U, 0x4747C98EU, 0xF0F00BFBU, \n 0xADADEC41U, 0xD4D467B3U, 0xA2A2FD5FU, 0xAFAFEA45U, 0x9C9CBF23U, 0xA4A4F753U, 0x727296E4U, 0xC0C05B9BU, \n 0xB7B7C275U, 0xFDFD1CE1U, 0x9393AE3DU, 0x26266A4CU, 0x36365A6CU, 0x3F3F417EU, 0xF7F702F5U, 0xCCCC4F83U, \n 0x34345C68U, 0xA5A5F451U, 0xE5E534D1U, 0xF1F108F9U, 0x717193E2U, 0xD8D873ABU, 0x31315362U, 0x15153F2AU, \n 0x04040C08U, 0xC7C75295U, 0x23236546U, 0xC3C35E9DU, 0x18182830U, 0x9696A137U, 0x05050F0AU, 0x9A9AB52FU, \n 0x0707090EU, 0x12123624U, 0x80809B1BU, 0xE2E23DDFU, 0xEBEB26CDU, 0x2727694EU, 0xB2B2CD7FU, 0x75759FEAU, \n 0x09091B12U, 0x83839E1DU, 0x2C2C7458U, 0x1A1A2E34U, 0x1B1B2D36U, 0x6E6EB2DCU, 0x5A5AEEB4U, 0xA0A0FB5BU, \n 0x5252F6A4U, 0x3B3B4D76U, 0xD6D661B7U, 0xB3B3CE7DU, 0x29297B52U, 0xE3E33EDDU, 0x2F2F715EU, 0x84849713U, \n 0x5353F5A6U, 0xD1D168B9U, 0x00000000U, 0xEDED2CC1U, 0x20206040U, 0xFCFC1FE3U, 0xB1B1C879U, 0x5B5BEDB6U, \n 0x6A6ABED4U, 0xCBCB468DU, 0xBEBED967U, 0x39394B72U, 0x4A4ADE94U, 0x4C4CD498U, 0x5858E8B0U, 0xCFCF4A85U, \n 0xD0D06BBBU, 0xEFEF2AC5U, 0xAAAAE54FU, 0xFBFB16EDU, 0x4343C586U, 0x4D4DD79AU, 0x33335566U, 0x85859411U, \n 0x4545CF8AU, 0xF9F910E9U, 0x02020604U, 0x7F7F81FEU, 0x5050F0A0U, 0x3C3C4478U, 0x9F9FBA25U, 0xA8A8E34BU, \n 0x5151F3A2U, 0xA3A3FE5DU, 0x4040C080U, 0x8F8F8A05U, 0x9292AD3FU, 0x9D9DBC21U, 0x38384870U, 0xF5F504F1U, \n 0xBCBCDF63U, 0xB6B6C177U, 0xDADA75AFU, 0x21216342U, 0x10103020U, 0xFFFF1AE5U, 0xF3F30EFDU, 0xD2D26DBFU, \n 0xCDCD4C81U, 0x0C0C1418U, 0x13133526U, 0xECEC2FC3U, 0x5F5FE1BEU, 0x9797A235U, 0x4444CC88U, 0x1717392EU, \n 0xC4C45793U, 0xA7A7F255U, 0x7E7E82FCU, 0x3D3D477AU, 0x6464ACC8U, 0x5D5DE7BAU, 0x19192B32U, 0x737395E6U, \n 0x6060A0C0U, 0x81819819U, 0x4F4FD19EU, 0xDCDC7FA3U, 0x22226644U, 0x2A2A7E54U, 0x9090AB3BU, 0x8888830BU, \n 0x4646CA8CU, 0xEEEE29C7U, 0xB8B8D36BU, 0x14143C28U, 0xDEDE79A7U, 0x5E5EE2BCU, 0x0B0B1D16U, 0xDBDB76ADU, \n 0xE0E03BDBU, 0x32325664U, 0x3A3A4E74U, 0x0A0A1E14U, 0x4949DB92U, 0x06060A0CU, 0x24246C48U, 0x5C5CE4B8U, \n 0xC2C25D9FU, 0xD3D36EBDU, 0xACACEF43U, 0x6262A6C4U, 0x9191A839U, 0x9595A431U, 0xE4E437D3U, 0x79798BF2U, \n 0xE7E732D5U, 0xC8C8438BU, 0x3737596EU, 0x6D6DB7DAU, 0x8D8D8C01U, 0xD5D564B1U, 0x4E4ED29CU, 0xA9A9E049U, \n 0x6C6CB4D8U, 0x5656FAACU, 0xF4F407F3U, 0xEAEA25CFU, 0x6565AFCAU, 0x7A7A8EF4U, 0xAEAEE947U, 0x08081810U, \n 0xBABAD56FU, 0x787888F0U, 0x25256F4AU, 0x2E2E725CU, 0x1C1C2438U, 0xA6A6F157U, 0xB4B4C773U, 0xC6C65197U, \n 0xE8E823CBU, 0xDDDD7CA1U, 0x74749CE8U, 0x1F1F213EU, 0x4B4BDD96U, 0xBDBDDC61U, 0x8B8B860DU, 0x8A8A850FU, \n 0x707090E0U, 0x3E3E427CU, 0xB5B5C471U, 0x6666AACCU, 0x4848D890U, 0x03030506U, 0xF6F601F7U, 0x0E0E121CU, \n 0x6161A3C2U, 0x35355F6AU, 0x5757F9AEU, 0xB9B9D069U, 0x86869117U, 0xC1C15899U, 0x1D1D273AU, 0x9E9EB927U, \n 0xE1E138D9U, 0xF8F813EBU, 0x9898B32BU, 0x11113322U, 0x6969BBD2U, 0xD9D970A9U, 0x8E8E8907U, 0x9494A733U, \n 0x9B9BB62DU, 0x1E1E223CU, 0x87879215U, 0xE9E920C9U, 0xCECE4987U, 0x5555FFAAU, 0x28287850U, 0xDFDF7AA5U, \n 0x8C8C8F03U, 0xA1A1F859U, 0x89898009U, 0x0D0D171AU, 0xBFBFDA65U, 0xE6E631D7U, 0x4242C684U, 0x6868B8D0U, \n 0x4141C382U, 0x9999B029U, 0x2D2D775AU, 0x0F0F111EU, 0xB0B0CB7BU, 0x5454FCA8U, 0xBBBBD66DU, 0x16163A2CU\n});" @@ -47,7 +47,7 @@ Replacements: NewHostVarName: TS3_host_ct1 BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/aes.h' Digest: 9ca1105f7e59fd3869626e1b3d07dab5 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -56,7 +56,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA' Specified: false AsyncHandler: Value: 'false' @@ -65,7 +65,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/attack.dp.o b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/attack.dp.o index ee49b1f59..3bdbb5cf2 100644 Binary files a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/attack.dp.o and b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/attack.dp.o differ diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/bitcracker.h.yaml b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/bitcracker.h.yaml index aefd7087f..7f4ffe5de 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/bitcracker.h.yaml +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/bitcracker.h.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/bitcracker.h' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/bitcracker.h' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' Offset: 1633 Length: 18 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' Offset: 1651 Length: 26 ReplacementText: '' @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' Offset: 3084 Length: 0 ReplacementText: "/*\nDPCT1009:5: SYCL uses exceptions to report errors and does not use the error codes. The original code was commented out and a warning string was inserted. You need to rewrite this code.\n*/\n" @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' Offset: 3162 Length: 9 ReplacementText: 'dpct::err0' @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' Offset: 3236 Length: 297 ReplacementText: '' @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' Offset: 3537 Length: 0 ReplacementText: "/*\nDPCT1010:6: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n*/\n/*\nDPCT1009:7: SYCL uses exceptions to report errors and does not use the error codes. The original code was commented out and a warning string was inserted. You need to rewrite this code.\n*/\n" @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' Offset: 3625 Length: 11 ReplacementText: '0' @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' Offset: 3640 Length: 18 ReplacementText: '0' @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' Offset: 3809 Length: 40 ReplacementText: '"cudaGetErrorString is not supported"/*cudaGetErrorString(0)*/' @@ -83,7 +83,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: false MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/bitcracker.h' Digest: 50c4dfdc6a83f7ccc7dad8bf75a4c77b DpctVersion: 18.0.0 MainHelperFileName: '' @@ -92,7 +92,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA' Specified: false AsyncHandler: Value: 'false' @@ -101,7 +101,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/main.dp.o b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/main.dp.o index f0a4633b9..d799d328b 100644 Binary files a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/main.dp.o and b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/main.dp.o differ diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/sha256.h.yaml b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/sha256.h.yaml index a221fa7b7..5d0b5448a 100644 --- a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/sha256.h.yaml +++ b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/sha256.h.yaml @@ -1,7 +1,7 @@ --- -MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/sha256.h' +MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/sha256.h' Replacements: - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' Offset: 0 Length: 0 ReplacementText: "#include \n#include \n" @@ -10,7 +10,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' Offset: 1819 Length: 13 ReplacementText: DPCT_COMPATIBILITY_TEMP @@ -19,7 +19,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' Offset: 1850 Length: 11 ReplacementText: '' @@ -28,7 +28,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' Offset: 1861 Length: 15 ReplacementText: __dpct_inline__ @@ -37,7 +37,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' Offset: 1954 Length: 72 ReplacementText: 'd = a ^ b ^ c' @@ -46,7 +46,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: true - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' Offset: 2055 Length: 11 ReplacementText: '' @@ -55,7 +55,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' Offset: 2066 Length: 15 ReplacementText: __dpct_inline__ @@ -64,7 +64,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' Offset: 2162 Length: 72 ReplacementText: 'd = (a ^ (b & (c ^ a)))' @@ -73,7 +73,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: true - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' Offset: 2263 Length: 11 ReplacementText: '' @@ -82,7 +82,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' Offset: 2274 Length: 15 ReplacementText: __dpct_inline__ @@ -91,7 +91,7 @@ Replacements: InitStr: '' NewHostVarName: '' BlockLevelFormatFlag: false - - FilePath: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' + - FilePath: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' Offset: 2369 Length: 72 ReplacementText: 'd = ((a & (b | c)) | (b & c))' @@ -101,7 +101,7 @@ Replacements: NewHostVarName: '' BlockLevelFormatFlag: true MainSourceFilesDigest: - - MainSourceFile: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' + - MainSourceFile: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/sha256.h' Digest: d6bf5df3b793bedc87d8c4197f8ce069 DpctVersion: 18.0.0 MainHelperFileName: '' @@ -110,7 +110,7 @@ FeatureMap: {} CompileTargets: {} OptionMap: AnalysisScopePath: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA' Specified: false AsyncHandler: Value: 'false' @@ -119,7 +119,7 @@ OptionMap: Value: 'false' Specified: false CompilationsDir: - Value: '/home/chenshe1/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build' + Value: '/home/local_user/sandbox/SYCLomatic-test/third-party-programs/Velocity-Bench/bitcracker/CUDA/build' Specified: true CtadEnabled: Value: 'false' diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/utils.dp.o b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/utils.dp.o index b23c22c10..0868d2172 100644 Binary files a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/utils.dp.o and b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/utils.dp.o differ diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/w_blocks.dp.o b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/w_blocks.dp.o index 4121fb83e..d68baa7ed 100644 Binary files a/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/w_blocks.dp.o and b/third-party-programs/Velocity-Bench/bitcracker/CUDA/out/src/w_blocks.dp.o differ diff --git a/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/.attack.cu.swp b/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/.attack.cu.swp index 498392fad..b40b04f14 100644 Binary files a/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/.attack.cu.swp and b/third-party-programs/Velocity-Bench/bitcracker/CUDA/src/.attack.cu.swp differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/CMakeLists.txt b/third-party-programs/Velocity-Bench/cudaSift/CUDA/CMakeLists.txt new file mode 100644 index 000000000..935f7b79b --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/CMakeLists.txt @@ -0,0 +1,101 @@ +# Modifications Copyright (C) 2023 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +# OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE. + +# SPDX-License-Identifier: MIT + +cmake_minimum_required(VERSION 3.10) +project(cudaSift C CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +option(DEVICE_TIMER "Build using Device Timer" OFF) +option(USE_SM "Specifies which streaming multiprocessor architecture to use" ) + +set(DEF_WL_CXX_FLAGS " -msse2 ") +set(DEF_GENERAL_CXX_FLAGS " -O3 ") +set(DEF_COMBINED_CXX_FLAGS "${DEF_GENERAL_CXX_FLAGS} ${DEF_WL_CXX_FLAGS}") + +find_package(OpenCV REQUIRED) +find_package(CUDA) +if (NOT CUDA_FOUND) + message(STATUS "CUDA not found. Project will not be built.") +endif(NOT CUDA_FOUND) + +set(cuda_sources + cudaImage.cu + cudaImage.h + cudaSiftH.cu + cudaSiftH.h + matching.cu + cudaSiftD.h + cudaSift.h + cudautils.h +) + +set(sources + ${CMAKE_SOURCE_DIR}/../common/Utility.cpp + geomFuncs.cpp + mainSift.cpp +) + +include_directories( + ${CMAKE_SOURCE_DIR}/../common/ + ${CMAKE_CURRENT_SOURCE_DIR} +) +if(DEVICE_TIMER) + message(STATUS "Enabling Device Timer") + add_compile_options(-DDEVICE_TIMER) +endif() + +# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific) +# -DOVERRIDE_GENERAL_CXX_FLAGS=" -blah -blah " overrides the general flags only (and not the workload specific flags) +# passing in both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS is not allowed, in order to prevent ambiguity + +if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "") + message(FATAL_ERROR "Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together") +elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "") + message(STATUS "Using DEFAULT compilation flags") + set(CMAKE_CXX_FLAGS "${DEF_COMBINED_CXX_FLAGS}") +elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "") + message(STATUS "OVERRIDING GENERAL compilation flags") + set(CMAKE_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS}") + string(APPEND CMAKE_CXX_FLAGS ${DEF_WL_CXX_FLAGS}) +elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "") + message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags") +endif() + + +set(CUDA_SEPARABLE_COMPILATION ON) +message(STATUS "CXX Compilation flags to: ${CMAKE_CXX_FLAGS}") + +cuda_add_executable(cudasift ${cuda_sources} ${sources} OPTIONS -arch=sm_${USE_SM}) +target_link_libraries(cudasift ${CUDA_cudadevrt_LIBRARY} ${OpenCV_LIBS}) + +install(FILES + ${cuda_sources} + ${sources} + cudaSiftD.cu + CMakeLists.txt + DESTINATION . +) +install(FILES data/left.pgm data/righ.pgm + DESTINATION data +) diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeCache.txt b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeCache.txt new file mode 100644 index 000000000..69924b468 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeCache.txt @@ -0,0 +1,628 @@ +# This is the CMakeCache file. +# For build in directory: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build +# It was generated by CMake: /usr/bin/cmake +# You can edit this file to change values found and used by cmake. +# If you do not want to change any of the values, simply exit the editor. +# If you do want to change a value, simply edit, save, and exit the editor. +# The syntax for the file is as follows: +# KEY:TYPE=VALUE +# KEY is the name of a variable in the cache. +# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!. +# VALUE is the current value for the KEY. + +######################## +# EXTERNAL cache entries +######################## + +//Path to a program. +CMAKE_ADDR2LINE:FILEPATH=/usr/bin/addr2line + +//Path to a program. +CMAKE_AR:FILEPATH=/usr/bin/ar + +//Choose the type of build, options are: None Debug Release RelWithDebInfo +// MinSizeRel ... +CMAKE_BUILD_TYPE:STRING= + +//Enable/Disable color output during build. +CMAKE_COLOR_MAKEFILE:BOOL=ON + +//CXX compiler +CMAKE_CXX_COMPILER:FILEPATH=/usr/bin/c++ + +//A wrapper around 'ar' adding the appropriate '--plugin' option +// for the GCC compiler +CMAKE_CXX_COMPILER_AR:FILEPATH=/usr/bin/gcc-ar-11 + +//A wrapper around 'ranlib' adding the appropriate '--plugin' option +// for the GCC compiler +CMAKE_CXX_COMPILER_RANLIB:FILEPATH=/usr/bin/gcc-ranlib-11 + +//Flags used by the CXX compiler during all build types. +CMAKE_CXX_FLAGS:STRING= + +//Flags used by the CXX compiler during DEBUG builds. +CMAKE_CXX_FLAGS_DEBUG:STRING=-g + +//Flags used by the CXX compiler during MINSIZEREL builds. +CMAKE_CXX_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG + +//Flags used by the CXX compiler during RELEASE builds. +CMAKE_CXX_FLAGS_RELEASE:STRING=-O3 -DNDEBUG + +//Flags used by the CXX compiler during RELWITHDEBINFO builds. +CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG + +//C compiler +CMAKE_C_COMPILER:FILEPATH=/usr/bin/cc + +//A wrapper around 'ar' adding the appropriate '--plugin' option +// for the GCC compiler +CMAKE_C_COMPILER_AR:FILEPATH=/usr/bin/gcc-ar-11 + +//A wrapper around 'ranlib' adding the appropriate '--plugin' option +// for the GCC compiler +CMAKE_C_COMPILER_RANLIB:FILEPATH=/usr/bin/gcc-ranlib-11 + +//Flags used by the C compiler during all build types. +CMAKE_C_FLAGS:STRING= + +//Flags used by the C compiler during DEBUG builds. +CMAKE_C_FLAGS_DEBUG:STRING=-g + +//Flags used by the C compiler during MINSIZEREL builds. +CMAKE_C_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG + +//Flags used by the C compiler during RELEASE builds. +CMAKE_C_FLAGS_RELEASE:STRING=-O3 -DNDEBUG + +//Flags used by the C compiler during RELWITHDEBINFO builds. +CMAKE_C_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG + +//Path to a program. +CMAKE_DLLTOOL:FILEPATH=CMAKE_DLLTOOL-NOTFOUND + +//Flags used by the linker during all build types. +CMAKE_EXE_LINKER_FLAGS:STRING= + +//Flags used by the linker during DEBUG builds. +CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during MINSIZEREL builds. +CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during RELEASE builds. +CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during RELWITHDEBINFO builds. +CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//Enable/Disable output of compile commands during generation. +CMAKE_EXPORT_COMPILE_COMMANDS:BOOL= + +//Install path prefix, prepended onto install directories. +CMAKE_INSTALL_PREFIX:PATH=/usr/local + +//Path to a program. +CMAKE_LINKER:FILEPATH=/usr/bin/ld + +//Path to a program. +CMAKE_MAKE_PROGRAM:FILEPATH=/usr/bin/gmake + +//Flags used by the linker during the creation of modules during +// all build types. +CMAKE_MODULE_LINKER_FLAGS:STRING= + +//Flags used by the linker during the creation of modules during +// DEBUG builds. +CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during the creation of modules during +// MINSIZEREL builds. +CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during the creation of modules during +// RELEASE builds. +CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during the creation of modules during +// RELWITHDEBINFO builds. +CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//Path to a program. +CMAKE_NM:FILEPATH=/usr/bin/nm + +//Path to a program. +CMAKE_OBJCOPY:FILEPATH=/usr/bin/objcopy + +//Path to a program. +CMAKE_OBJDUMP:FILEPATH=/usr/bin/objdump + +//Value Computed by CMake +CMAKE_PROJECT_DESCRIPTION:STATIC= + +//Value Computed by CMake +CMAKE_PROJECT_HOMEPAGE_URL:STATIC= + +//Value Computed by CMake +CMAKE_PROJECT_NAME:STATIC=cudaSift + +//Path to a program. +CMAKE_RANLIB:FILEPATH=/usr/bin/ranlib + +//Path to a program. +CMAKE_READELF:FILEPATH=/usr/bin/readelf + +//Flags used by the linker during the creation of shared libraries +// during all build types. +CMAKE_SHARED_LINKER_FLAGS:STRING= + +//Flags used by the linker during the creation of shared libraries +// during DEBUG builds. +CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during the creation of shared libraries +// during MINSIZEREL builds. +CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during the creation of shared libraries +// during RELEASE builds. +CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during the creation of shared libraries +// during RELWITHDEBINFO builds. +CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//If set, runtime paths are not added when installing shared libraries, +// but are added when building. +CMAKE_SKIP_INSTALL_RPATH:BOOL=NO + +//If set, runtime paths are not added when using shared libraries. +CMAKE_SKIP_RPATH:BOOL=NO + +//Flags used by the linker during the creation of static libraries +// during all build types. +CMAKE_STATIC_LINKER_FLAGS:STRING= + +//Flags used by the linker during the creation of static libraries +// during DEBUG builds. +CMAKE_STATIC_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during the creation of static libraries +// during MINSIZEREL builds. +CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during the creation of static libraries +// during RELEASE builds. +CMAKE_STATIC_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during the creation of static libraries +// during RELWITHDEBINFO builds. +CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//Path to a program. +CMAKE_STRIP:FILEPATH=/usr/bin/strip + +//If this value is on, makefiles will be generated without the +// .SILENT directive, and all commands will be echoed to the console +// during the make. This is useful for debugging only. With Visual +// Studio IDE projects all commands are done without /nologo. +CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE + +//Compile device code in 64 bit mode +CUDA_64_BIT_DEVICE_CODE:BOOL=ON + +//Attach the build rule to the CUDA source file. Enable only when +// the CUDA source file is added to at most one target. +CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE:BOOL=ON + +//Generate and parse .cubin files in Device mode. +CUDA_BUILD_CUBIN:BOOL=OFF + +//Build in Emulation mode +CUDA_BUILD_EMULATION:BOOL=OFF + +//"cudart" library +CUDA_CUDART_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcudart.so + +//"cuda" library (older versions only). +CUDA_CUDA_LIBRARY:FILEPATH=CUDA_CUDA_LIBRARY-NOTFOUND + +//Directory to put all the output files. If blank it will default +// to the CMAKE_CURRENT_BINARY_DIR +CUDA_GENERATED_OUTPUT_DIR:PATH= + +//Generated file extension +CUDA_HOST_COMPILATION_CPP:BOOL=ON + +//Host side compiler used by NVCC +CUDA_HOST_COMPILER:FILEPATH=/usr/bin/cc + +//Path to a program. +CUDA_NVCC_EXECUTABLE:FILEPATH=/usr/local/cuda/bin/nvcc + +//Semi-colon delimit multiple arguments. during all build types. +CUDA_NVCC_FLAGS:STRING= + +//Semi-colon delimit multiple arguments. during DEBUG builds. +CUDA_NVCC_FLAGS_DEBUG:STRING= + +//Semi-colon delimit multiple arguments. during MINSIZEREL builds. +CUDA_NVCC_FLAGS_MINSIZEREL:STRING= + +//Semi-colon delimit multiple arguments. during RELEASE builds. +CUDA_NVCC_FLAGS_RELEASE:STRING= + +//Semi-colon delimit multiple arguments. during RELWITHDEBINFO +// builds. +CUDA_NVCC_FLAGS_RELWITHDEBINFO:STRING= + +//"OpenCL" library +CUDA_OpenCL_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libOpenCL.so + +//Propagate C/CXX_FLAGS and friends to the host compiler via -Xcompile +CUDA_PROPAGATE_HOST_FLAGS:BOOL=ON + +//Path to a file. +CUDA_SDK_ROOT_DIR:PATH=CUDA_SDK_ROOT_DIR-NOTFOUND + +//Compile CUDA objects with separable compilation enabled. Requires +// CUDA 5.0+ +CUDA_SEPARABLE_COMPILATION:BOOL=OFF + +//Path to a file. +CUDA_TOOLKIT_INCLUDE:PATH=/usr/local/cuda/include + +//Toolkit location. +CUDA_TOOLKIT_ROOT_DIR:PATH=/usr/local/cuda + +//Use the static version of the CUDA runtime library if available +CUDA_USE_STATIC_CUDA_RUNTIME:BOOL=ON + +//Print out the commands run while compiling the CUDA source file. +// With the Makefile generator this defaults to VERBOSE variable +// specified on the command line, but can be forced on with this +// option. +CUDA_VERBOSE_BUILD:BOOL=OFF + +//Version of CUDA as computed from nvcc. +CUDA_VERSION:STRING=12.2 + +//"cublas" library +CUDA_cublas_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcublas.so + +//"cudadevrt" library +CUDA_cudadevrt_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcudadevrt.a + +//static CUDA runtime library +CUDA_cudart_static_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcudart_static.a + +//"cufft" library +CUDA_cufft_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcufft.so + +//"cupti" library +CUDA_cupti_LIBRARY:FILEPATH=/usr/local/cuda/extras/CUPTI/lib64/libcupti.so + +//"curand" library +CUDA_curand_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcurand.so + +//"cusolver" library +CUDA_cusolver_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcusolver.so + +//"cusparse" library +CUDA_cusparse_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libcusparse.so + +//"nppc" library +CUDA_nppc_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppc.so + +//"nppial" library +CUDA_nppial_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppial.so + +//"nppicc" library +CUDA_nppicc_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppicc.so + +//"nppidei" library +CUDA_nppidei_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppidei.so + +//"nppif" library +CUDA_nppif_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppif.so + +//"nppig" library +CUDA_nppig_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppig.so + +//"nppim" library +CUDA_nppim_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppim.so + +//"nppist" library +CUDA_nppist_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppist.so + +//"nppisu" library +CUDA_nppisu_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppisu.so + +//"nppitc" library +CUDA_nppitc_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnppitc.so + +//"npps" library +CUDA_npps_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnpps.so + +//"nvToolsExt" library +CUDA_nvToolsExt_LIBRARY:FILEPATH=/usr/local/cuda/lib64/libnvToolsExt.so + +//Path to a library. +CUDA_rt_LIBRARY:FILEPATH=/usr/lib/x86_64-linux-gnu/librt.a + +//Build using Device Timer +DEVICE_TIMER:BOOL=OFF + +//The directory containing a CMake configuration file for OpenCV. +OpenCV_DIR:PATH=/usr/lib/x86_64-linux-gnu/cmake/opencv4 + +//Specifies which streaming multiprocessor architecture to use +USE_SM:BOOL=80 + +//Value Computed by CMake +cudaSift_BINARY_DIR:STATIC=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build + +//Value Computed by CMake +cudaSift_IS_TOP_LEVEL:STATIC=ON + +//Value Computed by CMake +cudaSift_SOURCE_DIR:STATIC=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA + + +######################## +# INTERNAL cache entries +######################## + +//ADVANCED property for variable: CMAKE_ADDR2LINE +CMAKE_ADDR2LINE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_AR +CMAKE_AR-ADVANCED:INTERNAL=1 +//This is the directory where this CMakeCache.txt was created +CMAKE_CACHEFILE_DIR:INTERNAL=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build +//Major version of cmake used to create the current loaded cache +CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3 +//Minor version of cmake used to create the current loaded cache +CMAKE_CACHE_MINOR_VERSION:INTERNAL=22 +//Patch version of cmake used to create the current loaded cache +CMAKE_CACHE_PATCH_VERSION:INTERNAL=1 +//ADVANCED property for variable: CMAKE_COLOR_MAKEFILE +CMAKE_COLOR_MAKEFILE-ADVANCED:INTERNAL=1 +//Path to CMake executable. +CMAKE_COMMAND:INTERNAL=/usr/bin/cmake +//Path to cpack program executable. +CMAKE_CPACK_COMMAND:INTERNAL=/usr/bin/cpack +//Path to ctest program executable. +CMAKE_CTEST_COMMAND:INTERNAL=/usr/bin/ctest +//ADVANCED property for variable: CMAKE_CXX_COMPILER +CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_COMPILER_AR +CMAKE_CXX_COMPILER_AR-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_COMPILER_RANLIB +CMAKE_CXX_COMPILER_RANLIB-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS +CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_DEBUG +CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_MINSIZEREL +CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELEASE +CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO +CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_COMPILER +CMAKE_C_COMPILER-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_COMPILER_AR +CMAKE_C_COMPILER_AR-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_COMPILER_RANLIB +CMAKE_C_COMPILER_RANLIB-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS +CMAKE_C_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS_DEBUG +CMAKE_C_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS_MINSIZEREL +CMAKE_C_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS_RELEASE +CMAKE_C_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS_RELWITHDEBINFO +CMAKE_C_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_DLLTOOL +CMAKE_DLLTOOL-ADVANCED:INTERNAL=1 +//Executable file format +CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS +CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG +CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL +CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE +CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXPORT_COMPILE_COMMANDS +CMAKE_EXPORT_COMPILE_COMMANDS-ADVANCED:INTERNAL=1 +//Name of external makefile project generator. +CMAKE_EXTRA_GENERATOR:INTERNAL= +//Name of generator. +CMAKE_GENERATOR:INTERNAL=Unix Makefiles +//Generator instance identifier. +CMAKE_GENERATOR_INSTANCE:INTERNAL= +//Name of generator platform. +CMAKE_GENERATOR_PLATFORM:INTERNAL= +//Name of generator toolset. +CMAKE_GENERATOR_TOOLSET:INTERNAL= +//Test CMAKE_HAVE_LIBC_PTHREAD +CMAKE_HAVE_LIBC_PTHREAD:INTERNAL=1 +//Have include pthread.h +CMAKE_HAVE_PTHREAD_H:INTERNAL=1 +//Source directory with the top level CMakeLists.txt file for this +// project +CMAKE_HOME_DIRECTORY:INTERNAL=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA +//Install .so files without execute permission. +CMAKE_INSTALL_SO_NO_EXE:INTERNAL=1 +//ADVANCED property for variable: CMAKE_LINKER +CMAKE_LINKER-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MAKE_PROGRAM +CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS +CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG +CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL +CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE +CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_NM +CMAKE_NM-ADVANCED:INTERNAL=1 +//number of local generators +CMAKE_NUMBER_OF_MAKEFILES:INTERNAL=1 +//ADVANCED property for variable: CMAKE_OBJCOPY +CMAKE_OBJCOPY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_OBJDUMP +CMAKE_OBJDUMP-ADVANCED:INTERNAL=1 +//Platform information initialized +CMAKE_PLATFORM_INFO_INITIALIZED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_RANLIB +CMAKE_RANLIB-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_READELF +CMAKE_READELF-ADVANCED:INTERNAL=1 +//Path to CMake installation. +CMAKE_ROOT:INTERNAL=/usr/share/cmake-3.22 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS +CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG +CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL +CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE +CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SKIP_INSTALL_RPATH +CMAKE_SKIP_INSTALL_RPATH-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SKIP_RPATH +CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS +CMAKE_STATIC_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_DEBUG +CMAKE_STATIC_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL +CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELEASE +CMAKE_STATIC_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STRIP +CMAKE_STRIP-ADVANCED:INTERNAL=1 +//uname command +CMAKE_UNAME:INTERNAL=/usr/bin/uname +//ADVANCED property for variable: CMAKE_VERBOSE_MAKEFILE +CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_64_BIT_DEVICE_CODE +CUDA_64_BIT_DEVICE_CODE-ADVANCED:INTERNAL=1 +//List of intermediate files that are part of the cuda dependency +// scanning. +CUDA_ADDITIONAL_CLEAN_FILES:INTERNAL=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaImage.cu.o.depend;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaSiftH.cu.o.depend;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_matching.cu.o.depend +//ADVANCED property for variable: CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE +CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_BUILD_CUBIN +CUDA_BUILD_CUBIN-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_BUILD_EMULATION +CUDA_BUILD_EMULATION-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_CUDART_LIBRARY +CUDA_CUDART_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_CUDA_LIBRARY +CUDA_CUDA_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_GENERATED_OUTPUT_DIR +CUDA_GENERATED_OUTPUT_DIR-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_HOST_COMPILATION_CPP +CUDA_HOST_COMPILATION_CPP-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_NVCC_EXECUTABLE +CUDA_NVCC_EXECUTABLE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_NVCC_FLAGS +CUDA_NVCC_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_NVCC_FLAGS_DEBUG +CUDA_NVCC_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_NVCC_FLAGS_MINSIZEREL +CUDA_NVCC_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_NVCC_FLAGS_RELEASE +CUDA_NVCC_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_NVCC_FLAGS_RELWITHDEBINFO +CUDA_NVCC_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_OpenCL_LIBRARY +CUDA_OpenCL_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_PROPAGATE_HOST_FLAGS +CUDA_PROPAGATE_HOST_FLAGS-ADVANCED:INTERNAL=1 +//This is the value of the last time CUDA_SDK_ROOT_DIR was set +// successfully. +CUDA_SDK_ROOT_DIR_INTERNAL:INTERNAL=CUDA_SDK_ROOT_DIR-NOTFOUND +//ADVANCED property for variable: CUDA_SEPARABLE_COMPILATION +CUDA_SEPARABLE_COMPILATION-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_TOOLKIT_INCLUDE +CUDA_TOOLKIT_INCLUDE-ADVANCED:INTERNAL=1 +//This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was +// set successfully. +CUDA_TOOLKIT_ROOT_DIR_INTERNAL:INTERNAL=/usr/local/cuda +//This is the value of the last time CUDA_TOOLKIT_TARGET_DIR was +// set successfully. +CUDA_TOOLKIT_TARGET_DIR_INTERNAL:INTERNAL=/usr/local/cuda +//ADVANCED property for variable: CUDA_VERBOSE_BUILD +CUDA_VERBOSE_BUILD-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_VERSION +CUDA_VERSION-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_cublas_LIBRARY +CUDA_cublas_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_cudadevrt_LIBRARY +CUDA_cudadevrt_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_cudart_static_LIBRARY +CUDA_cudart_static_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_cufft_LIBRARY +CUDA_cufft_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_cupti_LIBRARY +CUDA_cupti_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_curand_LIBRARY +CUDA_curand_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_cusolver_LIBRARY +CUDA_cusolver_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_cusparse_LIBRARY +CUDA_cusparse_LIBRARY-ADVANCED:INTERNAL=1 +//Location of make2cmake.cmake +CUDA_make2cmake:INTERNAL=/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake +//ADVANCED property for variable: CUDA_nppc_LIBRARY +CUDA_nppc_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_nppial_LIBRARY +CUDA_nppial_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_nppicc_LIBRARY +CUDA_nppicc_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_nppidei_LIBRARY +CUDA_nppidei_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_nppif_LIBRARY +CUDA_nppif_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_nppig_LIBRARY +CUDA_nppig_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_nppim_LIBRARY +CUDA_nppim_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_nppist_LIBRARY +CUDA_nppist_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_nppisu_LIBRARY +CUDA_nppisu_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_nppitc_LIBRARY +CUDA_nppitc_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_npps_LIBRARY +CUDA_npps_LIBRARY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CUDA_nvToolsExt_LIBRARY +CUDA_nvToolsExt_LIBRARY-ADVANCED:INTERNAL=1 +//Location of parse_cubin.cmake +CUDA_parse_cubin:INTERNAL=/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake +//Location of run_nvcc.cmake +CUDA_run_nvcc:INTERNAL=/usr/share/cmake-3.22/Modules/FindCUDA/run_nvcc.cmake +//Details about finding CUDA +FIND_PACKAGE_MESSAGE_DETAILS_CUDA:INTERNAL=[/usr/local/cuda][/usr/local/cuda/bin/nvcc][/usr/local/cuda/include][/usr/local/cuda/lib64/libcudart_static.a][v12.2()] +//Details about finding OpenCV +FIND_PACKAGE_MESSAGE_DETAILS_OpenCV:INTERNAL=[/usr][v4.5.4()] +//Details about finding Threads +FIND_PACKAGE_MESSAGE_DETAILS_Threads:INTERNAL=[TRUE][v()] + diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeCCompiler.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeCCompiler.cmake new file mode 100644 index 000000000..488ad3751 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeCCompiler.cmake @@ -0,0 +1,72 @@ +set(CMAKE_C_COMPILER "/usr/bin/cc") +set(CMAKE_C_COMPILER_ARG1 "") +set(CMAKE_C_COMPILER_ID "GNU") +set(CMAKE_C_COMPILER_VERSION "11.4.0") +set(CMAKE_C_COMPILER_VERSION_INTERNAL "") +set(CMAKE_C_COMPILER_WRAPPER "") +set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "17") +set(CMAKE_C_EXTENSIONS_COMPUTED_DEFAULT "ON") +set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert;c_std_17;c_std_23") +set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes") +set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros") +set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert") +set(CMAKE_C17_COMPILE_FEATURES "c_std_17") +set(CMAKE_C23_COMPILE_FEATURES "c_std_23") + +set(CMAKE_C_PLATFORM_ID "Linux") +set(CMAKE_C_SIMULATE_ID "") +set(CMAKE_C_COMPILER_FRONTEND_VARIANT "") +set(CMAKE_C_SIMULATE_VERSION "") + + + + +set(CMAKE_AR "/usr/bin/ar") +set(CMAKE_C_COMPILER_AR "/usr/bin/gcc-ar-11") +set(CMAKE_RANLIB "/usr/bin/ranlib") +set(CMAKE_C_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11") +set(CMAKE_LINKER "/usr/bin/ld") +set(CMAKE_MT "") +set(CMAKE_COMPILER_IS_GNUCC 1) +set(CMAKE_C_COMPILER_LOADED 1) +set(CMAKE_C_COMPILER_WORKS TRUE) +set(CMAKE_C_ABI_COMPILED TRUE) + +set(CMAKE_C_COMPILER_ENV_VAR "CC") + +set(CMAKE_C_COMPILER_ID_RUN 1) +set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m) +set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC) +set(CMAKE_C_LINKER_PREFERENCE 10) + +# Save compiler ABI information. +set(CMAKE_C_SIZEOF_DATA_PTR "8") +set(CMAKE_C_COMPILER_ABI "ELF") +set(CMAKE_C_BYTE_ORDER "LITTLE_ENDIAN") +set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") + +if(CMAKE_C_SIZEOF_DATA_PTR) + set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}") +endif() + +if(CMAKE_C_COMPILER_ABI) + set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}") +endif() + +if(CMAKE_C_LIBRARY_ARCHITECTURE) + set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") +endif() + +set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "") +if(CMAKE_C_CL_SHOWINCLUDES_PREFIX) + set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}") +endif() + + + + + +set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include") +set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "gcc;gcc_s;c;gcc;gcc_s") +set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib") +set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake new file mode 100644 index 000000000..345e9307d --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeCXXCompiler.cmake @@ -0,0 +1,83 @@ +set(CMAKE_CXX_COMPILER "/usr/bin/c++") +set(CMAKE_CXX_COMPILER_ARG1 "") +set(CMAKE_CXX_COMPILER_ID "GNU") +set(CMAKE_CXX_COMPILER_VERSION "11.4.0") +set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "") +set(CMAKE_CXX_COMPILER_WRAPPER "") +set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "17") +set(CMAKE_CXX_EXTENSIONS_COMPUTED_DEFAULT "ON") +set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17;cxx_std_20;cxx_std_23") +set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters") +set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates") +set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates") +set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17") +set(CMAKE_CXX20_COMPILE_FEATURES "cxx_std_20") +set(CMAKE_CXX23_COMPILE_FEATURES "cxx_std_23") + +set(CMAKE_CXX_PLATFORM_ID "Linux") +set(CMAKE_CXX_SIMULATE_ID "") +set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "") +set(CMAKE_CXX_SIMULATE_VERSION "") + + + + +set(CMAKE_AR "/usr/bin/ar") +set(CMAKE_CXX_COMPILER_AR "/usr/bin/gcc-ar-11") +set(CMAKE_RANLIB "/usr/bin/ranlib") +set(CMAKE_CXX_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11") +set(CMAKE_LINKER "/usr/bin/ld") +set(CMAKE_MT "") +set(CMAKE_COMPILER_IS_GNUCXX 1) +set(CMAKE_CXX_COMPILER_LOADED 1) +set(CMAKE_CXX_COMPILER_WORKS TRUE) +set(CMAKE_CXX_ABI_COMPILED TRUE) + +set(CMAKE_CXX_COMPILER_ENV_VAR "CXX") + +set(CMAKE_CXX_COMPILER_ID_RUN 1) +set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;mpp;CPP;ixx;cppm) +set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC) + +foreach (lang C OBJC OBJCXX) + if (CMAKE_${lang}_COMPILER_ID_RUN) + foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS) + list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension}) + endforeach() + endif() +endforeach() + +set(CMAKE_CXX_LINKER_PREFERENCE 30) +set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1) + +# Save compiler ABI information. +set(CMAKE_CXX_SIZEOF_DATA_PTR "8") +set(CMAKE_CXX_COMPILER_ABI "ELF") +set(CMAKE_CXX_BYTE_ORDER "LITTLE_ENDIAN") +set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") + +if(CMAKE_CXX_SIZEOF_DATA_PTR) + set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}") +endif() + +if(CMAKE_CXX_COMPILER_ABI) + set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}") +endif() + +if(CMAKE_CXX_LIBRARY_ARCHITECTURE) + set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") +endif() + +set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "") +if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX) + set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}") +endif() + + + + + +set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include") +set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc") +set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib") +set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_C.bin b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_C.bin new file mode 100755 index 000000000..a4691337f Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_C.bin differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_CXX.bin b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_CXX.bin new file mode 100755 index 000000000..15e6e3f25 Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_CXX.bin differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeSystem.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeSystem.cmake new file mode 100644 index 000000000..42ff9747e --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CMakeSystem.cmake @@ -0,0 +1,15 @@ +set(CMAKE_HOST_SYSTEM "Linux-5.15.90.1-microsoft-standard-WSL2") +set(CMAKE_HOST_SYSTEM_NAME "Linux") +set(CMAKE_HOST_SYSTEM_VERSION "5.15.90.1-microsoft-standard-WSL2") +set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64") + + + +set(CMAKE_SYSTEM "Linux-5.15.90.1-microsoft-standard-WSL2") +set(CMAKE_SYSTEM_NAME "Linux") +set(CMAKE_SYSTEM_VERSION "5.15.90.1-microsoft-standard-WSL2") +set(CMAKE_SYSTEM_PROCESSOR "x86_64") + +set(CMAKE_CROSSCOMPILING "FALSE") + +set(CMAKE_SYSTEM_LOADED 1) diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/CMakeCCompilerId.c b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/CMakeCCompilerId.c new file mode 100644 index 000000000..41b99d778 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/CMakeCCompilerId.c @@ -0,0 +1,803 @@ +#ifdef __cplusplus +# error "A C++ compiler has been selected for C." +#endif + +#if defined(__18CXX) +# define ID_VOID_MAIN +#endif +#if defined(__CLASSIC_C__) +/* cv-qualifiers did not exist in K&R C */ +# define const +# define volatile +#endif + +#if !defined(__has_include) +/* If the compiler does not have __has_include, pretend the answer is + always no. */ +# define __has_include(x) 0 +#endif + + +/* Version number components: V=Version, R=Revision, P=Patch + Version date components: YYYY=Year, MM=Month, DD=Day */ + +#if defined(__INTEL_COMPILER) || defined(__ICC) +# define COMPILER_ID "Intel" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# if defined(__GNUC__) +# define SIMULATE_ID "GNU" +# endif + /* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later, + except that a few beta releases use the old format with V=2021. */ +# if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111 +# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100) +# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10) +# if defined(__INTEL_COMPILER_UPDATE) +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE) +# else +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10) +# endif +# else +# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER) +# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE) + /* The third version component from --version is an update index, + but no macro is provided for it. */ +# define COMPILER_VERSION_PATCH DEC(0) +# endif +# if defined(__INTEL_COMPILER_BUILD_DATE) + /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */ +# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE) +# endif +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# if defined(__GNUC__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) +# elif defined(__GNUG__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUG__) +# endif +# if defined(__GNUC_MINOR__) +# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER) +# define COMPILER_ID "IntelLLVM" +#if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +#endif +#if defined(__GNUC__) +# define SIMULATE_ID "GNU" +#endif +/* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and + * later. Look for 6 digit vs. 8 digit version number to decide encoding. + * VVVV is no smaller than the current year when a version is released. + */ +#if __INTEL_LLVM_COMPILER < 1000000L +# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100) +# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 10) +#else +# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000) +# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100) +# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 100) +#endif +#if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +#endif +#if defined(__GNUC__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) +#elif defined(__GNUG__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUG__) +#endif +#if defined(__GNUC_MINOR__) +# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) +#endif +#if defined(__GNUC_PATCHLEVEL__) +# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +#endif + +#elif defined(__PATHCC__) +# define COMPILER_ID "PathScale" +# define COMPILER_VERSION_MAJOR DEC(__PATHCC__) +# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__) +# if defined(__PATHCC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__) +# endif + +#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__) +# define COMPILER_ID "Embarcadero" +# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF) +# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF) +# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF) + +#elif defined(__BORLANDC__) +# define COMPILER_ID "Borland" + /* __BORLANDC__ = 0xVRR */ +# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8) +# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF) + +#elif defined(__WATCOMC__) && __WATCOMC__ < 1200 +# define COMPILER_ID "Watcom" + /* __WATCOMC__ = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__WATCOMC__) +# define COMPILER_ID "OpenWatcom" + /* __WATCOMC__ = VVRP + 1100 */ +# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__SUNPRO_C) +# define COMPILER_ID "SunPro" +# if __SUNPRO_C >= 0x5100 + /* __SUNPRO_C = 0xVRRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>12) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_C & 0xF) +# else + /* __SUNPRO_CC = 0xVRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>8) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_C & 0xF) +# endif + +#elif defined(__HP_cc) +# define COMPILER_ID "HP" + /* __HP_cc = VVRRPP */ +# define COMPILER_VERSION_MAJOR DEC(__HP_cc/10000) +# define COMPILER_VERSION_MINOR DEC(__HP_cc/100 % 100) +# define COMPILER_VERSION_PATCH DEC(__HP_cc % 100) + +#elif defined(__DECC) +# define COMPILER_ID "Compaq" + /* __DECC_VER = VVRRTPPPP */ +# define COMPILER_VERSION_MAJOR DEC(__DECC_VER/10000000) +# define COMPILER_VERSION_MINOR DEC(__DECC_VER/100000 % 100) +# define COMPILER_VERSION_PATCH DEC(__DECC_VER % 10000) + +#elif defined(__IBMC__) && defined(__COMPILER_VER__) +# define COMPILER_ID "zOS" + /* __IBMC__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) + +#elif defined(__ibmxl__) && defined(__clang__) +# define COMPILER_ID "XLClang" +# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__) +# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__) +# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__) +# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__) + + +#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ >= 800 +# define COMPILER_ID "XL" + /* __IBMC__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) + +#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ < 800 +# define COMPILER_ID "VisualAge" + /* __IBMC__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) + +#elif defined(__NVCOMPILER) +# define COMPILER_ID "NVHPC" +# define COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__) +# define COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__) +# if defined(__NVCOMPILER_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__) +# endif + +#elif defined(__PGI) +# define COMPILER_ID "PGI" +# define COMPILER_VERSION_MAJOR DEC(__PGIC__) +# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__) +# if defined(__PGIC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__) +# endif + +#elif defined(_CRAYC) +# define COMPILER_ID "Cray" +# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR) +# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR) + +#elif defined(__TI_COMPILER_VERSION__) +# define COMPILER_ID "TI" + /* __TI_COMPILER_VERSION__ = VVVRRRPPP */ +# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000) +# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000) +# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000) + +#elif defined(__CLANG_FUJITSU) +# define COMPILER_ID "FujitsuClang" +# define COMPILER_VERSION_MAJOR DEC(__FCC_major__) +# define COMPILER_VERSION_MINOR DEC(__FCC_minor__) +# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__) +# define COMPILER_VERSION_INTERNAL_STR __clang_version__ + + +#elif defined(__FUJITSU) +# define COMPILER_ID "Fujitsu" +# if defined(__FCC_version__) +# define COMPILER_VERSION __FCC_version__ +# elif defined(__FCC_major__) +# define COMPILER_VERSION_MAJOR DEC(__FCC_major__) +# define COMPILER_VERSION_MINOR DEC(__FCC_minor__) +# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__) +# endif +# if defined(__fcc_version) +# define COMPILER_VERSION_INTERNAL DEC(__fcc_version) +# elif defined(__FCC_VERSION) +# define COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION) +# endif + + +#elif defined(__ghs__) +# define COMPILER_ID "GHS" +/* __GHS_VERSION_NUMBER = VVVVRP */ +# ifdef __GHS_VERSION_NUMBER +# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100) +# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10) +# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10) +# endif + +#elif defined(__TINYC__) +# define COMPILER_ID "TinyCC" + +#elif defined(__BCC__) +# define COMPILER_ID "Bruce" + +#elif defined(__SCO_VERSION__) +# define COMPILER_ID "SCO" + +#elif defined(__ARMCC_VERSION) && !defined(__clang__) +# define COMPILER_ID "ARMCC" +#if __ARMCC_VERSION >= 1000000 + /* __ARMCC_VERSION = VRRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#else + /* __ARMCC_VERSION = VRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#endif + + +#elif defined(__clang__) && defined(__apple_build_version__) +# define COMPILER_ID "AppleClang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__) + +#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION) +# define COMPILER_ID "ARMClang" + # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION % 10000) +# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION) + +#elif defined(__clang__) +# define COMPILER_ID "Clang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif + +#elif defined(__GNUC__) +# define COMPILER_ID "GNU" +# define COMPILER_VERSION_MAJOR DEC(__GNUC__) +# if defined(__GNUC_MINOR__) +# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif defined(_MSC_VER) +# define COMPILER_ID "MSVC" + /* _MSC_VER = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100) +# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100) +# if defined(_MSC_FULL_VER) +# if _MSC_VER >= 1400 + /* _MSC_FULL_VER = VVRRPPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000) +# else + /* _MSC_FULL_VER = VVRRPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000) +# endif +# endif +# if defined(_MSC_BUILD) +# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD) +# endif + +#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) +# define COMPILER_ID "ADSP" +#if defined(__VISUALDSPVERSION__) + /* __VISUALDSPVERSION__ = 0xVVRRPP00 */ +# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24) +# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8 & 0xFF) +#endif + +#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) +# define COMPILER_ID "IAR" +# if defined(__VER__) && defined(__ICCARM__) +# define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000) +# define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000) +# define COMPILER_VERSION_PATCH DEC((__VER__) % 1000) +# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) +# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__)) +# define COMPILER_VERSION_MAJOR DEC((__VER__) / 100) +# define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100)) +# define COMPILER_VERSION_PATCH DEC(__SUBVERSION__) +# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) +# endif + +#elif defined(__SDCC_VERSION_MAJOR) || defined(SDCC) +# define COMPILER_ID "SDCC" +# if defined(__SDCC_VERSION_MAJOR) +# define COMPILER_VERSION_MAJOR DEC(__SDCC_VERSION_MAJOR) +# define COMPILER_VERSION_MINOR DEC(__SDCC_VERSION_MINOR) +# define COMPILER_VERSION_PATCH DEC(__SDCC_VERSION_PATCH) +# else + /* SDCC = VRP */ +# define COMPILER_VERSION_MAJOR DEC(SDCC/100) +# define COMPILER_VERSION_MINOR DEC(SDCC/10 % 10) +# define COMPILER_VERSION_PATCH DEC(SDCC % 10) +# endif + + +/* These compilers are either not known or too old to define an + identification macro. Try to identify the platform and guess that + it is the native compiler. */ +#elif defined(__hpux) || defined(__hpua) +# define COMPILER_ID "HP" + +#else /* unknown compiler */ +# define COMPILER_ID "" +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; +#ifdef SIMULATE_ID +char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]"; +#endif + +#ifdef __QNXNTO__ +char const* qnxnto = "INFO" ":" "qnxnto[]"; +#endif + +#if defined(__CRAYXT_COMPUTE_LINUX_TARGET) +char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]"; +#endif + +#define STRINGIFY_HELPER(X) #X +#define STRINGIFY(X) STRINGIFY_HELPER(X) + +/* Identify known platforms by name. */ +#if defined(__linux) || defined(__linux__) || defined(linux) +# define PLATFORM_ID "Linux" + +#elif defined(__MSYS__) +# define PLATFORM_ID "MSYS" + +#elif defined(__CYGWIN__) +# define PLATFORM_ID "Cygwin" + +#elif defined(__MINGW32__) +# define PLATFORM_ID "MinGW" + +#elif defined(__APPLE__) +# define PLATFORM_ID "Darwin" + +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +# define PLATFORM_ID "Windows" + +#elif defined(__FreeBSD__) || defined(__FreeBSD) +# define PLATFORM_ID "FreeBSD" + +#elif defined(__NetBSD__) || defined(__NetBSD) +# define PLATFORM_ID "NetBSD" + +#elif defined(__OpenBSD__) || defined(__OPENBSD) +# define PLATFORM_ID "OpenBSD" + +#elif defined(__sun) || defined(sun) +# define PLATFORM_ID "SunOS" + +#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) +# define PLATFORM_ID "AIX" + +#elif defined(__hpux) || defined(__hpux__) +# define PLATFORM_ID "HP-UX" + +#elif defined(__HAIKU__) +# define PLATFORM_ID "Haiku" + +#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) +# define PLATFORM_ID "BeOS" + +#elif defined(__QNX__) || defined(__QNXNTO__) +# define PLATFORM_ID "QNX" + +#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) +# define PLATFORM_ID "Tru64" + +#elif defined(__riscos) || defined(__riscos__) +# define PLATFORM_ID "RISCos" + +#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) +# define PLATFORM_ID "SINIX" + +#elif defined(__UNIX_SV__) +# define PLATFORM_ID "UNIX_SV" + +#elif defined(__bsdos__) +# define PLATFORM_ID "BSDOS" + +#elif defined(_MPRAS) || defined(MPRAS) +# define PLATFORM_ID "MP-RAS" + +#elif defined(__osf) || defined(__osf__) +# define PLATFORM_ID "OSF1" + +#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) +# define PLATFORM_ID "SCO_SV" + +#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) +# define PLATFORM_ID "ULTRIX" + +#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) +# define PLATFORM_ID "Xenix" + +#elif defined(__WATCOMC__) +# if defined(__LINUX__) +# define PLATFORM_ID "Linux" + +# elif defined(__DOS__) +# define PLATFORM_ID "DOS" + +# elif defined(__OS2__) +# define PLATFORM_ID "OS2" + +# elif defined(__WINDOWS__) +# define PLATFORM_ID "Windows3x" + +# elif defined(__VXWORKS__) +# define PLATFORM_ID "VxWorks" + +# else /* unknown platform */ +# define PLATFORM_ID +# endif + +#elif defined(__INTEGRITY) +# if defined(INT_178B) +# define PLATFORM_ID "Integrity178" + +# else /* regular Integrity */ +# define PLATFORM_ID "Integrity" +# endif + +#else /* unknown platform */ +# define PLATFORM_ID + +#endif + +/* For windows compilers MSVC and Intel we can determine + the architecture of the compiler being used. This is because + the compilers do not have flags that can change the architecture, + but rather depend on which compiler is being used +*/ +#if defined(_WIN32) && defined(_MSC_VER) +# if defined(_M_IA64) +# define ARCHITECTURE_ID "IA64" + +# elif defined(_M_ARM64EC) +# define ARCHITECTURE_ID "ARM64EC" + +# elif defined(_M_X64) || defined(_M_AMD64) +# define ARCHITECTURE_ID "x64" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# elif defined(_M_ARM64) +# define ARCHITECTURE_ID "ARM64" + +# elif defined(_M_ARM) +# if _M_ARM == 4 +# define ARCHITECTURE_ID "ARMV4I" +# elif _M_ARM == 5 +# define ARCHITECTURE_ID "ARMV5I" +# else +# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM) +# endif + +# elif defined(_M_MIPS) +# define ARCHITECTURE_ID "MIPS" + +# elif defined(_M_SH) +# define ARCHITECTURE_ID "SHx" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__WATCOMC__) +# if defined(_M_I86) +# define ARCHITECTURE_ID "I86" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) +# if defined(__ICCARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__ICCRX__) +# define ARCHITECTURE_ID "RX" + +# elif defined(__ICCRH850__) +# define ARCHITECTURE_ID "RH850" + +# elif defined(__ICCRL78__) +# define ARCHITECTURE_ID "RL78" + +# elif defined(__ICCRISCV__) +# define ARCHITECTURE_ID "RISCV" + +# elif defined(__ICCAVR__) +# define ARCHITECTURE_ID "AVR" + +# elif defined(__ICC430__) +# define ARCHITECTURE_ID "MSP430" + +# elif defined(__ICCV850__) +# define ARCHITECTURE_ID "V850" + +# elif defined(__ICC8051__) +# define ARCHITECTURE_ID "8051" + +# elif defined(__ICCSTM8__) +# define ARCHITECTURE_ID "STM8" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__ghs__) +# if defined(__PPC64__) +# define ARCHITECTURE_ID "PPC64" + +# elif defined(__ppc__) +# define ARCHITECTURE_ID "PPC" + +# elif defined(__ARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__x86_64__) +# define ARCHITECTURE_ID "x64" + +# elif defined(__i386__) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__TI_COMPILER_VERSION__) +# if defined(__TI_ARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__MSP430__) +# define ARCHITECTURE_ID "MSP430" + +# elif defined(__TMS320C28XX__) +# define ARCHITECTURE_ID "TMS320C28x" + +# elif defined(__TMS320C6X__) || defined(_TMS320C6X) +# define ARCHITECTURE_ID "TMS320C6x" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#else +# define ARCHITECTURE_ID +#endif + +/* Convert integer to decimal digit literals. */ +#define DEC(n) \ + ('0' + (((n) / 10000000)%10)), \ + ('0' + (((n) / 1000000)%10)), \ + ('0' + (((n) / 100000)%10)), \ + ('0' + (((n) / 10000)%10)), \ + ('0' + (((n) / 1000)%10)), \ + ('0' + (((n) / 100)%10)), \ + ('0' + (((n) / 10)%10)), \ + ('0' + ((n) % 10)) + +/* Convert integer to hex digit literals. */ +#define HEX(n) \ + ('0' + ((n)>>28 & 0xF)), \ + ('0' + ((n)>>24 & 0xF)), \ + ('0' + ((n)>>20 & 0xF)), \ + ('0' + ((n)>>16 & 0xF)), \ + ('0' + ((n)>>12 & 0xF)), \ + ('0' + ((n)>>8 & 0xF)), \ + ('0' + ((n)>>4 & 0xF)), \ + ('0' + ((n) & 0xF)) + +/* Construct a string literal encoding the version number. */ +#ifdef COMPILER_VERSION +char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]"; + +/* Construct a string literal encoding the version number components. */ +#elif defined(COMPILER_VERSION_MAJOR) +char const info_version[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[', + COMPILER_VERSION_MAJOR, +# ifdef COMPILER_VERSION_MINOR + '.', COMPILER_VERSION_MINOR, +# ifdef COMPILER_VERSION_PATCH + '.', COMPILER_VERSION_PATCH, +# ifdef COMPILER_VERSION_TWEAK + '.', COMPILER_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct a string literal encoding the internal version number. */ +#ifdef COMPILER_VERSION_INTERNAL +char const info_version_internal[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_', + 'i','n','t','e','r','n','a','l','[', + COMPILER_VERSION_INTERNAL,']','\0'}; +#elif defined(COMPILER_VERSION_INTERNAL_STR) +char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]"; +#endif + +/* Construct a string literal encoding the version number components. */ +#ifdef SIMULATE_VERSION_MAJOR +char const info_simulate_version[] = { + 'I', 'N', 'F', 'O', ':', + 's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[', + SIMULATE_VERSION_MAJOR, +# ifdef SIMULATE_VERSION_MINOR + '.', SIMULATE_VERSION_MINOR, +# ifdef SIMULATE_VERSION_PATCH + '.', SIMULATE_VERSION_PATCH, +# ifdef SIMULATE_VERSION_TWEAK + '.', SIMULATE_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; +char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]"; + + + +#if !defined(__STDC__) && !defined(__clang__) +# if defined(_MSC_VER) || defined(__ibmxl__) || defined(__IBMC__) +# define C_VERSION "90" +# else +# define C_VERSION +# endif +#elif __STDC_VERSION__ > 201710L +# define C_VERSION "23" +#elif __STDC_VERSION__ >= 201710L +# define C_VERSION "17" +#elif __STDC_VERSION__ >= 201000L +# define C_VERSION "11" +#elif __STDC_VERSION__ >= 199901L +# define C_VERSION "99" +#else +# define C_VERSION "90" +#endif +const char* info_language_standard_default = + "INFO" ":" "standard_default[" C_VERSION "]"; + +const char* info_language_extensions_default = "INFO" ":" "extensions_default[" +/* !defined(_MSC_VER) to exclude Clang's MSVC compatibility mode. */ +#if (defined(__clang__) || defined(__GNUC__) || \ + defined(__TI_COMPILER_VERSION__)) && \ + !defined(__STRICT_ANSI__) && !defined(_MSC_VER) + "ON" +#else + "OFF" +#endif +"]"; + +/*--------------------------------------------------------------------------*/ + +#ifdef ID_VOID_MAIN +void main() {} +#else +# if defined(__CLASSIC_C__) +int main(argc, argv) int argc; char *argv[]; +# else +int main(int argc, char* argv[]) +# endif +{ + int require = 0; + require += info_compiler[argc]; + require += info_platform[argc]; + require += info_arch[argc]; +#ifdef COMPILER_VERSION_MAJOR + require += info_version[argc]; +#endif +#ifdef COMPILER_VERSION_INTERNAL + require += info_version_internal[argc]; +#endif +#ifdef SIMULATE_ID + require += info_simulate[argc]; +#endif +#ifdef SIMULATE_VERSION_MAJOR + require += info_simulate_version[argc]; +#endif +#if defined(__CRAYXT_COMPUTE_LINUX_TARGET) + require += info_cray[argc]; +#endif + require += info_language_standard_default[argc]; + require += info_language_extensions_default[argc]; + (void)argv; + return require; +} +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/a.out b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/a.out new file mode 100755 index 000000000..c786756ab Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/a.out differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/CMakeCXXCompilerId.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/CMakeCXXCompilerId.cpp new file mode 100644 index 000000000..25c62a8c3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/CMakeCXXCompilerId.cpp @@ -0,0 +1,791 @@ +/* This source file must have a .cpp extension so that all C++ compilers + recognize the extension without flags. Borland does not know .cxx for + example. */ +#ifndef __cplusplus +# error "A C compiler has been selected for C++." +#endif + +#if !defined(__has_include) +/* If the compiler does not have __has_include, pretend the answer is + always no. */ +# define __has_include(x) 0 +#endif + + +/* Version number components: V=Version, R=Revision, P=Patch + Version date components: YYYY=Year, MM=Month, DD=Day */ + +#if defined(__COMO__) +# define COMPILER_ID "Comeau" + /* __COMO_VERSION__ = VRR */ +# define COMPILER_VERSION_MAJOR DEC(__COMO_VERSION__ / 100) +# define COMPILER_VERSION_MINOR DEC(__COMO_VERSION__ % 100) + +#elif defined(__INTEL_COMPILER) || defined(__ICC) +# define COMPILER_ID "Intel" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# if defined(__GNUC__) +# define SIMULATE_ID "GNU" +# endif + /* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later, + except that a few beta releases use the old format with V=2021. */ +# if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111 +# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100) +# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10) +# if defined(__INTEL_COMPILER_UPDATE) +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE) +# else +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10) +# endif +# else +# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER) +# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE) + /* The third version component from --version is an update index, + but no macro is provided for it. */ +# define COMPILER_VERSION_PATCH DEC(0) +# endif +# if defined(__INTEL_COMPILER_BUILD_DATE) + /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */ +# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE) +# endif +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# if defined(__GNUC__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) +# elif defined(__GNUG__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUG__) +# endif +# if defined(__GNUC_MINOR__) +# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER) +# define COMPILER_ID "IntelLLVM" +#if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +#endif +#if defined(__GNUC__) +# define SIMULATE_ID "GNU" +#endif +/* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and + * later. Look for 6 digit vs. 8 digit version number to decide encoding. + * VVVV is no smaller than the current year when a version is released. + */ +#if __INTEL_LLVM_COMPILER < 1000000L +# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100) +# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 10) +#else +# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000) +# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100) +# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 100) +#endif +#if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +#endif +#if defined(__GNUC__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) +#elif defined(__GNUG__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUG__) +#endif +#if defined(__GNUC_MINOR__) +# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) +#endif +#if defined(__GNUC_PATCHLEVEL__) +# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +#endif + +#elif defined(__PATHCC__) +# define COMPILER_ID "PathScale" +# define COMPILER_VERSION_MAJOR DEC(__PATHCC__) +# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__) +# if defined(__PATHCC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__) +# endif + +#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__) +# define COMPILER_ID "Embarcadero" +# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF) +# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF) +# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF) + +#elif defined(__BORLANDC__) +# define COMPILER_ID "Borland" + /* __BORLANDC__ = 0xVRR */ +# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8) +# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF) + +#elif defined(__WATCOMC__) && __WATCOMC__ < 1200 +# define COMPILER_ID "Watcom" + /* __WATCOMC__ = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__WATCOMC__) +# define COMPILER_ID "OpenWatcom" + /* __WATCOMC__ = VVRP + 1100 */ +# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__SUNPRO_CC) +# define COMPILER_ID "SunPro" +# if __SUNPRO_CC >= 0x5100 + /* __SUNPRO_CC = 0xVRRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF) +# else + /* __SUNPRO_CC = 0xVRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF) +# endif + +#elif defined(__HP_aCC) +# define COMPILER_ID "HP" + /* __HP_aCC = VVRRPP */ +# define COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000) +# define COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100) +# define COMPILER_VERSION_PATCH DEC(__HP_aCC % 100) + +#elif defined(__DECCXX) +# define COMPILER_ID "Compaq" + /* __DECCXX_VER = VVRRTPPPP */ +# define COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000) +# define COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000 % 100) +# define COMPILER_VERSION_PATCH DEC(__DECCXX_VER % 10000) + +#elif defined(__IBMCPP__) && defined(__COMPILER_VER__) +# define COMPILER_ID "zOS" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__ibmxl__) && defined(__clang__) +# define COMPILER_ID "XLClang" +# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__) +# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__) +# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__) +# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__) + + +#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800 +# define COMPILER_ID "XL" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800 +# define COMPILER_ID "VisualAge" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__NVCOMPILER) +# define COMPILER_ID "NVHPC" +# define COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__) +# define COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__) +# if defined(__NVCOMPILER_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__) +# endif + +#elif defined(__PGI) +# define COMPILER_ID "PGI" +# define COMPILER_VERSION_MAJOR DEC(__PGIC__) +# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__) +# if defined(__PGIC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__) +# endif + +#elif defined(_CRAYC) +# define COMPILER_ID "Cray" +# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR) +# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR) + +#elif defined(__TI_COMPILER_VERSION__) +# define COMPILER_ID "TI" + /* __TI_COMPILER_VERSION__ = VVVRRRPPP */ +# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000) +# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000) +# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000) + +#elif defined(__CLANG_FUJITSU) +# define COMPILER_ID "FujitsuClang" +# define COMPILER_VERSION_MAJOR DEC(__FCC_major__) +# define COMPILER_VERSION_MINOR DEC(__FCC_minor__) +# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__) +# define COMPILER_VERSION_INTERNAL_STR __clang_version__ + + +#elif defined(__FUJITSU) +# define COMPILER_ID "Fujitsu" +# if defined(__FCC_version__) +# define COMPILER_VERSION __FCC_version__ +# elif defined(__FCC_major__) +# define COMPILER_VERSION_MAJOR DEC(__FCC_major__) +# define COMPILER_VERSION_MINOR DEC(__FCC_minor__) +# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__) +# endif +# if defined(__fcc_version) +# define COMPILER_VERSION_INTERNAL DEC(__fcc_version) +# elif defined(__FCC_VERSION) +# define COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION) +# endif + + +#elif defined(__ghs__) +# define COMPILER_ID "GHS" +/* __GHS_VERSION_NUMBER = VVVVRP */ +# ifdef __GHS_VERSION_NUMBER +# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100) +# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10) +# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10) +# endif + +#elif defined(__SCO_VERSION__) +# define COMPILER_ID "SCO" + +#elif defined(__ARMCC_VERSION) && !defined(__clang__) +# define COMPILER_ID "ARMCC" +#if __ARMCC_VERSION >= 1000000 + /* __ARMCC_VERSION = VRRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#else + /* __ARMCC_VERSION = VRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#endif + + +#elif defined(__clang__) && defined(__apple_build_version__) +# define COMPILER_ID "AppleClang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__) + +#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION) +# define COMPILER_ID "ARMClang" + # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION % 10000) +# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION) + +#elif defined(__clang__) +# define COMPILER_ID "Clang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif + +#elif defined(__GNUC__) || defined(__GNUG__) +# define COMPILER_ID "GNU" +# if defined(__GNUC__) +# define COMPILER_VERSION_MAJOR DEC(__GNUC__) +# else +# define COMPILER_VERSION_MAJOR DEC(__GNUG__) +# endif +# if defined(__GNUC_MINOR__) +# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif defined(_MSC_VER) +# define COMPILER_ID "MSVC" + /* _MSC_VER = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100) +# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100) +# if defined(_MSC_FULL_VER) +# if _MSC_VER >= 1400 + /* _MSC_FULL_VER = VVRRPPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000) +# else + /* _MSC_FULL_VER = VVRRPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000) +# endif +# endif +# if defined(_MSC_BUILD) +# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD) +# endif + +#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) +# define COMPILER_ID "ADSP" +#if defined(__VISUALDSPVERSION__) + /* __VISUALDSPVERSION__ = 0xVVRRPP00 */ +# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24) +# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8 & 0xFF) +#endif + +#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) +# define COMPILER_ID "IAR" +# if defined(__VER__) && defined(__ICCARM__) +# define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000) +# define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000) +# define COMPILER_VERSION_PATCH DEC((__VER__) % 1000) +# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) +# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__)) +# define COMPILER_VERSION_MAJOR DEC((__VER__) / 100) +# define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100)) +# define COMPILER_VERSION_PATCH DEC(__SUBVERSION__) +# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) +# endif + + +/* These compilers are either not known or too old to define an + identification macro. Try to identify the platform and guess that + it is the native compiler. */ +#elif defined(__hpux) || defined(__hpua) +# define COMPILER_ID "HP" + +#else /* unknown compiler */ +# define COMPILER_ID "" +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; +#ifdef SIMULATE_ID +char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]"; +#endif + +#ifdef __QNXNTO__ +char const* qnxnto = "INFO" ":" "qnxnto[]"; +#endif + +#if defined(__CRAYXT_COMPUTE_LINUX_TARGET) +char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]"; +#endif + +#define STRINGIFY_HELPER(X) #X +#define STRINGIFY(X) STRINGIFY_HELPER(X) + +/* Identify known platforms by name. */ +#if defined(__linux) || defined(__linux__) || defined(linux) +# define PLATFORM_ID "Linux" + +#elif defined(__MSYS__) +# define PLATFORM_ID "MSYS" + +#elif defined(__CYGWIN__) +# define PLATFORM_ID "Cygwin" + +#elif defined(__MINGW32__) +# define PLATFORM_ID "MinGW" + +#elif defined(__APPLE__) +# define PLATFORM_ID "Darwin" + +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +# define PLATFORM_ID "Windows" + +#elif defined(__FreeBSD__) || defined(__FreeBSD) +# define PLATFORM_ID "FreeBSD" + +#elif defined(__NetBSD__) || defined(__NetBSD) +# define PLATFORM_ID "NetBSD" + +#elif defined(__OpenBSD__) || defined(__OPENBSD) +# define PLATFORM_ID "OpenBSD" + +#elif defined(__sun) || defined(sun) +# define PLATFORM_ID "SunOS" + +#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) +# define PLATFORM_ID "AIX" + +#elif defined(__hpux) || defined(__hpux__) +# define PLATFORM_ID "HP-UX" + +#elif defined(__HAIKU__) +# define PLATFORM_ID "Haiku" + +#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) +# define PLATFORM_ID "BeOS" + +#elif defined(__QNX__) || defined(__QNXNTO__) +# define PLATFORM_ID "QNX" + +#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) +# define PLATFORM_ID "Tru64" + +#elif defined(__riscos) || defined(__riscos__) +# define PLATFORM_ID "RISCos" + +#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) +# define PLATFORM_ID "SINIX" + +#elif defined(__UNIX_SV__) +# define PLATFORM_ID "UNIX_SV" + +#elif defined(__bsdos__) +# define PLATFORM_ID "BSDOS" + +#elif defined(_MPRAS) || defined(MPRAS) +# define PLATFORM_ID "MP-RAS" + +#elif defined(__osf) || defined(__osf__) +# define PLATFORM_ID "OSF1" + +#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) +# define PLATFORM_ID "SCO_SV" + +#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) +# define PLATFORM_ID "ULTRIX" + +#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) +# define PLATFORM_ID "Xenix" + +#elif defined(__WATCOMC__) +# if defined(__LINUX__) +# define PLATFORM_ID "Linux" + +# elif defined(__DOS__) +# define PLATFORM_ID "DOS" + +# elif defined(__OS2__) +# define PLATFORM_ID "OS2" + +# elif defined(__WINDOWS__) +# define PLATFORM_ID "Windows3x" + +# elif defined(__VXWORKS__) +# define PLATFORM_ID "VxWorks" + +# else /* unknown platform */ +# define PLATFORM_ID +# endif + +#elif defined(__INTEGRITY) +# if defined(INT_178B) +# define PLATFORM_ID "Integrity178" + +# else /* regular Integrity */ +# define PLATFORM_ID "Integrity" +# endif + +#else /* unknown platform */ +# define PLATFORM_ID + +#endif + +/* For windows compilers MSVC and Intel we can determine + the architecture of the compiler being used. This is because + the compilers do not have flags that can change the architecture, + but rather depend on which compiler is being used +*/ +#if defined(_WIN32) && defined(_MSC_VER) +# if defined(_M_IA64) +# define ARCHITECTURE_ID "IA64" + +# elif defined(_M_ARM64EC) +# define ARCHITECTURE_ID "ARM64EC" + +# elif defined(_M_X64) || defined(_M_AMD64) +# define ARCHITECTURE_ID "x64" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# elif defined(_M_ARM64) +# define ARCHITECTURE_ID "ARM64" + +# elif defined(_M_ARM) +# if _M_ARM == 4 +# define ARCHITECTURE_ID "ARMV4I" +# elif _M_ARM == 5 +# define ARCHITECTURE_ID "ARMV5I" +# else +# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM) +# endif + +# elif defined(_M_MIPS) +# define ARCHITECTURE_ID "MIPS" + +# elif defined(_M_SH) +# define ARCHITECTURE_ID "SHx" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__WATCOMC__) +# if defined(_M_I86) +# define ARCHITECTURE_ID "I86" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) +# if defined(__ICCARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__ICCRX__) +# define ARCHITECTURE_ID "RX" + +# elif defined(__ICCRH850__) +# define ARCHITECTURE_ID "RH850" + +# elif defined(__ICCRL78__) +# define ARCHITECTURE_ID "RL78" + +# elif defined(__ICCRISCV__) +# define ARCHITECTURE_ID "RISCV" + +# elif defined(__ICCAVR__) +# define ARCHITECTURE_ID "AVR" + +# elif defined(__ICC430__) +# define ARCHITECTURE_ID "MSP430" + +# elif defined(__ICCV850__) +# define ARCHITECTURE_ID "V850" + +# elif defined(__ICC8051__) +# define ARCHITECTURE_ID "8051" + +# elif defined(__ICCSTM8__) +# define ARCHITECTURE_ID "STM8" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__ghs__) +# if defined(__PPC64__) +# define ARCHITECTURE_ID "PPC64" + +# elif defined(__ppc__) +# define ARCHITECTURE_ID "PPC" + +# elif defined(__ARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__x86_64__) +# define ARCHITECTURE_ID "x64" + +# elif defined(__i386__) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__TI_COMPILER_VERSION__) +# if defined(__TI_ARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__MSP430__) +# define ARCHITECTURE_ID "MSP430" + +# elif defined(__TMS320C28XX__) +# define ARCHITECTURE_ID "TMS320C28x" + +# elif defined(__TMS320C6X__) || defined(_TMS320C6X) +# define ARCHITECTURE_ID "TMS320C6x" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#else +# define ARCHITECTURE_ID +#endif + +/* Convert integer to decimal digit literals. */ +#define DEC(n) \ + ('0' + (((n) / 10000000)%10)), \ + ('0' + (((n) / 1000000)%10)), \ + ('0' + (((n) / 100000)%10)), \ + ('0' + (((n) / 10000)%10)), \ + ('0' + (((n) / 1000)%10)), \ + ('0' + (((n) / 100)%10)), \ + ('0' + (((n) / 10)%10)), \ + ('0' + ((n) % 10)) + +/* Convert integer to hex digit literals. */ +#define HEX(n) \ + ('0' + ((n)>>28 & 0xF)), \ + ('0' + ((n)>>24 & 0xF)), \ + ('0' + ((n)>>20 & 0xF)), \ + ('0' + ((n)>>16 & 0xF)), \ + ('0' + ((n)>>12 & 0xF)), \ + ('0' + ((n)>>8 & 0xF)), \ + ('0' + ((n)>>4 & 0xF)), \ + ('0' + ((n) & 0xF)) + +/* Construct a string literal encoding the version number. */ +#ifdef COMPILER_VERSION +char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]"; + +/* Construct a string literal encoding the version number components. */ +#elif defined(COMPILER_VERSION_MAJOR) +char const info_version[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[', + COMPILER_VERSION_MAJOR, +# ifdef COMPILER_VERSION_MINOR + '.', COMPILER_VERSION_MINOR, +# ifdef COMPILER_VERSION_PATCH + '.', COMPILER_VERSION_PATCH, +# ifdef COMPILER_VERSION_TWEAK + '.', COMPILER_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct a string literal encoding the internal version number. */ +#ifdef COMPILER_VERSION_INTERNAL +char const info_version_internal[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_', + 'i','n','t','e','r','n','a','l','[', + COMPILER_VERSION_INTERNAL,']','\0'}; +#elif defined(COMPILER_VERSION_INTERNAL_STR) +char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]"; +#endif + +/* Construct a string literal encoding the version number components. */ +#ifdef SIMULATE_VERSION_MAJOR +char const info_simulate_version[] = { + 'I', 'N', 'F', 'O', ':', + 's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[', + SIMULATE_VERSION_MAJOR, +# ifdef SIMULATE_VERSION_MINOR + '.', SIMULATE_VERSION_MINOR, +# ifdef SIMULATE_VERSION_PATCH + '.', SIMULATE_VERSION_PATCH, +# ifdef SIMULATE_VERSION_TWEAK + '.', SIMULATE_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; +char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]"; + + + +#if defined(__INTEL_COMPILER) && defined(_MSVC_LANG) && _MSVC_LANG < 201403L +# if defined(__INTEL_CXX11_MODE__) +# if defined(__cpp_aggregate_nsdmi) +# define CXX_STD 201402L +# else +# define CXX_STD 201103L +# endif +# else +# define CXX_STD 199711L +# endif +#elif defined(_MSC_VER) && defined(_MSVC_LANG) +# define CXX_STD _MSVC_LANG +#else +# define CXX_STD __cplusplus +#endif + +const char* info_language_standard_default = "INFO" ":" "standard_default[" +#if CXX_STD > 202002L + "23" +#elif CXX_STD > 201703L + "20" +#elif CXX_STD >= 201703L + "17" +#elif CXX_STD >= 201402L + "14" +#elif CXX_STD >= 201103L + "11" +#else + "98" +#endif +"]"; + +const char* info_language_extensions_default = "INFO" ":" "extensions_default[" +/* !defined(_MSC_VER) to exclude Clang's MSVC compatibility mode. */ +#if (defined(__clang__) || defined(__GNUC__) || \ + defined(__TI_COMPILER_VERSION__)) && \ + !defined(__STRICT_ANSI__) && !defined(_MSC_VER) + "ON" +#else + "OFF" +#endif +"]"; + +/*--------------------------------------------------------------------------*/ + +int main(int argc, char* argv[]) +{ + int require = 0; + require += info_compiler[argc]; + require += info_platform[argc]; +#ifdef COMPILER_VERSION_MAJOR + require += info_version[argc]; +#endif +#ifdef COMPILER_VERSION_INTERNAL + require += info_version_internal[argc]; +#endif +#ifdef SIMULATE_ID + require += info_simulate[argc]; +#endif +#ifdef SIMULATE_VERSION_MAJOR + require += info_simulate_version[argc]; +#endif +#if defined(__CRAYXT_COMPUTE_LINUX_TARGET) + require += info_cray[argc]; +#endif + require += info_language_standard_default[argc]; + require += info_language_extensions_default[argc]; + (void)argv; + return require; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/a.out b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/a.out new file mode 100755 index 000000000..9944be481 Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/a.out differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake new file mode 100644 index 000000000..6e0330d60 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeDirectoryInformation.cmake @@ -0,0 +1,16 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.22 + +# Relative path conversion top directories. +set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA") +set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build") + +# Force unix paths in dependencies. +set(CMAKE_FORCE_UNIX_PATHS 1) + + +# The C and CXX include file regular expressions for this directory. +set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") +set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") +set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) +set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeOutput.log b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeOutput.log new file mode 100644 index 000000000..44029d7c0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeOutput.log @@ -0,0 +1,497 @@ +The system is: Linux - 5.15.90.1-microsoft-standard-WSL2 - x86_64 +Compiling the C compiler identification source file "CMakeCCompilerId.c" succeeded. +Compiler: /usr/bin/cc +Build flags: +Id flags: + +The output was: +0 + + +Compilation of the C compiler identification source "CMakeCCompilerId.c" produced "a.out" + +The C compiler identification is GNU, found in "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdC/a.out" + +Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" succeeded. +Compiler: /usr/bin/c++ +Build flags: +Id flags: + +The output was: +0 + + +Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "a.out" + +The CXX compiler identification is GNU, found in "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/3.22.1/CompilerIdCXX/a.out" + +Detecting C compiler ABI info compiled with the following output: +Change Dir: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_c6496/fast && /usr/bin/gmake -f CMakeFiles/cmTC_c6496.dir/build.make CMakeFiles/cmTC_c6496.dir/build +gmake[1]: Entering directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp' +Building C object CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o +/usr/bin/cc -v -o CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o -c /usr/share/cmake-3.22/Modules/CMakeCCompilerABI.c +Using built-in specs. +COLLECT_GCC=/usr/bin/cc +OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa +OFFLOAD_TARGET_DEFAULT=1 +Target: x86_64-linux-gnu +Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2 +Thread model: posix +Supported LTO compression algorithms: zlib zstd +gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) +COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o' '-c' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6496.dir/' + /usr/lib/gcc/x86_64-linux-gnu/11/cc1 -quiet -v -imultiarch x86_64-linux-gnu /usr/share/cmake-3.22/Modules/CMakeCCompilerABI.c -quiet -dumpdir CMakeFiles/cmTC_c6496.dir/ -dumpbase CMakeCCompilerABI.c.c -dumpbase-ext .c -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/ccP9g5du.s +GNU C17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu) + compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP + +GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 +ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu" +ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed" +ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86_64-linux-gnu/include" +#include "..." search starts here: +#include <...> search starts here: + /usr/lib/gcc/x86_64-linux-gnu/11/include + /usr/local/include + /usr/include/x86_64-linux-gnu + /usr/include +End of search list. +GNU C17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu) + compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP + +GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 +Compiler executable checksum: 50eaa2331df977b8016186198deb2d18 +COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o' '-c' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6496.dir/' + as -v --64 -o CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o /tmp/ccP9g5du.s +GNU assembler version 2.38 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.38 +COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/ +LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/ +COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o' '-c' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.' +Linking C executable cmTC_c6496 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_c6496.dir/link.txt --verbose=1 +/usr/bin/cc -v CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o -o cmTC_c6496 +Using built-in specs. +COLLECT_GCC=/usr/bin/cc +COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper +OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa +OFFLOAD_TARGET_DEFAULT=1 +Target: x86_64-linux-gnu +Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2 +Thread model: posix +Supported LTO compression algorithms: zlib zstd +gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) +COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/ +LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/ +COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_c6496' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_c6496.' + /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccAtPUmK.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_c6496 /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o -lgcc --push-state --as-needed -lgcc_s --pop-state -lc -lgcc --push-state --as-needed -lgcc_s --pop-state /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o +COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_c6496' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_c6496.' +gmake[1]: Leaving directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp' + + + +Parsed C implicit include dir info from above output: rv=done + found start of include info + found start of implicit include info + add: [/usr/lib/gcc/x86_64-linux-gnu/11/include] + add: [/usr/local/include] + add: [/usr/include/x86_64-linux-gnu] + add: [/usr/include] + end of search list found + collapse include dir [/usr/lib/gcc/x86_64-linux-gnu/11/include] ==> [/usr/lib/gcc/x86_64-linux-gnu/11/include] + collapse include dir [/usr/local/include] ==> [/usr/local/include] + collapse include dir [/usr/include/x86_64-linux-gnu] ==> [/usr/include/x86_64-linux-gnu] + collapse include dir [/usr/include] ==> [/usr/include] + implicit include dirs: [/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include] + + +Parsed C implicit link information from above output: + link line regex: [^( *|.*[/\])(ld|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\]+-)?ld|collect2)[^/\]*( |$)] + ignore line: [Change Dir: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp] + ignore line: [] + ignore line: [Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_c6496/fast && /usr/bin/gmake -f CMakeFiles/cmTC_c6496.dir/build.make CMakeFiles/cmTC_c6496.dir/build] + ignore line: [gmake[1]: Entering directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp'] + ignore line: [Building C object CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o] + ignore line: [/usr/bin/cc -v -o CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o -c /usr/share/cmake-3.22/Modules/CMakeCCompilerABI.c] + ignore line: [Using built-in specs.] + ignore line: [COLLECT_GCC=/usr/bin/cc] + ignore line: [OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa] + ignore line: [OFFLOAD_TARGET_DEFAULT=1] + ignore line: [Target: x86_64-linux-gnu] + ignore line: [Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c ada c++ go brig d fortran objc obj-c++ m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32 m64 mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2] + ignore line: [Thread model: posix] + ignore line: [Supported LTO compression algorithms: zlib zstd] + ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ] + ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o' '-c' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6496.dir/'] + ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/cc1 -quiet -v -imultiarch x86_64-linux-gnu /usr/share/cmake-3.22/Modules/CMakeCCompilerABI.c -quiet -dumpdir CMakeFiles/cmTC_c6496.dir/ -dumpbase CMakeCCompilerABI.c.c -dumpbase-ext .c -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/ccP9g5du.s] + ignore line: [GNU C17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)] + ignore line: [ compiled by GNU C version 11.4.0 GMP version 6.2.1 MPFR version 4.1.0 MPC version 1.2.1 isl version isl-0.24-GMP] + ignore line: [] + ignore line: [GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072] + ignore line: [ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"] + ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed"] + ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86_64-linux-gnu/include"] + ignore line: [#include "..." search starts here:] + ignore line: [#include <...> search starts here:] + ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/include] + ignore line: [ /usr/local/include] + ignore line: [ /usr/include/x86_64-linux-gnu] + ignore line: [ /usr/include] + ignore line: [End of search list.] + ignore line: [GNU C17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)] + ignore line: [ compiled by GNU C version 11.4.0 GMP version 6.2.1 MPFR version 4.1.0 MPC version 1.2.1 isl version isl-0.24-GMP] + ignore line: [] + ignore line: [GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072] + ignore line: [Compiler executable checksum: 50eaa2331df977b8016186198deb2d18] + ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o' '-c' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6496.dir/'] + ignore line: [ as -v --64 -o CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o /tmp/ccP9g5du.s] + ignore line: [GNU assembler version 2.38 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.38] + ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/] + ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/] + ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o' '-c' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.'] + ignore line: [Linking C executable cmTC_c6496] + ignore line: [/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_c6496.dir/link.txt --verbose=1] + ignore line: [/usr/bin/cc -v CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o -o cmTC_c6496 ] + ignore line: [Using built-in specs.] + ignore line: [COLLECT_GCC=/usr/bin/cc] + ignore line: [COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper] + ignore line: [OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa] + ignore line: [OFFLOAD_TARGET_DEFAULT=1] + ignore line: [Target: x86_64-linux-gnu] + ignore line: [Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c ada c++ go brig d fortran objc obj-c++ m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32 m64 mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2] + ignore line: [Thread model: posix] + ignore line: [Supported LTO compression algorithms: zlib zstd] + ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ] + ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/] + ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/] + ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_c6496' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_c6496.'] + link line: [ /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccAtPUmK.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_c6496 /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o -lgcc --push-state --as-needed -lgcc_s --pop-state -lc -lgcc --push-state --as-needed -lgcc_s --pop-state /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o] + arg [/usr/lib/gcc/x86_64-linux-gnu/11/collect2] ==> ignore + arg [-plugin] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so] ==> ignore + arg [-plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper] ==> ignore + arg [-plugin-opt=-fresolution=/tmp/ccAtPUmK.res] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore + arg [-plugin-opt=-pass-through=-lc] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore + arg [--build-id] ==> ignore + arg [--eh-frame-hdr] ==> ignore + arg [-m] ==> ignore + arg [elf_x86_64] ==> ignore + arg [--hash-style=gnu] ==> ignore + arg [--as-needed] ==> ignore + arg [-dynamic-linker] ==> ignore + arg [/lib64/ld-linux-x86-64.so.2] ==> ignore + arg [-pie] ==> ignore + arg [-znow] ==> ignore + arg [-zrelro] ==> ignore + arg [-o] ==> ignore + arg [cmTC_c6496] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o] + arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o] + arg [/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o] + arg [-L/usr/lib/gcc/x86_64-linux-gnu/11] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11] + arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu] + arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib] + arg [-L/lib/x86_64-linux-gnu] ==> dir [/lib/x86_64-linux-gnu] + arg [-L/lib/../lib] ==> dir [/lib/../lib] + arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu] + arg [-L/usr/lib/../lib] ==> dir [/usr/lib/../lib] + arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../..] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../..] + arg [CMakeFiles/cmTC_c6496.dir/CMakeCCompilerABI.c.o] ==> ignore + arg [-lgcc] ==> lib [gcc] + arg [--push-state] ==> ignore + arg [--as-needed] ==> ignore + arg [-lgcc_s] ==> lib [gcc_s] + arg [--pop-state] ==> ignore + arg [-lc] ==> lib [c] + arg [-lgcc] ==> lib [gcc] + arg [--push-state] ==> ignore + arg [--as-needed] ==> ignore + arg [-lgcc_s] ==> lib [gcc_s] + arg [--pop-state] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o] + arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o] + collapse obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o] ==> [/usr/lib/x86_64-linux-gnu/Scrt1.o] + collapse obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o] ==> [/usr/lib/x86_64-linux-gnu/crti.o] + collapse obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o] ==> [/usr/lib/x86_64-linux-gnu/crtn.o] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11] ==> [/usr/lib/gcc/x86_64-linux-gnu/11] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib] ==> [/usr/lib] + collapse library dir [/lib/x86_64-linux-gnu] ==> [/lib/x86_64-linux-gnu] + collapse library dir [/lib/../lib] ==> [/lib] + collapse library dir [/usr/lib/x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu] + collapse library dir [/usr/lib/../lib] ==> [/usr/lib] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../..] ==> [/usr/lib] + implicit libs: [gcc;gcc_s;c;gcc;gcc_s] + implicit objs: [/usr/lib/x86_64-linux-gnu/Scrt1.o;/usr/lib/x86_64-linux-gnu/crti.o;/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o;/usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o;/usr/lib/x86_64-linux-gnu/crtn.o] + implicit dirs: [/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib] + implicit fwks: [] + + +Detecting CXX compiler ABI info compiled with the following output: +Change Dir: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_871bc/fast && /usr/bin/gmake -f CMakeFiles/cmTC_871bc.dir/build.make CMakeFiles/cmTC_871bc.dir/build +gmake[1]: Entering directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp' +Building CXX object CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o +/usr/bin/c++ -v -o CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp +Using built-in specs. +COLLECT_GCC=/usr/bin/c++ +OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa +OFFLOAD_TARGET_DEFAULT=1 +Target: x86_64-linux-gnu +Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2 +Thread model: posix +Supported LTO compression algorithms: zlib zstd +gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) +COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_871bc.dir/' + /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp -quiet -dumpdir CMakeFiles/cmTC_871bc.dir/ -dumpbase CMakeCXXCompilerABI.cpp.cpp -dumpbase-ext .cpp -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/cczm4rAC.s +GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu) + compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP + +GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 +ignoring duplicate directory "/usr/include/x86_64-linux-gnu/c++/11" +ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu" +ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed" +ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86_64-linux-gnu/include" +#include "..." search starts here: +#include <...> search starts here: + /usr/include/c++/11 + /usr/include/x86_64-linux-gnu/c++/11 + /usr/include/c++/11/backward + /usr/lib/gcc/x86_64-linux-gnu/11/include + /usr/local/include + /usr/include/x86_64-linux-gnu + /usr/include +End of search list. +GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu) + compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP + +GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 +Compiler executable checksum: d591828bb4d392ae8b7b160e5bb0b95f +COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_871bc.dir/' + as -v --64 -o CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o /tmp/cczm4rAC.s +GNU assembler version 2.38 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.38 +COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/ +LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/ +COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.' +Linking CXX executable cmTC_871bc +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_871bc.dir/link.txt --verbose=1 +/usr/bin/c++ -v CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o -o cmTC_871bc +Using built-in specs. +COLLECT_GCC=/usr/bin/c++ +COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper +OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa +OFFLOAD_TARGET_DEFAULT=1 +Target: x86_64-linux-gnu +Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2 +Thread model: posix +Supported LTO compression algorithms: zlib zstd +gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) +COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/ +LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/ +COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_871bc' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_871bc.' + /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccvdFTTw.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_871bc /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o +COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_871bc' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_871bc.' +gmake[1]: Leaving directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp' + + + +Parsed CXX implicit include dir info from above output: rv=done + found start of include info + found start of implicit include info + add: [/usr/include/c++/11] + add: [/usr/include/x86_64-linux-gnu/c++/11] + add: [/usr/include/c++/11/backward] + add: [/usr/lib/gcc/x86_64-linux-gnu/11/include] + add: [/usr/local/include] + add: [/usr/include/x86_64-linux-gnu] + add: [/usr/include] + end of search list found + collapse include dir [/usr/include/c++/11] ==> [/usr/include/c++/11] + collapse include dir [/usr/include/x86_64-linux-gnu/c++/11] ==> [/usr/include/x86_64-linux-gnu/c++/11] + collapse include dir [/usr/include/c++/11/backward] ==> [/usr/include/c++/11/backward] + collapse include dir [/usr/lib/gcc/x86_64-linux-gnu/11/include] ==> [/usr/lib/gcc/x86_64-linux-gnu/11/include] + collapse include dir [/usr/local/include] ==> [/usr/local/include] + collapse include dir [/usr/include/x86_64-linux-gnu] ==> [/usr/include/x86_64-linux-gnu] + collapse include dir [/usr/include] ==> [/usr/include] + implicit include dirs: [/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include] + + +Parsed CXX implicit link information from above output: + link line regex: [^( *|.*[/\])(ld|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\]+-)?ld|collect2)[^/\]*( |$)] + ignore line: [Change Dir: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp] + ignore line: [] + ignore line: [Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_871bc/fast && /usr/bin/gmake -f CMakeFiles/cmTC_871bc.dir/build.make CMakeFiles/cmTC_871bc.dir/build] + ignore line: [gmake[1]: Entering directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp'] + ignore line: [Building CXX object CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o] + ignore line: [/usr/bin/c++ -v -o CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp] + ignore line: [Using built-in specs.] + ignore line: [COLLECT_GCC=/usr/bin/c++] + ignore line: [OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa] + ignore line: [OFFLOAD_TARGET_DEFAULT=1] + ignore line: [Target: x86_64-linux-gnu] + ignore line: [Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c ada c++ go brig d fortran objc obj-c++ m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32 m64 mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2] + ignore line: [Thread model: posix] + ignore line: [Supported LTO compression algorithms: zlib zstd] + ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ] + ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_871bc.dir/'] + ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp -quiet -dumpdir CMakeFiles/cmTC_871bc.dir/ -dumpbase CMakeCXXCompilerABI.cpp.cpp -dumpbase-ext .cpp -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/cczm4rAC.s] + ignore line: [GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)] + ignore line: [ compiled by GNU C version 11.4.0 GMP version 6.2.1 MPFR version 4.1.0 MPC version 1.2.1 isl version isl-0.24-GMP] + ignore line: [] + ignore line: [GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072] + ignore line: [ignoring duplicate directory "/usr/include/x86_64-linux-gnu/c++/11"] + ignore line: [ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"] + ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed"] + ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86_64-linux-gnu/include"] + ignore line: [#include "..." search starts here:] + ignore line: [#include <...> search starts here:] + ignore line: [ /usr/include/c++/11] + ignore line: [ /usr/include/x86_64-linux-gnu/c++/11] + ignore line: [ /usr/include/c++/11/backward] + ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/include] + ignore line: [ /usr/local/include] + ignore line: [ /usr/include/x86_64-linux-gnu] + ignore line: [ /usr/include] + ignore line: [End of search list.] + ignore line: [GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)] + ignore line: [ compiled by GNU C version 11.4.0 GMP version 6.2.1 MPFR version 4.1.0 MPC version 1.2.1 isl version isl-0.24-GMP] + ignore line: [] + ignore line: [GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072] + ignore line: [Compiler executable checksum: d591828bb4d392ae8b7b160e5bb0b95f] + ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_871bc.dir/'] + ignore line: [ as -v --64 -o CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o /tmp/cczm4rAC.s] + ignore line: [GNU assembler version 2.38 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.38] + ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/] + ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/] + ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.'] + ignore line: [Linking CXX executable cmTC_871bc] + ignore line: [/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_871bc.dir/link.txt --verbose=1] + ignore line: [/usr/bin/c++ -v CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o -o cmTC_871bc ] + ignore line: [Using built-in specs.] + ignore line: [COLLECT_GCC=/usr/bin/c++] + ignore line: [COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper] + ignore line: [OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa] + ignore line: [OFFLOAD_TARGET_DEFAULT=1] + ignore line: [Target: x86_64-linux-gnu] + ignore line: [Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c ada c++ go brig d fortran objc obj-c++ m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32 m64 mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-nvptx/usr amdgcn-amdhsa=/build/gcc-11-XeT9lY/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2] + ignore line: [Thread model: posix] + ignore line: [Supported LTO compression algorithms: zlib zstd] + ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ] + ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/] + ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/] + ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_871bc' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_871bc.'] + link line: [ /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccvdFTTw.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_871bc /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o] + arg [/usr/lib/gcc/x86_64-linux-gnu/11/collect2] ==> ignore + arg [-plugin] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so] ==> ignore + arg [-plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper] ==> ignore + arg [-plugin-opt=-fresolution=/tmp/ccvdFTTw.res] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc] ==> ignore + arg [-plugin-opt=-pass-through=-lc] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore + arg [-plugin-opt=-pass-through=-lgcc] ==> ignore + arg [--build-id] ==> ignore + arg [--eh-frame-hdr] ==> ignore + arg [-m] ==> ignore + arg [elf_x86_64] ==> ignore + arg [--hash-style=gnu] ==> ignore + arg [--as-needed] ==> ignore + arg [-dynamic-linker] ==> ignore + arg [/lib64/ld-linux-x86-64.so.2] ==> ignore + arg [-pie] ==> ignore + arg [-znow] ==> ignore + arg [-zrelro] ==> ignore + arg [-o] ==> ignore + arg [cmTC_871bc] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o] + arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o] + arg [/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o] + arg [-L/usr/lib/gcc/x86_64-linux-gnu/11] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11] + arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu] + arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib] + arg [-L/lib/x86_64-linux-gnu] ==> dir [/lib/x86_64-linux-gnu] + arg [-L/lib/../lib] ==> dir [/lib/../lib] + arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu] + arg [-L/usr/lib/../lib] ==> dir [/usr/lib/../lib] + arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../..] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../..] + arg [CMakeFiles/cmTC_871bc.dir/CMakeCXXCompilerABI.cpp.o] ==> ignore + arg [-lstdc++] ==> lib [stdc++] + arg [-lm] ==> lib [m] + arg [-lgcc_s] ==> lib [gcc_s] + arg [-lgcc] ==> lib [gcc] + arg [-lc] ==> lib [c] + arg [-lgcc_s] ==> lib [gcc_s] + arg [-lgcc] ==> lib [gcc] + arg [/usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o] + arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o] + collapse obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o] ==> [/usr/lib/x86_64-linux-gnu/Scrt1.o] + collapse obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o] ==> [/usr/lib/x86_64-linux-gnu/crti.o] + collapse obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o] ==> [/usr/lib/x86_64-linux-gnu/crtn.o] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11] ==> [/usr/lib/gcc/x86_64-linux-gnu/11] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib] ==> [/usr/lib] + collapse library dir [/lib/x86_64-linux-gnu] ==> [/lib/x86_64-linux-gnu] + collapse library dir [/lib/../lib] ==> [/lib] + collapse library dir [/usr/lib/x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu] + collapse library dir [/usr/lib/../lib] ==> [/usr/lib] + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../..] ==> [/usr/lib] + implicit libs: [stdc++;m;gcc_s;gcc;c;gcc_s;gcc] + implicit objs: [/usr/lib/x86_64-linux-gnu/Scrt1.o;/usr/lib/x86_64-linux-gnu/crti.o;/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o;/usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o;/usr/lib/x86_64-linux-gnu/crtn.o] + implicit dirs: [/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib] + implicit fwks: [] + + +Determining if the include file pthread.h exists passed with the following output: +Change Dir: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_924ae/fast && /usr/bin/gmake -f CMakeFiles/cmTC_924ae.dir/build.make CMakeFiles/cmTC_924ae.dir/build +gmake[1]: Entering directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp' +Building C object CMakeFiles/cmTC_924ae.dir/CheckIncludeFile.c.o +/usr/bin/cc -fPIC -o CMakeFiles/cmTC_924ae.dir/CheckIncludeFile.c.o -c /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp/CheckIncludeFile.c +Linking C executable cmTC_924ae +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_924ae.dir/link.txt --verbose=1 +/usr/bin/cc -fPIC CMakeFiles/cmTC_924ae.dir/CheckIncludeFile.c.o -o cmTC_924ae +gmake[1]: Leaving directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp' + + + +Performing C SOURCE FILE Test CMAKE_HAVE_LIBC_PTHREAD succeeded with the following output: +Change Dir: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_31011/fast && /usr/bin/gmake -f CMakeFiles/cmTC_31011.dir/build.make CMakeFiles/cmTC_31011.dir/build +gmake[1]: Entering directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp' +Building C object CMakeFiles/cmTC_31011.dir/src.c.o +/usr/bin/cc -DCMAKE_HAVE_LIBC_PTHREAD -fPIC -o CMakeFiles/cmTC_31011.dir/src.c.o -c /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp/src.c +Linking C executable cmTC_31011 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_31011.dir/link.txt --verbose=1 +/usr/bin/cc -fPIC CMakeFiles/cmTC_31011.dir/src.c.o -o cmTC_31011 +gmake[1]: Leaving directory '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeTmp' + + +Source file was: +#include + +static void* test_func(void* data) +{ + return data; +} + +int main(void) +{ + pthread_t thread; + pthread_create(&thread, NULL, test_func, NULL); + pthread_detach(thread); + pthread_cancel(thread); + pthread_join(thread, NULL); + pthread_atfork(NULL, NULL, NULL); + pthread_exit(NULL); + + return 0; +} + diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeRuleHashes.txt b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeRuleHashes.txt new file mode 100644 index 000000000..17b99f247 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/CMakeRuleHashes.txt @@ -0,0 +1,5 @@ +# Hashes of file build rules. +26f0f991a0077627a6b7828f81498ad5 CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o +55db74aac8a85fdaacfdffb72e447185 CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o +3f710a41fdff272f63aa0a841d96d42f CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o +f81070b22b3ab370c5639d1f7d659f2d CMakeFiles/cudasift.dir/cudasift_intermediate_link.o diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/Makefile.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/Makefile.cmake new file mode 100644 index 000000000..9c4baa2ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/Makefile.cmake @@ -0,0 +1,149 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.22 + +# The generator used is: +set(CMAKE_DEPENDS_GENERATOR "Unix Makefiles") + +# The top level Makefile was generated from the following files: +set(CMAKE_MAKEFILE_DEPENDS + "CMakeCache.txt" + "../CMakeLists.txt" + "CMakeFiles/3.22.1/CMakeCCompiler.cmake" + "CMakeFiles/3.22.1/CMakeCXXCompiler.cmake" + "CMakeFiles/3.22.1/CMakeSystem.cmake" + "CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake.pre-gen" + "CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.depend" + "CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake.pre-gen" + "CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.depend" + "CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake.pre-gen" + "CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.depend" + "/usr/lib/x86_64-linux-gnu/cmake/opencv4/OpenCVConfig-version.cmake" + "/usr/lib/x86_64-linux-gnu/cmake/opencv4/OpenCVConfig.cmake" + "/usr/lib/x86_64-linux-gnu/cmake/opencv4/OpenCVModules-release.cmake" + "/usr/lib/x86_64-linux-gnu/cmake/opencv4/OpenCVModules.cmake" + "/usr/share/cmake-3.22/Modules/CMakeCCompiler.cmake.in" + "/usr/share/cmake-3.22/Modules/CMakeCCompilerABI.c" + "/usr/share/cmake-3.22/Modules/CMakeCInformation.cmake" + "/usr/share/cmake-3.22/Modules/CMakeCXXCompiler.cmake.in" + "/usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp" + "/usr/share/cmake-3.22/Modules/CMakeCXXInformation.cmake" + "/usr/share/cmake-3.22/Modules/CMakeCommonLanguageInclude.cmake" + "/usr/share/cmake-3.22/Modules/CMakeCompilerIdDetection.cmake" + "/usr/share/cmake-3.22/Modules/CMakeDetermineCCompiler.cmake" + "/usr/share/cmake-3.22/Modules/CMakeDetermineCXXCompiler.cmake" + "/usr/share/cmake-3.22/Modules/CMakeDetermineCompileFeatures.cmake" + "/usr/share/cmake-3.22/Modules/CMakeDetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/CMakeDetermineCompilerABI.cmake" + "/usr/share/cmake-3.22/Modules/CMakeDetermineCompilerId.cmake" + "/usr/share/cmake-3.22/Modules/CMakeDetermineSystem.cmake" + "/usr/share/cmake-3.22/Modules/CMakeFindBinUtils.cmake" + "/usr/share/cmake-3.22/Modules/CMakeGenericSystem.cmake" + "/usr/share/cmake-3.22/Modules/CMakeInitializeConfigs.cmake" + "/usr/share/cmake-3.22/Modules/CMakeLanguageInformation.cmake" + "/usr/share/cmake-3.22/Modules/CMakeParseImplicitIncludeInfo.cmake" + "/usr/share/cmake-3.22/Modules/CMakeParseImplicitLinkInfo.cmake" + "/usr/share/cmake-3.22/Modules/CMakeParseLibraryArchitecture.cmake" + "/usr/share/cmake-3.22/Modules/CMakeSystem.cmake.in" + "/usr/share/cmake-3.22/Modules/CMakeSystemSpecificInformation.cmake" + "/usr/share/cmake-3.22/Modules/CMakeSystemSpecificInitialize.cmake" + "/usr/share/cmake-3.22/Modules/CMakeTestCCompiler.cmake" + "/usr/share/cmake-3.22/Modules/CMakeTestCXXCompiler.cmake" + "/usr/share/cmake-3.22/Modules/CMakeTestCompilerCommon.cmake" + "/usr/share/cmake-3.22/Modules/CMakeUnixFindMake.cmake" + "/usr/share/cmake-3.22/Modules/CheckCSourceCompiles.cmake" + "/usr/share/cmake-3.22/Modules/CheckIncludeFile.c.in" + "/usr/share/cmake-3.22/Modules/CheckIncludeFile.cmake" + "/usr/share/cmake-3.22/Modules/CheckLibraryExists.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/ADSP-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/ARMCC-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/ARMClang-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/AppleClang-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/Borland-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/Bruce-C-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/CMakeCommonCompilerMacros.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/Clang-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/Clang-DetermineCompilerInternal.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/Comeau-CXX-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/Compaq-C-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/Compaq-CXX-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/Cray-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/Embarcadero-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/Fujitsu-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/FujitsuClang-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/GHS-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/GNU-C-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/GNU-C.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/GNU-CXX-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/GNU-CXX.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/GNU-FindBinUtils.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/GNU.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/HP-C-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/HP-CXX-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/IAR-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/IBMCPP-C-DetermineVersionInternal.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/IBMCPP-CXX-DetermineVersionInternal.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/Intel-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/IntelLLVM-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/MSVC-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/NVHPC-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/NVIDIA-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/OpenWatcom-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/PGI-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/PathScale-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/SCO-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/SDCC-C-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/SunPro-C-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/SunPro-CXX-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/TI-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/TinyCC-C-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/VisualAge-C-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/VisualAge-CXX-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/Watcom-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/XL-C-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/XL-CXX-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/XLClang-C-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/XLClang-CXX-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/zOS-C-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/Compiler/zOS-CXX-DetermineCompiler.cmake" + "/usr/share/cmake-3.22/Modules/FindCUDA.cmake" + "/usr/share/cmake-3.22/Modules/FindCUDA/run_nvcc.cmake" + "/usr/share/cmake-3.22/Modules/FindCUDA/select_compute_arch.cmake" + "/usr/share/cmake-3.22/Modules/FindPackageHandleStandardArgs.cmake" + "/usr/share/cmake-3.22/Modules/FindPackageMessage.cmake" + "/usr/share/cmake-3.22/Modules/FindThreads.cmake" + "/usr/share/cmake-3.22/Modules/Internal/CheckSourceCompiles.cmake" + "/usr/share/cmake-3.22/Modules/Internal/FeatureTesting.cmake" + "/usr/share/cmake-3.22/Modules/Platform/Linux-Determine-CXX.cmake" + "/usr/share/cmake-3.22/Modules/Platform/Linux-GNU-C.cmake" + "/usr/share/cmake-3.22/Modules/Platform/Linux-GNU-CXX.cmake" + "/usr/share/cmake-3.22/Modules/Platform/Linux-GNU.cmake" + "/usr/share/cmake-3.22/Modules/Platform/Linux.cmake" + "/usr/share/cmake-3.22/Modules/Platform/UnixPaths.cmake" + ) + +# The corresponding makefile is: +set(CMAKE_MAKEFILE_OUTPUTS + "Makefile" + "CMakeFiles/cmake.check_cache" + ) + +# Byproducts of CMake generate step: +set(CMAKE_MAKEFILE_PRODUCTS + "CMakeFiles/3.22.1/CMakeSystem.cmake" + "CMakeFiles/3.22.1/CMakeCCompiler.cmake" + "CMakeFiles/3.22.1/CMakeCXXCompiler.cmake" + "CMakeFiles/3.22.1/CMakeCCompiler.cmake" + "CMakeFiles/3.22.1/CMakeCXXCompiler.cmake" + "CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake.pre-gen" + "CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake.pre-gen" + "CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake.pre-gen" + "CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake" + "CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake" + "CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake" + "CMakeFiles/CMakeDirectoryInformation.cmake" + ) + +# Dependency information for all targets: +set(CMAKE_DEPEND_INFO_FILES + "CMakeFiles/cudasift.dir/DependInfo.cmake" + ) diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/Makefile2 b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/Makefile2 new file mode 100644 index 000000000..5b83d3e00 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/Makefile2 @@ -0,0 +1,112 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.22 + +# Default target executed when no arguments are given to make. +default_target: all +.PHONY : default_target + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + +# Disable VCS-based implicit rules. +% : %,v + +# Disable VCS-based implicit rules. +% : RCS/% + +# Disable VCS-based implicit rules. +% : RCS/%,v + +# Disable VCS-based implicit rules. +% : SCCS/s.% + +# Disable VCS-based implicit rules. +% : s.% + +.SUFFIXES: .hpux_make_needs_suffix_list + +# Command-line flag to silence nested $(MAKE). +$(VERBOSE)MAKESILENT = -s + +#Suppress display of executed commands. +$(VERBOSE).SILENT: + +# A target that is always out of date. +cmake_force: +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E rm -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build + +#============================================================================= +# Directory level rules for the build root directory + +# The main recursive "all" target. +all: CMakeFiles/cudasift.dir/all +.PHONY : all + +# The main recursive "preinstall" target. +preinstall: +.PHONY : preinstall + +# The main recursive "clean" target. +clean: CMakeFiles/cudasift.dir/clean +.PHONY : clean + +#============================================================================= +# Target rules for target CMakeFiles/cudasift.dir + +# All Build rule for target. +CMakeFiles/cudasift.dir/all: + $(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/depend + $(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=1,2,3,4,5,6,7,8 "Built target cudasift" +.PHONY : CMakeFiles/cudasift.dir/all + +# Build rule for subdir invocation for target. +CMakeFiles/cudasift.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles 8 + $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/cudasift.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles 0 +.PHONY : CMakeFiles/cudasift.dir/rule + +# Convenience name for target. +cudasift: CMakeFiles/cudasift.dir/rule +.PHONY : cudasift + +# clean rule for target. +CMakeFiles/cudasift.dir/clean: + $(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/clean +.PHONY : CMakeFiles/cudasift.dir/clean + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/TargetDirectories.txt b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/TargetDirectories.txt new file mode 100644 index 000000000..88efc46e1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/TargetDirectories.txt @@ -0,0 +1,7 @@ +/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir +/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/edit_cache.dir +/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/rebuild_cache.dir +/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/list_install_components.dir +/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/install.dir +/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/install/local.dir +/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/install/strip.dir diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cmake.check_cache b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cmake.check_cache new file mode 100644 index 000000000..3dccd7317 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cmake.check_cache @@ -0,0 +1 @@ +# This file is generated by cmake for dependency checking of the CMakeCache.txt file diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/DependInfo.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/DependInfo.cmake new file mode 100644 index 000000000..059d1e524 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/DependInfo.cmake @@ -0,0 +1,21 @@ + +# Consider dependencies only in project. +set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF) + +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + ) + +# The set of dependency files which are needed: +set(CMAKE_DEPENDS_DEPENDENCY_FILES + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp" "CMakeFiles/cudasift.dir/geomFuncs.cpp.o" "gcc" "CMakeFiles/cudasift.dir/geomFuncs.cpp.o.d" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp" "CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o" "gcc" "CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o.d" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp" "CMakeFiles/cudasift.dir/mainSift.cpp.o" "gcc" "CMakeFiles/cudasift.dir/mainSift.cpp.o.d" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/build.make b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/build.make new file mode 100644 index 000000000..8ae41e034 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/build.make @@ -0,0 +1,237 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.22 + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + +# Disable VCS-based implicit rules. +% : %,v + +# Disable VCS-based implicit rules. +% : RCS/% + +# Disable VCS-based implicit rules. +% : RCS/%,v + +# Disable VCS-based implicit rules. +% : SCCS/s.% + +# Disable VCS-based implicit rules. +% : s.% + +.SUFFIXES: .hpux_make_needs_suffix_list + +# Command-line flag to silence nested $(MAKE). +$(VERBOSE)MAKESILENT = -s + +#Suppress display of executed commands. +$(VERBOSE).SILENT: + +# A target that is always out of date. +cmake_force: +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E rm -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build + +# Include any dependencies generated for this target. +include CMakeFiles/cudasift.dir/depend.make +# Include any dependencies generated by the compiler for this target. +include CMakeFiles/cudasift.dir/compiler_depend.make + +# Include the progress variables for this target. +include CMakeFiles/cudasift.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/cudasift.dir/flags.make + +CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o: CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.depend +CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o: CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake +CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o: ../cudaImage.cu + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building NVCC (Device) object CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o" + cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir && /usr/bin/cmake -E make_directory /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//. + cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o.cubin.txt -P /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaImage.cu.o.cmake + +CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o: CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.depend +CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o: CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake +CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o: ../cudaSiftH.cu + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Building NVCC (Device) object CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o" + cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir && /usr/bin/cmake -E make_directory /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//. + cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o.cubin.txt -P /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaSiftH.cu.o.cmake + +CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o: CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.depend +CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o: CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake +CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o: ../matching.cu + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_3) "Building NVCC (Device) object CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o" + cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir && /usr/bin/cmake -E make_directory /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//. + cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir && /usr/bin/cmake -D verbose:BOOL=$(VERBOSE) -D build_configuration:STRING= -D generated_file:STRING=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o -D generated_cubin_file:STRING=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o.cubin.txt -P /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_matching.cu.o.cmake + +CMakeFiles/cudasift.dir/cudasift_intermediate_link.o: CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o +CMakeFiles/cudasift.dir/cudasift_intermediate_link.o: CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o +CMakeFiles/cudasift.dir/cudasift_intermediate_link.o: CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --blue --bold --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_4) "Building NVCC intermediate link file CMakeFiles/cudasift.dir/cudasift_intermediate_link.o" + /usr/local/cuda/bin/nvcc -arch=sm_80 -m64 -ccbin /usr/bin/cc -dlink /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o -o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/./cudasift_intermediate_link.o + +CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o: CMakeFiles/cudasift.dir/flags.make +CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o: /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp +CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o: CMakeFiles/cudasift.dir/compiler_depend.ts + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_5) "Building CXX object CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -MD -MT CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o -MF CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o.d -o CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o -c /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp + +CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp > CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i + +CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp -o CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s + +CMakeFiles/cudasift.dir/geomFuncs.cpp.o: CMakeFiles/cudasift.dir/flags.make +CMakeFiles/cudasift.dir/geomFuncs.cpp.o: ../geomFuncs.cpp +CMakeFiles/cudasift.dir/geomFuncs.cpp.o: CMakeFiles/cudasift.dir/compiler_depend.ts + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_6) "Building CXX object CMakeFiles/cudasift.dir/geomFuncs.cpp.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -MD -MT CMakeFiles/cudasift.dir/geomFuncs.cpp.o -MF CMakeFiles/cudasift.dir/geomFuncs.cpp.o.d -o CMakeFiles/cudasift.dir/geomFuncs.cpp.o -c /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp + +CMakeFiles/cudasift.dir/geomFuncs.cpp.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/cudasift.dir/geomFuncs.cpp.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp > CMakeFiles/cudasift.dir/geomFuncs.cpp.i + +CMakeFiles/cudasift.dir/geomFuncs.cpp.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/cudasift.dir/geomFuncs.cpp.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp -o CMakeFiles/cudasift.dir/geomFuncs.cpp.s + +CMakeFiles/cudasift.dir/mainSift.cpp.o: CMakeFiles/cudasift.dir/flags.make +CMakeFiles/cudasift.dir/mainSift.cpp.o: ../mainSift.cpp +CMakeFiles/cudasift.dir/mainSift.cpp.o: CMakeFiles/cudasift.dir/compiler_depend.ts + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_7) "Building CXX object CMakeFiles/cudasift.dir/mainSift.cpp.o" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -MD -MT CMakeFiles/cudasift.dir/mainSift.cpp.o -MF CMakeFiles/cudasift.dir/mainSift.cpp.o.d -o CMakeFiles/cudasift.dir/mainSift.cpp.o -c /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp + +CMakeFiles/cudasift.dir/mainSift.cpp.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/cudasift.dir/mainSift.cpp.i" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp > CMakeFiles/cudasift.dir/mainSift.cpp.i + +CMakeFiles/cudasift.dir/mainSift.cpp.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/cudasift.dir/mainSift.cpp.s" + /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp -o CMakeFiles/cudasift.dir/mainSift.cpp.s + +# Object files for target cudasift +cudasift_OBJECTS = \ +"CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o" \ +"CMakeFiles/cudasift.dir/geomFuncs.cpp.o" \ +"CMakeFiles/cudasift.dir/mainSift.cpp.o" + +# External object files for target cudasift +cudasift_EXTERNAL_OBJECTS = \ +"/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o" \ +"/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o" \ +"/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o" \ +"/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_intermediate_link.o" + +cudasift: CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o +cudasift: CMakeFiles/cudasift.dir/geomFuncs.cpp.o +cudasift: CMakeFiles/cudasift.dir/mainSift.cpp.o +cudasift: CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o +cudasift: CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o +cudasift: CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o +cudasift: CMakeFiles/cudasift.dir/cudasift_intermediate_link.o +cudasift: CMakeFiles/cudasift.dir/build.make +cudasift: /usr/local/cuda/lib64/libcudart_static.a +cudasift: /usr/lib/x86_64-linux-gnu/librt.a +cudasift: /usr/local/cuda/lib64/libcudadevrt.a +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_stitching.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_alphamat.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_aruco.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_barcode.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_bgsegm.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_bioinspired.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_ccalib.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_dnn_objdetect.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_dnn_superres.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_dpm.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_face.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_freetype.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_fuzzy.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_hdf.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_hfs.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_img_hash.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_intensity_transform.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_line_descriptor.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_mcc.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_quality.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_rapid.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_reg.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_rgbd.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_saliency.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_shape.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_stereo.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_structured_light.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_superres.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_surface_matching.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_tracking.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_videostab.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_viz.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_wechat_qrcode.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_xobjdetect.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_xphoto.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_highgui.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_datasets.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_plot.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_text.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_ml.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_phase_unwrapping.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_optflow.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_ximgproc.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_video.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_videoio.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_imgcodecs.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_objdetect.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_calib3d.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_dnn.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_features2d.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_flann.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_photo.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_imgproc.so.4.5.4d +cudasift: /usr/lib/x86_64-linux-gnu/libopencv_core.so.4.5.4d +cudasift: CMakeFiles/cudasift.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_8) "Linking CXX executable cudasift" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/cudasift.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/cudasift.dir/build: cudasift +.PHONY : CMakeFiles/cudasift.dir/build + +CMakeFiles/cudasift.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/cudasift.dir/cmake_clean.cmake +.PHONY : CMakeFiles/cudasift.dir/clean + +CMakeFiles/cudasift.dir/depend: CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o +CMakeFiles/cudasift.dir/depend: CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o +CMakeFiles/cudasift.dir/depend: CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o +CMakeFiles/cudasift.dir/depend: CMakeFiles/cudasift.dir/cudasift_intermediate_link.o + cd /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/cudasift.dir/depend + diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cmake_clean.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cmake_clean.cmake new file mode 100644 index 000000000..29fb8f06c --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cmake_clean.cmake @@ -0,0 +1,19 @@ +file(REMOVE_RECURSE + "CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o" + "CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o" + "CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o" + "CMakeFiles/cudasift.dir/cudasift_intermediate_link.o" + "CMakeFiles/cudasift.dir/geomFuncs.cpp.o" + "CMakeFiles/cudasift.dir/geomFuncs.cpp.o.d" + "CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o" + "CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o.d" + "CMakeFiles/cudasift.dir/mainSift.cpp.o" + "CMakeFiles/cudasift.dir/mainSift.cpp.o.d" + "cudasift" + "cudasift.pdb" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/cudasift.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/compiler_depend.make b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/compiler_depend.make new file mode 100644 index 000000000..33c8a9a40 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/compiler_depend.make @@ -0,0 +1,2 @@ +# Empty compiler generated dependencies file for cudasift. +# This may be replaced when dependencies are built. diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/compiler_depend.ts b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/compiler_depend.ts new file mode 100644 index 000000000..587836585 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/compiler_depend.ts @@ -0,0 +1,2 @@ +# CMAKE generated file: DO NOT EDIT! +# Timestamp file for compiler generated dependencies management for cudasift. diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o new file mode 100644 index 000000000..34b81727b Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake new file mode 100644 index 000000000..36ab0c6b1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake @@ -0,0 +1,314 @@ +# James Bigler, NVIDIA Corp (nvidia.com - jbigler) +# +# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. +# +# This code is licensed under the MIT License. See the FindCUDA.cmake script +# for the text of the license. + +# The MIT License +# +# License for the specific language governing rights and limitations under +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + + +########################################################################## +# This file runs the nvcc commands to produce the desired output file along with +# the dependency file needed by CMake to compute dependencies. In addition the +# file checks the output of each command and if the command fails it deletes the +# output files. + +# Input variables +# +# verbose:BOOL=<> OFF: Be as quiet as possible (default) +# ON : Describe each step +# +# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or +# RelWithDebInfo, but it should match one of the +# entries in CUDA_HOST_FLAGS. This is the build +# configuration used when compiling the code. If +# blank or unspecified Debug is assumed as this is +# what CMake does. +# +# generated_file:STRING=<> File to generate. This argument must be passed in. +# +# generated_cubin_file:STRING=<> File to generate. This argument must be passed +# in if build_cubin is true. + +cmake_policy(PUSH) +cmake_policy(SET CMP0007 NEW) +if(NOT generated_file) + message(FATAL_ERROR "You must specify generated_file on the command line") +endif() + +# Set these up as variables to make reading the generated file easier +set(CMAKE_COMMAND "/usr/bin/cmake") # path +set(source_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaImage.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaImage.cu.o.depend") # path +set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path +set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path +set(build_cubin OFF) # bool +set(CUDA_HOST_COMPILER "/usr/bin/cc") # path +# We won't actually use these variables for now, but we need to set this, in +# order to force this file to be run again if it changes. +set(generated_file_path "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.") # path +set(generated_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o.cubin.txt") # path + +set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path +set(CUDA_NVCC_FLAGS ;; -arch=sm_80) # list +# Build specific configuration flags +set(CUDA_NVCC_FLAGS_DEBUG ; ) +set(CUDA_NVCC_FLAGS_MINSIZEREL ; ) +set(CUDA_NVCC_FLAGS_RELEASE ; ) +set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ; ) +set(nvcc_flags -m64) # list +set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA;/usr/local/cuda/include;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4]==]) # list (needs to be in lua quotes to address backslashes) +string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}") +set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ). +set(format_flag "-dc") # string +set(cuda_language_flag ) # list + +# Clean up list of include directories and add -I flags +list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS) +set(CUDA_NVCC_INCLUDE_ARGS) +foreach(dir ${CUDA_NVCC_INCLUDE_DIRS}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}") +endforeach() + +# Clean up list of compile definitions, add -D flags, and append to nvcc_flags +list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS) +foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS}) + list(APPEND nvcc_flags "-D${def}") +endforeach() + +if(build_cubin AND NOT generated_cubin_file) + message(FATAL_ERROR "You must specify generated_cubin_file on the command line") +endif() + +# This is the list of host compilation flags. It C or CXX should already have +# been chosen by FindCUDA.cmake. +set(CMAKE_HOST_FLAGS -O3 -msse2 ) +set(CMAKE_HOST_FLAGS_DEBUG -g) +set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG) +set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG) +set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG) + +# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler +set(nvcc_host_compiler_flags "") +# If we weren't given a build_configuration, use Debug. +if(NOT build_configuration) + set(build_configuration Debug) +endif() +string(TOUPPER "${build_configuration}" build_configuration) +#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}") +foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + string(APPEND nvcc_host_compiler_flags ",\"${flag}\"") +endforeach() +if (nvcc_host_compiler_flags) + set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags}) +endif() +#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"") +# Add the build specific configuration flags +list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}}) + +# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority +list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 ) +list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 ) +if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER ) + if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN) + set(CCBIN -ccbin "${CCBIN}") + else() + set(CCBIN -ccbin "${CUDA_HOST_COMPILER}") + endif() +endif() + +# cuda_execute_process - Executes a command with optional command echo and status message. +# +# status - Status message to print if verbose is true +# command - COMMAND argument from the usual execute_process argument structure +# ARGN - Remaining arguments are the command with arguments +# +# CUDA_result - return value from running the command +# +# Make this a macro instead of a function, so that things like RESULT_VARIABLE +# and other return variables are present after executing the process. +macro(cuda_execute_process status command) + set(_command ${command}) + if(NOT "x${_command}" STREQUAL "xCOMMAND") + message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})") + endif() + if(verbose) + execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status}) + # Now we need to build up our command string. We are accounting for quotes + # and spaces, anything else is left up to the user to fix if they want to + # copy and paste a runnable command line. + set(cuda_execute_process_string) + foreach(arg ${ARGN}) + # If there are quotes, escape them, so they come through. + string(REPLACE "\"" "\\\"" arg ${arg}) + # Args with spaces need quotes around them to get them to be parsed as a single argument. + if(arg MATCHES " ") + list(APPEND cuda_execute_process_string "\"${arg}\"") + else() + list(APPEND cuda_execute_process_string ${arg}) + endif() + endforeach() + # Echo the command + execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string}) + endif() + # Run the command + execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result ) +endmacro() + +# Delete the target file +cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}" + ) + +# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag +# for dependency generation and hope for the best. +set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}") +set(CUDA_VERSION 12.2) +if(CUDA_VERSION VERSION_LESS "3.0") + # Note that this will remove all occurrences of -G. + list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G") +endif() + +# nvcc doesn't define __CUDACC__ for some reason when generating dependency files. This +# can cause incorrect dependencies when #including files based on this macro which is +# defined in the generating passes of nvcc invocation. We will go ahead and manually +# define this for now until a future version fixes this bug. +set(CUDACC_DEFINE -D__CUDACC__) + +# Generate the dependency file +cuda_execute_process( + "Generating dependency file: ${NVCC_generated_dependency_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + -M + ${CUDACC_DEFINE} + "${source_file}" + -o "${NVCC_generated_dependency_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${depends_CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the cmake readable dependency file to a temp file. Don't put the +# quotes just around the filenames for the input_file and output_file variables. +# CMake will pass the quotes through and not be able to find the file. +cuda_execute_process( + "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:FILEPATH=${NVCC_generated_dependency_file}" + -D "output_file:FILEPATH=${cmake_dependency_file}.tmp" + -D "verbose=${verbose}" + -P "${CUDA_make2cmake}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Copy the file if it is different +cuda_execute_process( + "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Delete the temporary file +cuda_execute_process( + "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the code +cuda_execute_process( + "Generating ${generated_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${cuda_language_flag} + ${format_flag} -o "${generated_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + # Since nvcc can sometimes leave half done files make sure that we delete the output file. + cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}" + ) + message(FATAL_ERROR "Error generating file ${generated_file}") +else() + if(verbose) + message("Generated ${generated_file} successfully.") + endif() +endif() + +# Cubin resource report commands. +if( build_cubin ) + # Run with -cubin to produce resource usage report. + cuda_execute_process( + "Generating ${generated_cubin_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${CUDA_NVCC_FLAGS} + ${nvcc_flags} + ${CCBIN} + ${nvcc_host_compiler_flags} + -DNVCC + -cubin + -o "${generated_cubin_file}" + ${CUDA_NVCC_INCLUDE_ARGS} + ) + + # Execute the parser script. + cuda_execute_process( + "Executing the parser script" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:STRING=${generated_cubin_file}" + -P "${CUDA_parse_cubin}" + ) + +endif() + +cmake_policy(POP) diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake.pre-gen new file mode 100644 index 000000000..67c13bf13 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.cmake.pre-gen @@ -0,0 +1,314 @@ +# James Bigler, NVIDIA Corp (nvidia.com - jbigler) +# +# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. +# +# This code is licensed under the MIT License. See the FindCUDA.cmake script +# for the text of the license. + +# The MIT License +# +# License for the specific language governing rights and limitations under +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + + +########################################################################## +# This file runs the nvcc commands to produce the desired output file along with +# the dependency file needed by CMake to compute dependencies. In addition the +# file checks the output of each command and if the command fails it deletes the +# output files. + +# Input variables +# +# verbose:BOOL=<> OFF: Be as quiet as possible (default) +# ON : Describe each step +# +# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or +# RelWithDebInfo, but it should match one of the +# entries in CUDA_HOST_FLAGS. This is the build +# configuration used when compiling the code. If +# blank or unspecified Debug is assumed as this is +# what CMake does. +# +# generated_file:STRING=<> File to generate. This argument must be passed in. +# +# generated_cubin_file:STRING=<> File to generate. This argument must be passed +# in if build_cubin is true. + +cmake_policy(PUSH) +cmake_policy(SET CMP0007 NEW) +if(NOT generated_file) + message(FATAL_ERROR "You must specify generated_file on the command line") +endif() + +# Set these up as variables to make reading the generated file easier +set(CMAKE_COMMAND "/usr/bin/cmake") # path +set(source_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaImage.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaImage.cu.o.depend") # path +set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path +set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path +set(build_cubin OFF) # bool +set(CUDA_HOST_COMPILER "/usr/bin/cc") # path +# We won't actually use these variables for now, but we need to set this, in +# order to force this file to be run again if it changes. +set(generated_file_path "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.") # path +set(generated_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o.cubin.txt") # path + +set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path +set(CUDA_NVCC_FLAGS ;; -arch=sm_80) # list +# Build specific configuration flags +set(CUDA_NVCC_FLAGS_DEBUG ; ) +set(CUDA_NVCC_FLAGS_MINSIZEREL ; ) +set(CUDA_NVCC_FLAGS_RELEASE ; ) +set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ; ) +set(nvcc_flags -m64) # list +set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;$]==]) # list (needs to be in lua quotes to address backslashes) +string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}") +set(CUDA_NVCC_COMPILE_DEFINITIONS [==[$]==]) # list (needs to be in lua quotes see #16510 ). +set(format_flag "-dc") # string +set(cuda_language_flag ) # list + +# Clean up list of include directories and add -I flags +list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS) +set(CUDA_NVCC_INCLUDE_ARGS) +foreach(dir ${CUDA_NVCC_INCLUDE_DIRS}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}") +endforeach() + +# Clean up list of compile definitions, add -D flags, and append to nvcc_flags +list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS) +foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS}) + list(APPEND nvcc_flags "-D${def}") +endforeach() + +if(build_cubin AND NOT generated_cubin_file) + message(FATAL_ERROR "You must specify generated_cubin_file on the command line") +endif() + +# This is the list of host compilation flags. It C or CXX should already have +# been chosen by FindCUDA.cmake. +set(CMAKE_HOST_FLAGS -O3 -msse2 ) +set(CMAKE_HOST_FLAGS_DEBUG -g) +set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG) +set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG) +set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG) + +# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler +set(nvcc_host_compiler_flags "") +# If we weren't given a build_configuration, use Debug. +if(NOT build_configuration) + set(build_configuration Debug) +endif() +string(TOUPPER "${build_configuration}" build_configuration) +#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}") +foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + string(APPEND nvcc_host_compiler_flags ",\"${flag}\"") +endforeach() +if (nvcc_host_compiler_flags) + set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags}) +endif() +#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"") +# Add the build specific configuration flags +list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}}) + +# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority +list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 ) +list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 ) +if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER ) + if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN) + set(CCBIN -ccbin "${CCBIN}") + else() + set(CCBIN -ccbin "${CUDA_HOST_COMPILER}") + endif() +endif() + +# cuda_execute_process - Executes a command with optional command echo and status message. +# +# status - Status message to print if verbose is true +# command - COMMAND argument from the usual execute_process argument structure +# ARGN - Remaining arguments are the command with arguments +# +# CUDA_result - return value from running the command +# +# Make this a macro instead of a function, so that things like RESULT_VARIABLE +# and other return variables are present after executing the process. +macro(cuda_execute_process status command) + set(_command ${command}) + if(NOT "x${_command}" STREQUAL "xCOMMAND") + message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})") + endif() + if(verbose) + execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status}) + # Now we need to build up our command string. We are accounting for quotes + # and spaces, anything else is left up to the user to fix if they want to + # copy and paste a runnable command line. + set(cuda_execute_process_string) + foreach(arg ${ARGN}) + # If there are quotes, escape them, so they come through. + string(REPLACE "\"" "\\\"" arg ${arg}) + # Args with spaces need quotes around them to get them to be parsed as a single argument. + if(arg MATCHES " ") + list(APPEND cuda_execute_process_string "\"${arg}\"") + else() + list(APPEND cuda_execute_process_string ${arg}) + endif() + endforeach() + # Echo the command + execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string}) + endif() + # Run the command + execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result ) +endmacro() + +# Delete the target file +cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}" + ) + +# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag +# for dependency generation and hope for the best. +set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}") +set(CUDA_VERSION 12.2) +if(CUDA_VERSION VERSION_LESS "3.0") + # Note that this will remove all occurrences of -G. + list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G") +endif() + +# nvcc doesn't define __CUDACC__ for some reason when generating dependency files. This +# can cause incorrect dependencies when #including files based on this macro which is +# defined in the generating passes of nvcc invocation. We will go ahead and manually +# define this for now until a future version fixes this bug. +set(CUDACC_DEFINE -D__CUDACC__) + +# Generate the dependency file +cuda_execute_process( + "Generating dependency file: ${NVCC_generated_dependency_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + -M + ${CUDACC_DEFINE} + "${source_file}" + -o "${NVCC_generated_dependency_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${depends_CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the cmake readable dependency file to a temp file. Don't put the +# quotes just around the filenames for the input_file and output_file variables. +# CMake will pass the quotes through and not be able to find the file. +cuda_execute_process( + "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:FILEPATH=${NVCC_generated_dependency_file}" + -D "output_file:FILEPATH=${cmake_dependency_file}.tmp" + -D "verbose=${verbose}" + -P "${CUDA_make2cmake}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Copy the file if it is different +cuda_execute_process( + "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Delete the temporary file +cuda_execute_process( + "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the code +cuda_execute_process( + "Generating ${generated_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${cuda_language_flag} + ${format_flag} -o "${generated_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + # Since nvcc can sometimes leave half done files make sure that we delete the output file. + cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}" + ) + message(FATAL_ERROR "Error generating file ${generated_file}") +else() + if(verbose) + message("Generated ${generated_file} successfully.") + endif() +endif() + +# Cubin resource report commands. +if( build_cubin ) + # Run with -cubin to produce resource usage report. + cuda_execute_process( + "Generating ${generated_cubin_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${CUDA_NVCC_FLAGS} + ${nvcc_flags} + ${CCBIN} + ${nvcc_host_compiler_flags} + -DNVCC + -cubin + -o "${generated_cubin_file}" + ${CUDA_NVCC_INCLUDE_ARGS} + ) + + # Execute the parser script. + cuda_execute_process( + "Executing the parser script" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:STRING=${generated_cubin_file}" + -P "${CUDA_parse_cubin}" + ) + +endif() + +cmake_policy(POP) diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.depend b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.depend new file mode 100644 index 000000000..3cb252554 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o.depend @@ -0,0 +1,4 @@ +# Generated by: make2cmake.cmake +SET(CUDA_NVCC_DEPEND + ) + diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o new file mode 100644 index 000000000..34b81727b Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake new file mode 100644 index 000000000..a1dfc99a0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake @@ -0,0 +1,314 @@ +# James Bigler, NVIDIA Corp (nvidia.com - jbigler) +# +# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. +# +# This code is licensed under the MIT License. See the FindCUDA.cmake script +# for the text of the license. + +# The MIT License +# +# License for the specific language governing rights and limitations under +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + + +########################################################################## +# This file runs the nvcc commands to produce the desired output file along with +# the dependency file needed by CMake to compute dependencies. In addition the +# file checks the output of each command and if the command fails it deletes the +# output files. + +# Input variables +# +# verbose:BOOL=<> OFF: Be as quiet as possible (default) +# ON : Describe each step +# +# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or +# RelWithDebInfo, but it should match one of the +# entries in CUDA_HOST_FLAGS. This is the build +# configuration used when compiling the code. If +# blank or unspecified Debug is assumed as this is +# what CMake does. +# +# generated_file:STRING=<> File to generate. This argument must be passed in. +# +# generated_cubin_file:STRING=<> File to generate. This argument must be passed +# in if build_cubin is true. + +cmake_policy(PUSH) +cmake_policy(SET CMP0007 NEW) +if(NOT generated_file) + message(FATAL_ERROR "You must specify generated_file on the command line") +endif() + +# Set these up as variables to make reading the generated file easier +set(CMAKE_COMMAND "/usr/bin/cmake") # path +set(source_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaSiftH.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaSiftH.cu.o.depend") # path +set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path +set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path +set(build_cubin OFF) # bool +set(CUDA_HOST_COMPILER "/usr/bin/cc") # path +# We won't actually use these variables for now, but we need to set this, in +# order to force this file to be run again if it changes. +set(generated_file_path "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.") # path +set(generated_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o.cubin.txt") # path + +set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path +set(CUDA_NVCC_FLAGS ;; -arch=sm_80) # list +# Build specific configuration flags +set(CUDA_NVCC_FLAGS_DEBUG ; ) +set(CUDA_NVCC_FLAGS_MINSIZEREL ; ) +set(CUDA_NVCC_FLAGS_RELEASE ; ) +set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ; ) +set(nvcc_flags -m64) # list +set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA;/usr/local/cuda/include;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4]==]) # list (needs to be in lua quotes to address backslashes) +string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}") +set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ). +set(format_flag "-dc") # string +set(cuda_language_flag ) # list + +# Clean up list of include directories and add -I flags +list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS) +set(CUDA_NVCC_INCLUDE_ARGS) +foreach(dir ${CUDA_NVCC_INCLUDE_DIRS}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}") +endforeach() + +# Clean up list of compile definitions, add -D flags, and append to nvcc_flags +list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS) +foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS}) + list(APPEND nvcc_flags "-D${def}") +endforeach() + +if(build_cubin AND NOT generated_cubin_file) + message(FATAL_ERROR "You must specify generated_cubin_file on the command line") +endif() + +# This is the list of host compilation flags. It C or CXX should already have +# been chosen by FindCUDA.cmake. +set(CMAKE_HOST_FLAGS -O3 -msse2 ) +set(CMAKE_HOST_FLAGS_DEBUG -g) +set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG) +set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG) +set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG) + +# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler +set(nvcc_host_compiler_flags "") +# If we weren't given a build_configuration, use Debug. +if(NOT build_configuration) + set(build_configuration Debug) +endif() +string(TOUPPER "${build_configuration}" build_configuration) +#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}") +foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + string(APPEND nvcc_host_compiler_flags ",\"${flag}\"") +endforeach() +if (nvcc_host_compiler_flags) + set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags}) +endif() +#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"") +# Add the build specific configuration flags +list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}}) + +# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority +list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 ) +list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 ) +if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER ) + if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN) + set(CCBIN -ccbin "${CCBIN}") + else() + set(CCBIN -ccbin "${CUDA_HOST_COMPILER}") + endif() +endif() + +# cuda_execute_process - Executes a command with optional command echo and status message. +# +# status - Status message to print if verbose is true +# command - COMMAND argument from the usual execute_process argument structure +# ARGN - Remaining arguments are the command with arguments +# +# CUDA_result - return value from running the command +# +# Make this a macro instead of a function, so that things like RESULT_VARIABLE +# and other return variables are present after executing the process. +macro(cuda_execute_process status command) + set(_command ${command}) + if(NOT "x${_command}" STREQUAL "xCOMMAND") + message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})") + endif() + if(verbose) + execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status}) + # Now we need to build up our command string. We are accounting for quotes + # and spaces, anything else is left up to the user to fix if they want to + # copy and paste a runnable command line. + set(cuda_execute_process_string) + foreach(arg ${ARGN}) + # If there are quotes, escape them, so they come through. + string(REPLACE "\"" "\\\"" arg ${arg}) + # Args with spaces need quotes around them to get them to be parsed as a single argument. + if(arg MATCHES " ") + list(APPEND cuda_execute_process_string "\"${arg}\"") + else() + list(APPEND cuda_execute_process_string ${arg}) + endif() + endforeach() + # Echo the command + execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string}) + endif() + # Run the command + execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result ) +endmacro() + +# Delete the target file +cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}" + ) + +# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag +# for dependency generation and hope for the best. +set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}") +set(CUDA_VERSION 12.2) +if(CUDA_VERSION VERSION_LESS "3.0") + # Note that this will remove all occurrences of -G. + list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G") +endif() + +# nvcc doesn't define __CUDACC__ for some reason when generating dependency files. This +# can cause incorrect dependencies when #including files based on this macro which is +# defined in the generating passes of nvcc invocation. We will go ahead and manually +# define this for now until a future version fixes this bug. +set(CUDACC_DEFINE -D__CUDACC__) + +# Generate the dependency file +cuda_execute_process( + "Generating dependency file: ${NVCC_generated_dependency_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + -M + ${CUDACC_DEFINE} + "${source_file}" + -o "${NVCC_generated_dependency_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${depends_CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the cmake readable dependency file to a temp file. Don't put the +# quotes just around the filenames for the input_file and output_file variables. +# CMake will pass the quotes through and not be able to find the file. +cuda_execute_process( + "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:FILEPATH=${NVCC_generated_dependency_file}" + -D "output_file:FILEPATH=${cmake_dependency_file}.tmp" + -D "verbose=${verbose}" + -P "${CUDA_make2cmake}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Copy the file if it is different +cuda_execute_process( + "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Delete the temporary file +cuda_execute_process( + "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the code +cuda_execute_process( + "Generating ${generated_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${cuda_language_flag} + ${format_flag} -o "${generated_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + # Since nvcc can sometimes leave half done files make sure that we delete the output file. + cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}" + ) + message(FATAL_ERROR "Error generating file ${generated_file}") +else() + if(verbose) + message("Generated ${generated_file} successfully.") + endif() +endif() + +# Cubin resource report commands. +if( build_cubin ) + # Run with -cubin to produce resource usage report. + cuda_execute_process( + "Generating ${generated_cubin_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${CUDA_NVCC_FLAGS} + ${nvcc_flags} + ${CCBIN} + ${nvcc_host_compiler_flags} + -DNVCC + -cubin + -o "${generated_cubin_file}" + ${CUDA_NVCC_INCLUDE_ARGS} + ) + + # Execute the parser script. + cuda_execute_process( + "Executing the parser script" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:STRING=${generated_cubin_file}" + -P "${CUDA_parse_cubin}" + ) + +endif() + +cmake_policy(POP) diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake.pre-gen new file mode 100644 index 000000000..a7f671a8f --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.cmake.pre-gen @@ -0,0 +1,314 @@ +# James Bigler, NVIDIA Corp (nvidia.com - jbigler) +# +# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. +# +# This code is licensed under the MIT License. See the FindCUDA.cmake script +# for the text of the license. + +# The MIT License +# +# License for the specific language governing rights and limitations under +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + + +########################################################################## +# This file runs the nvcc commands to produce the desired output file along with +# the dependency file needed by CMake to compute dependencies. In addition the +# file checks the output of each command and if the command fails it deletes the +# output files. + +# Input variables +# +# verbose:BOOL=<> OFF: Be as quiet as possible (default) +# ON : Describe each step +# +# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or +# RelWithDebInfo, but it should match one of the +# entries in CUDA_HOST_FLAGS. This is the build +# configuration used when compiling the code. If +# blank or unspecified Debug is assumed as this is +# what CMake does. +# +# generated_file:STRING=<> File to generate. This argument must be passed in. +# +# generated_cubin_file:STRING=<> File to generate. This argument must be passed +# in if build_cubin is true. + +cmake_policy(PUSH) +cmake_policy(SET CMP0007 NEW) +if(NOT generated_file) + message(FATAL_ERROR "You must specify generated_file on the command line") +endif() + +# Set these up as variables to make reading the generated file easier +set(CMAKE_COMMAND "/usr/bin/cmake") # path +set(source_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaSiftH.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_cudaSiftH.cu.o.depend") # path +set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path +set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path +set(build_cubin OFF) # bool +set(CUDA_HOST_COMPILER "/usr/bin/cc") # path +# We won't actually use these variables for now, but we need to set this, in +# order to force this file to be run again if it changes. +set(generated_file_path "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.") # path +set(generated_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o.cubin.txt") # path + +set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path +set(CUDA_NVCC_FLAGS ;; -arch=sm_80) # list +# Build specific configuration flags +set(CUDA_NVCC_FLAGS_DEBUG ; ) +set(CUDA_NVCC_FLAGS_MINSIZEREL ; ) +set(CUDA_NVCC_FLAGS_RELEASE ; ) +set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ; ) +set(nvcc_flags -m64) # list +set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;$]==]) # list (needs to be in lua quotes to address backslashes) +string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}") +set(CUDA_NVCC_COMPILE_DEFINITIONS [==[$]==]) # list (needs to be in lua quotes see #16510 ). +set(format_flag "-dc") # string +set(cuda_language_flag ) # list + +# Clean up list of include directories and add -I flags +list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS) +set(CUDA_NVCC_INCLUDE_ARGS) +foreach(dir ${CUDA_NVCC_INCLUDE_DIRS}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}") +endforeach() + +# Clean up list of compile definitions, add -D flags, and append to nvcc_flags +list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS) +foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS}) + list(APPEND nvcc_flags "-D${def}") +endforeach() + +if(build_cubin AND NOT generated_cubin_file) + message(FATAL_ERROR "You must specify generated_cubin_file on the command line") +endif() + +# This is the list of host compilation flags. It C or CXX should already have +# been chosen by FindCUDA.cmake. +set(CMAKE_HOST_FLAGS -O3 -msse2 ) +set(CMAKE_HOST_FLAGS_DEBUG -g) +set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG) +set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG) +set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG) + +# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler +set(nvcc_host_compiler_flags "") +# If we weren't given a build_configuration, use Debug. +if(NOT build_configuration) + set(build_configuration Debug) +endif() +string(TOUPPER "${build_configuration}" build_configuration) +#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}") +foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + string(APPEND nvcc_host_compiler_flags ",\"${flag}\"") +endforeach() +if (nvcc_host_compiler_flags) + set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags}) +endif() +#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"") +# Add the build specific configuration flags +list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}}) + +# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority +list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 ) +list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 ) +if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER ) + if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN) + set(CCBIN -ccbin "${CCBIN}") + else() + set(CCBIN -ccbin "${CUDA_HOST_COMPILER}") + endif() +endif() + +# cuda_execute_process - Executes a command with optional command echo and status message. +# +# status - Status message to print if verbose is true +# command - COMMAND argument from the usual execute_process argument structure +# ARGN - Remaining arguments are the command with arguments +# +# CUDA_result - return value from running the command +# +# Make this a macro instead of a function, so that things like RESULT_VARIABLE +# and other return variables are present after executing the process. +macro(cuda_execute_process status command) + set(_command ${command}) + if(NOT "x${_command}" STREQUAL "xCOMMAND") + message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})") + endif() + if(verbose) + execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status}) + # Now we need to build up our command string. We are accounting for quotes + # and spaces, anything else is left up to the user to fix if they want to + # copy and paste a runnable command line. + set(cuda_execute_process_string) + foreach(arg ${ARGN}) + # If there are quotes, escape them, so they come through. + string(REPLACE "\"" "\\\"" arg ${arg}) + # Args with spaces need quotes around them to get them to be parsed as a single argument. + if(arg MATCHES " ") + list(APPEND cuda_execute_process_string "\"${arg}\"") + else() + list(APPEND cuda_execute_process_string ${arg}) + endif() + endforeach() + # Echo the command + execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string}) + endif() + # Run the command + execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result ) +endmacro() + +# Delete the target file +cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}" + ) + +# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag +# for dependency generation and hope for the best. +set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}") +set(CUDA_VERSION 12.2) +if(CUDA_VERSION VERSION_LESS "3.0") + # Note that this will remove all occurrences of -G. + list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G") +endif() + +# nvcc doesn't define __CUDACC__ for some reason when generating dependency files. This +# can cause incorrect dependencies when #including files based on this macro which is +# defined in the generating passes of nvcc invocation. We will go ahead and manually +# define this for now until a future version fixes this bug. +set(CUDACC_DEFINE -D__CUDACC__) + +# Generate the dependency file +cuda_execute_process( + "Generating dependency file: ${NVCC_generated_dependency_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + -M + ${CUDACC_DEFINE} + "${source_file}" + -o "${NVCC_generated_dependency_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${depends_CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the cmake readable dependency file to a temp file. Don't put the +# quotes just around the filenames for the input_file and output_file variables. +# CMake will pass the quotes through and not be able to find the file. +cuda_execute_process( + "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:FILEPATH=${NVCC_generated_dependency_file}" + -D "output_file:FILEPATH=${cmake_dependency_file}.tmp" + -D "verbose=${verbose}" + -P "${CUDA_make2cmake}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Copy the file if it is different +cuda_execute_process( + "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Delete the temporary file +cuda_execute_process( + "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the code +cuda_execute_process( + "Generating ${generated_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${cuda_language_flag} + ${format_flag} -o "${generated_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + # Since nvcc can sometimes leave half done files make sure that we delete the output file. + cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}" + ) + message(FATAL_ERROR "Error generating file ${generated_file}") +else() + if(verbose) + message("Generated ${generated_file} successfully.") + endif() +endif() + +# Cubin resource report commands. +if( build_cubin ) + # Run with -cubin to produce resource usage report. + cuda_execute_process( + "Generating ${generated_cubin_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${CUDA_NVCC_FLAGS} + ${nvcc_flags} + ${CCBIN} + ${nvcc_host_compiler_flags} + -DNVCC + -cubin + -o "${generated_cubin_file}" + ${CUDA_NVCC_INCLUDE_ARGS} + ) + + # Execute the parser script. + cuda_execute_process( + "Executing the parser script" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:STRING=${generated_cubin_file}" + -P "${CUDA_parse_cubin}" + ) + +endif() + +cmake_policy(POP) diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.depend b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.depend new file mode 100644 index 000000000..3cb252554 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o.depend @@ -0,0 +1,4 @@ +# Generated by: make2cmake.cmake +SET(CUDA_NVCC_DEPEND + ) + diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o new file mode 100644 index 000000000..34b81727b Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake new file mode 100644 index 000000000..c763b5c39 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake @@ -0,0 +1,314 @@ +# James Bigler, NVIDIA Corp (nvidia.com - jbigler) +# +# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. +# +# This code is licensed under the MIT License. See the FindCUDA.cmake script +# for the text of the license. + +# The MIT License +# +# License for the specific language governing rights and limitations under +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + + +########################################################################## +# This file runs the nvcc commands to produce the desired output file along with +# the dependency file needed by CMake to compute dependencies. In addition the +# file checks the output of each command and if the command fails it deletes the +# output files. + +# Input variables +# +# verbose:BOOL=<> OFF: Be as quiet as possible (default) +# ON : Describe each step +# +# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or +# RelWithDebInfo, but it should match one of the +# entries in CUDA_HOST_FLAGS. This is the build +# configuration used when compiling the code. If +# blank or unspecified Debug is assumed as this is +# what CMake does. +# +# generated_file:STRING=<> File to generate. This argument must be passed in. +# +# generated_cubin_file:STRING=<> File to generate. This argument must be passed +# in if build_cubin is true. + +cmake_policy(PUSH) +cmake_policy(SET CMP0007 NEW) +if(NOT generated_file) + message(FATAL_ERROR "You must specify generated_file on the command line") +endif() + +# Set these up as variables to make reading the generated file easier +set(CMAKE_COMMAND "/usr/bin/cmake") # path +set(source_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_matching.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_matching.cu.o.depend") # path +set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path +set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path +set(build_cubin OFF) # bool +set(CUDA_HOST_COMPILER "/usr/bin/cc") # path +# We won't actually use these variables for now, but we need to set this, in +# order to force this file to be run again if it changes. +set(generated_file_path "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.") # path +set(generated_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o.cubin.txt") # path + +set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path +set(CUDA_NVCC_FLAGS ;; -arch=sm_80) # list +# Build specific configuration flags +set(CUDA_NVCC_FLAGS_DEBUG ; ) +set(CUDA_NVCC_FLAGS_MINSIZEREL ; ) +set(CUDA_NVCC_FLAGS_RELEASE ; ) +set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ; ) +set(nvcc_flags -m64) # list +set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common;/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA;/usr/local/cuda/include;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4;/usr/include/opencv4]==]) # list (needs to be in lua quotes to address backslashes) +string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}") +set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ). +set(format_flag "-dc") # string +set(cuda_language_flag ) # list + +# Clean up list of include directories and add -I flags +list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS) +set(CUDA_NVCC_INCLUDE_ARGS) +foreach(dir ${CUDA_NVCC_INCLUDE_DIRS}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}") +endforeach() + +# Clean up list of compile definitions, add -D flags, and append to nvcc_flags +list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS) +foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS}) + list(APPEND nvcc_flags "-D${def}") +endforeach() + +if(build_cubin AND NOT generated_cubin_file) + message(FATAL_ERROR "You must specify generated_cubin_file on the command line") +endif() + +# This is the list of host compilation flags. It C or CXX should already have +# been chosen by FindCUDA.cmake. +set(CMAKE_HOST_FLAGS -O3 -msse2 ) +set(CMAKE_HOST_FLAGS_DEBUG -g) +set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG) +set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG) +set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG) + +# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler +set(nvcc_host_compiler_flags "") +# If we weren't given a build_configuration, use Debug. +if(NOT build_configuration) + set(build_configuration Debug) +endif() +string(TOUPPER "${build_configuration}" build_configuration) +#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}") +foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + string(APPEND nvcc_host_compiler_flags ",\"${flag}\"") +endforeach() +if (nvcc_host_compiler_flags) + set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags}) +endif() +#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"") +# Add the build specific configuration flags +list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}}) + +# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority +list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 ) +list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 ) +if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER ) + if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN) + set(CCBIN -ccbin "${CCBIN}") + else() + set(CCBIN -ccbin "${CUDA_HOST_COMPILER}") + endif() +endif() + +# cuda_execute_process - Executes a command with optional command echo and status message. +# +# status - Status message to print if verbose is true +# command - COMMAND argument from the usual execute_process argument structure +# ARGN - Remaining arguments are the command with arguments +# +# CUDA_result - return value from running the command +# +# Make this a macro instead of a function, so that things like RESULT_VARIABLE +# and other return variables are present after executing the process. +macro(cuda_execute_process status command) + set(_command ${command}) + if(NOT "x${_command}" STREQUAL "xCOMMAND") + message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})") + endif() + if(verbose) + execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status}) + # Now we need to build up our command string. We are accounting for quotes + # and spaces, anything else is left up to the user to fix if they want to + # copy and paste a runnable command line. + set(cuda_execute_process_string) + foreach(arg ${ARGN}) + # If there are quotes, escape them, so they come through. + string(REPLACE "\"" "\\\"" arg ${arg}) + # Args with spaces need quotes around them to get them to be parsed as a single argument. + if(arg MATCHES " ") + list(APPEND cuda_execute_process_string "\"${arg}\"") + else() + list(APPEND cuda_execute_process_string ${arg}) + endif() + endforeach() + # Echo the command + execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string}) + endif() + # Run the command + execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result ) +endmacro() + +# Delete the target file +cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}" + ) + +# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag +# for dependency generation and hope for the best. +set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}") +set(CUDA_VERSION 12.2) +if(CUDA_VERSION VERSION_LESS "3.0") + # Note that this will remove all occurrences of -G. + list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G") +endif() + +# nvcc doesn't define __CUDACC__ for some reason when generating dependency files. This +# can cause incorrect dependencies when #including files based on this macro which is +# defined in the generating passes of nvcc invocation. We will go ahead and manually +# define this for now until a future version fixes this bug. +set(CUDACC_DEFINE -D__CUDACC__) + +# Generate the dependency file +cuda_execute_process( + "Generating dependency file: ${NVCC_generated_dependency_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + -M + ${CUDACC_DEFINE} + "${source_file}" + -o "${NVCC_generated_dependency_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${depends_CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the cmake readable dependency file to a temp file. Don't put the +# quotes just around the filenames for the input_file and output_file variables. +# CMake will pass the quotes through and not be able to find the file. +cuda_execute_process( + "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:FILEPATH=${NVCC_generated_dependency_file}" + -D "output_file:FILEPATH=${cmake_dependency_file}.tmp" + -D "verbose=${verbose}" + -P "${CUDA_make2cmake}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Copy the file if it is different +cuda_execute_process( + "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Delete the temporary file +cuda_execute_process( + "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the code +cuda_execute_process( + "Generating ${generated_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${cuda_language_flag} + ${format_flag} -o "${generated_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + # Since nvcc can sometimes leave half done files make sure that we delete the output file. + cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}" + ) + message(FATAL_ERROR "Error generating file ${generated_file}") +else() + if(verbose) + message("Generated ${generated_file} successfully.") + endif() +endif() + +# Cubin resource report commands. +if( build_cubin ) + # Run with -cubin to produce resource usage report. + cuda_execute_process( + "Generating ${generated_cubin_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${CUDA_NVCC_FLAGS} + ${nvcc_flags} + ${CCBIN} + ${nvcc_host_compiler_flags} + -DNVCC + -cubin + -o "${generated_cubin_file}" + ${CUDA_NVCC_INCLUDE_ARGS} + ) + + # Execute the parser script. + cuda_execute_process( + "Executing the parser script" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:STRING=${generated_cubin_file}" + -P "${CUDA_parse_cubin}" + ) + +endif() + +cmake_policy(POP) diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake.pre-gen b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake.pre-gen new file mode 100644 index 000000000..13ac4f235 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.cmake.pre-gen @@ -0,0 +1,314 @@ +# James Bigler, NVIDIA Corp (nvidia.com - jbigler) +# +# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. +# +# This code is licensed under the MIT License. See the FindCUDA.cmake script +# for the text of the license. + +# The MIT License +# +# License for the specific language governing rights and limitations under +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + + +########################################################################## +# This file runs the nvcc commands to produce the desired output file along with +# the dependency file needed by CMake to compute dependencies. In addition the +# file checks the output of each command and if the command fails it deletes the +# output files. + +# Input variables +# +# verbose:BOOL=<> OFF: Be as quiet as possible (default) +# ON : Describe each step +# +# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or +# RelWithDebInfo, but it should match one of the +# entries in CUDA_HOST_FLAGS. This is the build +# configuration used when compiling the code. If +# blank or unspecified Debug is assumed as this is +# what CMake does. +# +# generated_file:STRING=<> File to generate. This argument must be passed in. +# +# generated_cubin_file:STRING=<> File to generate. This argument must be passed +# in if build_cubin is true. + +cmake_policy(PUSH) +cmake_policy(SET CMP0007 NEW) +if(NOT generated_file) + message(FATAL_ERROR "You must specify generated_file on the command line") +endif() + +# Set these up as variables to make reading the generated file easier +set(CMAKE_COMMAND "/usr/bin/cmake") # path +set(source_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu") # path +set(NVCC_generated_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_matching.cu.o.NVCC-depend") # path +set(cmake_dependency_file "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//cudasift_generated_matching.cu.o.depend") # path +set(CUDA_make2cmake "/usr/share/cmake-3.22/Modules/FindCUDA/make2cmake.cmake") # path +set(CUDA_parse_cubin "/usr/share/cmake-3.22/Modules/FindCUDA/parse_cubin.cmake") # path +set(build_cubin OFF) # bool +set(CUDA_HOST_COMPILER "/usr/bin/cc") # path +# We won't actually use these variables for now, but we need to set this, in +# order to force this file to be run again if it changes. +set(generated_file_path "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//.") # path +set(generated_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o") # path +set(generated_cubin_file_internal "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o.cubin.txt") # path + +set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda/bin/nvcc") # path +set(CUDA_NVCC_FLAGS ;; -arch=sm_80) # list +# Build specific configuration flags +set(CUDA_NVCC_FLAGS_DEBUG ; ) +set(CUDA_NVCC_FLAGS_MINSIZEREL ; ) +set(CUDA_NVCC_FLAGS_RELEASE ; ) +set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ; ) +set(nvcc_flags -m64) # list +set(CUDA_NVCC_INCLUDE_DIRS [==[/usr/local/cuda/include;$]==]) # list (needs to be in lua quotes to address backslashes) +string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}") +set(CUDA_NVCC_COMPILE_DEFINITIONS [==[$]==]) # list (needs to be in lua quotes see #16510 ). +set(format_flag "-dc") # string +set(cuda_language_flag ) # list + +# Clean up list of include directories and add -I flags +list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS) +set(CUDA_NVCC_INCLUDE_ARGS) +foreach(dir ${CUDA_NVCC_INCLUDE_DIRS}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}") +endforeach() + +# Clean up list of compile definitions, add -D flags, and append to nvcc_flags +list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS) +foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS}) + list(APPEND nvcc_flags "-D${def}") +endforeach() + +if(build_cubin AND NOT generated_cubin_file) + message(FATAL_ERROR "You must specify generated_cubin_file on the command line") +endif() + +# This is the list of host compilation flags. It C or CXX should already have +# been chosen by FindCUDA.cmake. +set(CMAKE_HOST_FLAGS -O3 -msse2 ) +set(CMAKE_HOST_FLAGS_DEBUG -g) +set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG) +set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG) +set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG) + +# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler +set(nvcc_host_compiler_flags "") +# If we weren't given a build_configuration, use Debug. +if(NOT build_configuration) + set(build_configuration Debug) +endif() +string(TOUPPER "${build_configuration}" build_configuration) +#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}") +foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + string(APPEND nvcc_host_compiler_flags ",\"${flag}\"") +endforeach() +if (nvcc_host_compiler_flags) + set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags}) +endif() +#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"") +# Add the build specific configuration flags +list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}}) + +# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority +list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 ) +list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 ) +if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER ) + if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN) + set(CCBIN -ccbin "${CCBIN}") + else() + set(CCBIN -ccbin "${CUDA_HOST_COMPILER}") + endif() +endif() + +# cuda_execute_process - Executes a command with optional command echo and status message. +# +# status - Status message to print if verbose is true +# command - COMMAND argument from the usual execute_process argument structure +# ARGN - Remaining arguments are the command with arguments +# +# CUDA_result - return value from running the command +# +# Make this a macro instead of a function, so that things like RESULT_VARIABLE +# and other return variables are present after executing the process. +macro(cuda_execute_process status command) + set(_command ${command}) + if(NOT "x${_command}" STREQUAL "xCOMMAND") + message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})") + endif() + if(verbose) + execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status}) + # Now we need to build up our command string. We are accounting for quotes + # and spaces, anything else is left up to the user to fix if they want to + # copy and paste a runnable command line. + set(cuda_execute_process_string) + foreach(arg ${ARGN}) + # If there are quotes, escape them, so they come through. + string(REPLACE "\"" "\\\"" arg ${arg}) + # Args with spaces need quotes around them to get them to be parsed as a single argument. + if(arg MATCHES " ") + list(APPEND cuda_execute_process_string "\"${arg}\"") + else() + list(APPEND cuda_execute_process_string ${arg}) + endif() + endforeach() + # Echo the command + execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string}) + endif() + # Run the command + execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result ) +endmacro() + +# Delete the target file +cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}" + ) + +# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag +# for dependency generation and hope for the best. +set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}") +set(CUDA_VERSION 12.2) +if(CUDA_VERSION VERSION_LESS "3.0") + # Note that this will remove all occurrences of -G. + list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G") +endif() + +# nvcc doesn't define __CUDACC__ for some reason when generating dependency files. This +# can cause incorrect dependencies when #including files based on this macro which is +# defined in the generating passes of nvcc invocation. We will go ahead and manually +# define this for now until a future version fixes this bug. +set(CUDACC_DEFINE -D__CUDACC__) + +# Generate the dependency file +cuda_execute_process( + "Generating dependency file: ${NVCC_generated_dependency_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + -M + ${CUDACC_DEFINE} + "${source_file}" + -o "${NVCC_generated_dependency_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${depends_CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the cmake readable dependency file to a temp file. Don't put the +# quotes just around the filenames for the input_file and output_file variables. +# CMake will pass the quotes through and not be able to find the file. +cuda_execute_process( + "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:FILEPATH=${NVCC_generated_dependency_file}" + -D "output_file:FILEPATH=${cmake_dependency_file}.tmp" + -D "verbose=${verbose}" + -P "${CUDA_make2cmake}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Copy the file if it is different +cuda_execute_process( + "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Delete the temporary file +cuda_execute_process( + "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}" + ) + +if(CUDA_result) + message(FATAL_ERROR "Error generating ${generated_file}") +endif() + +# Generate the code +cuda_execute_process( + "Generating ${generated_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${cuda_language_flag} + ${format_flag} -o "${generated_file}" + ${CCBIN} + ${nvcc_flags} + ${nvcc_host_compiler_flags} + ${CUDA_NVCC_FLAGS} + -DNVCC + ${CUDA_NVCC_INCLUDE_ARGS} + ) + +if(CUDA_result) + # Since nvcc can sometimes leave half done files make sure that we delete the output file. + cuda_execute_process( + "Removing ${generated_file}" + COMMAND "${CMAKE_COMMAND}" -E rm -f "${generated_file}" + ) + message(FATAL_ERROR "Error generating file ${generated_file}") +else() + if(verbose) + message("Generated ${generated_file} successfully.") + endif() +endif() + +# Cubin resource report commands. +if( build_cubin ) + # Run with -cubin to produce resource usage report. + cuda_execute_process( + "Generating ${generated_cubin_file}" + COMMAND "${CUDA_NVCC_EXECUTABLE}" + "${source_file}" + ${CUDA_NVCC_FLAGS} + ${nvcc_flags} + ${CCBIN} + ${nvcc_host_compiler_flags} + -DNVCC + -cubin + -o "${generated_cubin_file}" + ${CUDA_NVCC_INCLUDE_ARGS} + ) + + # Execute the parser script. + cuda_execute_process( + "Executing the parser script" + COMMAND "${CMAKE_COMMAND}" + -D "input_file:STRING=${generated_cubin_file}" + -P "${CUDA_parse_cubin}" + ) + +endif() + +cmake_policy(POP) diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.depend b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.depend new file mode 100644 index 000000000..3cb252554 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o.depend @@ -0,0 +1,4 @@ +# Generated by: make2cmake.cmake +SET(CUDA_NVCC_DEPEND + ) + diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_intermediate_link.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_intermediate_link.o new file mode 100644 index 000000000..34b81727b Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/cudasift_intermediate_link.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/depend.make b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/depend.make new file mode 100644 index 000000000..175fcce75 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/depend.make @@ -0,0 +1,2 @@ +# Empty dependencies file for cudasift. +# This may be replaced when dependencies are built. diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/flags.make b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/flags.make new file mode 100644 index 000000000..966c980d0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.22 + +# compile CXX with /usr/bin/c++ +CXX_DEFINES = + +CXX_INCLUDES = -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/local/cuda/include -isystem /usr/include/opencv4 + +CXX_FLAGS = -O3 -msse2 -std=gnu++17 + diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/geomFuncs.cpp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/geomFuncs.cpp.o new file mode 100644 index 000000000..8381a70ff Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/geomFuncs.cpp.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/geomFuncs.cpp.o.d b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/geomFuncs.cpp.o.d new file mode 100644 index 000000000..0f547060b --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/geomFuncs.cpp.o.d @@ -0,0 +1,267 @@ +CMakeFiles/cudasift.dir/geomFuncs.cpp.o: \ + /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp \ + /usr/include/stdc-predef.h /usr/include/c++/11/iostream \ + /usr/include/x86_64-linux-gnu/c++/11/bits/c++config.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/os_defines.h \ + /usr/include/features.h /usr/include/features-time64.h \ + /usr/include/x86_64-linux-gnu/bits/wordsize.h \ + /usr/include/x86_64-linux-gnu/bits/timesize.h \ + /usr/include/x86_64-linux-gnu/sys/cdefs.h \ + /usr/include/x86_64-linux-gnu/bits/long-double.h \ + /usr/include/x86_64-linux-gnu/gnu/stubs.h \ + /usr/include/x86_64-linux-gnu/gnu/stubs-64.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/cpu_defines.h \ + /usr/include/c++/11/pstl/pstl_config.h /usr/include/c++/11/ostream \ + /usr/include/c++/11/ios /usr/include/c++/11/iosfwd \ + /usr/include/c++/11/bits/stringfwd.h \ + /usr/include/c++/11/bits/memoryfwd.h /usr/include/c++/11/bits/postypes.h \ + /usr/include/c++/11/cwchar /usr/include/wchar.h \ + /usr/include/x86_64-linux-gnu/bits/libc-header-start.h \ + /usr/include/x86_64-linux-gnu/bits/floatn.h \ + /usr/include/x86_64-linux-gnu/bits/floatn-common.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/stddef.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/stdarg.h \ + /usr/include/x86_64-linux-gnu/bits/wchar.h \ + /usr/include/x86_64-linux-gnu/bits/types/wint_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/mbstate_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/__mbstate_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/__FILE.h \ + /usr/include/x86_64-linux-gnu/bits/types/FILE.h \ + /usr/include/x86_64-linux-gnu/bits/types/locale_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/__locale_t.h \ + /usr/include/x86_64-linux-gnu/bits/wchar2.h \ + /usr/include/c++/11/exception /usr/include/c++/11/bits/exception.h \ + /usr/include/c++/11/bits/exception_ptr.h \ + /usr/include/c++/11/bits/exception_defines.h \ + /usr/include/c++/11/bits/cxxabi_init_exception.h \ + /usr/include/c++/11/typeinfo /usr/include/c++/11/bits/hash_bytes.h \ + /usr/include/c++/11/new /usr/include/c++/11/bits/move.h \ + /usr/include/c++/11/type_traits \ + /usr/include/c++/11/bits/nested_exception.h \ + /usr/include/c++/11/bits/char_traits.h \ + /usr/include/c++/11/bits/stl_algobase.h \ + /usr/include/c++/11/bits/functexcept.h \ + /usr/include/c++/11/bits/cpp_type_traits.h \ + /usr/include/c++/11/ext/type_traits.h \ + /usr/include/c++/11/ext/numeric_traits.h \ + /usr/include/c++/11/bits/stl_pair.h \ + /usr/include/c++/11/bits/stl_iterator_base_types.h \ + /usr/include/c++/11/bits/stl_iterator_base_funcs.h \ + /usr/include/c++/11/bits/concept_check.h \ + /usr/include/c++/11/debug/assertions.h \ + /usr/include/c++/11/bits/stl_iterator.h \ + /usr/include/c++/11/bits/ptr_traits.h /usr/include/c++/11/debug/debug.h \ + /usr/include/c++/11/bits/predefined_ops.h /usr/include/c++/11/cstdint \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/stdint.h /usr/include/stdint.h \ + /usr/include/x86_64-linux-gnu/bits/types.h \ + /usr/include/x86_64-linux-gnu/bits/typesizes.h \ + /usr/include/x86_64-linux-gnu/bits/time64.h \ + /usr/include/x86_64-linux-gnu/bits/stdint-intn.h \ + /usr/include/x86_64-linux-gnu/bits/stdint-uintn.h \ + /usr/include/c++/11/bits/localefwd.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/c++locale.h \ + /usr/include/c++/11/clocale /usr/include/locale.h \ + /usr/include/x86_64-linux-gnu/bits/locale.h /usr/include/c++/11/cctype \ + /usr/include/ctype.h /usr/include/x86_64-linux-gnu/bits/endian.h \ + /usr/include/x86_64-linux-gnu/bits/endianness.h \ + /usr/include/c++/11/bits/ios_base.h /usr/include/c++/11/ext/atomicity.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/gthr.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/gthr-default.h \ + /usr/include/pthread.h /usr/include/sched.h \ + /usr/include/x86_64-linux-gnu/bits/types/time_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_timespec.h \ + /usr/include/x86_64-linux-gnu/bits/sched.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_sched_param.h \ + /usr/include/x86_64-linux-gnu/bits/cpu-set.h /usr/include/time.h \ + /usr/include/x86_64-linux-gnu/bits/time.h \ + /usr/include/x86_64-linux-gnu/bits/timex.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_timeval.h \ + /usr/include/x86_64-linux-gnu/bits/types/clock_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_tm.h \ + /usr/include/x86_64-linux-gnu/bits/types/clockid_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/timer_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_itimerspec.h \ + /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h \ + /usr/include/x86_64-linux-gnu/bits/thread-shared-types.h \ + /usr/include/x86_64-linux-gnu/bits/pthreadtypes-arch.h \ + /usr/include/x86_64-linux-gnu/bits/atomic_wide_counter.h \ + /usr/include/x86_64-linux-gnu/bits/struct_mutex.h \ + /usr/include/x86_64-linux-gnu/bits/struct_rwlock.h \ + /usr/include/x86_64-linux-gnu/bits/setjmp.h \ + /usr/include/x86_64-linux-gnu/bits/types/__sigset_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct___jmp_buf_tag.h \ + /usr/include/x86_64-linux-gnu/bits/pthread_stack_min-dynamic.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/atomic_word.h \ + /usr/include/x86_64-linux-gnu/sys/single_threaded.h \ + /usr/include/c++/11/bits/locale_classes.h /usr/include/c++/11/string \ + /usr/include/c++/11/bits/allocator.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/c++allocator.h \ + /usr/include/c++/11/ext/new_allocator.h \ + /usr/include/c++/11/bits/ostream_insert.h \ + /usr/include/c++/11/bits/cxxabi_forced.h \ + /usr/include/c++/11/bits/stl_function.h \ + /usr/include/c++/11/backward/binders.h \ + /usr/include/c++/11/bits/range_access.h \ + /usr/include/c++/11/initializer_list \ + /usr/include/c++/11/bits/basic_string.h \ + /usr/include/c++/11/ext/alloc_traits.h \ + /usr/include/c++/11/bits/alloc_traits.h \ + /usr/include/c++/11/bits/stl_construct.h /usr/include/c++/11/string_view \ + /usr/include/c++/11/bits/functional_hash.h \ + /usr/include/c++/11/bits/string_view.tcc \ + /usr/include/c++/11/ext/string_conversions.h /usr/include/c++/11/cstdlib \ + /usr/include/stdlib.h /usr/include/x86_64-linux-gnu/bits/waitflags.h \ + /usr/include/x86_64-linux-gnu/bits/waitstatus.h \ + /usr/include/x86_64-linux-gnu/sys/types.h /usr/include/endian.h \ + /usr/include/x86_64-linux-gnu/bits/byteswap.h \ + /usr/include/x86_64-linux-gnu/bits/uintn-identity.h \ + /usr/include/x86_64-linux-gnu/sys/select.h \ + /usr/include/x86_64-linux-gnu/bits/select.h \ + /usr/include/x86_64-linux-gnu/bits/types/sigset_t.h \ + /usr/include/x86_64-linux-gnu/bits/select2.h /usr/include/alloca.h \ + /usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h \ + /usr/include/x86_64-linux-gnu/bits/stdlib-float.h \ + /usr/include/x86_64-linux-gnu/bits/stdlib.h \ + /usr/include/c++/11/bits/std_abs.h /usr/include/c++/11/cstdio \ + /usr/include/stdio.h /usr/include/x86_64-linux-gnu/bits/types/__fpos_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/__fpos64_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_FILE.h \ + /usr/include/x86_64-linux-gnu/bits/types/cookie_io_functions_t.h \ + /usr/include/x86_64-linux-gnu/bits/stdio_lim.h \ + /usr/include/x86_64-linux-gnu/bits/stdio.h \ + /usr/include/x86_64-linux-gnu/bits/stdio2.h /usr/include/c++/11/cerrno \ + /usr/include/errno.h /usr/include/x86_64-linux-gnu/bits/errno.h \ + /usr/include/linux/errno.h /usr/include/x86_64-linux-gnu/asm/errno.h \ + /usr/include/asm-generic/errno.h /usr/include/asm-generic/errno-base.h \ + /usr/include/x86_64-linux-gnu/bits/types/error_t.h \ + /usr/include/c++/11/bits/charconv.h \ + /usr/include/c++/11/bits/basic_string.tcc \ + /usr/include/c++/11/bits/locale_classes.tcc \ + /usr/include/c++/11/system_error \ + /usr/include/x86_64-linux-gnu/c++/11/bits/error_constants.h \ + /usr/include/c++/11/stdexcept /usr/include/c++/11/streambuf \ + /usr/include/c++/11/bits/streambuf.tcc \ + /usr/include/c++/11/bits/basic_ios.h \ + /usr/include/c++/11/bits/locale_facets.h /usr/include/c++/11/cwctype \ + /usr/include/wctype.h /usr/include/x86_64-linux-gnu/bits/wctype-wchar.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/ctype_base.h \ + /usr/include/c++/11/bits/streambuf_iterator.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/ctype_inline.h \ + /usr/include/c++/11/bits/locale_facets.tcc \ + /usr/include/c++/11/bits/basic_ios.tcc \ + /usr/include/c++/11/bits/ostream.tcc /usr/include/c++/11/istream \ + /usr/include/c++/11/bits/istream.tcc /usr/include/c++/11/cmath \ + /usr/include/math.h /usr/include/x86_64-linux-gnu/bits/math-vector.h \ + /usr/include/x86_64-linux-gnu/bits/libm-simd-decl-stubs.h \ + /usr/include/x86_64-linux-gnu/bits/flt-eval-method.h \ + /usr/include/x86_64-linux-gnu/bits/fp-logb.h \ + /usr/include/x86_64-linux-gnu/bits/fp-fast.h \ + /usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h \ + /usr/include/x86_64-linux-gnu/bits/mathcalls.h \ + /usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h \ + /usr/include/x86_64-linux-gnu/bits/iscanonical.h \ + /usr/include/c++/11/bits/specfun.h /usr/include/c++/11/limits \ + /usr/include/c++/11/tr1/gamma.tcc \ + /usr/include/c++/11/tr1/special_function_util.h \ + /usr/include/c++/11/tr1/bessel_function.tcc \ + /usr/include/c++/11/tr1/beta_function.tcc \ + /usr/include/c++/11/tr1/ell_integral.tcc \ + /usr/include/c++/11/tr1/exp_integral.tcc \ + /usr/include/c++/11/tr1/hypergeometric.tcc \ + /usr/include/c++/11/tr1/legendre_function.tcc \ + /usr/include/c++/11/tr1/modified_bessel_func.tcc \ + /usr/include/c++/11/tr1/poly_hermite.tcc \ + /usr/include/c++/11/tr1/poly_laguerre.tcc \ + /usr/include/c++/11/tr1/riemann_zeta.tcc \ + /usr/include/opencv4/opencv2/core/core.hpp \ + /usr/include/opencv4/opencv2/core.hpp \ + /usr/include/opencv4/opencv2/core/cvdef.h \ + /usr/include/opencv4/opencv2/core/version.hpp \ + /usr/include/opencv4/opencv2/core/hal/interface.h \ + /usr/include/c++/11/cstddef \ + /usr/include/opencv4/opencv2/core/cv_cpu_dispatch.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/emmintrin.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/xmmintrin.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/mmintrin.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/mm_malloc.h \ + /usr/include/c++/11/stdlib.h /usr/include/c++/11/array \ + /usr/include/c++/11/utility /usr/include/c++/11/bits/stl_relops.h \ + /usr/include/opencv4/opencv2/core/base.hpp \ + /usr/include/opencv4/opencv2/opencv_modules.hpp \ + /usr/include/c++/11/climits \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/limits.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/syslimits.h \ + /usr/include/limits.h /usr/include/x86_64-linux-gnu/bits/posix1_lim.h \ + /usr/include/x86_64-linux-gnu/bits/local_lim.h \ + /usr/include/linux/limits.h \ + /usr/include/x86_64-linux-gnu/bits/posix2_lim.h \ + /usr/include/x86_64-linux-gnu/bits/xopen_lim.h \ + /usr/include/x86_64-linux-gnu/bits/uio_lim.h \ + /usr/include/c++/11/algorithm /usr/include/c++/11/bits/stl_algo.h \ + /usr/include/c++/11/bits/algorithmfwd.h \ + /usr/include/c++/11/bits/stl_heap.h \ + /usr/include/c++/11/bits/stl_tempbuf.h \ + /usr/include/c++/11/bits/uniform_int_dist.h \ + /usr/include/c++/11/pstl/glue_algorithm_defs.h \ + /usr/include/c++/11/functional /usr/include/c++/11/tuple \ + /usr/include/c++/11/bits/uses_allocator.h \ + /usr/include/c++/11/bits/invoke.h /usr/include/c++/11/bits/refwrap.h \ + /usr/include/c++/11/bits/std_function.h \ + /usr/include/c++/11/unordered_map \ + /usr/include/c++/11/ext/aligned_buffer.h \ + /usr/include/c++/11/bits/hashtable.h \ + /usr/include/c++/11/bits/hashtable_policy.h \ + /usr/include/c++/11/bits/enable_special_members.h \ + /usr/include/c++/11/bits/node_handle.h \ + /usr/include/c++/11/bits/unordered_map.h \ + /usr/include/c++/11/bits/erase_if.h /usr/include/c++/11/vector \ + /usr/include/c++/11/bits/stl_uninitialized.h \ + /usr/include/c++/11/bits/stl_vector.h \ + /usr/include/c++/11/bits/stl_bvector.h \ + /usr/include/c++/11/bits/vector.tcc \ + /usr/include/c++/11/pstl/execution_defs.h \ + /usr/include/opencv4/opencv2/core/cvstd.hpp /usr/include/c++/11/cstring \ + /usr/include/string.h /usr/include/strings.h \ + /usr/include/x86_64-linux-gnu/bits/strings_fortified.h \ + /usr/include/x86_64-linux-gnu/bits/string_fortified.h \ + /usr/include/opencv4/opencv2/core/cvstd_wrapper.hpp \ + /usr/include/c++/11/memory \ + /usr/include/c++/11/bits/stl_raw_storage_iter.h \ + /usr/include/c++/11/bits/align.h /usr/include/c++/11/bit \ + /usr/include/c++/11/bits/unique_ptr.h \ + /usr/include/c++/11/bits/shared_ptr.h \ + /usr/include/c++/11/bits/shared_ptr_base.h \ + /usr/include/c++/11/bits/allocated_ptr.h \ + /usr/include/c++/11/ext/concurrence.h \ + /usr/include/c++/11/bits/shared_ptr_atomic.h \ + /usr/include/c++/11/bits/atomic_base.h \ + /usr/include/c++/11/bits/atomic_lockfree_defines.h \ + /usr/include/c++/11/backward/auto_ptr.h \ + /usr/include/c++/11/pstl/glue_memory_defs.h \ + /usr/include/opencv4/opencv2/core/neon_utils.hpp \ + /usr/include/opencv4/opencv2/core/vsx_utils.hpp /usr/include/assert.h \ + /usr/include/opencv4/opencv2/core/check.hpp \ + /usr/include/opencv4/opencv2/core/traits.hpp \ + /usr/include/opencv4/opencv2/core/matx.hpp \ + /usr/include/opencv4/opencv2/core/saturate.hpp \ + /usr/include/opencv4/opencv2/core/fast_math.hpp \ + /usr/include/opencv4/opencv2/core/types.hpp /usr/include/c++/11/cfloat \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/float.h \ + /usr/include/opencv4/opencv2/core/mat.hpp \ + /usr/include/opencv4/opencv2/core/bufferpool.hpp \ + /usr/include/opencv4/opencv2/core/mat.inl.hpp \ + /usr/include/opencv4/opencv2/core/persistence.hpp \ + /usr/include/opencv4/opencv2/core/operations.hpp \ + /usr/include/opencv4/opencv2/core/cvstd.inl.hpp \ + /usr/include/c++/11/complex /usr/include/c++/11/sstream \ + /usr/include/c++/11/bits/sstream.tcc \ + /usr/include/opencv4/opencv2/core/utility.hpp /usr/include/c++/11/mutex \ + /usr/include/c++/11/chrono /usr/include/c++/11/ratio \ + /usr/include/c++/11/ctime /usr/include/c++/11/bits/parse_numbers.h \ + /usr/include/c++/11/bits/std_mutex.h \ + /usr/include/c++/11/bits/unique_lock.h \ + /usr/include/opencv4/opencv2/core/optim.hpp \ + /usr/include/opencv4/opencv2/core/ovx.hpp \ + /usr/include/opencv4/opencv2/core/cvdef.h \ + /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSift.h \ + /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.h diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/home/chenshe1/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/home/chenshe1/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o new file mode 100644 index 000000000..220855fcd Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/home/chenshe1/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/home/chenshe1/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o.d b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/home/chenshe1/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o.d new file mode 100644 index 000000000..e610eadb3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/home/chenshe1/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o.d @@ -0,0 +1,154 @@ +CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o: \ + /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp \ + /usr/include/stdc-predef.h /usr/include/c++/11/iostream \ + /usr/include/x86_64-linux-gnu/c++/11/bits/c++config.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/os_defines.h \ + /usr/include/features.h /usr/include/features-time64.h \ + /usr/include/x86_64-linux-gnu/bits/wordsize.h \ + /usr/include/x86_64-linux-gnu/bits/timesize.h \ + /usr/include/x86_64-linux-gnu/sys/cdefs.h \ + /usr/include/x86_64-linux-gnu/bits/long-double.h \ + /usr/include/x86_64-linux-gnu/gnu/stubs.h \ + /usr/include/x86_64-linux-gnu/gnu/stubs-64.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/cpu_defines.h \ + /usr/include/c++/11/pstl/pstl_config.h /usr/include/c++/11/ostream \ + /usr/include/c++/11/ios /usr/include/c++/11/iosfwd \ + /usr/include/c++/11/bits/stringfwd.h \ + /usr/include/c++/11/bits/memoryfwd.h /usr/include/c++/11/bits/postypes.h \ + /usr/include/c++/11/cwchar /usr/include/wchar.h \ + /usr/include/x86_64-linux-gnu/bits/libc-header-start.h \ + /usr/include/x86_64-linux-gnu/bits/floatn.h \ + /usr/include/x86_64-linux-gnu/bits/floatn-common.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/stddef.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/stdarg.h \ + /usr/include/x86_64-linux-gnu/bits/wchar.h \ + /usr/include/x86_64-linux-gnu/bits/types/wint_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/mbstate_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/__mbstate_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/__FILE.h \ + /usr/include/x86_64-linux-gnu/bits/types/FILE.h \ + /usr/include/x86_64-linux-gnu/bits/types/locale_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/__locale_t.h \ + /usr/include/x86_64-linux-gnu/bits/wchar2.h \ + /usr/include/c++/11/exception /usr/include/c++/11/bits/exception.h \ + /usr/include/c++/11/bits/exception_ptr.h \ + /usr/include/c++/11/bits/exception_defines.h \ + /usr/include/c++/11/bits/cxxabi_init_exception.h \ + /usr/include/c++/11/typeinfo /usr/include/c++/11/bits/hash_bytes.h \ + /usr/include/c++/11/new /usr/include/c++/11/bits/move.h \ + /usr/include/c++/11/type_traits \ + /usr/include/c++/11/bits/nested_exception.h \ + /usr/include/c++/11/bits/char_traits.h \ + /usr/include/c++/11/bits/stl_algobase.h \ + /usr/include/c++/11/bits/functexcept.h \ + /usr/include/c++/11/bits/cpp_type_traits.h \ + /usr/include/c++/11/ext/type_traits.h \ + /usr/include/c++/11/ext/numeric_traits.h \ + /usr/include/c++/11/bits/stl_pair.h \ + /usr/include/c++/11/bits/stl_iterator_base_types.h \ + /usr/include/c++/11/bits/stl_iterator_base_funcs.h \ + /usr/include/c++/11/bits/concept_check.h \ + /usr/include/c++/11/debug/assertions.h \ + /usr/include/c++/11/bits/stl_iterator.h \ + /usr/include/c++/11/bits/ptr_traits.h /usr/include/c++/11/debug/debug.h \ + /usr/include/c++/11/bits/predefined_ops.h /usr/include/c++/11/cstdint \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/stdint.h /usr/include/stdint.h \ + /usr/include/x86_64-linux-gnu/bits/types.h \ + /usr/include/x86_64-linux-gnu/bits/typesizes.h \ + /usr/include/x86_64-linux-gnu/bits/time64.h \ + /usr/include/x86_64-linux-gnu/bits/stdint-intn.h \ + /usr/include/x86_64-linux-gnu/bits/stdint-uintn.h \ + /usr/include/c++/11/bits/localefwd.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/c++locale.h \ + /usr/include/c++/11/clocale /usr/include/locale.h \ + /usr/include/x86_64-linux-gnu/bits/locale.h /usr/include/c++/11/cctype \ + /usr/include/ctype.h /usr/include/x86_64-linux-gnu/bits/endian.h \ + /usr/include/x86_64-linux-gnu/bits/endianness.h \ + /usr/include/c++/11/bits/ios_base.h /usr/include/c++/11/ext/atomicity.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/gthr.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/gthr-default.h \ + /usr/include/pthread.h /usr/include/sched.h \ + /usr/include/x86_64-linux-gnu/bits/types/time_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_timespec.h \ + /usr/include/x86_64-linux-gnu/bits/sched.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_sched_param.h \ + /usr/include/x86_64-linux-gnu/bits/cpu-set.h /usr/include/time.h \ + /usr/include/x86_64-linux-gnu/bits/time.h \ + /usr/include/x86_64-linux-gnu/bits/timex.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_timeval.h \ + /usr/include/x86_64-linux-gnu/bits/types/clock_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_tm.h \ + /usr/include/x86_64-linux-gnu/bits/types/clockid_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/timer_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_itimerspec.h \ + /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h \ + /usr/include/x86_64-linux-gnu/bits/thread-shared-types.h \ + /usr/include/x86_64-linux-gnu/bits/pthreadtypes-arch.h \ + /usr/include/x86_64-linux-gnu/bits/atomic_wide_counter.h \ + /usr/include/x86_64-linux-gnu/bits/struct_mutex.h \ + /usr/include/x86_64-linux-gnu/bits/struct_rwlock.h \ + /usr/include/x86_64-linux-gnu/bits/setjmp.h \ + /usr/include/x86_64-linux-gnu/bits/types/__sigset_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct___jmp_buf_tag.h \ + /usr/include/x86_64-linux-gnu/bits/pthread_stack_min-dynamic.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/atomic_word.h \ + /usr/include/x86_64-linux-gnu/sys/single_threaded.h \ + /usr/include/c++/11/bits/locale_classes.h /usr/include/c++/11/string \ + /usr/include/c++/11/bits/allocator.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/c++allocator.h \ + /usr/include/c++/11/ext/new_allocator.h \ + /usr/include/c++/11/bits/ostream_insert.h \ + /usr/include/c++/11/bits/cxxabi_forced.h \ + /usr/include/c++/11/bits/stl_function.h \ + /usr/include/c++/11/backward/binders.h \ + /usr/include/c++/11/bits/range_access.h \ + /usr/include/c++/11/initializer_list \ + /usr/include/c++/11/bits/basic_string.h \ + /usr/include/c++/11/ext/alloc_traits.h \ + /usr/include/c++/11/bits/alloc_traits.h \ + /usr/include/c++/11/bits/stl_construct.h /usr/include/c++/11/string_view \ + /usr/include/c++/11/bits/functional_hash.h \ + /usr/include/c++/11/bits/string_view.tcc \ + /usr/include/c++/11/ext/string_conversions.h /usr/include/c++/11/cstdlib \ + /usr/include/stdlib.h /usr/include/x86_64-linux-gnu/bits/waitflags.h \ + /usr/include/x86_64-linux-gnu/bits/waitstatus.h \ + /usr/include/x86_64-linux-gnu/sys/types.h /usr/include/endian.h \ + /usr/include/x86_64-linux-gnu/bits/byteswap.h \ + /usr/include/x86_64-linux-gnu/bits/uintn-identity.h \ + /usr/include/x86_64-linux-gnu/sys/select.h \ + /usr/include/x86_64-linux-gnu/bits/select.h \ + /usr/include/x86_64-linux-gnu/bits/types/sigset_t.h \ + /usr/include/x86_64-linux-gnu/bits/select2.h /usr/include/alloca.h \ + /usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h \ + /usr/include/x86_64-linux-gnu/bits/stdlib-float.h \ + /usr/include/x86_64-linux-gnu/bits/stdlib.h \ + /usr/include/c++/11/bits/std_abs.h /usr/include/c++/11/cstdio \ + /usr/include/stdio.h /usr/include/x86_64-linux-gnu/bits/types/__fpos_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/__fpos64_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_FILE.h \ + /usr/include/x86_64-linux-gnu/bits/types/cookie_io_functions_t.h \ + /usr/include/x86_64-linux-gnu/bits/stdio_lim.h \ + /usr/include/x86_64-linux-gnu/bits/stdio.h \ + /usr/include/x86_64-linux-gnu/bits/stdio2.h /usr/include/c++/11/cerrno \ + /usr/include/errno.h /usr/include/x86_64-linux-gnu/bits/errno.h \ + /usr/include/linux/errno.h /usr/include/x86_64-linux-gnu/asm/errno.h \ + /usr/include/asm-generic/errno.h /usr/include/asm-generic/errno-base.h \ + /usr/include/x86_64-linux-gnu/bits/types/error_t.h \ + /usr/include/c++/11/bits/charconv.h \ + /usr/include/c++/11/bits/basic_string.tcc \ + /usr/include/c++/11/bits/locale_classes.tcc \ + /usr/include/c++/11/system_error \ + /usr/include/x86_64-linux-gnu/c++/11/bits/error_constants.h \ + /usr/include/c++/11/stdexcept /usr/include/c++/11/streambuf \ + /usr/include/c++/11/bits/streambuf.tcc \ + /usr/include/c++/11/bits/basic_ios.h \ + /usr/include/c++/11/bits/locale_facets.h /usr/include/c++/11/cwctype \ + /usr/include/wctype.h /usr/include/x86_64-linux-gnu/bits/wctype-wchar.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/ctype_base.h \ + /usr/include/c++/11/bits/streambuf_iterator.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/ctype_inline.h \ + /usr/include/c++/11/bits/locale_facets.tcc \ + /usr/include/c++/11/bits/basic_ios.tcc \ + /usr/include/c++/11/bits/ostream.tcc /usr/include/c++/11/istream \ + /usr/include/c++/11/bits/istream.tcc \ + /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.h diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/link.txt b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/link.txt new file mode 100644 index 000000000..5d4781220 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ -O3 -msse2 CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o CMakeFiles/cudasift.dir/geomFuncs.cpp.o CMakeFiles/cudasift.dir/mainSift.cpp.o CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o CMakeFiles/cudasift.dir/cudasift_intermediate_link.o -o cudasift /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/x86_64-linux-gnu/librt.a /usr/local/cuda/lib64/libcudadevrt.a /usr/lib/x86_64-linux-gnu/libopencv_stitching.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_alphamat.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_aruco.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_barcode.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_bgsegm.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_bioinspired.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_ccalib.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dnn_objdetect.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dnn_superres.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dpm.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_face.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_freetype.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_fuzzy.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_hdf.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_hfs.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_img_hash.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_intensity_transform.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_line_descriptor.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_mcc.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_quality.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_rapid.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_reg.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_rgbd.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_saliency.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_shape.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_stereo.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_structured_light.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_superres.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_surface_matching.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_tracking.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_videostab.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_viz.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_wechat_qrcode.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_xobjdetect.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_xphoto.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_highgui.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_datasets.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_plot.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_text.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_ml.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_phase_unwrapping.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_optflow.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_ximgproc.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_video.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_videoio.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_imgcodecs.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_objdetect.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_calib3d.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dnn.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_features2d.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_flann.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_photo.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_imgproc.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_core.so.4.5.4d diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/mainSift.cpp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/mainSift.cpp.o new file mode 100644 index 000000000..7a888fa16 Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/mainSift.cpp.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/mainSift.cpp.o.d b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/mainSift.cpp.o.d new file mode 100644 index 000000000..30d1de48f --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/mainSift.cpp.o.d @@ -0,0 +1,298 @@ +CMakeFiles/cudasift.dir/mainSift.cpp.o: \ + /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp \ + /usr/include/stdc-predef.h /usr/include/c++/11/iostream \ + /usr/include/x86_64-linux-gnu/c++/11/bits/c++config.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/os_defines.h \ + /usr/include/features.h /usr/include/features-time64.h \ + /usr/include/x86_64-linux-gnu/bits/wordsize.h \ + /usr/include/x86_64-linux-gnu/bits/timesize.h \ + /usr/include/x86_64-linux-gnu/sys/cdefs.h \ + /usr/include/x86_64-linux-gnu/bits/long-double.h \ + /usr/include/x86_64-linux-gnu/gnu/stubs.h \ + /usr/include/x86_64-linux-gnu/gnu/stubs-64.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/cpu_defines.h \ + /usr/include/c++/11/pstl/pstl_config.h /usr/include/c++/11/ostream \ + /usr/include/c++/11/ios /usr/include/c++/11/iosfwd \ + /usr/include/c++/11/bits/stringfwd.h \ + /usr/include/c++/11/bits/memoryfwd.h /usr/include/c++/11/bits/postypes.h \ + /usr/include/c++/11/cwchar /usr/include/wchar.h \ + /usr/include/x86_64-linux-gnu/bits/libc-header-start.h \ + /usr/include/x86_64-linux-gnu/bits/floatn.h \ + /usr/include/x86_64-linux-gnu/bits/floatn-common.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/stddef.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/stdarg.h \ + /usr/include/x86_64-linux-gnu/bits/wchar.h \ + /usr/include/x86_64-linux-gnu/bits/types/wint_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/mbstate_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/__mbstate_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/__FILE.h \ + /usr/include/x86_64-linux-gnu/bits/types/FILE.h \ + /usr/include/x86_64-linux-gnu/bits/types/locale_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/__locale_t.h \ + /usr/include/x86_64-linux-gnu/bits/wchar2.h \ + /usr/include/c++/11/exception /usr/include/c++/11/bits/exception.h \ + /usr/include/c++/11/bits/exception_ptr.h \ + /usr/include/c++/11/bits/exception_defines.h \ + /usr/include/c++/11/bits/cxxabi_init_exception.h \ + /usr/include/c++/11/typeinfo /usr/include/c++/11/bits/hash_bytes.h \ + /usr/include/c++/11/new /usr/include/c++/11/bits/move.h \ + /usr/include/c++/11/type_traits \ + /usr/include/c++/11/bits/nested_exception.h \ + /usr/include/c++/11/bits/char_traits.h \ + /usr/include/c++/11/bits/stl_algobase.h \ + /usr/include/c++/11/bits/functexcept.h \ + /usr/include/c++/11/bits/cpp_type_traits.h \ + /usr/include/c++/11/ext/type_traits.h \ + /usr/include/c++/11/ext/numeric_traits.h \ + /usr/include/c++/11/bits/stl_pair.h \ + /usr/include/c++/11/bits/stl_iterator_base_types.h \ + /usr/include/c++/11/bits/stl_iterator_base_funcs.h \ + /usr/include/c++/11/bits/concept_check.h \ + /usr/include/c++/11/debug/assertions.h \ + /usr/include/c++/11/bits/stl_iterator.h \ + /usr/include/c++/11/bits/ptr_traits.h /usr/include/c++/11/debug/debug.h \ + /usr/include/c++/11/bits/predefined_ops.h /usr/include/c++/11/cstdint \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/stdint.h /usr/include/stdint.h \ + /usr/include/x86_64-linux-gnu/bits/types.h \ + /usr/include/x86_64-linux-gnu/bits/typesizes.h \ + /usr/include/x86_64-linux-gnu/bits/time64.h \ + /usr/include/x86_64-linux-gnu/bits/stdint-intn.h \ + /usr/include/x86_64-linux-gnu/bits/stdint-uintn.h \ + /usr/include/c++/11/bits/localefwd.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/c++locale.h \ + /usr/include/c++/11/clocale /usr/include/locale.h \ + /usr/include/x86_64-linux-gnu/bits/locale.h /usr/include/c++/11/cctype \ + /usr/include/ctype.h /usr/include/x86_64-linux-gnu/bits/endian.h \ + /usr/include/x86_64-linux-gnu/bits/endianness.h \ + /usr/include/c++/11/bits/ios_base.h /usr/include/c++/11/ext/atomicity.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/gthr.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/gthr-default.h \ + /usr/include/pthread.h /usr/include/sched.h \ + /usr/include/x86_64-linux-gnu/bits/types/time_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_timespec.h \ + /usr/include/x86_64-linux-gnu/bits/sched.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_sched_param.h \ + /usr/include/x86_64-linux-gnu/bits/cpu-set.h /usr/include/time.h \ + /usr/include/x86_64-linux-gnu/bits/time.h \ + /usr/include/x86_64-linux-gnu/bits/timex.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_timeval.h \ + /usr/include/x86_64-linux-gnu/bits/types/clock_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_tm.h \ + /usr/include/x86_64-linux-gnu/bits/types/clockid_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/timer_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_itimerspec.h \ + /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h \ + /usr/include/x86_64-linux-gnu/bits/thread-shared-types.h \ + /usr/include/x86_64-linux-gnu/bits/pthreadtypes-arch.h \ + /usr/include/x86_64-linux-gnu/bits/atomic_wide_counter.h \ + /usr/include/x86_64-linux-gnu/bits/struct_mutex.h \ + /usr/include/x86_64-linux-gnu/bits/struct_rwlock.h \ + /usr/include/x86_64-linux-gnu/bits/setjmp.h \ + /usr/include/x86_64-linux-gnu/bits/types/__sigset_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct___jmp_buf_tag.h \ + /usr/include/x86_64-linux-gnu/bits/pthread_stack_min-dynamic.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/atomic_word.h \ + /usr/include/x86_64-linux-gnu/sys/single_threaded.h \ + /usr/include/c++/11/bits/locale_classes.h /usr/include/c++/11/string \ + /usr/include/c++/11/bits/allocator.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/c++allocator.h \ + /usr/include/c++/11/ext/new_allocator.h \ + /usr/include/c++/11/bits/ostream_insert.h \ + /usr/include/c++/11/bits/cxxabi_forced.h \ + /usr/include/c++/11/bits/stl_function.h \ + /usr/include/c++/11/backward/binders.h \ + /usr/include/c++/11/bits/range_access.h \ + /usr/include/c++/11/initializer_list \ + /usr/include/c++/11/bits/basic_string.h \ + /usr/include/c++/11/ext/alloc_traits.h \ + /usr/include/c++/11/bits/alloc_traits.h \ + /usr/include/c++/11/bits/stl_construct.h /usr/include/c++/11/string_view \ + /usr/include/c++/11/bits/functional_hash.h \ + /usr/include/c++/11/bits/string_view.tcc \ + /usr/include/c++/11/ext/string_conversions.h /usr/include/c++/11/cstdlib \ + /usr/include/stdlib.h /usr/include/x86_64-linux-gnu/bits/waitflags.h \ + /usr/include/x86_64-linux-gnu/bits/waitstatus.h \ + /usr/include/x86_64-linux-gnu/sys/types.h /usr/include/endian.h \ + /usr/include/x86_64-linux-gnu/bits/byteswap.h \ + /usr/include/x86_64-linux-gnu/bits/uintn-identity.h \ + /usr/include/x86_64-linux-gnu/sys/select.h \ + /usr/include/x86_64-linux-gnu/bits/select.h \ + /usr/include/x86_64-linux-gnu/bits/types/sigset_t.h \ + /usr/include/x86_64-linux-gnu/bits/select2.h /usr/include/alloca.h \ + /usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h \ + /usr/include/x86_64-linux-gnu/bits/stdlib-float.h \ + /usr/include/x86_64-linux-gnu/bits/stdlib.h \ + /usr/include/c++/11/bits/std_abs.h /usr/include/c++/11/cstdio \ + /usr/include/stdio.h /usr/include/x86_64-linux-gnu/bits/types/__fpos_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/__fpos64_t.h \ + /usr/include/x86_64-linux-gnu/bits/types/struct_FILE.h \ + /usr/include/x86_64-linux-gnu/bits/types/cookie_io_functions_t.h \ + /usr/include/x86_64-linux-gnu/bits/stdio_lim.h \ + /usr/include/x86_64-linux-gnu/bits/stdio.h \ + /usr/include/x86_64-linux-gnu/bits/stdio2.h /usr/include/c++/11/cerrno \ + /usr/include/errno.h /usr/include/x86_64-linux-gnu/bits/errno.h \ + /usr/include/linux/errno.h /usr/include/x86_64-linux-gnu/asm/errno.h \ + /usr/include/asm-generic/errno.h /usr/include/asm-generic/errno-base.h \ + /usr/include/x86_64-linux-gnu/bits/types/error_t.h \ + /usr/include/c++/11/bits/charconv.h \ + /usr/include/c++/11/bits/basic_string.tcc \ + /usr/include/c++/11/bits/locale_classes.tcc \ + /usr/include/c++/11/system_error \ + /usr/include/x86_64-linux-gnu/c++/11/bits/error_constants.h \ + /usr/include/c++/11/stdexcept /usr/include/c++/11/streambuf \ + /usr/include/c++/11/bits/streambuf.tcc \ + /usr/include/c++/11/bits/basic_ios.h \ + /usr/include/c++/11/bits/locale_facets.h /usr/include/c++/11/cwctype \ + /usr/include/wctype.h /usr/include/x86_64-linux-gnu/bits/wctype-wchar.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/ctype_base.h \ + /usr/include/c++/11/bits/streambuf_iterator.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/ctype_inline.h \ + /usr/include/c++/11/bits/locale_facets.tcc \ + /usr/include/c++/11/bits/basic_ios.tcc \ + /usr/include/c++/11/bits/ostream.tcc /usr/include/c++/11/istream \ + /usr/include/c++/11/bits/istream.tcc /usr/include/c++/11/cmath \ + /usr/include/math.h /usr/include/x86_64-linux-gnu/bits/math-vector.h \ + /usr/include/x86_64-linux-gnu/bits/libm-simd-decl-stubs.h \ + /usr/include/x86_64-linux-gnu/bits/flt-eval-method.h \ + /usr/include/x86_64-linux-gnu/bits/fp-logb.h \ + /usr/include/x86_64-linux-gnu/bits/fp-fast.h \ + /usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h \ + /usr/include/x86_64-linux-gnu/bits/mathcalls.h \ + /usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h \ + /usr/include/x86_64-linux-gnu/bits/iscanonical.h \ + /usr/include/c++/11/bits/specfun.h /usr/include/c++/11/limits \ + /usr/include/c++/11/tr1/gamma.tcc \ + /usr/include/c++/11/tr1/special_function_util.h \ + /usr/include/c++/11/tr1/bessel_function.tcc \ + /usr/include/c++/11/tr1/beta_function.tcc \ + /usr/include/c++/11/tr1/ell_integral.tcc \ + /usr/include/c++/11/tr1/exp_integral.tcc \ + /usr/include/c++/11/tr1/hypergeometric.tcc \ + /usr/include/c++/11/tr1/legendre_function.tcc \ + /usr/include/c++/11/tr1/modified_bessel_func.tcc \ + /usr/include/c++/11/tr1/poly_hermite.tcc \ + /usr/include/c++/11/tr1/poly_laguerre.tcc \ + /usr/include/c++/11/tr1/riemann_zeta.tcc /usr/include/c++/11/iomanip \ + /usr/include/c++/11/locale \ + /usr/include/c++/11/bits/locale_facets_nonio.h /usr/include/c++/11/ctime \ + /usr/include/x86_64-linux-gnu/c++/11/bits/time_members.h \ + /usr/include/x86_64-linux-gnu/c++/11/bits/messages_members.h \ + /usr/include/libintl.h /usr/include/c++/11/bits/codecvt.h \ + /usr/include/c++/11/bits/locale_facets_nonio.tcc \ + /usr/include/c++/11/bits/locale_conv.h \ + /usr/include/c++/11/bits/unique_ptr.h /usr/include/c++/11/utility \ + /usr/include/c++/11/bits/stl_relops.h /usr/include/c++/11/tuple \ + /usr/include/c++/11/array /usr/include/c++/11/bits/uses_allocator.h \ + /usr/include/c++/11/bits/invoke.h \ + /usr/include/c++/11/bits/quoted_string.h /usr/include/c++/11/sstream \ + /usr/include/c++/11/bits/sstream.tcc /usr/local/cuda/include/cuda.h \ + /usr/include/c++/11/stdlib.h /usr/local/cuda/include/cuda_runtime.h \ + /usr/local/cuda/include/crt/host_config.h \ + /usr/local/cuda/include/builtin_types.h \ + /usr/local/cuda/include/device_types.h \ + /usr/local/cuda/include/crt/host_defines.h \ + /usr/local/cuda/include/driver_types.h \ + /usr/local/cuda/include/vector_types.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/limits.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/syslimits.h \ + /usr/include/limits.h /usr/include/x86_64-linux-gnu/bits/posix1_lim.h \ + /usr/include/x86_64-linux-gnu/bits/local_lim.h \ + /usr/include/linux/limits.h \ + /usr/include/x86_64-linux-gnu/bits/posix2_lim.h \ + /usr/include/x86_64-linux-gnu/bits/xopen_lim.h \ + /usr/include/x86_64-linux-gnu/bits/uio_lim.h \ + /usr/local/cuda/include/surface_types.h \ + /usr/local/cuda/include/texture_types.h \ + /usr/local/cuda/include/library_types.h \ + /usr/local/cuda/include/channel_descriptor.h \ + /usr/local/cuda/include/cuda_runtime_api.h \ + /usr/local/cuda/include/cuda_device_runtime_api.h \ + /usr/local/cuda/include/driver_functions.h \ + /usr/local/cuda/include/vector_functions.h \ + /usr/local/cuda/include/vector_functions.hpp \ + /usr/include/opencv4/opencv2/core/core.hpp \ + /usr/include/opencv4/opencv2/core.hpp \ + /usr/include/opencv4/opencv2/core/cvdef.h \ + /usr/include/opencv4/opencv2/core/version.hpp \ + /usr/include/opencv4/opencv2/core/hal/interface.h \ + /usr/include/c++/11/cstddef \ + /usr/include/opencv4/opencv2/core/cv_cpu_dispatch.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/emmintrin.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/xmmintrin.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/mmintrin.h \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/mm_malloc.h \ + /usr/include/opencv4/opencv2/core/base.hpp \ + /usr/include/opencv4/opencv2/opencv_modules.hpp \ + /usr/include/c++/11/climits /usr/include/c++/11/algorithm \ + /usr/include/c++/11/bits/stl_algo.h \ + /usr/include/c++/11/bits/algorithmfwd.h \ + /usr/include/c++/11/bits/stl_heap.h \ + /usr/include/c++/11/bits/stl_tempbuf.h \ + /usr/include/c++/11/bits/uniform_int_dist.h \ + /usr/include/c++/11/pstl/glue_algorithm_defs.h \ + /usr/include/c++/11/functional /usr/include/c++/11/bits/refwrap.h \ + /usr/include/c++/11/bits/std_function.h \ + /usr/include/c++/11/unordered_map \ + /usr/include/c++/11/ext/aligned_buffer.h \ + /usr/include/c++/11/bits/hashtable.h \ + /usr/include/c++/11/bits/hashtable_policy.h \ + /usr/include/c++/11/bits/enable_special_members.h \ + /usr/include/c++/11/bits/node_handle.h \ + /usr/include/c++/11/bits/unordered_map.h \ + /usr/include/c++/11/bits/erase_if.h /usr/include/c++/11/vector \ + /usr/include/c++/11/bits/stl_uninitialized.h \ + /usr/include/c++/11/bits/stl_vector.h \ + /usr/include/c++/11/bits/stl_bvector.h \ + /usr/include/c++/11/bits/vector.tcc \ + /usr/include/c++/11/pstl/execution_defs.h \ + /usr/include/opencv4/opencv2/core/cvstd.hpp /usr/include/c++/11/cstring \ + /usr/include/string.h /usr/include/strings.h \ + /usr/include/x86_64-linux-gnu/bits/strings_fortified.h \ + /usr/include/x86_64-linux-gnu/bits/string_fortified.h \ + /usr/include/opencv4/opencv2/core/cvstd_wrapper.hpp \ + /usr/include/c++/11/memory \ + /usr/include/c++/11/bits/stl_raw_storage_iter.h \ + /usr/include/c++/11/bits/align.h /usr/include/c++/11/bit \ + /usr/include/c++/11/bits/shared_ptr.h \ + /usr/include/c++/11/bits/shared_ptr_base.h \ + /usr/include/c++/11/bits/allocated_ptr.h \ + /usr/include/c++/11/ext/concurrence.h \ + /usr/include/c++/11/bits/shared_ptr_atomic.h \ + /usr/include/c++/11/bits/atomic_base.h \ + /usr/include/c++/11/bits/atomic_lockfree_defines.h \ + /usr/include/c++/11/backward/auto_ptr.h \ + /usr/include/c++/11/pstl/glue_memory_defs.h \ + /usr/include/opencv4/opencv2/core/neon_utils.hpp \ + /usr/include/opencv4/opencv2/core/vsx_utils.hpp /usr/include/assert.h \ + /usr/include/opencv4/opencv2/core/check.hpp \ + /usr/include/opencv4/opencv2/core/traits.hpp \ + /usr/include/opencv4/opencv2/core/matx.hpp \ + /usr/include/opencv4/opencv2/core/saturate.hpp \ + /usr/include/opencv4/opencv2/core/fast_math.hpp \ + /usr/include/opencv4/opencv2/core/types.hpp /usr/include/c++/11/cfloat \ + /usr/lib/gcc/x86_64-linux-gnu/11/include/float.h \ + /usr/include/opencv4/opencv2/core/mat.hpp \ + /usr/include/opencv4/opencv2/core/bufferpool.hpp \ + /usr/include/opencv4/opencv2/core/mat.inl.hpp \ + /usr/include/opencv4/opencv2/core/persistence.hpp \ + /usr/include/opencv4/opencv2/core/operations.hpp \ + /usr/include/opencv4/opencv2/core/cvstd.inl.hpp \ + /usr/include/c++/11/complex \ + /usr/include/opencv4/opencv2/core/utility.hpp /usr/include/c++/11/mutex \ + /usr/include/c++/11/chrono /usr/include/c++/11/ratio \ + /usr/include/c++/11/bits/parse_numbers.h \ + /usr/include/c++/11/bits/std_mutex.h \ + /usr/include/c++/11/bits/unique_lock.h \ + /usr/include/opencv4/opencv2/core/optim.hpp \ + /usr/include/opencv4/opencv2/core/ovx.hpp \ + /usr/include/opencv4/opencv2/core/cvdef.h \ + /usr/include/opencv4/opencv2/highgui/highgui.hpp \ + /usr/include/opencv4/opencv2/highgui.hpp \ + /usr/include/opencv4/opencv2/imgcodecs.hpp \ + /usr/include/opencv4/opencv2/videoio.hpp \ + /usr/include/opencv4/opencv2/imgproc/imgproc.hpp \ + /usr/include/opencv4/opencv2/imgproc.hpp \ + /usr/include/opencv4/opencv2/imgproc/segmentation.hpp \ + /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common/Utility.h \ + /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.h \ + /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSift.h diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/progress.make b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/progress.make new file mode 100644 index 000000000..5b293683d --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/progress.make @@ -0,0 +1,9 @@ +CMAKE_PROGRESS_1 = 1 +CMAKE_PROGRESS_2 = 2 +CMAKE_PROGRESS_3 = 3 +CMAKE_PROGRESS_4 = 4 +CMAKE_PROGRESS_5 = 5 +CMAKE_PROGRESS_6 = 6 +CMAKE_PROGRESS_7 = 7 +CMAKE_PROGRESS_8 = 8 + diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/progress.marks b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/progress.marks new file mode 100644 index 000000000..45a4fb75d --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/progress.marks @@ -0,0 +1 @@ +8 diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/Makefile b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/Makefile new file mode 100644 index 000000000..ef43b3964 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/Makefile @@ -0,0 +1,284 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.22 + +# Default target executed when no arguments are given to make. +default_target: all +.PHONY : default_target + +# Allow only one "make -f Makefile2" at a time, but pass parallelism. +.NOTPARALLEL: + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + +# Disable VCS-based implicit rules. +% : %,v + +# Disable VCS-based implicit rules. +% : RCS/% + +# Disable VCS-based implicit rules. +% : RCS/%,v + +# Disable VCS-based implicit rules. +% : SCCS/s.% + +# Disable VCS-based implicit rules. +% : s.% + +.SUFFIXES: .hpux_make_needs_suffix_list + +# Command-line flag to silence nested $(MAKE). +$(VERBOSE)MAKESILENT = -s + +#Suppress display of executed commands. +$(VERBOSE).SILENT: + +# A target that is always out of date. +cmake_force: +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E rm -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build + +#============================================================================= +# Targets provided globally by CMake. + +# Special rule for the target edit_cache +edit_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..." + /usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. +.PHONY : edit_cache + +# Special rule for the target edit_cache +edit_cache/fast: edit_cache +.PHONY : edit_cache/fast + +# Special rule for the target rebuild_cache +rebuild_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." + /usr/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : rebuild_cache + +# Special rule for the target rebuild_cache +rebuild_cache/fast: rebuild_cache +.PHONY : rebuild_cache/fast + +# Special rule for the target list_install_components +list_install_components: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Available install components are: \"Unspecified\"" +.PHONY : list_install_components + +# Special rule for the target list_install_components +list_install_components/fast: list_install_components +.PHONY : list_install_components/fast + +# Special rule for the target install +install: preinstall + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..." + /usr/bin/cmake -P cmake_install.cmake +.PHONY : install + +# Special rule for the target install +install/fast: preinstall/fast + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..." + /usr/bin/cmake -P cmake_install.cmake +.PHONY : install/fast + +# Special rule for the target install/local +install/local: preinstall + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..." + /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake +.PHONY : install/local + +# Special rule for the target install/local +install/local/fast: preinstall/fast + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..." + /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake +.PHONY : install/local/fast + +# Special rule for the target install/strip +install/strip: preinstall + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..." + /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake +.PHONY : install/strip + +# Special rule for the target install/strip +install/strip/fast: preinstall/fast + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..." + /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake +.PHONY : install/strip/fast + +# The main all target +all: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build//CMakeFiles/progress.marks + $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 all + $(CMAKE_COMMAND) -E cmake_progress_start /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles 0 +.PHONY : all + +# The main clean target +clean: + $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 clean +.PHONY : clean + +# The main clean target +clean/fast: clean +.PHONY : clean/fast + +# Prepare targets for installation. +preinstall: all + $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 preinstall +.PHONY : preinstall + +# Prepare targets for installation. +preinstall/fast: + $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 preinstall +.PHONY : preinstall/fast + +# clear depends +depend: + $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 +.PHONY : depend + +#============================================================================= +# Target rules for targets named cudasift + +# Build rule for target. +cudasift: cmake_check_build_system + $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 cudasift +.PHONY : cudasift + +# fast build rule for target. +cudasift/fast: + $(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/build +.PHONY : cudasift/fast + +geomFuncs.o: geomFuncs.cpp.o +.PHONY : geomFuncs.o + +# target to build an object file +geomFuncs.cpp.o: + $(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/geomFuncs.cpp.o +.PHONY : geomFuncs.cpp.o + +geomFuncs.i: geomFuncs.cpp.i +.PHONY : geomFuncs.i + +# target to preprocess a source file +geomFuncs.cpp.i: + $(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/geomFuncs.cpp.i +.PHONY : geomFuncs.cpp.i + +geomFuncs.s: geomFuncs.cpp.s +.PHONY : geomFuncs.s + +# target to generate assembly for a file +geomFuncs.cpp.s: + $(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/geomFuncs.cpp.s +.PHONY : geomFuncs.cpp.s + +home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.o: home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o +.PHONY : home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.o + +# target to build an object file +home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o: + $(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o +.PHONY : home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o + +home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.i: home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i +.PHONY : home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.i + +# target to preprocess a source file +home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i: + $(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i +.PHONY : home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.i + +home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.s: home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s +.PHONY : home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.s + +# target to generate assembly for a file +home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s: + $(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s +.PHONY : home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.s + +mainSift.o: mainSift.cpp.o +.PHONY : mainSift.o + +# target to build an object file +mainSift.cpp.o: + $(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/mainSift.cpp.o +.PHONY : mainSift.cpp.o + +mainSift.i: mainSift.cpp.i +.PHONY : mainSift.i + +# target to preprocess a source file +mainSift.cpp.i: + $(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/mainSift.cpp.i +.PHONY : mainSift.cpp.i + +mainSift.s: mainSift.cpp.s +.PHONY : mainSift.s + +# target to generate assembly for a file +mainSift.cpp.s: + $(MAKE) $(MAKESILENT) -f CMakeFiles/cudasift.dir/build.make CMakeFiles/cudasift.dir/mainSift.cpp.s +.PHONY : mainSift.cpp.s + +# Help Target +help: + @echo "The following are some of the valid targets for this Makefile:" + @echo "... all (the default if no target is provided)" + @echo "... clean" + @echo "... depend" + @echo "... edit_cache" + @echo "... install" + @echo "... install/local" + @echo "... install/strip" + @echo "... list_install_components" + @echo "... rebuild_cache" + @echo "... cudasift" + @echo "... geomFuncs.o" + @echo "... geomFuncs.i" + @echo "... geomFuncs.s" + @echo "... home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.o" + @echo "... home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.i" + @echo "... home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.s" + @echo "... mainSift.o" + @echo "... mainSift.i" + @echo "... mainSift.s" +.PHONY : help + + + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/cmake_install.cmake b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/cmake_install.cmake new file mode 100644 index 000000000..b83980cb5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/cmake_install.cmake @@ -0,0 +1,79 @@ +# Install script for directory: /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA + +# Set the install prefix +if(NOT DEFINED CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX "/usr/local") +endif() +string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + if(BUILD_TYPE) + string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + else() + set(CMAKE_INSTALL_CONFIG_NAME "") + endif() + message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +endif() + +# Set the component getting installed. +if(NOT CMAKE_INSTALL_COMPONENT) + if(COMPONENT) + message(STATUS "Install component: \"${COMPONENT}\"") + set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + else() + set(CMAKE_INSTALL_COMPONENT) + endif() +endif() + +# Install shared libraries without execute permission? +if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) + set(CMAKE_INSTALL_SO_NO_EXE "1") +endif() + +# Is this installation the result of a crosscompile? +if(NOT DEFINED CMAKE_CROSSCOMPILING) + set(CMAKE_CROSSCOMPILING "FALSE") +endif() + +# Set default install directory permissions. +if(NOT DEFINED CMAKE_OBJDUMP) + set(CMAKE_OBJDUMP "/usr/bin/objdump") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/." TYPE FILE FILES + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.h" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.h" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.h" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSift.h" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common/Utility.cpp" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/CMakeLists.txt" + ) +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT) + file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/data" TYPE FILE FILES + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/data/left.pgm" + "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/data/righ.pgm" + ) +endif() + +if(CMAKE_INSTALL_COMPONENT) + set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt") +else() + set(CMAKE_INSTALL_MANIFEST "install_manifest.txt") +endif() + +string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT + "${CMAKE_INSTALL_MANIFEST_FILES}") +file(WRITE "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/${CMAKE_INSTALL_MANIFEST}" + "${CMAKE_INSTALL_MANIFEST_CONTENT}") diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/compile_commands.json b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/compile_commands.json new file mode 100644 index 000000000..4a2149538 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/compile_commands.json @@ -0,0 +1,40 @@ +[ + { + "command": "nvcc --cuda-gpu-arch=sm_80 -m64 /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o -o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir/./cudasift_intermediate_link.o -D__CUDACC__=1", + "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build" + }, + { + "command": "nvcc -c -o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_matching.cu.o -m64 -O3 -msse2 --cuda-gpu-arch=sm_80 -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/include/opencv4 -D__CUDACC__=1 /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu", + "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir", + "file": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu" + }, + { + "command": "nvcc -c -o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaImage.cu.o -m64 -O3 -msse2 --cuda-gpu-arch=sm_80 -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/include/opencv4 -D__CUDACC__=1 /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu", + "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir", + "file": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu" + }, + { + "command": "nvcc -c -o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir//./cudasift_generated_cudaSiftH.cu.o -m64 -O3 -msse2 --cuda-gpu-arch=sm_80 -DNVCC -I/usr/local/cuda/include -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/include/opencv4 -D__CUDACC__=1 /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu", + "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build/CMakeFiles/cudasift.dir", + "file": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu" + }, + { + "command": "c++ -c -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/local/cuda/include -isystem /usr/include/opencv4 -O3 -msse2 -std=gnu++17 -o CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp", + "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build", + "file": "/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp" + }, + { + "command": "c++ -c -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/local/cuda/include -isystem /usr/include/opencv4 -O3 -msse2 -std=gnu++17 -o CMakeFiles/cudasift.dir/geomFuncs.cpp.o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp", + "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build", + "file": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp" + }, + { + "command": "c++ -c -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/../common -I/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA -I/usr/local/cuda/include -isystem /usr/include/opencv4 -O3 -msse2 -std=gnu++17 -o CMakeFiles/cudasift.dir/mainSift.cpp.o /home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp", + "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build", + "file": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp" + }, + { + "command": "ld -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccK090v3.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -melf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -o cudasift /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o CMakeFiles/cudasift.dir/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp.o CMakeFiles/cudasift.dir/geomFuncs.cpp.o CMakeFiles/cudasift.dir/mainSift.cpp.o CMakeFiles/cudasift.dir/cudasift_generated_cudaImage.cu.o CMakeFiles/cudasift.dir/cudasift_generated_cudaSiftH.cu.o CMakeFiles/cudasift.dir/cudasift_generated_matching.cu.o CMakeFiles/cudasift.dir/cudasift_intermediate_link.o /usr/local/cuda/lib64/libcudart_static.a /usr/lib/x86_64-linux-gnu/librt.a /usr/local/cuda/lib64/libcudadevrt.a /usr/lib/x86_64-linux-gnu/libopencv_stitching.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_alphamat.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_aruco.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_barcode.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_bgsegm.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_bioinspired.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_ccalib.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dnn_objdetect.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dnn_superres.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dpm.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_face.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_freetype.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_fuzzy.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_hdf.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_hfs.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_img_hash.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_intensity_transform.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_line_descriptor.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_mcc.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_quality.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_rapid.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_reg.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_rgbd.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_saliency.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_shape.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_stereo.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_structured_light.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_superres.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_surface_matching.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_tracking.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_videostab.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_viz.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_wechat_qrcode.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_xobjdetect.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_xphoto.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_highgui.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_datasets.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_plot.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_text.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_ml.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_phase_unwrapping.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_optflow.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_ximgproc.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_video.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_videoio.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_imgcodecs.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_objdetect.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_calib3d.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_dnn.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_features2d.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_flann.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_photo.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_imgproc.so.4.5.4d /usr/lib/x86_64-linux-gnu/libopencv_core.so.4.5.4d /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build" + } +] \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/cudasift b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/cudasift new file mode 100644 index 000000000..34b81727b Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/build/cudasift differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaImage.cu b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaImage.cu new file mode 100644 index 000000000..e12f15093 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaImage.cu @@ -0,0 +1,107 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include + +#include "cudautils.h" +#include "cudaImage.h" + +int iDivUp(int a, int b) { return (a % b != 0) ? (a / b + 1) : (a / b); } +int iDivDown(int a, int b) { return a / b; } +int iAlignUp(int a, int b) { return (a % b != 0) ? (a - a % b + b) : a; } +int iAlignDown(int a, int b) { return a - a % b; } + +void CudaImage::Allocate(int w, int h, int p, bool host, float &totTime, float *devmem, float *hostmem) +{ + width = w; + height = h; + pitch = p; + d_data = devmem; + h_data = hostmem; + t_data = NULL; + if (devmem == NULL) + { +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMallocPitch((void **)&d_data, (size_t *)&pitch, (size_t)(sizeof(float) * width), (size_t)height)); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + std::cout << "Allocate Time is " << std::chrono::duration(stop_malloc - start_malloc).count() << " us" << std::endl; + totTime += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + pitch /= sizeof(float); + if (d_data == NULL) + printf("Failed to allocate device data\n"); + d_internalAlloc = true; + } + if (host && hostmem == NULL) + { + h_data = (float *)malloc(sizeof(float) * pitch * height); + h_internalAlloc = true; + } +} + +CudaImage::CudaImage() : width(0), height(0), pitch(0), d_data(NULL), h_data(NULL), t_data(NULL), d_internalAlloc(false), h_internalAlloc(false) +{ +} + +CudaImage::~CudaImage() +{ + if (d_internalAlloc && d_data != NULL) + safeCall(cudaFree(d_data)); + d_data = NULL; + if (h_internalAlloc && h_data != NULL) + free(h_data); + h_data = NULL; + if (t_data != NULL) + safeCall(cudaFreeArray((cudaArray *)t_data)); + t_data = NULL; +} + +double CudaImage::Download(float &totTime) +{ + double downloadTime = 0.0; + int p = sizeof(float) * pitch; + if (d_data != NULL && h_data != NULL) + { +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMemcpy2D(d_data, p, h_data, sizeof(float) * width, sizeof(float) * width, height, cudaMemcpyHostToDevice)); + // safeCall(cudaMemcpy(d_data, h_data, sizeof(float) * width * height, cudaMemcpyHostToDevice)); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); + downloadTime = std::chrono::duration(stop_memcpy - start_memcpy).count(); + std::cout << "Download Time is " << downloadTime << " us" << std::endl; +#endif + } + return downloadTime; +} \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaImage.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaImage.h new file mode 100644 index 000000000..737446686 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaImage.h @@ -0,0 +1,38 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +#ifndef CUDAIMAGE_H +#define CUDAIMAGE_H + +class CudaImage +{ +public: + int width, height; + int pitch; + float *h_data; + float *d_data; + float *t_data; + bool d_internalAlloc; + bool h_internalAlloc; + +public: + CudaImage(); + CudaImage(const CudaImage&) = delete; + CudaImage& operator=(const CudaImage&) = delete; + ~CudaImage(); + void Allocate(int width, int height, int pitch, bool withHost, float &totTime, float *devMem = NULL, float *hostMem = NULL); + double Download(float &totTime); + double Readback(); + double InitTexture(); + double CopyToTexture(CudaImage &dst, bool host); +}; + +int iDivUp(int a, int b); +int iDivDown(int a, int b); +int iAlignUp(int a, int b); +int iAlignDown(int a, int b); +void StartTimer(unsigned int *hTimer); +double StopTimer(unsigned int hTimer); + +#endif // CUDAIMAGE_H diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSift.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSift.h new file mode 100644 index 000000000..b49f6c503 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSift.h @@ -0,0 +1,48 @@ +#ifndef CUDASIFT_H +#define CUDASIFT_H + +#include "cudaImage.h" + +typedef struct +{ + float xpos; + float ypos; + float scale; + float sharpness; + float edgeness; + float orientation; + float score; + float ambiguity; + int match; + float match_xpos; + float match_ypos; + float match_error; + float subsampling; + float empty[3]; + float data[128]; +} SiftPoint; + +typedef struct +{ + int numPts; // Number of available Sift points + int maxPts; // Number of allocated Sift points +#ifdef MANAGEDMEM + SiftPoint *m_data; // Managed data +#else + SiftPoint *h_data; // Host (CPU) data + SiftPoint *d_data; // Device (GPU) data +#endif +} SiftData; + +void InitCuda(int devNum = 0); +float *AllocSiftTempMemory(int width, int height, int numOctaves, float &totTime, bool scaleUp = false); +void FreeSiftTempMemory(float *memoryTmp); +void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, + float &totTime, float lowestScale = 0.0f, bool scaleUp = false, float *tempMemory = 0); +void InitSiftData(SiftData &data, float &totTime, int num = 1024, bool host = false, bool dev = true); +void FreeSiftData(SiftData &data); +void PrintSiftData(SiftData &data); +double MatchSiftData(SiftData &data1, SiftData &data2, float &matchTime); +double FindHomography(SiftData &data, float *homography, int *numMatches, float &matchTime, int numLoops = 1000, float minScore = 0.85f, float maxAmbiguity = 0.95f, float thresh = 5.0f); + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu new file mode 100644 index 000000000..db018dd14 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu @@ -0,0 +1,2263 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include "cudautils.h" +#include "cudaSiftD.h" +#include "cudaSift.h" + +/////////////////////////////////////////////////////////////////////////////// +// Kernel configuration +/////////////////////////////////////////////////////////////////////////////// + +__constant__ int d_MaxNumPoints; +__device__ unsigned int d_PointCounter[8 * 2 + 1]; +__constant__ float d_ScaleDownKernel[5]; +__constant__ float d_LowPassKernel[2 * LOWPASS_R + 1]; +__constant__ float d_LaplaceKernel[8 * 12 * 16]; + +/////////////////////////////////////////////////////////////////////////////// +// Lowpass filter and subsample image +/////////////////////////////////////////////////////////////////////////////// +__global__ void ScaleDownDenseShift(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch) +{ +#define BW (SCALEDOWN_W + 4) +#define BH (SCALEDOWN_H + 4) +#define W2 (SCALEDOWN_W / 2) +#define H2 (SCALEDOWN_H / 2) + __shared__ float brows[BH * BW]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int xp = blockIdx.x * SCALEDOWN_W + tx; + const int yp = blockIdx.y * SCALEDOWN_H + ty; + const float k0 = d_ScaleDownKernel[0]; + const float k1 = d_ScaleDownKernel[1]; + const float k2 = d_ScaleDownKernel[2]; + const int xl = min(width - 1, max(0, xp - 2)); + const int yl = min(height - 1, max(0, yp - 2)); + if (xp < (width + 4) && yp < (height + 4)) + { + float v = d_Data[yl * pitch + xl]; + brows[BW * ty + tx] = k0 * (v + ShiftDown(v, 4)) + k1 * (ShiftDown(v, 1) + ShiftDown(v, 3)) + k2 * ShiftDown(v, 2); + } + __syncthreads(); + const int xs = blockIdx.x * W2 + tx; + const int ys = blockIdx.y * H2 + ty; + if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2)) + { + float *ptr = &brows[BW * (ty * 2) + (tx * 2)]; + d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * BW]) + k1 * (ptr[1 * BW] + ptr[3 * BW]) + k2 * ptr[2 * BW]; + } +} + +__global__ void ScaleDownDense(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch) +{ +#define BW (SCALEDOWN_W + 4) +#define BH (SCALEDOWN_H + 4) +#define W2 (SCALEDOWN_W / 2) +#define H2 (SCALEDOWN_H / 2) + __shared__ float irows[BH * BW]; + __shared__ float brows[BH * W2]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int xp = blockIdx.x * SCALEDOWN_W + tx; + const int yp = blockIdx.y * SCALEDOWN_H + ty; + const int xl = min(width - 1, max(0, xp - 2)); + const int yl = min(height - 1, max(0, yp - 2)); + const float k0 = d_ScaleDownKernel[0]; + const float k1 = d_ScaleDownKernel[1]; + const float k2 = d_ScaleDownKernel[2]; + if (xp < (width + 4) && yp < (height + 4)) + irows[BW * ty + tx] = d_Data[yl * pitch + xl]; + __syncthreads(); + if (yp < (height + 4) && tx < W2) + { + float *ptr = &irows[BW * ty + 2 * tx]; + brows[W2 * ty + tx] = k0 * (ptr[0] + ptr[4]) + k1 * (ptr[1] + ptr[3]) + k2 * ptr[2]; + } + __syncthreads(); + const int xs = blockIdx.x * W2 + tx; + const int ys = blockIdx.y * H2 + ty; + if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2)) + { + float *ptr = &brows[W2 * (ty * 2) + tx]; + d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * W2]) + k1 * (ptr[1 * W2] + ptr[3 * W2]) + k2 * ptr[2 * W2]; + } +} + +__global__ void ScaleDown(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch) +{ + __shared__ float inrow[SCALEDOWN_W + 4]; + __shared__ float brow[5 * (SCALEDOWN_W / 2)]; + __shared__ int yRead[SCALEDOWN_H + 4]; + __shared__ int yWrite[SCALEDOWN_H + 4]; +#define dx2 (SCALEDOWN_W / 2) + const int tx = threadIdx.x; + const int tx0 = tx + 0 * dx2; + const int tx1 = tx + 1 * dx2; + const int tx2 = tx + 2 * dx2; + const int tx3 = tx + 3 * dx2; + const int tx4 = tx + 4 * dx2; + const int xStart = blockIdx.x * SCALEDOWN_W; + const int yStart = blockIdx.y * SCALEDOWN_H; + const int xWrite = xStart / 2 + tx; + float k0 = d_ScaleDownKernel[0]; + float k1 = d_ScaleDownKernel[1]; + float k2 = d_ScaleDownKernel[2]; + if (tx < SCALEDOWN_H + 4) + { + int y = yStart + tx - 2; + y = (y < 0 ? 0 : y); + y = (y >= height ? height - 1 : y); + yRead[tx] = y * pitch; + yWrite[tx] = (yStart + tx - 4) / 2 * newpitch; + } + __syncthreads(); + int xRead = xStart + tx - 2; + xRead = (xRead < 0 ? 0 : xRead); + xRead = (xRead >= width ? width - 1 : xRead); + + int maxtx = min(dx2, width / 2 - xStart / 2); + for (int dy = 0; dy < SCALEDOWN_H + 4; dy += 5) + { + { + inrow[tx] = d_Data[yRead[dy + 0] + xRead]; + __syncthreads(); + if (tx < maxtx) + { + brow[tx4] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 4 && !(dy & 1)) + d_Result[yWrite[dy + 0] + xWrite] = k2 * brow[tx2] + k0 * (brow[tx0] + brow[tx4]) + k1 * (brow[tx1] + brow[tx3]); + } + __syncthreads(); + } + if (dy < (SCALEDOWN_H + 3)) + { + inrow[tx] = d_Data[yRead[dy + 1] + xRead]; + __syncthreads(); + if (tx < maxtx) + { + brow[tx0] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 3 && (dy & 1)) + d_Result[yWrite[dy + 1] + xWrite] = k2 * brow[tx3] + k0 * (brow[tx1] + brow[tx0]) + k1 * (brow[tx2] + brow[tx4]); + } + __syncthreads(); + } + if (dy < (SCALEDOWN_H + 2)) + { + inrow[tx] = d_Data[yRead[dy + 2] + xRead]; + __syncthreads(); + if (tx < maxtx) + { + brow[tx1] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 2 && !(dy & 1)) + d_Result[yWrite[dy + 2] + xWrite] = k2 * brow[tx4] + k0 * (brow[tx2] + brow[tx1]) + k1 * (brow[tx3] + brow[tx0]); + } + __syncthreads(); + } + if (dy < (SCALEDOWN_H + 1)) + { + inrow[tx] = d_Data[yRead[dy + 3] + xRead]; + __syncthreads(); + if (tx < maxtx) + { + brow[tx2] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 1 && (dy & 1)) + d_Result[yWrite[dy + 3] + xWrite] = k2 * brow[tx0] + k0 * (brow[tx3] + brow[tx2]) + k1 * (brow[tx4] + brow[tx1]); + } + __syncthreads(); + } + if (dy < SCALEDOWN_H) + { + inrow[tx] = d_Data[yRead[dy + 4] + xRead]; + __syncthreads(); + if (tx < dx2 && xWrite < width / 2) + { + brow[tx3] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (!(dy & 1)) + d_Result[yWrite[dy + 4] + xWrite] = k2 * brow[tx1] + k0 * (brow[tx4] + brow[tx3]) + k1 * (brow[tx0] + brow[tx2]); + } + __syncthreads(); + } + } +} + +__global__ void ScaleUp(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch) +{ + const int tx = threadIdx.x; + const int ty = threadIdx.y; + int x = blockIdx.x * SCALEUP_W + 2 * tx; + int y = blockIdx.y * SCALEUP_H + 2 * ty; + if (x < 2 * width && y < 2 * height) + { + int xl = blockIdx.x * (SCALEUP_W / 2) + tx; + int yu = blockIdx.y * (SCALEUP_H / 2) + ty; + int xr = min(xl + 1, width - 1); + int yd = min(yu + 1, height - 1); + float vul = d_Data[yu * pitch + xl]; + float vur = d_Data[yu * pitch + xr]; + float vdl = d_Data[yd * pitch + xl]; + float vdr = d_Data[yd * pitch + xr]; + d_Result[(y + 0) * newpitch + x + 0] = vul; + d_Result[(y + 0) * newpitch + x + 1] = 0.50f * (vul + vur); + d_Result[(y + 1) * newpitch + x + 0] = 0.50f * (vul + vdl); + d_Result[(y + 1) * newpitch + x + 1] = 0.25f * (vul + vur + vdl + vdr); + } +} + +__global__ void ExtractSiftDescriptors(cudaTextureObject_t texObj, SiftPoint *d_sift, int fstPts, float subsampling) +{ + __shared__ float gauss[16]; + __shared__ float buffer[128]; + __shared__ float sums[4]; + + const int tx = threadIdx.x; // 0 -> 16 + const int ty = threadIdx.y; // 0 -> 8 + const int idx = ty * 16 + tx; + const int bx = blockIdx.x + fstPts; // 0 -> numPts + if (ty == 0) + gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + buffer[idx] = 0.0f; + __syncthreads(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sinf(theta); // cosa -sina + float cosa = cosf(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + float dx = tex2D(texObj, xpos + cosa, ypos + sina) - + tex2D(texObj, xpos - cosa, ypos - sina); + float dy = tex2D(texObj, xpos - sina, ypos + cosa) - + tex2D(texObj, xpos + sina, ypos - cosa); + float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + atomicAdd(buffer + p1, iangf * grad2); + atomicAdd(buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 32, iangf * grad2); + atomicAdd(buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + atomicAdd(buffer + p1 + 8, iangf * grad2); + atomicAdd(buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 40, iangf * grad2); + atomicAdd(buffer + p2 + 40, angf * grad2); + } + } + } + __syncthreads(); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f); + + sum = tsum1 * tsum1; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * rsqrtf(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } +} + +__device__ float FastAtan2(float y, float x) +{ + float absx = abs(x); + float absy = abs(y); + float a = __fdiv_rn(min(absx, absy), max(absx, absy)); + float s = a * a; + float r = ((-0.0464964749f * s + 0.15931422f) * s - 0.327622764f) * s * a + a; + r = (absy > absx ? 1.57079637f - r : r); + r = (x < 0 ? 3.14159274f - r : r); + r = (y < 0 ? -r : r); + return r; +} + +// __global__ void ExtractSiftDescriptorsCONSTNew(cudaTextureObject_t texObj, SiftPoint *d_sift, float subsampling, int octave) +__global__ void ExtractSiftDescriptorsCONSTNew(float *texObj, int pitch, SiftPoint *d_sift, float subsampling, int octave) +{ + __shared__ float gauss[16]; + __shared__ float buffer[128]; + __shared__ float sums[4]; + + const int tx = threadIdx.x; // 0 -> 16 + const int ty = threadIdx.y; // 0 -> 8 + const int idx = ty * 16 + tx; + if (ty == 0) + gauss[tx] = __expf(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + + int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = min(d_PointCounter[2 * octave + 1], d_MaxNumPoints); + // if (tx==0 && ty==0) + // printf("%d %d %d %d\n", octave, fstPts, min(d_PointCounter[2*octave], d_MaxNumPoints), totPts); + for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x) + { + + buffer[idx] = 0.0f; + __syncthreads(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = __sinf(theta); // cosa -sina + float cosa = __cosf(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + + // float dx = tex2D(texObj, xpos + cosa, ypos + sina) - + // tex2D(texObj, xpos - cosa, ypos - sina); + // float dy = tex2D(texObj, xpos - sina, ypos + cosa) - + // tex2D(texObj, xpos + sina, ypos - cosa); + + int xi1 = xpos + cosa; + int yi1 = ypos + sina; + + int xi2 = xpos - cosa; + int yi2 = ypos - sina; + + float dx = *(texObj + yi1 * pitch + xi1) - + *(texObj + yi2 * pitch + xi2); + + xi1 = xpos - sina; + yi1 = ypos + cosa; + + xi2 = xpos + sina; + yi2 = ypos - cosa; + + float dy = *(texObj + yi1 * pitch + xi1) - + *(texObj + yi2 * pitch + xi2); + + float grad = gauss[y] * gauss[tx] * __fsqrt_rn(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * FastAtan2(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + atomicAdd(buffer + p1, iangf * grad2); + atomicAdd(buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 32, iangf * grad2); + atomicAdd(buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + atomicAdd(buffer + p1 + 8, iangf * grad2); + atomicAdd(buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 40, iangf * grad2); + atomicAdd(buffer + p2 + 40, angf * grad2); + } + } + } + __syncthreads(); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f); + + sum = tsum1 * tsum1; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * rsqrtf(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } + __syncthreads(); + } +} + +__global__ void ExtractSiftDescriptorsCONST(cudaTextureObject_t texObj, SiftPoint *d_sift, float subsampling, int octave) +{ + __shared__ float gauss[16]; + __shared__ float buffer[128]; + __shared__ float sums[4]; + + const int tx = threadIdx.x; // 0 -> 16 + const int ty = threadIdx.y; // 0 -> 8 + const int idx = ty * 16 + tx; + if (ty == 0) + gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + + int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = min(d_PointCounter[2 * octave + 1], d_MaxNumPoints); + // if (tx==0 && ty==0) + // printf("%d %d %d %d\n", octave, fstPts, min(d_PointCounter[2*octave], d_MaxNumPoints), totPts); + for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x) + { + + buffer[idx] = 0.0f; + __syncthreads(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sinf(theta); // cosa -sina + float cosa = cosf(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + float dx = tex2D(texObj, xpos + cosa, ypos + sina) - + tex2D(texObj, xpos - cosa, ypos - sina); + float dy = tex2D(texObj, xpos - sina, ypos + cosa) - + tex2D(texObj, xpos + sina, ypos - cosa); + float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + atomicAdd(buffer + p1, iangf * grad2); + atomicAdd(buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 32, iangf * grad2); + atomicAdd(buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + atomicAdd(buffer + p1 + 8, iangf * grad2); + atomicAdd(buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 40, iangf * grad2); + atomicAdd(buffer + p2 + 40, angf * grad2); + } + } + } + __syncthreads(); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f); + + sum = tsum1 * tsum1; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * rsqrtf(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } + __syncthreads(); + } +} + +__global__ void ExtractSiftDescriptorsOld(cudaTextureObject_t texObj, SiftPoint *d_sift, int fstPts, float subsampling) +{ + __shared__ float gauss[16]; + __shared__ float buffer[128]; + __shared__ float sums[128]; + + const int tx = threadIdx.x; // 0 -> 16 + const int ty = threadIdx.y; // 0 -> 8 + const int idx = ty * 16 + tx; + const int bx = blockIdx.x + fstPts; // 0 -> numPts + if (ty == 0) + gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + buffer[idx] = 0.0f; + __syncthreads(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sinf(theta); // cosa -sina + float cosa = cosf(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + float dx = tex2D(texObj, xpos + cosa, ypos + sina) - + tex2D(texObj, xpos - cosa, ypos - sina); + float dy = tex2D(texObj, xpos - sina, ypos + cosa) - + tex2D(texObj, xpos + sina, ypos - cosa); + float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + atomicAdd(buffer + p1, iangf * grad2); + atomicAdd(buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 32, iangf * grad2); + atomicAdd(buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + atomicAdd(buffer + p1 + 8, iangf * grad2); + atomicAdd(buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 40, iangf * grad2); + atomicAdd(buffer + p2 + 40, angf * grad2); + } + } + } + __syncthreads(); + + // Normalize twice and suppress peaks first time + if (idx < 64) + sums[idx] = buffer[idx] * buffer[idx] + buffer[idx + 64] * buffer[idx + 64]; + __syncthreads(); + if (idx < 32) + sums[idx] = sums[idx] + sums[idx + 32]; + __syncthreads(); + if (idx < 16) + sums[idx] = sums[idx] + sums[idx + 16]; + __syncthreads(); + if (idx < 8) + sums[idx] = sums[idx] + sums[idx + 8]; + __syncthreads(); + if (idx < 4) + sums[idx] = sums[idx] + sums[idx + 4]; + __syncthreads(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + buffer[idx] = buffer[idx] * rsqrtf(tsum1); + + if (buffer[idx] > 0.2f) + buffer[idx] = 0.2f; + __syncthreads(); + if (idx < 64) + sums[idx] = buffer[idx] * buffer[idx] + buffer[idx + 64] * buffer[idx + 64]; + __syncthreads(); + if (idx < 32) + sums[idx] = sums[idx] + sums[idx + 32]; + __syncthreads(); + if (idx < 16) + sums[idx] = sums[idx] + sums[idx + 16]; + __syncthreads(); + if (idx < 8) + sums[idx] = sums[idx] + sums[idx + 8]; + __syncthreads(); + if (idx < 4) + sums[idx] = sums[idx] + sums[idx + 4]; + __syncthreads(); + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + + float *desc = d_sift[bx].data; + desc[idx] = buffer[idx] * rsqrtf(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } +} + +__device__ void ExtractSiftDescriptor(cudaTextureObject_t texObj, SiftPoint *d_sift, float subsampling, int octave, int bx) +{ + __shared__ float gauss[16]; + __shared__ float buffer[128]; + __shared__ float sums[4]; + + const int idx = threadIdx.x; + const int tx = idx & 15; // 0 -> 16 + const int ty = idx / 16; // 0 -> 8 + if (ty == 0) + gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + buffer[idx] = 0.0f; + __syncthreads(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sinf(theta); // cosa -sina + float cosa = cosf(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + float dx = tex2D(texObj, xpos + cosa, ypos + sina) - + tex2D(texObj, xpos - cosa, ypos - sina); + float dy = tex2D(texObj, xpos - sina, ypos + cosa) - + tex2D(texObj, xpos + sina, ypos - cosa); + float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + atomicAdd(buffer + p1, iangf * grad2); + atomicAdd(buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 32, iangf * grad2); + atomicAdd(buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + atomicAdd(buffer + p1 + 8, iangf * grad2); + atomicAdd(buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 40, iangf * grad2); + atomicAdd(buffer + p2 + 40, angf * grad2); + } + } + } + __syncthreads(); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f); + + sum = tsum1 * tsum1; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * rsqrtf(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } + __syncthreads(); +} + +__global__ void RescalePositions(SiftPoint *d_sift, int numPts, float scale) +{ + int num = blockIdx.x * blockDim.x + threadIdx.x; + if (num < numPts) + { + d_sift[num].xpos *= scale; + d_sift[num].ypos *= scale; + d_sift[num].scale *= scale; + } +} + +__global__ void ComputeOrientations(cudaTextureObject_t texObj, SiftPoint *d_Sift, int fstPts) +{ + __shared__ float hist[64]; + __shared__ float gauss[11]; + const int tx = threadIdx.x; + const int bx = blockIdx.x + fstPts; + float i2sigma2 = -1.0f / (4.5f * d_Sift[bx].scale * d_Sift[bx].scale); + if (tx < 11) + gauss[tx] = exp(i2sigma2 * (tx - 5) * (tx - 5)); + if (tx < 64) + hist[tx] = 0.0f; + __syncthreads(); + float xp = d_Sift[bx].xpos - 4.5f; + float yp = d_Sift[bx].ypos - 4.5f; + int yd = tx / 11; + int xd = tx - yd * 11; + float xf = xp + xd; + float yf = yp + yd; + if (yd < 11) + { + float dx = tex2D(texObj, xf + 1.0, yf) - tex2D(texObj, xf - 1.0, yf); + float dy = tex2D(texObj, xf, yf + 1.0) - tex2D(texObj, xf, yf - 1.0); + int bin = 16.0f * atan2f(dy, dx) / 3.1416f + 16.5f; + if (bin > 31) + bin = 0; + float grad = sqrtf(dx * dx + dy * dy); + atomicAdd(&hist[bin], grad * gauss[xd] * gauss[yd]); + } + __syncthreads(); + int x1m = (tx >= 1 ? tx - 1 : tx + 31); + int x1p = (tx <= 30 ? tx + 1 : tx - 31); + if (tx < 32) + { + int x2m = (tx >= 2 ? tx - 2 : tx + 30); + int x2p = (tx <= 29 ? tx + 2 : tx - 30); + hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]); + } + __syncthreads(); + if (tx < 32) + { + float v = hist[32 + tx]; + if(x1p < 32 && x1m < 32) + hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f); + } + __syncthreads(); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + for (int i = 0; i < 32; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[32 + ((i1 + 1) & 31)]; + float val2 = hist[32 + ((i1 + 31) & 31)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + if (maxval2 > 0.8f * maxval1) + { + float val1 = hist[32 + ((i2 + 1) & 31)]; + float val2 = hist[32 + ((i2 + 31) & 31)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + unsigned int idx = atomicInc(d_PointCounter, 0x7fffffff); + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = d_Sift[bx].scale; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + ; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } +} + +// With constant number of blocks +__global__ void ComputeOrientationsCONSTNew(float *image, int w, int p, int h, SiftPoint *d_Sift, int octave) +{ +#define RAD 9 +#define WID (2 * RAD + 1) +#define LEN 32 //%%%% Note: Lowe suggests 36, not 32 + __shared__ float img[WID][WID], tmp[WID][WID]; + __shared__ float hist[2 * LEN]; + __shared__ float gaussx[WID], gaussy[WID]; + const int tx = threadIdx.x; + + int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = min(d_PointCounter[2 * octave + 0], d_MaxNumPoints); + for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x) + { + + float sc = d_Sift[bx].scale; + for (int i = tx; i < 2 * LEN; i += blockDim.x) + hist[i] = 0.0f; + float xp = d_Sift[bx].xpos; + float yp = d_Sift[bx].ypos; + int xi = (int)xp; + int yi = (int)yp; + float xf = xp - xi; + float yf = yp - yi; + for (int i = tx; i < WID * WID; i += blockDim.x) + { + int y = i / WID; + int x = i - y * WID; + int xp = max(min(x - RAD + xi, w - 1), 0); + int yp = max(min(y - RAD + yi, h - 1), 0); + img[y][x] = image[yp * p + xp]; + } + float fac[5]; + fac[1] = fac[3] = (sc > 0.5f ? __expf(-1.0f / (2.0f * (sc * sc - 0.25f))) : 0.0f); + fac[0] = fac[4] = (sc > 0.5f ? __expf(-4.0f / (2.0f * (sc * sc - 0.25f))) : 0.0f); + fac[2] = 1.0f; + float i2sigma2 = -1.0f / (2.0f * 2.0f * 2.0f * sc * sc); //%%%% Note: Lowe suggests 1.5, not 2.0 + if (tx < WID) + { + gaussx[tx] = __expf(i2sigma2 * (tx - RAD - xf) * (tx - RAD - xf)); + gaussy[tx] = __expf(i2sigma2 * (tx - RAD - yf) * (tx - RAD - yf)); + } + __syncthreads(); + for (int i = tx; i < (WID - 4) * WID; i += blockDim.x) + { + int y = i / WID; + int x = i - y * WID; + y += 2; + tmp[y][x] = img[y][x] + fac[1] * (img[y - 1][x] + img[y + 1][x]) + + fac[0] * (img[y - 2][x] + img[y + 2][x]); + } + __syncthreads(); + for (int i = tx; i < (WID - 4) * (WID - 4); i += blockDim.x) + { + int y = i / (WID - 4); + int x = i - y * (WID - 4); + x += 2; + y += 2; + img[y][x] = tmp[y][x] + fac[1] * (tmp[y][x - 1] + tmp[y][x + 1]) + + fac[0] * (tmp[y][x - 2] + tmp[y][x + 2]); + } + __syncthreads(); + for (int i = tx; i < (WID - 6) * (WID - 6); i += blockDim.x) + { + int y = i / (WID - 6); + int x = i - y * (WID - 6); + x += 3; + y += 3; + float dx = img[y][x + 1] - img[y][x - 1]; + float dy = img[y + 1][x] - img[y - 1][x]; + int bin = (int)((LEN / 2) * atan2f(dy, dx) / 3.1416f + (LEN / 2) + 0.5f) % LEN; + float grad = __fsqrt_rn(dx * dx + dy * dy); + atomicAdd(&hist[LEN + bin], grad * gaussx[x] * gaussy[y]); + } + __syncthreads(); + int x1m = (tx >= 1 ? tx - 1 : tx + LEN - 1); + int x1p = (tx < (LEN - 1) ? tx + 1 : tx - LEN + 1); + int x2m = (tx >= 2 ? tx - 2 : tx + LEN - 2); + int x2p = (tx < (LEN - 2) ? tx + 2 : tx - LEN + 2); + if (tx < LEN) + { + hist[tx] = 6.0f * hist[tx + LEN] + 4.0f * (hist[x1m + LEN] + hist[x1p + LEN]) + + 1.0f * (hist[x2m + LEN] + hist[x2p + LEN]); + hist[tx + LEN] = 8.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + + 0.0f * (hist[x2m] + hist[x2p]); + float val = hist[tx + LEN]; + hist[tx] = (val > hist[x1m + LEN] && val >= hist[x1p + LEN] ? val : 0.0f); + } + __syncthreads(); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + for (int i = 0; i < LEN; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[LEN + ((i1 + 1) % LEN)]; + float val2 = hist[LEN + ((i1 + LEN - 1) % LEN)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN; + atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]); + if (maxval2 > 0.8f * maxval1 && true) + { + float val1 = hist[LEN + ((i2 + 1) % LEN)]; + float val2 = hist[LEN + ((i2 + LEN - 1) % LEN)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 1], 0x7fffffff); + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = sc; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } + } +#undef RAD +#undef WID +#undef LEN +} + +// With constant number of blocks +__global__ void ComputeOrientationsCONST(cudaTextureObject_t texObj, SiftPoint *d_Sift, int octave) +{ + __shared__ float hist[64]; + __shared__ float gauss[11]; + const int tx = threadIdx.x; + + int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = min(d_PointCounter[2 * octave + 0], d_MaxNumPoints); + for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x) + { + + float i2sigma2 = -1.0f / (2.0f * 1.5f * 1.5f * d_Sift[bx].scale * d_Sift[bx].scale); + if (tx < 11) + gauss[tx] = exp(i2sigma2 * (tx - 5) * (tx - 5)); + if (tx < 64) + hist[tx] = 0.0f; + __syncthreads(); + float xp = d_Sift[bx].xpos - 4.5f; + float yp = d_Sift[bx].ypos - 4.5f; + int yd = tx / 11; + int xd = tx - yd * 11; + float xf = xp + xd; + float yf = yp + yd; + if (yd < 11) + { + float dx = tex2D(texObj, xf + 1.0, yf) - tex2D(texObj, xf - 1.0, yf); + float dy = tex2D(texObj, xf, yf + 1.0) - tex2D(texObj, xf, yf - 1.0); + int bin = 16.0f * atan2f(dy, dx) / 3.1416f + 16.5f; + if (bin > 31) + bin = 0; + float grad = sqrtf(dx * dx + dy * dy); + atomicAdd(&hist[bin], grad * gauss[xd] * gauss[yd]); + } + __syncthreads(); + int x1m = (tx >= 1 ? tx - 1 : tx + 31); + int x1p = (tx <= 30 ? tx + 1 : tx - 31); + if (tx < 32) + { + int x2m = (tx >= 2 ? tx - 2 : tx + 30); + int x2p = (tx <= 29 ? tx + 2 : tx - 30); + hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]); + } + __syncthreads(); + if (tx < 32) + { + float v = hist[32 + tx]; + if(x1m < 32) + hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f); + } + __syncthreads(); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + for (int i = 0; i < 32; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[32 + ((i1 + 1) & 31)]; + float val2 = hist[32 + ((i1 + 31) & 31)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]); + if (maxval2 > 0.8f * maxval1 && true) + { + float val1 = hist[32 + ((i2 + 1) & 31)]; + float val2 = hist[32 + ((i2 + 31) & 31)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 1], 0x7fffffff); + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = d_Sift[bx].scale; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + ; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } + __syncthreads(); + } +} + +// With constant number of blocks +__global__ void OrientAndExtractCONST(cudaTextureObject_t texObj, SiftPoint *d_Sift, float subsampling, int octave) +{ + __shared__ float hist[64]; + __shared__ float gauss[11]; + __shared__ unsigned int idx; //%%%% + const int tx = threadIdx.x; + + int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = min(d_PointCounter[2 * octave + 0], d_MaxNumPoints); + for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x) + { + + float i2sigma2 = -1.0f / (4.5f * d_Sift[bx].scale * d_Sift[bx].scale); + if (tx < 11) + gauss[tx] = exp(i2sigma2 * (tx - 5) * (tx - 5)); + if (tx < 64) + hist[tx] = 0.0f; + __syncthreads(); + float xp = d_Sift[bx].xpos - 4.5f; + float yp = d_Sift[bx].ypos - 4.5f; + int yd = tx / 11; + int xd = tx - yd * 11; + float xf = xp + xd; + float yf = yp + yd; + if (yd < 11) + { + float dx = tex2D(texObj, xf + 1.0, yf) - tex2D(texObj, xf - 1.0, yf); + float dy = tex2D(texObj, xf, yf + 1.0) - tex2D(texObj, xf, yf - 1.0); + int bin = 16.0f * atan2f(dy, dx) / 3.1416f + 16.5f; + if (bin > 31) + bin = 0; + float grad = sqrtf(dx * dx + dy * dy); + atomicAdd(&hist[bin], grad * gauss[xd] * gauss[yd]); + } + __syncthreads(); + int x1m = (tx >= 1 ? tx - 1 : tx + 31); + int x1p = (tx <= 30 ? tx + 1 : tx - 31); + if (tx < 32) + { + int x2m = (tx >= 2 ? tx - 2 : tx + 30); + int x2p = (tx <= 29 ? tx + 2 : tx - 30); + hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]); + } + __syncthreads(); + if (tx < 32) + { + float v = hist[32 + tx]; + if(x1m < 32) + hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f); + } + __syncthreads(); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + for (int i = 0; i < 32; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[32 + ((i1 + 1) & 31)]; + float val2 = hist[32 + ((i1 + 31) & 31)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + idx = 0xffffffff; //%%%% + atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]); + if (maxval2 > 0.8f * maxval1) + { + float val1 = hist[32 + ((i2 + 1) & 31)]; + float val2 = hist[32 + ((i2 + 31) & 31)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + idx = atomicInc(&d_PointCounter[2 * octave + 1], 0x7fffffff); //%%%% + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = d_Sift[bx].scale; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + ; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } + __syncthreads(); + ExtractSiftDescriptor(texObj, d_Sift, subsampling, octave, bx); //%%%% + if (idx < d_MaxNumPoints) //%%%% + ExtractSiftDescriptor(texObj, d_Sift, subsampling, octave, idx); //%%%% + } +} + +/////////////////////////////////////////////////////////////////////////////// +// Subtract two images (multi-scale version) +/////////////////////////////////////////////////////////////////////////////// + +// __global__ void FindPointsMultiTest(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave) +// { +// #define MEMWID (MINMAX_W + 2) +// __shared__ unsigned int cnt; +// __shared__ unsigned short points[3 * MEMWID]; + +// if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0 && threadIdx.y == 0) +// { +// atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); +// atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]); +// } +// int tx = threadIdx.x; +// int ty = threadIdx.y; +// if (tx == 0 && ty == 0) +// cnt = 0; +// __syncthreads(); + +// int ypos = MINMAX_H * blockIdx.y + ty; +// if (ypos >= height) +// return; +// int block = blockIdx.x / NUM_SCALES; +// int scale = blockIdx.x - NUM_SCALES * block; +// int minx = block * MINMAX_W; +// int maxx = min(minx + MINMAX_W, width); +// int xpos = minx + tx; +// int size = pitch * height; +// int ptr = size * scale + max(min(xpos - 1, width - 1), 0); + +// float maxv = fabs(d_Data0[ptr + ypos * pitch + 1 * size]); +// maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W)); + +// if (Shuffle(maxv, 0) > thresh) +// { +// int yptr1 = ptr + ypos * pitch; +// int yptr0 = ptr + max(0, ypos - 1) * pitch; +// int yptr2 = ptr + min(height - 1, ypos + 1) * pitch; +// float d20 = d_Data0[yptr0 + 1 * size]; +// float d21 = d_Data0[yptr1 + 1 * size]; +// float d22 = d_Data0[yptr2 + 1 * size]; +// float d31 = d_Data0[yptr1 + 2 * size]; +// float d11 = d_Data0[yptr1]; + +// float d10 = d_Data0[yptr0]; +// float d12 = d_Data0[yptr2]; +// float ymin1 = fminf(fminf(d10, d11), d12); +// float ymax1 = fmaxf(fmaxf(d10, d11), d12); +// float d30 = d_Data0[yptr0 + 2 * size]; +// float d32 = d_Data0[yptr2 + 2 * size]; +// float ymin3 = fminf(fminf(d30, d31), d32); +// float ymax3 = fmaxf(fmaxf(d30, d31), d32); +// float ymin2 = fminf(fminf(ymin1, fminf(fminf(d20, d22), d21)), ymin3); +// float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d20, d22), d21)), ymax3); + +// float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1)); +// float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1)); +// if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx) +// { +// if (d21 < -thresh) +// { +// float minv = fminf(fminf(nmin2, ymin1), ymin3); +// minv = fminf(fminf(minv, d20), d22); +// if (d21 < minv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// if (d21 > thresh) +// { +// float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3); +// maxv = fmaxf(fmaxf(maxv, d20), d22); +// if (d21 > maxv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// } +// } +// __syncthreads(); +// if (ty == 0 && tx < cnt) +// { +// int xpos = points[3 * tx + 0]; +// int ypos = points[3 * tx + 1]; +// int scale = points[3 * tx + 2]; +// int ptr = xpos + (ypos + (scale + 1) * height) * pitch; +// float val = d_Data0[ptr]; +// float *data1 = &d_Data0[ptr]; +// float dxx = 2.0f * val - data1[-1] - data1[1]; +// float dyy = 2.0f * val - data1[-pitch] - data1[pitch]; +// float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]); +// float tra = dxx + dyy; +// float det = dxx * dyy - dxy * dxy; +// if (tra * tra < edgeLimit * det) +// { +// float edge = __fdividef(tra * tra, det); +// float dx = 0.5f * (data1[1] - data1[-1]); +// float dy = 0.5f * (data1[pitch] - data1[-pitch]); +// float *data0 = d_Data0 + ptr - height * pitch; +// float *data2 = d_Data0 + ptr + height * pitch; +// float ds = 0.5f * (data0[0] - data2[0]); +// float dss = 2.0f * val - data2[0] - data0[0]; +// float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]); +// float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]); +// float idxx = dyy * dss - dys * dys; +// float idxy = dys * dxs - dxy * dss; +// float idxs = dxy * dys - dyy * dxs; +// float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs); +// float idyy = dxx * dss - dxs * dxs; +// float idys = dxy * dxs - dxx * dys; +// float idss = dxx * dyy - dxy * dxy; +// float pdx = idet * (idxx * dx + idxy * dy + idxs * ds); +// float pdy = idet * (idxy * dx + idyy * dy + idys * ds); +// float pds = idet * (idxs * dx + idys * dy + idss * ds); +// if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f) +// { +// pdx = __fdividef(dx, dxx); +// pdy = __fdividef(dy, dyy); +// pds = __fdividef(ds, dss); +// } +// float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds); +// int maxPts = d_MaxNumPoints; +// float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor); +// if (sc >= lowestScale) +// { +// unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff); +// idx = (idx >= maxPts ? maxPts - 1 : idx); +// d_Sift[idx].xpos = xpos + pdx; +// d_Sift[idx].ypos = ypos + pdy; +// d_Sift[idx].scale = sc; +// d_Sift[idx].sharpness = val + dval; +// d_Sift[idx].edgeness = edge; +// d_Sift[idx].subsampling = subsampling; +// } +// } +// } +// } + +__global__ void FindPointsMultiNew(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave) +{ +#define MEMWID (MINMAX_W + 2) + __shared__ unsigned short points[2 * MEMWID]; + + if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0) + { + atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); + atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]); + } + int tx = threadIdx.x; + int block = blockIdx.x / NUM_SCALES; + int scale = blockIdx.x - NUM_SCALES * block; + int minx = block * MINMAX_W; + int maxx = min(minx + MINMAX_W, width); + int xpos = minx + tx; + int size = pitch * height; + int ptr = size * scale + max(min(xpos - 1, width - 1), 0); + + int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H); + float maxv = 0.0f; + for (int y = 0; y < yloops; y++) + { + int ypos = MINMAX_H * blockIdx.y + y; + int yptr1 = ptr + ypos * pitch; + float val = d_Data0[yptr1 + 1 * size]; + maxv = fmaxf(maxv, fabs(val)); + } + // if (tx==0) printf("XXX1\n"); + if (!__any_sync(0xffffffff, maxv > thresh)) + return; + // if (tx==0) printf("XXX2\n"); + + int ptbits = 0; + for (int y = 0; y < yloops; y++) + { + + int ypos = MINMAX_H * blockIdx.y + y; + int yptr1 = ptr + ypos * pitch; + float d11 = d_Data0[yptr1 + 1 * size]; + if (__any_sync(0xffffffff, fabs(d11) > thresh)) + { + + int yptr0 = ptr + max(0, ypos - 1) * pitch; + int yptr2 = ptr + min(height - 1, ypos + 1) * pitch; + float d01 = d_Data0[yptr1]; + float d10 = d_Data0[yptr0 + 1 * size]; + float d12 = d_Data0[yptr2 + 1 * size]; + float d21 = d_Data0[yptr1 + 2 * size]; + + float d00 = d_Data0[yptr0]; + float d02 = d_Data0[yptr2]; + float ymin1 = fminf(fminf(d00, d01), d02); + float ymax1 = fmaxf(fmaxf(d00, d01), d02); + float d20 = d_Data0[yptr0 + 2 * size]; + float d22 = d_Data0[yptr2 + 2 * size]; + float ymin3 = fminf(fminf(d20, d21), d22); + float ymax3 = fmaxf(fmaxf(d20, d21), d22); + float ymin2 = fminf(fminf(ymin1, fminf(fminf(d10, d12), d11)), ymin3); + float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d10, d12), d11)), ymax3); + + float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1)); + float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1)); + float minv = fminf(fminf(nmin2, ymin1), ymin3); + minv = fminf(fminf(minv, d10), d12); + float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3); + maxv = fmaxf(fmaxf(maxv, d10), d12); + + if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx) + ptbits |= ((d11 < fminf(-thresh, minv)) | (d11 > fmaxf(thresh, maxv))) << y; + } + } + + unsigned int totbits = __popc(ptbits); + unsigned int numbits = totbits; + for (int d = 1; d < 32; d <<= 1) + { + unsigned int num = ShiftUp(totbits, d); + if (tx >= d) + totbits += num; + } + int pos = totbits - numbits; + for (int y = 0; y < yloops; y++) + { + int ypos = MINMAX_H * blockIdx.y + y; + if (ptbits & (1 << y) && pos < MEMWID) + { + points[2 * pos + 0] = xpos - 1; + points[2 * pos + 1] = ypos; + pos++; + } + } + + totbits = Shuffle(totbits, 31); + if (tx < totbits) + { + int xpos = points[2 * tx + 0]; + int ypos = points[2 * tx + 1]; + int ptr = xpos + (ypos + (scale + 1) * height) * pitch; + float val = d_Data0[ptr]; + float *data1 = &d_Data0[ptr]; + float dxx = 2.0f * val - data1[-1] - data1[1]; + float dyy = 2.0f * val - data1[-pitch] - data1[pitch]; + float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]); + float tra = dxx + dyy; + float det = dxx * dyy - dxy * dxy; + if (tra * tra < edgeLimit * det) + { + float edge = __fdividef(tra * tra, det); + float dx = 0.5f * (data1[1] - data1[-1]); + float dy = 0.5f * (data1[pitch] - data1[-pitch]); + float *data0 = d_Data0 + ptr - height * pitch; + float *data2 = d_Data0 + ptr + height * pitch; + float ds = 0.5f * (data0[0] - data2[0]); + float dss = 2.0f * val - data2[0] - data0[0]; + float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]); + float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]); + float idxx = dyy * dss - dys * dys; + float idxy = dys * dxs - dxy * dss; + float idxs = dxy * dys - dyy * dxs; + float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs); + float idyy = dxx * dss - dxs * dxs; + float idys = dxy * dxs - dxx * dys; + float idss = dxx * dyy - dxy * dxy; + float pdx = idet * (idxx * dx + idxy * dy + idxs * ds); + float pdy = idet * (idxy * dx + idyy * dy + idys * ds); + float pds = idet * (idxs * dx + idys * dy + idss * ds); + if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f) + { + pdx = __fdividef(dx, dxx); + pdy = __fdividef(dy, dyy); + pds = __fdividef(ds, dss); + } + float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds); + int maxPts = d_MaxNumPoints; + float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor); + if (sc >= lowestScale) + { + atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); + unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff); + idx = (idx >= maxPts ? maxPts - 1 : idx); + d_Sift[idx].xpos = xpos + pdx; + d_Sift[idx].ypos = ypos + pdy; + d_Sift[idx].scale = sc; + d_Sift[idx].sharpness = val + dval; + d_Sift[idx].edgeness = edge; + d_Sift[idx].subsampling = subsampling; + } + } + } +} + +// __global__ void FindPointsMulti(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave) +// { +// #define MEMWID (MINMAX_W + 2) +// __shared__ unsigned int cnt; +// __shared__ unsigned short points[3 * MEMWID]; + + +// if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0) +// { +// atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); +// atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]); +// } +// int tx = threadIdx.x; +// int block = blockIdx.x / NUM_SCALES; +// int scale = blockIdx.x - NUM_SCALES * block; +// int minx = block * MINMAX_W; +// int maxx = min(minx + MINMAX_W, width); +// int xpos = minx + tx; +// int size = pitch * height; +// int ptr = size * scale + max(min(xpos - 1, width - 1), 0); + +// int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H); +// float maxv = 0.0f; +// for (int y = 0; y < yloops; y++) +// { +// int ypos = MINMAX_H * blockIdx.y + y; +// int yptr1 = ptr + ypos * pitch; +// float val = d_Data0[yptr1 + 1 * size]; +// maxv = fmaxf(maxv, fabs(val)); +// } +// maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W)); +// if (Shuffle(maxv, 0) <= thresh) +// return; + +// if (tx == 0) +// cnt = 0; +// __syncthreads(); + +// for (int y = 0; y < yloops; y++) +// { + +// int ypos = MINMAX_H * blockIdx.y + y; +// int yptr1 = ptr + ypos * pitch; +// int yptr0 = ptr + max(0, ypos - 1) * pitch; +// int yptr2 = ptr + min(height - 1, ypos + 1) * pitch; +// float d20 = d_Data0[yptr0 + 1 * size]; +// float d21 = d_Data0[yptr1 + 1 * size]; +// float d22 = d_Data0[yptr2 + 1 * size]; +// float d31 = d_Data0[yptr1 + 2 * size]; +// float d11 = d_Data0[yptr1]; + +// float d10 = d_Data0[yptr0]; +// float d12 = d_Data0[yptr2]; +// float ymin1 = fminf(fminf(d10, d11), d12); +// float ymax1 = fmaxf(fmaxf(d10, d11), d12); +// float d30 = d_Data0[yptr0 + 2 * size]; +// float d32 = d_Data0[yptr2 + 2 * size]; +// float ymin3 = fminf(fminf(d30, d31), d32); +// float ymax3 = fmaxf(fmaxf(d30, d31), d32); +// float ymin2 = fminf(fminf(ymin1, fminf(fminf(d20, d22), d21)), ymin3); +// float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d20, d22), d21)), ymax3); + +// float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1)); +// float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1)); +// if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx) +// { +// if (d21 < -thresh) +// { +// float minv = fminf(fminf(nmin2, ymin1), ymin3); +// minv = fminf(fminf(minv, d20), d22); +// if (d21 < minv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// if (d21 > thresh) +// { +// float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3); +// maxv = fmaxf(fmaxf(maxv, d20), d22); +// if (d21 > maxv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// } +// } +// if (tx < cnt) +// { +// int xpos = points[3 * tx + 0]; +// int ypos = points[3 * tx + 1]; +// int scale = points[3 * tx + 2]; +// int ptr = xpos + (ypos + (scale + 1) * height) * pitch; +// float val = d_Data0[ptr]; +// float *data1 = &d_Data0[ptr]; +// float dxx = 2.0f * val - data1[-1] - data1[1]; +// float dyy = 2.0f * val - data1[-pitch] - data1[pitch]; +// float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]); +// float tra = dxx + dyy; +// float det = dxx * dyy - dxy * dxy; +// if (tra * tra < edgeLimit * det) +// { +// float edge = __fdividef(tra * tra, det); +// float dx = 0.5f * (data1[1] - data1[-1]); +// float dy = 0.5f * (data1[pitch] - data1[-pitch]); +// float *data0 = d_Data0 + ptr - height * pitch; +// float *data2 = d_Data0 + ptr + height * pitch; +// float ds = 0.5f * (data0[0] - data2[0]); +// float dss = 2.0f * val - data2[0] - data0[0]; +// float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]); +// float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]); +// float idxx = dyy * dss - dys * dys; +// float idxy = dys * dxs - dxy * dss; +// float idxs = dxy * dys - dyy * dxs; +// float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs); +// float idyy = dxx * dss - dxs * dxs; +// float idys = dxy * dxs - dxx * dys; +// float idss = dxx * dyy - dxy * dxy; +// float pdx = idet * (idxx * dx + idxy * dy + idxs * ds); +// float pdy = idet * (idxy * dx + idyy * dy + idys * ds); +// float pds = idet * (idxs * dx + idys * dy + idss * ds); +// if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f) +// { +// pdx = __fdividef(dx, dxx); +// pdy = __fdividef(dy, dyy); +// pds = __fdividef(ds, dss); +// } +// float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds); +// int maxPts = d_MaxNumPoints; +// float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor); +// if (sc >= lowestScale) +// { +// atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); +// unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff); +// idx = (idx >= maxPts ? maxPts - 1 : idx); +// d_Sift[idx].xpos = xpos + pdx; +// d_Sift[idx].ypos = ypos + pdy; +// d_Sift[idx].scale = sc; +// d_Sift[idx].sharpness = val + dval; +// d_Sift[idx].edgeness = edge; +// d_Sift[idx].subsampling = subsampling; +// } +// } +// } +// } + +// __global__ void FindPointsMultiOld(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave) +// { +// #define MEMWID (MINMAX_W + 2) +// __shared__ float ymin1[MEMWID], ymin2[MEMWID], ymin3[MEMWID]; +// __shared__ float ymax1[MEMWID], ymax2[MEMWID], ymax3[MEMWID]; +// __shared__ unsigned int cnt; +// __shared__ unsigned short points[3 * MEMWID]; + +// if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0) +// { +// atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); +// atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]); +// } +// int tx = threadIdx.x; +// int block = blockIdx.x / NUM_SCALES; +// int scale = blockIdx.x - NUM_SCALES * block; +// int minx = block * MINMAX_W; +// int maxx = min(minx + MINMAX_W, width); +// int xpos = minx + tx; +// int size = pitch * height; +// int ptr = size * scale + max(min(xpos - 1, width - 1), 0); + +// int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H); +// float maxv = 0.0f; +// for (int y = 0; y < yloops; y++) +// { +// int ypos = MINMAX_H * blockIdx.y + y; +// int yptr1 = ptr + ypos * pitch; +// float val = d_Data0[yptr1 + 1 * size]; +// maxv = fmaxf(maxv, fabs(val)); +// } +// maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W)); +// if (Shuffle(maxv, 0) <= thresh) +// return; + +// if (tx == 0) +// cnt = 0; +// __syncthreads(); + +// for (int y = 0; y < yloops; y++) +// { + +// int ypos = MINMAX_H * blockIdx.y + y; +// int yptr1 = ptr + ypos * pitch; +// int yptr0 = ptr + max(0, ypos - 1) * pitch; +// int yptr2 = ptr + min(height - 1, ypos + 1) * pitch; +// float d20 = d_Data0[yptr0 + 1 * size]; +// float d21 = d_Data0[yptr1 + 1 * size]; +// float d22 = d_Data0[yptr2 + 1 * size]; +// float d31 = d_Data0[yptr1 + 2 * size]; +// float d11 = d_Data0[yptr1]; + +// float d10 = d_Data0[yptr0]; +// float d12 = d_Data0[yptr2]; +// ymin1[tx] = fminf(fminf(d10, d11), d12); +// ymax1[tx] = fmaxf(fmaxf(d10, d11), d12); +// float d30 = d_Data0[yptr0 + 2 * size]; +// float d32 = d_Data0[yptr2 + 2 * size]; +// ymin3[tx] = fminf(fminf(d30, d31), d32); +// ymax3[tx] = fmaxf(fmaxf(d30, d31), d32); +// ymin2[tx] = fminf(fminf(ymin1[tx], fminf(fminf(d20, d22), d21)), ymin3[tx]); +// ymax2[tx] = fmaxf(fmaxf(ymax1[tx], fmaxf(fmaxf(d20, d22), d21)), ymax3[tx]); + +// __syncthreads(); + +// if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx) +// { +// if (d21 < -thresh) +// { +// float minv = fminf(fminf(fminf(ymin2[tx - 1], ymin2[tx + 1]), ymin1[tx]), ymin3[tx]); +// minv = fminf(fminf(minv, d20), d22); +// if (d21 < minv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// if (d21 > thresh) +// { +// float maxv = fmaxf(fmaxf(fmaxf(ymax2[tx - 1], ymax2[tx + 1]), ymax1[tx]), ymax3[tx]); +// maxv = fmaxf(fmaxf(maxv, d20), d22); +// if (d21 > maxv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// } +// __syncthreads(); +// } +// if (tx < cnt) +// { +// int xpos = points[3 * tx + 0]; +// int ypos = points[3 * tx + 1]; +// int scale = points[3 * tx + 2]; +// int ptr = xpos + (ypos + (scale + 1) * height) * pitch; +// float val = d_Data0[ptr]; +// float *data1 = &d_Data0[ptr]; +// float dxx = 2.0f * val - data1[-1] - data1[1]; +// float dyy = 2.0f * val - data1[-pitch] - data1[pitch]; +// float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]); +// float tra = dxx + dyy; +// float det = dxx * dyy - dxy * dxy; +// if (tra * tra < edgeLimit * det) +// { +// float edge = __fdividef(tra * tra, det); +// float dx = 0.5f * (data1[1] - data1[-1]); +// float dy = 0.5f * (data1[pitch] - data1[-pitch]); +// float *data0 = d_Data0 + ptr - height * pitch; +// float *data2 = d_Data0 + ptr + height * pitch; +// float ds = 0.5f * (data0[0] - data2[0]); +// float dss = 2.0f * val - data2[0] - data0[0]; +// float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]); +// float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]); +// float idxx = dyy * dss - dys * dys; +// float idxy = dys * dxs - dxy * dss; +// float idxs = dxy * dys - dyy * dxs; +// float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs); +// float idyy = dxx * dss - dxs * dxs; +// float idys = dxy * dxs - dxx * dys; +// float idss = dxx * dyy - dxy * dxy; +// float pdx = idet * (idxx * dx + idxy * dy + idxs * ds); +// float pdy = idet * (idxy * dx + idyy * dy + idys * ds); +// float pds = idet * (idxs * dx + idys * dy + idss * ds); +// if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f) +// { +// pdx = __fdividef(dx, dxx); +// pdy = __fdividef(dy, dyy); +// pds = __fdividef(ds, dss); +// } +// float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds); +// int maxPts = d_MaxNumPoints; +// float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor); +// if (sc >= lowestScale) +// { +// unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff); +// idx = (idx >= maxPts ? maxPts - 1 : idx); +// d_Sift[idx].xpos = xpos + pdx; +// d_Sift[idx].ypos = ypos + pdy; +// d_Sift[idx].scale = sc; +// d_Sift[idx].sharpness = val + dval; +// d_Sift[idx].edgeness = edge; +// d_Sift[idx].subsampling = subsampling; +// } +// } +// } +// } + +__global__ void LaplaceMultiTex(cudaTextureObject_t texObj, float *d_Result, int width, int pitch, int height, int octave) +{ + __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S]; + __shared__ float data2[LAPLACE_W * LAPLACE_S]; + const int tx = threadIdx.x; + const int xp = blockIdx.x * LAPLACE_W + tx; + const int yp = blockIdx.y; + const int scale = threadIdx.y; + float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; + float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale; + float x = xp - 3.5; + float y = yp + 0.5; + sdata1[tx] = kernel[0] * tex2D(texObj, x, y) + + kernel[1] * (tex2D(texObj, x, y - 1.0) + tex2D(texObj, x, y + 1.0)) + + kernel[2] * (tex2D(texObj, x, y - 2.0) + tex2D(texObj, x, y + 2.0)) + + kernel[3] * (tex2D(texObj, x, y - 3.0) + tex2D(texObj, x, y + 3.0)) + + kernel[4] * (tex2D(texObj, x, y - 4.0) + tex2D(texObj, x, y + 4.0)); + __syncthreads(); + float *sdata2 = data2 + LAPLACE_W * scale; + if (tx < LAPLACE_W) + { + sdata2[tx] = kernel[0] * sdata1[tx + 4] + + kernel[1] * (sdata1[tx + 3] + sdata1[tx + 5]) + + kernel[2] * (sdata1[tx + 2] + sdata1[tx + 6]) + + kernel[3] * (sdata1[tx + 1] + sdata1[tx + 7]) + + kernel[4] * (sdata1[tx + 0] + sdata1[tx + 8]); + } + __syncthreads(); + if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width) + d_Result[scale * height * pitch + yp * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W]; +} + +__global__ void LaplaceMultiMem(float *d_Image, float *d_Result, int width, int pitch, int height, int octave) +{ + __shared__ float buff[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S]; + const int tx = threadIdx.x; + const int xp = blockIdx.x * LAPLACE_W + tx; + const int yp = blockIdx.y; + float *data = d_Image + max(min(xp - LAPLACE_R, width - 1), 0); // multiply with 4 for max func + float temp[2 * LAPLACE_R + 1]; + + float kern[LAPLACE_S][LAPLACE_R + 1]; + if (xp < (width + 2 * LAPLACE_R)) + { + for (int i = 0; i <= 2 * LAPLACE_R; i++) + temp[i] = data[max(0, min(yp + i - LAPLACE_R, height - 1)) * pitch]; + for (int scale = 0; scale < LAPLACE_S; scale++) + { + float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; + float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; + for (int i = 0; i <= LAPLACE_R; i++) + { + kern[scale][i] = kernel[i]; + } + float sum = kern[scale][0] * temp[LAPLACE_R]; +#pragma unroll + for (int j = 1; j <= LAPLACE_R; j++) + sum += kern[scale][j] * (temp[LAPLACE_R - j] + temp[LAPLACE_R + j]); + buf[tx] = sum; + } + } + __syncthreads(); + if (tx < LAPLACE_W && xp < (width + 2 * LAPLACE_R)) + { + int scale = 0; + float oldRes = kern[scale][0] * buff[tx + LAPLACE_R]; + +#pragma unroll + for (int j = 1; j <= LAPLACE_R; j++) + oldRes += kern[scale][j] * (buff[tx + LAPLACE_R - j] + buff[tx + LAPLACE_R + j]); + + for (int scale = 1; scale < LAPLACE_S; scale++) + { + float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; + + float res = kern[scale][0] * buf[tx + LAPLACE_R]; + +#pragma unroll + for (int j = 1; j <= LAPLACE_R; j++) + res += kern[scale][j] * (buf[tx + LAPLACE_R - j] + buf[tx + LAPLACE_R + j]); + + d_Result[(scale - 1) * height * pitch + yp * pitch + xp] = res - oldRes; + oldRes = res; + } + } +} + +// __global__ void LaplaceMultiMemWide(float *d_Image, float *d_Result, int width, int pitch, int height, int octave) +// { +// __shared__ float buff[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S]; +// const int tx = threadIdx.x; +// const int xp = blockIdx.x * LAPLACE_W + tx; +// const int xp4 = blockIdx.x * LAPLACE_W + 4 * tx; +// const int yp = blockIdx.y; +// float kern[LAPLACE_S][LAPLACE_R + 1]; +// float *data = d_Image + max(min(xp - 4, width - 1), 0); +// float temp[9]; +// if (xp < (width + 2 * LAPLACE_R)) +// { +// for (int i = 0; i < 4; i++) +// temp[i] = data[max(0, min(yp + i - 4, height - 1)) * pitch]; +// for (int i = 4; i < 8 + 1; i++) +// temp[i] = data[min(yp + i - 4, height - 1) * pitch]; +// for (int scale = 0; scale < LAPLACE_S; scale++) +// { +// float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; +// for (int i = 0; i <= LAPLACE_R; i++) +// kern[scale][i] = kernel[LAPLACE_R - i]; +// float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; +// buf[tx] = kern[scale][4] * temp[4] + +// kern[scale][3] * (temp[3] + temp[5]) + kern[scale][2] * (temp[2] + temp[6]) + +// kern[scale][1] * (temp[1] + temp[7]) + kern[scale][0] * (temp[0] + temp[8]); +// } +// } +// __syncthreads(); +// if (tx < LAPLACE_W / 4 && xp4 < width) +// { +// float4 b0 = reinterpret_cast(buff)[tx + 0]; +// float4 b1 = reinterpret_cast(buff)[tx + 1]; +// float4 b2 = reinterpret_cast(buff)[tx + 2]; +// float4 old4, new4, dif4; +// old4.x = kern[0][4] * b1.x + kern[0][3] * (b0.w + b1.y) + kern[0][2] * (b0.z + b1.z) + +// kern[0][1] * (b0.y + b1.w) + kern[0][0] * (b0.x + b2.x); +// old4.y = kern[0][4] * b1.y + kern[0][3] * (b1.x + b1.z) + kern[0][2] * (b0.w + b1.w) + +// kern[0][1] * (b0.z + b2.x) + kern[0][0] * (b0.y + b2.y); +// old4.z = kern[0][4] * b1.z + kern[0][3] * (b1.y + b1.w) + kern[0][2] * (b1.x + b2.x) + +// kern[0][1] * (b0.w + b2.y) + kern[0][0] * (b0.z + b2.z); +// old4.w = kern[0][4] * b1.w + kern[0][3] * (b1.z + b2.x) + kern[0][2] * (b1.y + b2.y) + +// kern[0][1] * (b1.x + b2.z) + kern[0][0] * (b0.w + b2.w); +// for (int scale = 1; scale < LAPLACE_S; scale++) +// { +// float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; +// float4 b0 = reinterpret_cast(buf)[tx + 0]; +// float4 b1 = reinterpret_cast(buf)[tx + 1]; +// float4 b2 = reinterpret_cast(buf)[tx + 2]; +// new4.x = kern[scale][4] * b1.x + kern[scale][3] * (b0.w + b1.y) + +// kern[scale][2] * (b0.z + b1.z) + kern[scale][1] * (b0.y + b1.w) + +// kern[scale][0] * (b0.x + b2.x); +// new4.y = kern[scale][4] * b1.y + kern[scale][3] * (b1.x + b1.z) + +// kern[scale][2] * (b0.w + b1.w) + kern[scale][1] * (b0.z + b2.x) + +// kern[scale][0] * (b0.y + b2.y); +// new4.z = kern[scale][4] * b1.z + kern[scale][3] * (b1.y + b1.w) + +// kern[scale][2] * (b1.x + b2.x) + kern[scale][1] * (b0.w + b2.y) + +// kern[scale][0] * (b0.z + b2.z); +// new4.w = kern[scale][4] * b1.w + kern[scale][3] * (b1.z + b2.x) + +// kern[scale][2] * (b1.y + b2.y) + kern[scale][1] * (b1.x + b2.z) + +// kern[scale][0] * (b0.w + b2.w); +// dif4.x = new4.x - old4.x; +// dif4.y = new4.y - old4.y; +// dif4.z = new4.z - old4.z; +// dif4.w = new4.w - old4.w; +// reinterpret_cast(&d_Result[(scale - 1) * height * pitch + yp * pitch + xp4])[0] = dif4; +// old4 = new4; +// } +// } +// } + +// __global__ void LaplaceMultiMemTest(float *d_Image, float *d_Result, int width, int pitch, int height, int octave) +// { +// __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S]; +// __shared__ float data2[LAPLACE_W * LAPLACE_S]; +// const int tx = threadIdx.x; +// const int xp = blockIdx.x * LAPLACE_W + tx; +// const int yp = LAPLACE_H * blockIdx.y; +// const int scale = threadIdx.y; +// float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; +// float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale; +// float *data = d_Image + max(min(xp - 4, width - 1), 0); +// int h = height - 1; +// float temp[8 + LAPLACE_H], kern[LAPLACE_R + 1]; +// for (int i = 0; i < 4; i++) +// temp[i] = data[max(0, min(yp + i - 4, h)) * pitch]; +// for (int i = 4; i < 8 + LAPLACE_H; i++) +// temp[i] = data[min(yp + i - 4, h) * pitch]; +// for (int i = 0; i <= LAPLACE_R; i++) +// kern[i] = kernel[LAPLACE_R - i]; +// for (int j = 0; j < LAPLACE_H; j++) +// { +// sdata1[tx] = kern[4] * temp[4 + j] + +// kern[3] * (temp[3 + j] + temp[5 + j]) + kern[2] * (temp[2 + j] + temp[6 + j]) + +// kern[1] * (temp[1 + j] + temp[7 + j]) + kern[0] * (temp[0 + j] + temp[8 + j]); +// __syncthreads(); +// float *sdata2 = data2 + LAPLACE_W * scale; +// if (tx < LAPLACE_W) +// { +// sdata2[tx] = kern[4] * sdata1[tx + 4] + +// kern[3] * (sdata1[tx + 3] + sdata1[tx + 5]) + kern[2] * (sdata1[tx + 2] + sdata1[tx + 6]) + +// kern[1] * (sdata1[tx + 1] + sdata1[tx + 7]) + kern[0] * (sdata1[tx + 0] + sdata1[tx + 8]); +// } +// __syncthreads(); +// if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width && (yp + j) < height) +// d_Result[scale * height * pitch + (yp + j) * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W]; +// } +// } + +// __global__ void LaplaceMultiMemOld(float *d_Image, float *d_Result, int width, int pitch, int height, int octave) +// { +// __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S]; +// __shared__ float data2[LAPLACE_W * LAPLACE_S]; +// const int tx = threadIdx.x; +// const int xp = blockIdx.x * LAPLACE_W + tx; +// const int yp = blockIdx.y; +// const int scale = threadIdx.y; +// float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; +// float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale; +// float *data = d_Image + max(min(xp - 4, width - 1), 0); +// int h = height - 1; +// sdata1[tx] = kernel[0] * data[min(yp, h) * pitch] + +// kernel[1] * (data[max(0, min(yp - 1, h)) * pitch] + data[min(yp + 1, h) * pitch]) + +// kernel[2] * (data[max(0, min(yp - 2, h)) * pitch] + data[min(yp + 2, h) * pitch]) + +// kernel[3] * (data[max(0, min(yp - 3, h)) * pitch] + data[min(yp + 3, h) * pitch]) + +// kernel[4] * (data[max(0, min(yp - 4, h)) * pitch] + data[min(yp + 4, h) * pitch]); +// __syncthreads(); +// float *sdata2 = data2 + LAPLACE_W * scale; +// if (tx < LAPLACE_W) +// { +// sdata2[tx] = kernel[0] * sdata1[tx + 4] + +// kernel[1] * (sdata1[tx + 3] + sdata1[tx + 5]) + +// kernel[2] * (sdata1[tx + 2] + sdata1[tx + 6]) + +// kernel[3] * (sdata1[tx + 1] + sdata1[tx + 7]) + +// kernel[4] * (sdata1[tx + 0] + sdata1[tx + 8]); +// } +// __syncthreads(); +// if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width) +// d_Result[scale * height * pitch + yp * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W]; +// } + +__global__ void LowPass(float *d_Image, float *d_Result, int width, int pitch, int height) +{ + __shared__ float buffer[(LOWPASS_W + 2 * LOWPASS_R) * LOWPASS_H]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int xp = blockIdx.x * LOWPASS_W + tx; + const int yp = blockIdx.y * LOWPASS_H + ty; + float *kernel = d_LowPassKernel; + float *data = d_Image + max(min(xp - 4, width - 1), 0); + float *buff = buffer + ty * (LOWPASS_W + 2 * LOWPASS_R); + int h = height - 1; + if (yp < height) + buff[tx] = kernel[4] * data[min(yp, h) * pitch] + + kernel[3] * (data[max(0, min(yp - 1, h)) * pitch] + data[min(yp + 1, h) * pitch]) + + kernel[2] * (data[max(0, min(yp - 2, h)) * pitch] + data[min(yp + 2, h) * pitch]) + + kernel[1] * (data[max(0, min(yp - 3, h)) * pitch] + data[min(yp + 3, h) * pitch]) + + kernel[0] * (data[max(0, min(yp - 4, h)) * pitch] + data[min(yp + 4, h) * pitch]); + __syncthreads(); + if (tx < LOWPASS_W && xp < width && yp < height) + d_Result[yp * pitch + xp] = kernel[4] * buff[tx + 4] + + kernel[3] * (buff[tx + 3] + buff[tx + 5]) + kernel[2] * (buff[tx + 2] + buff[tx + 6]) + + kernel[1] * (buff[tx + 1] + buff[tx + 7]) + kernel[0] * (buff[tx + 0] + buff[tx + 8]); +} + +__global__ void LowPassBlockOld(float *d_Image, float *d_Result, int width, int pitch, int height) +{ + __shared__ float xrows[16][32]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int xp = blockIdx.x * LOWPASS_W + tx; + const int yp = blockIdx.y * LOWPASS_H + ty; + const int N = 16; + float *k = d_LowPassKernel; + int xl = max(min(xp - 4, width - 1), 0); + for (int l = -8; l <= LOWPASS_H; l += 4) + { + if (l < LOWPASS_H) + { + int yl = max(min(yp + l + 4, height - 1), 0); + float val = d_Image[yl * pitch + xl]; + xrows[(l + 8 + ty) % N][tx] = k[4] * ShiftDown(val, 4) + + k[3] * (ShiftDown(val, 5) + ShiftDown(val, 3)) + + k[2] * (ShiftDown(val, 6) + ShiftDown(val, 2)) + + k[1] * (ShiftDown(val, 7) + ShiftDown(val, 1)) + + k[0] * (ShiftDown(val, 8) + val); + } + if (l >= 4) + { + int ys = yp + l - 4; + if (xp < width && ys < height && tx < LOWPASS_W) + d_Result[ys * pitch + xp] = k[4] * xrows[(l + 0 + ty) % N][tx] + + k[3] * (xrows[(l - 1 + ty) % N][tx] + xrows[(l + 1 + ty) % N][tx]) + + k[2] * (xrows[(l - 2 + ty) % N][tx] + xrows[(l + 2 + ty) % N][tx]) + + k[1] * (xrows[(l - 3 + ty) % N][tx] + xrows[(l + 3 + ty) % N][tx]) + + k[0] * (xrows[(l - 4 + ty) % N][tx] + xrows[(l + 4 + ty) % N][tx]); + } + if (l >= 0) + __syncthreads(); + } +} + +__global__ void LowPassBlock(float *d_Image, float *d_Result, int width, int pitch, int height) +{ + __shared__ float xrows[16][32]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int xp = blockIdx.x * LOWPASS_W + tx; + const int yp = blockIdx.y * LOWPASS_H + ty; + const int N = 16; + float *k = d_LowPassKernel; + int xl = max(min(xp - 4, width - 1), 0); +#pragma unroll + for (int l = -8; l < 4; l += 4) + { + int ly = l + ty; + int yl = max(min(yp + l + 4, height - 1), 0); + float val = d_Image[yl * pitch + xl]; // d_Image[yl*pitch + xl].x + val = k[4] * ShiftDown(val, 4) + + k[3] * (ShiftDown(val, 5) + ShiftDown(val, 3)) + + k[2] * (ShiftDown(val, 6) + ShiftDown(val, 2)) + + k[1] * (ShiftDown(val, 7) + ShiftDown(val, 1)) + + k[0] * (ShiftDown(val, 8) + val); + xrows[ly + 8][tx] = val; + } + __syncthreads(); +#pragma unroll + for (int l = 4; l < LOWPASS_H; l += 4) + { + int ly = l + ty; + int yl = min(yp + l + 4, height - 1); + float val = d_Image[yl * pitch + xl]; + val = k[4] * ShiftDown(val, 4) + + k[3] * (ShiftDown(val, 5) + ShiftDown(val, 3)) + + k[2] * (ShiftDown(val, 6) + ShiftDown(val, 2)) + + k[1] * (ShiftDown(val, 7) + ShiftDown(val, 1)) + + k[0] * (ShiftDown(val, 8) + val); + xrows[(ly + 8) % N][tx] = val; + int ys = yp + l - 4; + if (xp < width && ys < height && tx < LOWPASS_W) + d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] + + k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) + + k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) + + k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) + + k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]); + __syncthreads(); + } + int ly = LOWPASS_H + ty; + int ys = yp + LOWPASS_H - 4; + if (xp < width && ys < height && tx < LOWPASS_W) + d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] + + k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) + + k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) + + k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) + + k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]); +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftD.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftD.h new file mode 100644 index 000000000..52fd52aa4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftD.h @@ -0,0 +1,80 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#ifndef CUDASIFTD_H +#define CUDASIFTD_H + +#define NUM_SCALES 5 + +// Scale down thread block width +#define SCALEDOWN_W 64 // 60 + +// Scale down thread block height +#define SCALEDOWN_H 16 // 8 + +// Scale up thread block width +#define SCALEUP_W 64 + +// Scale up thread block height +#define SCALEUP_H 8 + +// Find point thread block width +#define MINMAX_W 30 // 32 + +// Find point thread block height +#define MINMAX_H 8 // 16 + +// Laplace thread block width +#define LAPLACE_W 128 // 56 + +// Laplace rows per thread +#define LAPLACE_H 4 + +// Number of laplace scales +#define LAPLACE_S (NUM_SCALES + 3) + +// Laplace filter kernel radius +#define LAPLACE_R 4 + +#define LOWPASS_W 24 // 56 +#define LOWPASS_H 32 // 16 +#define LOWPASS_R 4 + +//====================== Number of threads ====================// +// ScaleDown: SCALEDOWN_W + 4 +// LaplaceMulti: (LAPLACE_W+2*LAPLACE_R)*LAPLACE_S +// FindPointsMulti: MINMAX_W + 2 +// ComputeOrientations: 128 +// ExtractSiftDescriptors: 256 + +//====================== Number of blocks ====================// +// ScaleDown: (width/SCALEDOWN_W) * (height/SCALEDOWN_H) +// LaplceMulti: (width+2*LAPLACE_R)/LAPLACE_W * height +// FindPointsMulti: (width/MINMAX_W)*NUM_SCALES * (height/MINMAX_H) +// ComputeOrientations: numpts +// ExtractSiftDescriptors: numpts + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu new file mode 100644 index 000000000..3f1e15fbf --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu @@ -0,0 +1,593 @@ +//********************************************************// +// CUDA SIFT extractor by Mårten Björkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include + +#include "cudautils.h" +#include "cudaImage.h" +#include "cudaSift.h" +#include "cudaSiftD.h" +#include "cudaSiftH.h" + +#include "cudaSiftD.cu" + +void InitCuda(int devNum) +{ + int nDevices; + safeCall(cudaGetDeviceCount(&nDevices)); + if (!nDevices) + { + std::cerr << "No CUDA devices available" << std::endl; + return; + } + devNum = std::min(nDevices - 1, devNum); + deviceInit(devNum); + cudaDeviceProp prop; + safeCall(cudaGetDeviceProperties(&prop, devNum)); + printf("Device Number: %d\n", devNum); + printf(" Device name: %s\n", prop.name); + printf(" Memory Clock Rate (MHz): %d\n", prop.memoryClockRate / 1000); + printf(" Clock Freq (MHz): %d\n", prop.clockRate / 1000); + printf(" Memory Bus Width (bits): %d\n", prop.memoryBusWidth); + printf(" Peak Memory Bandwidth (GB/s): %.1f\n\n", + 2.0 * prop.memoryClockRate * (prop.memoryBusWidth / 8) / 1.0e6); +} + +float *AllocSiftTempMemory(int width, int height, int numOctaves, float &time, bool scaleUp) +{ + const int nd = NUM_SCALES + 3; + int w = width * (scaleUp ? 2 : 1); + int h = height * (scaleUp ? 2 : 1); + int p = iAlignUp(w, 128); + int size = h * p; // image sizes + int sizeTmp = nd * h * p; // laplace buffer sizes + for (int i = 0; i < numOctaves; i++) + { + w /= 2; + h /= 2; + int p = iAlignUp(w, 128); + size += h * p; + sizeTmp += nd * h * p; + } + float *memoryTmp = NULL; + size_t pitch; + size += sizeTmp; + +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMallocPitch((void **)&memoryTmp, &pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float))); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + time += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + return memoryTmp; +} + +void FreeSiftTempMemory(float *memoryTmp) +{ + if (memoryTmp) + safeCall(cudaFree(memoryTmp)); +} + +void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, + float thresh, float &totTime, float lowestScale, bool scaleUp, float *tempMemory) +{ + unsigned int *d_PointCounterAddr; +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + safeCall(cudaGetSymbolAddress((void **)&d_PointCounterAddr, d_PointCounter)); + safeCall(cudaMemset(d_PointCounterAddr, 0, (8 * 2 + 1) * sizeof(int))); + safeCall(cudaMemcpyToSymbol(d_MaxNumPoints, &siftData.maxPts, sizeof(int))); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + const int nd = NUM_SCALES + 3; + int w = img.width * (scaleUp ? 2 : 1); + int h = img.height * (scaleUp ? 2 : 1); + int p = iAlignUp(w, 128); + int width = w, height = h; + int size = h * p; // image sizes + int sizeTmp = nd * h * p; // laplace buffer sizes + for (int i = 0; i < numOctaves; i++) + { + w /= 2; + h /= 2; + int p = iAlignUp(w, 128); + size += h * p; + sizeTmp += nd * h * p; + } + float *memoryTmp = tempMemory; + size += sizeTmp; + if (!tempMemory) + { + size_t pitch; +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMallocPitch((void **)&memoryTmp, &pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float))); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + } + float *memorySub = memoryTmp + sizeTmp; + + CudaImage lowImg; + lowImg.Allocate(width, height, iAlignUp(width, 128), false, totTime, memorySub); + if (!scaleUp) + { + float kernel[8 * 12 * 16]; + PrepareLaplaceKernels(numOctaves, 0.0f, kernel); +#ifdef DEVICE_TIMER + auto start_memcpy1 = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMemcpyToSymbol(d_LaplaceKernel, kernel, 8 * 12 * 16 * sizeof(float))); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_memcpy1 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy1 - start_memcpy1).count(); +#endif + LowPass(lowImg, img, fmax(initBlur, 0.001f), totTime); + ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale, 1.0f, memoryTmp, + memorySub + height * iAlignUp(width, 128), totTime); +#ifdef DEVICE_TIMER + auto start_memcpy2 = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMemcpy(&siftData.numPts, &d_PointCounterAddr[2 * numOctaves], sizeof(int), cudaMemcpyDeviceToHost)); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_memcpy2 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy2 - start_memcpy2).count(); +#endif + siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts); + } + else + { + CudaImage upImg; + upImg.Allocate(width, height, iAlignUp(width, 128), false, totTime, memoryTmp); + ScaleUp(upImg, img, totTime); + LowPass(lowImg, upImg, max(initBlur, 0.001f), totTime); + float kernel[8 * 12 * 16]; + PrepareLaplaceKernels(numOctaves, 0.0f, kernel); +#ifdef DEVICE_TIMER + auto start_memcpy3 = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMemcpyToSymbol(d_LaplaceKernel, kernel, 8 * 12 * 16 * sizeof(float))); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_memcpy3 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy3 - start_memcpy3).count(); +#endif + ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale * 2.0f, 1.0f, memoryTmp, + memorySub + height * iAlignUp(width, 128), totTime); +#ifdef DEVICE_TIMER + auto start_memcpy4 = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMemcpy(&siftData.numPts, &d_PointCounterAddr[2 * numOctaves], sizeof(int), cudaMemcpyDeviceToHost)); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_memcpy4 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy4 - start_memcpy4).count(); +#endif + siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts); + RescalePositions(siftData, 0.5f, totTime); + } + + if (!tempMemory) + safeCall(cudaFree(memoryTmp)); + if (siftData.h_data) + { +#ifdef DEVICE_TIMER + auto start_memcpy5 = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMemcpy(siftData.h_data, siftData.d_data, sizeof(SiftPoint) * siftData.numPts, cudaMemcpyDeviceToHost)); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_memcpy5 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy5 - start_memcpy5).count(); + printf("Total time for sift extraction = %.2f us\n\n", totTime); +#endif + } + printf("Number of Points after sift extraction = %d\n\n", siftData.numPts); +} + +int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, float lowestScale, + float subsampling, float *memoryTmp, float *memorySub, float &totTime) +{ + int w = img.width; + int h = img.height; + if (numOctaves > 1) + { + CudaImage subImg; + int p = iAlignUp(w / 2, 128); + subImg.Allocate(w / 2, h / 2, p, false, totTime, memorySub); + ScaleDown(subImg, img, 0.5f, totTime); + float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f; + ExtractSiftLoop(siftData, subImg, numOctaves - 1, totInitBlur, thresh, lowestScale, subsampling * 2.0f, + memoryTmp, memorySub + (h / 2) * p, totTime); + } + ExtractSiftOctave(siftData, img, numOctaves, thresh, lowestScale, subsampling, memoryTmp, totTime); + return 0; +} + +void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh, + float lowestScale, float subsampling, float *memoryTmp, float &totTime) +{ + const int nd = NUM_SCALES + 3; + CudaImage diffImg[nd]; + int w = img.width; + int h = img.height; + int p = iAlignUp(w, 128); + for (int i = 0; i < nd - 1; i++) + diffImg[i].Allocate(w, h, p, false, totTime, memoryTmp + i * p * h); + + float baseBlur = pow(2.0f, -1.0f / NUM_SCALES); + float diffScale = pow(2.0f, 1.0f / NUM_SCALES); + LaplaceMulti(img, diffImg, octave, totTime); + FindPointsMulti(diffImg, siftData, thresh, 10.0f, 1.0f / NUM_SCALES, lowestScale / subsampling, subsampling, octave, totTime); + ComputeOrientations(img, siftData, octave, totTime); + ExtractSiftDescriptors(img.d_data, img.pitch, siftData, subsampling, octave, totTime); +} + +void InitSiftData(SiftData &data, float &time, int num, bool host, bool dev) +{ + data.numPts = 0; + data.maxPts = num; + int sz = sizeof(SiftPoint) * num; + data.h_data = NULL; + if (host) + data.h_data = (SiftPoint *)malloc(sz); + data.d_data = NULL; + if (dev) + { +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMalloc((void **)&data.d_data, sz)); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + time += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + } +} + +void FreeSiftData(SiftData &data) +{ + if (data.d_data != NULL) + safeCall(cudaFree(data.d_data)); + data.d_data = NULL; + if (data.h_data != NULL) + free(data.h_data); + data.numPts = 0; + data.maxPts = 0; +} + +void PrintSiftData(SiftData &data) +{ + SiftPoint *h_data = data.h_data; + if (data.h_data == NULL) + { + h_data = (SiftPoint *)malloc(sizeof(SiftPoint) * data.maxPts); + safeCall(cudaMemcpy(h_data, data.d_data, sizeof(SiftPoint) * data.numPts, cudaMemcpyDeviceToHost)); + safeCall(cudaDeviceSynchronize()); + data.h_data = h_data; + } + for (int i = 0; i < data.numPts; i++) + { + printf("xpos = %.2f\n", h_data[i].xpos); + printf("ypos = %.2f\n", h_data[i].ypos); + printf("scale = %.2f\n", h_data[i].scale); + printf("sharpness = %.2f\n", h_data[i].sharpness); + printf("edgeness = %.2f\n", h_data[i].edgeness); + printf("orientation = %.2f\n", h_data[i].orientation); + printf("score = %.2f\n", h_data[i].score); + float *siftData = (float *)&h_data[i].data; + for (int j = 0; j < 8; j++) + { + if (j == 0) + printf("data = "); + else + printf(" "); + for (int k = 0; k < 16; k++) + if (siftData[j + 8 * k] < 0.05) + printf(" . "); + else + printf("%.2f ", siftData[j + 8 * k]); + printf("\n"); + } + } + printf("Number of available points: %d\n", data.numPts); + printf("Number of allocated points: %d\n", data.maxPts); +} + +/////////////////////////////////////////////////////////////////////////////// +// Host side master functions +/////////////////////////////////////////////////////////////////////////////// + +double ScaleDown(CudaImage &res, CudaImage &src, float variance, float &totTime) +{ + static float oldVariance = -1.0f; + if (res.d_data == NULL || src.d_data == NULL) + { + printf("ScaleDown: missing data\n"); + return 0.0; + } + if (oldVariance != variance) + { + float h_Kernel[5]; + float kernelSum = 0.0f; + for (int j = 0; j < 5; j++) + { + h_Kernel[j] = (float)expf(-(double)(j - 2) * (j - 2) / 2.0 / variance); + kernelSum += h_Kernel[j]; + } + for (int j = 0; j < 5; j++) + h_Kernel[j] /= kernelSum; +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMemcpyToSymbol(d_ScaleDownKernel, h_Kernel, 5 * sizeof(float))); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + oldVariance = variance; + } +#if 0 + dim3 blocks(iDivUp(src.width, SCALEDOWN_W), iDivUp(src.height, SCALEDOWN_H)); + dim3 threads(SCALEDOWN_W + 4, SCALEDOWN_H + 4); + ScaleDownDenseShift<<>>(res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch); +#else + dim3 blocks(iDivUp(src.width, SCALEDOWN_W), iDivUp(src.height, SCALEDOWN_H)); + dim3 threads(SCALEDOWN_W + 4); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + ScaleDown<<>>(res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ScaleDown time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif +#endif + checkMsg("ScaleDown() execution failed\n"); + return 0.0; +} + +double ScaleUp(CudaImage &res, CudaImage &src, float &totTime) +{ + if (res.d_data == NULL || src.d_data == NULL) + { + printf("ScaleUp: missing data\n"); + return 0.0; + } + dim3 blocks(iDivUp(res.width, SCALEUP_W), iDivUp(res.height, SCALEUP_H)); + dim3 threads(SCALEUP_W / 2, SCALEUP_H / 2); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + ScaleUp<<>>(res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ScaleUp time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("ScaleUp() execution failed\n"); + return 0.0; +} + +double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, float &totTime) +{ + dim3 blocks(512); + dim3 threads(256); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + ComputeOrientationsCONSTNew<<>>(src.d_data, src.width, src.pitch, src.height, siftData.d_data, octave); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ComputeOrientationsCONSTNew time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel) + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("ComputeOrientations() execution failed\n"); + return 0.0; +} + +double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, float &totTime) +{ + dim3 blocks(512); + dim3 threads(16, 8); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + ExtractSiftDescriptorsCONSTNew<<>>(texObj, pitch, siftData.d_data, subsampling, octave); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ExtractSiftDescriptorsCONSTNew time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("ExtractSiftDescriptors() execution failed\n"); + return 0.0; +} +double RescalePositions(SiftData &siftData, float scale, float &totTime) +{ + dim3 blocks(iDivUp(siftData.numPts, 64)); + dim3 threads(64); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + RescalePositions<<>>(siftData.d_data, siftData.numPts, scale); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("RescalePositions time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("RescapePositions() execution failed\n"); + return 0.0; +} + +double LowPass(CudaImage &res, CudaImage &src, float scale, float &totTime) +{ + float kernel[2 * LOWPASS_R + 1]; + static float oldScale = -1.0f; + if (scale != oldScale) + { + float kernelSum = 0.0f; + float ivar2 = 1.0f / (2.0f * scale * scale); + for (int j = -LOWPASS_R; j <= LOWPASS_R; j++) + { + kernel[j + LOWPASS_R] = (float)expf(-(double)j * j * ivar2); + kernelSum += kernel[j + LOWPASS_R]; + } + for (int j = -LOWPASS_R; j <= LOWPASS_R; j++) + kernel[j + LOWPASS_R] /= kernelSum; +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMemcpyToSymbol(d_LowPassKernel, kernel, (2 * LOWPASS_R + 1) * sizeof(float))); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + oldScale = scale; + } + int width = res.width; + int pitch = res.pitch; + int height = res.height; + dim3 blocks(iDivUp(width, LOWPASS_W), iDivUp(height, LOWPASS_H)); //[80,34,1] + + dim3 threads(LOWPASS_W + 2 * LOWPASS_R, 4); //[32,4,1] +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + LowPassBlockOld<<>>(src.d_data, res.d_data, width, pitch, height); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("LowPassBlock time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("LowPass() execution failed\n"); + return 0.0; +} + +//==================== Multi-scale functions ===================// + +void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel) +{ + if (numOctaves > 1) + { + float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f; + PrepareLaplaceKernels(numOctaves - 1, totInitBlur, kernel); + } + float scale = pow(2.0f, -1.0f / NUM_SCALES); + float diffScale = pow(2.0f, 1.0f / NUM_SCALES); + for (int i = 0; i < NUM_SCALES + 3; i++) + { + float kernelSum = 0.0f; + float var = scale * scale - initBlur * initBlur; + for (int j = 0; j <= LAPLACE_R; j++) + { + kernel[numOctaves * 12 * 16 + 16 * i + j] = (float)expf(-(double)j * j / 2.0 / var); + kernelSum += (j == 0 ? 1 : 2) * kernel[numOctaves * 12 * 16 + 16 * i + j]; + } + for (int j = 0; j <= LAPLACE_R; j++) + kernel[numOctaves * 12 * 16 + 16 * i + j] /= kernelSum; + scale *= diffScale; + } +} + +double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, float &totTime) +{ + int width = results[0].width; + int pitch = results[0].pitch; + int height = results[0].height; +#if 1 + dim3 threads(LAPLACE_W + 2 * LAPLACE_R); //(136) + dim3 blocks(iDivUp(width, LAPLACE_W), height); //(15) +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + LaplaceMultiMem<<>>(baseImage.d_data, results[0].d_data, width, pitch, height, octave); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("LaplaceMultiMem time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif +#endif + checkMsg("LaplaceMulti() execution failed\n"); + return 0.0; +} + +double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor, + float lowestScale, float subsampling, int octave, float &totTime) +{ + if (sources->d_data == NULL) + { + printf("FindPointsMulti: missing data\n"); + return 0.0; + } + int w = sources->width; + int p = sources->pitch; + int h = sources->height; +#if 1 + dim3 blocks(iDivUp(w, MINMAX_W) * NUM_SCALES, iDivUp(h, MINMAX_H)); + dim3 threads(MINMAX_W + 2); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + FindPointsMultiNew<<>>(sources->d_data, siftData.d_data, w, p, h, subsampling, + lowestScale, thresh, factor, edgeLimit, octave); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("FindPointsMultiNew time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()) + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif +#endif + checkMsg("FindPointsMulti() execution failed\n"); + return 0.0; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftH.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftH.h new file mode 100644 index 000000000..95e8384ec --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudaSiftH.h @@ -0,0 +1,50 @@ + +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#ifndef CUDASIFTH_H +#define CUDASIFTH_H + +#include "cudautils.h" +#include "cudaImage.h" +#include "cudaSift.h" + +int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, + float lowestScale, float subsampling, float *memoryTmp, float *memorySub, float &totTime); +void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh, float lowestScale, + float subsampling, float *memoryTmp, float &totTime); +double ScaleDown(CudaImage &res, CudaImage &src, float variance, float &totTime); +double ScaleUp(CudaImage &res, CudaImage &src, float &totTime); +double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, float &totTime); +double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, float &totTime); +double RescalePositions(SiftData &siftData, float scale, float &totTime); +double LowPass(CudaImage &res, CudaImage &src, float scale, float &totTime); +void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel); +double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, float &totTime); +double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor, + float lowestScale, float subsampling, int octave, float &totTime); + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudautils.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudautils.h new file mode 100644 index 000000000..d5b3161cc --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/cudautils.h @@ -0,0 +1,155 @@ +#ifndef CUDAUTILS_H +#define CUDAUTILS_H + +#include +#include + +#ifdef WIN32 +#include +#endif + +#define safeCall(err) __safeCall(err, __FILE__, __LINE__) +#define safeThreadSync() __safeThreadSync(__FILE__, __LINE__) +#define checkMsg(msg) __checkMsg(msg, __FILE__, __LINE__) + +inline void __safeCall(cudaError err, const char *file, const int line) +{ + if (cudaSuccess != err) + { + fprintf(stderr, "safeCall() Runtime API error in file <%s>, line %i : %s.\n", file, line, cudaGetErrorString(err)); + exit(-1); + } +} + +inline void __safeThreadSync(const char *file, const int line) +{ + cudaError err = cudaDeviceSynchronize(); + if (cudaSuccess != err) + { + fprintf(stderr, "threadSynchronize() Driver API error in file '%s' in line %i : %s.\n", file, line, cudaGetErrorString(err)); + exit(-1); + } +} + +inline void __checkMsg(const char *errorMessage, const char *file, const int line) +{ + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err) + { + fprintf(stderr, "checkMsg() CUDA error: %s in file <%s>, line %i : %s.\n", errorMessage, file, line, cudaGetErrorString(err)); + exit(-1); + } +} + +inline bool deviceInit(int dev) +{ + int deviceCount; + safeCall(cudaGetDeviceCount(&deviceCount)); + if (deviceCount == 0) + { + fprintf(stderr, "CUDA error: no devices supporting CUDA.\n"); + return false; + } + if (dev < 0) + dev = 0; + if (dev > deviceCount - 1) + dev = deviceCount - 1; + cudaDeviceProp deviceProp; + safeCall(cudaGetDeviceProperties(&deviceProp, dev)); + if (deviceProp.major < 1) + { + fprintf(stderr, "error: device does not support CUDA.\n"); + return false; + } + safeCall(cudaSetDevice(dev)); + return true; +} + +class TimerGPU +{ +public: + cudaEvent_t start, stop; + cudaStream_t stream; + TimerGPU(cudaStream_t stream_ = 0) : stream(stream_) + { + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, stream); + } + ~TimerGPU() + { + cudaEventDestroy(start); + cudaEventDestroy(stop); + } + float read() + { + cudaEventRecord(stop, stream); + cudaEventSynchronize(stop); + float time; + cudaEventElapsedTime(&time, start, stop); + return time; + } +}; + +class TimerCPU +{ + static const int bits = 10; + +public: + long long beg_clock; + float freq; + TimerCPU(float freq_) : freq(freq_) + { // freq = clock frequency in MHz + beg_clock = getTSC(bits); + } + long long getTSC(int bits) + { +#ifdef WIN32 + return __rdtsc() / (1LL << bits); +#else + unsigned int low, high; + __asm__(".byte 0x0f, 0x31" + : "=a"(low), "=d"(high)); + return ((long long)high << (32 - bits)) | ((long long)low >> bits); +#endif + } + float read() + { + long long end_clock = getTSC(bits); + long long Kcycles = end_clock - beg_clock; + float time = (float)(1 << bits) * Kcycles / freq / 1e3f; + return time; + } +}; + +template +__device__ __inline__ T ShiftDown(T var, unsigned int delta, int width = 32) +{ +#if (CUDART_VERSION >= 9000) + return __shfl_down_sync(0xffffffff, var, delta, width); +#else + return __shfl_down(var, delta, width); +#endif +} + +template +__device__ __inline__ T ShiftUp(T var, unsigned int delta, int width = 32) +{ +#if (CUDART_VERSION >= 9000) + return __shfl_up_sync(0xffffffff, var, delta, width); +#else + return __shfl_up(var, delta, width); +#endif +} + +template +__device__ __inline__ T Shuffle(T var, unsigned int lane, int width = 32) +{ +#if (CUDART_VERSION >= 9000) + return __shfl_sync(0xffffffff, var, lane, width); +#else + return __shfl(var, lane, width); +#endif +} + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp new file mode 100644 index 000000000..c01e6e7d2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/geomFuncs.cpp @@ -0,0 +1,72 @@ +#include +#include +#include +#include "cudaSift.h" + +int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh) +{ +#ifdef MANAGEDMEM + SiftPoint *mpts = data.m_data; +#else + if (data.h_data==NULL) + return 0; + SiftPoint *mpts = data.h_data; +#endif + float limit = thresh*thresh; + int numPts = data.numPts; + cv::Mat M(8, 8, CV_64FC1); + cv::Mat A(8, 1, CV_64FC1), X(8, 1, CV_64FC1); + double Y[8]; + for (int i=0;i<8;i++) + A.at(i, 0) = homography[i] / homography[8]; + for (int loop=0;loopmaxAmbiguity) + continue; + float den = A.at(6)*pt.xpos + A.at(7)*pt.ypos + 1.0f; + float dx = (A.at(0)*pt.xpos + A.at(1)*pt.ypos + A.at(2)) / den - pt.match_xpos; + float dy = (A.at(3)*pt.xpos + A.at(4)*pt.ypos + A.at(5)) / den - pt.match_ypos; + float err = dx*dx + dy*dy; + float wei = (err(r,c) += (Y[c] * Y[r] * wei); + X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_xpos * wei); + Y[0] = Y[1] = Y[2] = 0.0; + Y[3] = pt.xpos; + Y[4] = pt.ypos; + Y[5] = 1.0; + Y[6] = - pt.xpos * pt.match_ypos; + Y[7] = - pt.ypos * pt.match_ypos; + for (int c=0;c<8;c++) + for (int r=0;r<8;r++) + M.at(r,c) += (Y[c] * Y[r] * wei); + X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_ypos * wei); + } + cv::solve(M, X, A, cv::DECOMP_CHOLESKY); + } + int numfit = 0; + for (int i=0;i(6)*pt.xpos + A.at(7)*pt.ypos + 1.0; + float dx = (A.at(0)*pt.xpos + A.at(1)*pt.ypos + A.at(2)) / den - pt.match_xpos; + float dy = (A.at(3)*pt.xpos + A.at(4)*pt.ypos + A.at(5)) / den - pt.match_ypos; + float err = dx*dx + dy*dy; + if (err(i); + homography[8] = 1.0f; + return numfit; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/mainSift.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/mainSift.cpp new file mode 100644 index 000000000..0edc92d2b --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/mainSift.cpp @@ -0,0 +1,278 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Björkman aka Celebrandil // +// celle @ csc.kth.se // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Utility.h" +#include "cudaImage.h" +#include "cudaSift.h" + +int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh); +void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img); +void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography); + +double ScaleUp(CudaImage &res, CudaImage &src); + +/////////////////////////////////////////////////////////////////////////////// +// Main program +/////////////////////////////////////////////////////////////////////////////// +int main(int argc, char **argv) +{ + auto totalProgTimer_start = std::chrono::steady_clock::now(); + int devNum = 0, imgSet = 0; + if (argc > 1) + devNum = std::atoi(argv[1]); + if (argc > 2) + imgSet = std::atoi(argv[2]); + + float totTime = 0.0; + float imageInitTime = 0.0; + float extractSiftTime = 0.0; + float matchingTime = 0.0; + float ioReadTime = 0.0; + float dataVerificationTime = 0.0; + + // Read images using OpenCV + cv::Mat limg, rimg; + auto ioRead_start = std::chrono::steady_clock::now(); + if (imgSet) + { + cv::imread("../../inputData/left.pgm", 0).convertTo(limg, CV_32FC1); + cv::imread("../../inputData/righ.pgm", 0).convertTo(rimg, CV_32FC1); + } + else + { + cv::imread("../../inputData/img1.png", 0).convertTo(limg, CV_32FC1); + cv::imread("../../inputData/img2.png", 0).convertTo(rimg, CV_32FC1); + } + auto ioRead_stop = std::chrono::steady_clock::now(); + ioReadTime = std::chrono::duration(ioRead_stop - ioRead_start).count(); + + unsigned int w = limg.cols; + unsigned int h = limg.rows; + std::cout << "Image size = (" << w << "," << h << ")" << std::endl; + + // Initial Cuda images and download images to device + std::cout << "Initializing data..." << std::endl; + cudaSetDevice(0); + CudaImage img1, img2; + + img1.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)limg.data); + img2.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)rimg.data); + img1.Download(imageInitTime); + img2.Download(imageInitTime); + + // Extract Sift features from images + SiftData siftData1, siftData2; + float initBlur = 1.0f; + float thresh = (imgSet ? 4.5f : 2.0f); + + InitSiftData(siftData1, imageInitTime, 32768, true, true); + InitSiftData(siftData2, imageInitTime, 32768, true, true); + + // A bit of benchmarking + // for (int thresh1=1.00f;thresh1<=4.01f;thresh1+=0.50f) { + float *memoryTmp = AllocSiftTempMemory(w, h, 5, imageInitTime, false); + for (int i = 0; i < 50; i++) + { + float time = 0.0f; // set total time to init time + ExtractSift(siftData1, img1, 5, initBlur, thresh, time, 0.0f, false, memoryTmp); + extractSiftTime += time; + time = 0.0f; + ExtractSift(siftData2, img2, 5, initBlur, thresh, time, 0.0f, false, memoryTmp); + extractSiftTime += time; + } + FreeSiftTempMemory(memoryTmp); + + // Match Sift features and find a homography + for (int i = 0; i < 1; i++) + MatchSiftData(siftData1, siftData2, matchingTime); + float homography[9]; + int numMatches; + FindHomography(siftData1, homography, &numMatches, matchingTime, 10000, 0.00f, 0.80f, 5.0); + int numFit = ImproveHomography(siftData1, homography, 5, 0.00f, 0.80f, 3.0); + float matchPercentage = 100.0f * numFit / std::min(siftData1.numPts, siftData2.numPts); + + std::cout << "Number of original features: " << siftData1.numPts << " " << siftData2.numPts << std::endl; + std::cout << "Number of matching features: " << numFit << " " << numMatches << " " << matchPercentage << "% " << initBlur << " " << thresh << "\n" + << std::endl; + +#ifdef DEVICE_TIMER + totTime = imageInitTime + extractSiftTime + matchingTime; + + std::cout << "Images initialization time = " << imageInitTime / 1000 << " ms" << std::endl; + std::cout << "Feature extraction time = " << extractSiftTime / 1000 << " ms" << std::endl; + std::cout << "Matching time = " << matchingTime / 1000 << " ms" + << "\n" + << std::endl; + std::cout << "Total Deivce Time = " << totTime / 1000 << " ms" + << "\n" + << std::endl; +#endif + + // data validation + auto dataVerficationTimer_start = std::chrono::steady_clock::now(); + int data_verification_flag = Utility::RunDataVerification(thresh, matchPercentage); + auto dataVerficationTimer_stop = std::chrono::steady_clock::now(); + dataVerificationTime = std::chrono::duration(dataVerficationTimer_stop - dataVerficationTimer_start).count(); + // // Print out and store summary data + // // PrintMatchData(siftData1, siftData2, img1); + // cv::imwrite("data/limg_pts.pgm", limg); + + // MatchAll(siftData1, siftData2, homography); + + // Free Sift data from device + FreeSiftData(siftData1); + FreeSiftData(siftData2); + + auto totalProgTimer_end = std::chrono::steady_clock::now(); + float totalProgramTime = std::chrono::duration(totalProgTimer_end - totalProgTimer_start).count() - ioReadTime - dataVerificationTime; + std::cout << "Total workload time = " << totalProgramTime / 1000 << " ms" + << "\n" + << std::endl; + return data_verification_flag; +} + +void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography) +{ +#ifdef MANAGEDMEM + SiftPoint *sift1 = siftData1.m_data; + SiftPoint *sift2 = siftData2.m_data; +#else + SiftPoint *sift1 = siftData1.h_data; + SiftPoint *sift2 = siftData2.h_data; +#endif + int numPts1 = siftData1.numPts; + int numPts2 = siftData2.numPts; + int numFound = 0; +#if 1 + homography[0] = homography[4] = -1.0f; + homography[1] = homography[3] = homography[6] = homography[7] = 0.0f; + homography[2] = 1279.0f; + homography[5] = 959.0f; +#endif + for (int i = 0; i < numPts1; i++) + { + float *data1 = sift1[i].data; + std::cout << i << ":" << sift1[i].scale << ":" << (int)sift1[i].orientation << " " << sift1[i].xpos << " " << sift1[i].ypos << std::endl; + bool found = false; + for (int j = 0; j < numPts2; j++) + { + float *data2 = sift2[j].data; + float sum = 0.0f; + for (int k = 0; k < 128; k++) + sum += data1[k] * data2[k]; + float den = homography[6] * sift1[i].xpos + homography[7] * sift1[i].ypos + homography[8]; + float dx = (homography[0] * sift1[i].xpos + homography[1] * sift1[i].ypos + homography[2]) / den - sift2[j].xpos; + float dy = (homography[3] * sift1[i].xpos + homography[4] * sift1[i].ypos + homography[5]) / den - sift2[j].ypos; + float err = dx * dx + dy * dy; + if (err < 100.0f) // 100.0 + found = true; + if (err < 100.0f || j == sift1[i].match) + { // 100.0 + if (j == sift1[i].match && err < 100.0f) + std::cout << " *"; + else if (j == sift1[i].match) + std::cout << " -"; + else if (err < 100.0f) + std::cout << " +"; + else + std::cout << " "; + std::cout << j << ":" << sum << ":" << (int)sqrt(err) << ":" << sift2[j].scale << ":" << (int)sift2[j].orientation << " " << sift2[j].xpos << " " << sift2[j].ypos << " " << (int)dx << " " << (int)dy << std::endl; + } + } + std::cout << std::endl; + if (found) + numFound++; + } + std::cout << "Number of finds: " << numFound << " / " << numPts1 << std::endl; + std::cout << homography[0] << " " << homography[1] << " " << homography[2] << std::endl; //%%% + std::cout << homography[3] << " " << homography[4] << " " << homography[5] << std::endl; //%%% + std::cout << homography[6] << " " << homography[7] << " " << homography[8] << std::endl; //%%% +} + +void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img) +{ + int numPts = siftData1.numPts; +#ifdef MANAGEDMEM + SiftPoint *sift1 = siftData1.m_data; + SiftPoint *sift2 = siftData2.m_data; +#else + SiftPoint *sift1 = siftData1.h_data; + SiftPoint *sift2 = siftData2.h_data; +#endif + float *h_img = img.h_data; + int w = img.width; + int h = img.height; + std::cout << std::setprecision(3); + for (int j = 0; j < numPts; j++) + { + int k = sift1[j].match; + if (sift1[j].match_error < 5) + { + float dx = sift2[k].xpos - sift1[j].xpos; + float dy = sift2[k].ypos - sift1[j].ypos; +#if 0 + if (false && sift1[j].xpos>550 && sift1[j].xpos<600) { + std::cout << "pos1=(" << (int)sift1[j].xpos << "," << (int)sift1[j].ypos << ") "; + std::cout << j << ": " << "score=" << sift1[j].score << " ambiguity=" << sift1[j].ambiguity << " match=" << k << " "; + std::cout << "scale=" << sift1[j].scale << " "; + std::cout << "error=" << (int)sift1[j].match_error << " "; + std::cout << "orient=" << (int)sift1[j].orientation << "," << (int)sift2[k].orientation << " "; + std::cout << " delta=(" << (int)dx << "," << (int)dy << ")" << std::endl; + } +#endif +#if 1 + int len = (int)(fabs(dx) > fabs(dy) ? fabs(dx) : fabs(dy)); + for (int l = 0; l < len; l++) + { + int x = (int)(sift1[j].xpos + dx * l / len); + int y = (int)(sift1[j].ypos + dy * l / len); + h_img[y * w + x] = 255.0f; + } +#endif + } + int x = (int)(sift1[j].xpos + 0.5); + int y = (int)(sift1[j].ypos + 0.5); + int s = std::min(x, std::min(y, std::min(w - x - 2, std::min(h - y - 2, (int)(1.41 * sift1[j].scale))))); + int p = y * w + x; + p += (w + 1); + for (int k = 0; k < s; k++) + h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 0.0f; + p -= (w + 1); + for (int k = 0; k < s; k++) + h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 255.0f; + } + std::cout << std::setprecision(6); +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/matching.cu b/third-party-programs/Velocity-Bench/cudaSift/CUDA/matching.cu new file mode 100644 index 000000000..d54978960 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/matching.cu @@ -0,0 +1,1530 @@ +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include +#include "cudaSift.h" +#include "cudautils.h" + +//================= Device matching functions =====================// + +__global__ void MatchSiftPoints(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2) +{ + __shared__ float siftPoint[128]; + __shared__ float sums[16 * 16]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int p1 = blockIdx.x; + const int p2 = blockIdx.y * 16 + ty; + const float *ptr1 = sift1[p1].data; + const float *ptr2 = sift2[p2].data; + const int i = 16 * ty + tx; + if (ty < 8) + siftPoint[i] = ptr1[i]; + __syncthreads(); + float sum = 0.0f; + if (p2 < numPts2) + for (int j = 0; j < 8; j++) + sum += siftPoint[16 * j + tx] * ptr2[16 * j + tx]; + sums[i] = sum; + __syncthreads(); + if (tx < 8) + sums[i] += sums[i + 8]; + __syncthreads(); + if (tx < 4) + sums[i] += sums[i + 4]; + __syncthreads(); + if (ty == 0) + { + sum = sums[16 * tx + 0] + sums[16 * tx + 1] + sums[16 * tx + 2] + sums[16 * tx + 3]; + corrData[p1 * gridDim.y * 16 + blockIdx.y * 16 + tx] = sum; + } + __syncthreads(); +} + +__global__ void MatchSiftPoints2(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2) +{ + __shared__ float siftPoints1[16 * 128]; + __shared__ float siftPoints2[16 * 128]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const float *ptr1 = sift1[min(numPts1 - 1, blockIdx.x * 16 + ty)].data; + const float *ptr2 = sift2[min(numPts2 - 1, blockIdx.y * 16 + ty)].data; + for (int i = 0; i < 8; i++) + { + siftPoints1[128 * ty + 16 * i + tx] = ptr1[16 * i + tx]; + siftPoints2[128 * ty + 16 * i + tx] = ptr2[16 * i + tx]; + } + __syncthreads(); + const int p1 = blockIdx.x * 16 + ty; + const int p2 = blockIdx.y * 16 + tx; + const float *pt1 = &siftPoints1[ty * 128]; + const float *pt2 = &siftPoints2[tx * 128]; + float sum = 0.0f; + for (int i = 0; i < 128; i++) + { + int itx = (i + tx) & 127; // avoid bank conflicts + sum += pt1[itx] * pt2[itx]; + } + if (p1 < numPts1) + corrData[p1 * gridDim.y * 16 + p2] = (p2 < numPts2 ? sum : -1.0f); +} + +__global__ void FindMaxCorr(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int corrWidth, int siftSize) +{ + __shared__ float maxScore[16 * 16]; + __shared__ float maxScor2[16 * 16]; + __shared__ int maxIndex[16 * 16]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int idx = ty * 16 + tx; + int p1 = blockIdx.x * 16 + threadIdx.y; + p1 = (p1 >= numPts1 ? numPts1 - 1 : p1); + maxScore[idx] = -1.0f; + maxScor2[idx] = -1.0f; + maxIndex[idx] = -1; + __syncthreads(); + float *corrs = &corrData[p1 * corrWidth]; + for (int i = tx; i < corrWidth; i += 16) + { + float val = corrs[i]; + if (val > maxScore[idx]) + { + maxScor2[idx] = maxScore[idx]; + maxScore[idx] = val; + maxIndex[idx] = i; + } + else if (val > maxScor2[idx]) + maxScor2[idx] = val; + } + __syncthreads(); + for (int len = 8; len > 0; len /= 2) + { + if (tx < 8) + { + float val = maxScore[idx + len]; + int i = maxIndex[idx + len]; + if (val > maxScore[idx]) + { + maxScor2[idx] = maxScore[idx]; + maxScore[idx] = val; + maxIndex[idx] = i; + } + else if (val > maxScor2[idx]) + maxScor2[idx] = val; + float va2 = maxScor2[idx + len]; + if (va2 > maxScor2[idx]) + maxScor2[idx] = va2; + } + __syncthreads(); + } + if (tx == 0) + { + sift1[p1].score = maxScore[ty * 16]; + sift1[p1].ambiguity = maxScor2[ty * 16] / (maxScore[ty * 16] + 1e-6); + sift1[p1].match = maxIndex[ty * 16]; + sift1[p1].match_xpos = sift2[maxIndex[ty * 16]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[ty * 16]].ypos; + } +} + +// Version based on suggestion by Nicholas Lin +__global__ void FindMaxCorr3(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + int block_dim = blockDim.x; // blockDim.x == 16 + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int p1 = blockIdx.x * block_dim + ty; + const int idx = ty * 16 + tx; + + __shared__ int maxIndex[16 * 16]; + maxIndex[idx] = 0; + __syncthreads(); + + float *corrs = NULL; + if (p1 < numPts1) + { + corrs = &corrData[p1 * block_dim * 2]; + corrs[tx] = 0.0f; + corrs[tx + 16] = 0.0f; + const float *pt1 = sift1[p1].data; + for (int p2 = tx; p2 < numPts2; p2 += 16) + { + float *pt2 = sift2[p2].data; + float sum = 0.0f; + for (int i = 0; i < 128; i++) + sum += pt1[i] * pt2[i]; + if (sum > corrs[tx]) + { + corrs[tx + 16] = corrs[tx]; + corrs[tx] = sum; + maxIndex[idx] = p2; + } + else if (sum > corrs[tx + 16]) + corrs[tx + 16] = sum; + } + } + __syncthreads(); + if (p1 < numPts1) + { + for (int len = 8; len > 0; len /= 2) + { + if (tx < len) + { + float val = corrs[tx + len]; + int i = maxIndex[idx + len]; + if (val > corrs[tx]) + { + corrs[tx + 16] = corrs[tx]; + corrs[tx] = val; + maxIndex[idx] = i; + } + else if (val > corrs[tx + 16]) + corrs[tx + 16] = val; + float va2 = corrs[tx + 16 + len]; + if (va2 > corrs[tx + 16]) + corrs[tx + 16] = va2; + } + __syncthreads(); + } + if (tx == 0) + { + sift1[p1].score = corrs[0]; + sift1[p1].ambiguity = corrs[16] / (corrs[0] + 1e-6); + sift1[p1].match = maxIndex[ty << 4]; + sift1[p1].match_xpos = sift2[maxIndex[ty << 4]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[ty << 4]].ypos; + } + } +} + +#define FMC2W 16 +#define FMC2H 4 + +__global__ void FindMaxCorr2(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float siftPoint[128]; + __shared__ float maxScore[FMC2H]; + __shared__ float maxScor2[FMC2H]; + __shared__ int maxIndex[FMC2H]; + const int p1 = blockIdx.x; + if (p1 >= numPts1) + return; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int idx = ty * FMC2W + tx; + if (idx < FMC2H) + { + maxScore[idx] = -1.0f; + maxScor2[idx] = -1.0f; + maxIndex[idx] = 0; + } + __syncthreads(); + const float *pt1 = sift1[p1].data; + for (int i = idx; i < 128; i += FMC2W * FMC2H) + siftPoint[i] = pt1[i]; + __syncthreads(); + for (int p2 = ty; p2 < numPts2; p2 += FMC2H) + { + const float *pt2 = sift2[p2].data; + float sum = 0.0f; + for (int j = tx; j < 128; j += FMC2W) + sum += siftPoint[j] * pt2[j]; + for (int j = FMC2W / 2; j > 0; j /= 2) + sum += ShiftDown(sum, j); + if (tx == 0) + { + if (sum > maxScore[ty]) + { + maxScor2[ty] = maxScore[ty]; + maxScore[ty] = sum; + maxIndex[ty] = p2; + } + else if (sum > maxScor2[ty]) + maxScor2[ty] = sum; + } + } + __syncthreads(); + for (int len = FMC2H / 2; len > 0; len /= 2) + { + if (ty == 0 && tx < len) + { + float val = maxScore[tx + len]; + int p2 = maxIndex[tx + len]; + if (val > maxScore[tx]) + { + maxScor2[tx] = maxScore[tx]; + maxScore[tx] = val; + maxIndex[tx] = p2; + } + else if (val > maxScor2[tx]) + maxScor2[tx] = val; + float va2 = maxScor2[tx + len]; + if (va2 > maxScor2[tx]) + maxScor2[tx] = va2; + } + __syncthreads(); + } + if (ty == 0 && tx == 0) + { + sift1[p1].score = maxScore[0]; + sift1[p1].ambiguity = maxScor2[0] / (maxScore[0] + 1e-6); + sift1[p1].match = maxIndex[0]; + sift1[p1].match_xpos = sift2[maxIndex[0]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[0]].ypos; + } +} + +__global__ void FindMaxCorr4(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float siftPoint[128 * FMC2H]; + __shared__ float maxScore[FMC2H]; + __shared__ float maxScor2[FMC2H]; + __shared__ int maxIndex[FMC2H]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + if (tx == 0) + { + maxScore[ty] = -1.0f; + maxScor2[ty] = -1.0f; + maxIndex[ty] = 0; + } + const int p1 = blockIdx.x * FMC2H + ty; + const float *pt1 = sift1[p1].data; + for (int j = tx; j < 128; j += FMC2W) + siftPoint[128 * ty + j] = pt1[j]; + __syncthreads(); + for (int p2 = 0; p2 < numPts2; p2++) + { + const float *pt2 = sift2[p2].data; + float sum = 0.0f; + for (int j = tx; j < 128; j += FMC2W) + sum += siftPoint[128 * ty + j] * pt2[j]; + for (int j = FMC2W / 2; j > 0; j /= 2) + sum += ShiftDown(sum, j); + if (tx == 0) + { + if (sum > maxScore[ty]) + { + maxScor2[ty] = maxScore[ty]; + maxScore[ty] = sum; + maxIndex[ty] = p2; + } + else if (sum > maxScor2[ty]) + maxScor2[ty] = sum; + } + } + __syncthreads(); + if (tx == 0) + { + sift1[p1].score = maxScore[ty]; + sift1[p1].ambiguity = maxScor2[ty] / (maxScore[ty] + 1e-6); + sift1[p1].match = maxIndex[ty]; + sift1[p1].match_xpos = sift2[maxIndex[ty]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[ty]].ypos; + } +} + +__global__ void memcopyKernel(float *src, float *dst, size_t src_pitch, size_t dst_pitch, int numPts, size_t width) +{ + char *d_src = (char *)src; + char *d_dst = (char *)dst; + + for (int i = 0; i < numPts; ++i) + { + for (int j = 0; j < width; ++j) + { + d_dst[j] = d_src[j]; + } + d_src = d_src + src_pitch; + d_dst = d_dst + dst_pitch; + } +} + +__global__ void +CleanMatches(SiftPoint *sift1, int numPts1) +{ + const int p1 = min(blockIdx.x * 64 + threadIdx.x, numPts1 - 1); + sift1[p1].score = 0.0f; +} + +#define M7W 32 +#define M7H 32 +#define M7R 4 +#define NRX 2 +#define NDIM 128 + +__global__ void FindMaxCorr10(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float4 buffer1[M7W * NDIM / 4]; + __shared__ float4 buffer2[M7H * NDIM / 4]; + int tx = threadIdx.x; + int ty = threadIdx.y; + int bp1 = M7W * blockIdx.x; + for (int j = ty; j < M7W; j += M7H / M7R) + { + int p1 = min(bp1 + j, numPts1 - 1); + for (int d = tx; d < NDIM / 4; d += M7W) + buffer1[j * NDIM / 4 + (d + j) % (NDIM / 4)] = ((float4 *)&sift1[p1].data)[d]; + } + + float max_score[NRX]; + float sec_score[NRX]; + int index[NRX]; + for (int i = 0; i < NRX; i++) + { + max_score[i] = 0.0f; + sec_score[i] = 0.0f; + index[i] = -1; + } + + int idx = ty * M7W + tx; + int ix = idx % (M7W / NRX); + int iy = idx / (M7W / NRX); + for (int bp2 = 0; bp2 < numPts2 - M7H + 1; bp2 += M7H) + { + for (int j = ty; j < M7H; j += M7H / M7R) + { + int p2 = min(bp2 + j, numPts2 - 1); + for (int d = tx; d < NDIM / 4; d += M7W) + buffer2[j * NDIM / 4 + d] = ((float4 *)&sift2[p2].data)[d]; + } + __syncthreads(); + + if (idx < M7W * M7H / M7R / NRX) + { + float score[M7R][NRX]; + for (int dy = 0; dy < M7R; dy++) + for (int i = 0; i < NRX; i++) + score[dy][i] = 0.0f; + for (int d = 0; d < NDIM / 4; d++) + { + float4 v1[NRX]; + for (int i = 0; i < NRX; i++) + v1[i] = buffer1[((M7W / NRX) * i + ix) * NDIM / 4 + (d + (M7W / NRX) * i + ix) % (NDIM / 4)]; + for (int dy = 0; dy < M7R; dy++) + { + float4 v2 = buffer2[(M7R * iy + dy) * (NDIM / 4) + d]; + for (int i = 0; i < NRX; i++) + { + score[dy][i] += v1[i].x * v2.x; + score[dy][i] += v1[i].y * v2.y; + score[dy][i] += v1[i].z * v2.z; + score[dy][i] += v1[i].w * v2.w; + } + } + } + for (int dy = 0; dy < M7R; dy++) + { + for (int i = 0; i < NRX; i++) + { + if (score[dy][i] > max_score[i]) + { + sec_score[i] = max_score[i]; + max_score[i] = score[dy][i]; + index[i] = min(bp2 + M7R * iy + dy, numPts2 - 1); + } + else if (score[dy][i] > sec_score[i]) + sec_score[i] = score[dy][i]; + } + } + } + __syncthreads(); + } + float *scores1 = (float *)buffer1; + float *scores2 = &scores1[M7W * M7H / M7R]; + int *indices = (int *)&scores2[M7W * M7H / M7R]; + if (idx < M7W * M7H / M7R / NRX) + { + for (int i = 0; i < NRX; i++) + { + scores1[iy * M7W + (M7W / NRX) * i + ix] = max_score[i]; + scores2[iy * M7W + (M7W / NRX) * i + ix] = sec_score[i]; + indices[iy * M7W + (M7W / NRX) * i + ix] = index[i]; + } + } + __syncthreads(); + + if (ty == 0) + { + float max_score = scores1[tx]; + float sec_score = scores2[tx]; + int index = indices[tx]; + for (int y = 0; y < M7H / M7R; y++) + if (index != indices[y * M7W + tx]) + { + if (scores1[y * M7W + tx] > max_score) + { + sec_score = max(max_score, sec_score); + max_score = scores1[y * M7W + tx]; + index = indices[y * M7W + tx]; + } + else if (scores1[y * M7W + tx] > sec_score) + sec_score = scores1[y * M7W + tx]; + } + sift1[bp1 + tx].score = max_score; + sift1[bp1 + tx].match = index; + sift1[bp1 + tx].match_xpos = sift2[index].xpos; + sift1[bp1 + tx].match_ypos = sift2[index].ypos; + sift1[bp1 + tx].ambiguity = sec_score / (max_score + 1e-6f); + } +} + +#define FMC_GH 512 +#define FMC_BW 32 +#define FMC_BH 32 +#define FMC_BD 16 +#define FMC_TW 1 +#define FMC_TH 4 +#define FMC_NW (FMC_BW / FMC_TW) // 32 +#define FMC_NH (FMC_BH / FMC_TH) // 8 +#define FMC_NT (FMC_NW * FMC_NH) // 256 = 8 warps + +__device__ volatile int lock = 0; + +__global__ void FindMaxCorr9(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float4 siftParts1[FMC_BW * FMC_BD]; // 4*32*8 = 1024 + __shared__ float4 siftParts2[FMC_BH * FMC_BD]; // 4*32*8 = 1024 + //__shared__ float blksums[FMC_BW*FMC_BH]; // 32*32 = 1024 + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int idx = ty * FMC_NW + tx; + float4 *pts1 = 0, *pts2 = 0; + if (idx < FMC_BW) + { + const int p1l = min(blockIdx.x * FMC_BW + idx, numPts1 - 1); + pts1 = (float4 *)sift1[p1l].data; + } + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < min(FMC_GH, numPts2 - FMC_BH + 1); k += FMC_BH) + { + if (idx < FMC_BH) + { + const int p2l = min(blockIdx.y * FMC_GH + k + idx, numPts2 - 1); + pts2 = (float4 *)sift2[p2l].data; + } + float sums[FMC_TW * FMC_TH]; + for (int i = 0; i < FMC_TW * FMC_TH; i++) + sums[i] = 0.0f; + + if (idx < FMC_BW) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts1[(i + 0) * FMC_BW + idx] = pts1[0 + i]; + if (idx < FMC_BH) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts2[(i + 0) * FMC_BH + idx] = pts2[0 + i]; + __syncthreads(); + + int b = FMC_BD / 2; + for (int d = FMC_BD / 2; d < 32; d += FMC_BD / 2) + { + if (idx < FMC_BW) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts1[(i + b) * FMC_BW + idx] = pts1[d + i]; + if (idx < FMC_BH) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts2[(i + b) * FMC_BH + idx] = pts2[d + i]; + + b ^= FMC_BD / 2; + for (int i = 0; i < FMC_BD / 2; i++) + { + float4 v1[FMC_TW]; + for (int ix = 0; ix < FMC_TW; ix++) + v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)]; + for (int iy = 0; iy < FMC_TH; iy++) + { + float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)]; + for (int ix = 0; ix < FMC_TW; ix++) + { + sums[iy * FMC_TW + ix] += v1[ix].x * v2.x; + sums[iy * FMC_TW + ix] += v1[ix].y * v2.y; + sums[iy * FMC_TW + ix] += v1[ix].z * v2.z; + sums[iy * FMC_TW + ix] += v1[ix].w * v2.w; + } + } + } + __syncthreads(); + } + + b ^= FMC_BD / 2; + for (int i = 0; i < FMC_BD / 2; i++) + { + float4 v1[FMC_TW]; + for (int ix = 0; ix < FMC_TW; ix++) + v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)]; + for (int iy = 0; iy < FMC_TH; iy++) + { + float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)]; + for (int ix = 0; ix < FMC_TW; ix++) + { + sums[iy * FMC_TW + ix] += v1[ix].x * v2.x; + sums[iy * FMC_TW + ix] += v1[ix].y * v2.y; + sums[iy * FMC_TW + ix] += v1[ix].z * v2.z; + sums[iy * FMC_TW + ix] += v1[ix].w * v2.w; + } + } + } + __syncthreads(); + + float *blksums = (float *)siftParts1; + for (int iy = 0; iy < FMC_TH; iy++) + for (int ix = 0; ix < FMC_TW; ix++) + blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix]; + __syncthreads(); + if (idx < FMC_BW) + { + for (int j = 0; j < FMC_BH; j++) + { + float sum = blksums[j * FMC_BW + idx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = min(blockIdx.y * FMC_GH + k + j, numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + __syncthreads(); + } + const int p1 = min(blockIdx.x * FMC_BW + idx, numPts1 - 1); + if (idx == 0) + while (atomicCAS((int *)&lock, 0, 1) != 0) + ; + __syncthreads(); + if (idx < FMC_BW) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + __syncthreads(); + if (idx == 0) + atomicExch((int *)&lock, 0); +} + +__global__ void FindMaxCorr8(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float4 siftParts1[FMC_BW * FMC_BD]; // 4*32*8 = 1024 + __shared__ float4 siftParts2[FMC_BH * FMC_BD]; // 4*32*8 = 1024 + __shared__ float blksums[FMC_BW * FMC_BH]; // 32*32 = 1024 + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int idx = ty * FMC_NW + tx; + float4 *pts1 = 0, *pts2 = 0; + if (idx < FMC_BW) + { + const int p1l = min(blockIdx.x * FMC_BW + idx, numPts1 - 1); + pts1 = (float4 *)sift1[p1l].data; + } + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < min(FMC_GH, numPts2 - FMC_BH + 1); k += FMC_BH) + { + if (idx < FMC_BH) + { + const int p2l = min(blockIdx.y * FMC_GH + k + idx, numPts2 - 1); + pts2 = (float4 *)sift2[p2l].data; + } + float sums[FMC_TW * FMC_TH]; + for (int i = 0; i < FMC_TW * FMC_TH; i++) + sums[i] = 0.0f; + for (int d = 0; d < 32; d += FMC_BD) + { + if (idx < FMC_BW) + for (int i = 0; i < FMC_BD; i++) + siftParts1[i * FMC_BW + idx] = pts1[d + i]; + if (idx < FMC_BH) + for (int i = 0; i < FMC_BD; i++) + siftParts2[i * FMC_BH + idx] = pts2[d + i]; + __syncthreads(); + + for (int i = 0; i < FMC_BD; i++) + { + float4 v1[FMC_TW]; + for (int ix = 0; ix < FMC_TW; ix++) + v1[ix] = siftParts1[i * FMC_BW + (tx * FMC_TW + ix)]; + for (int iy = 0; iy < FMC_TH; iy++) + { + float4 v2 = siftParts2[i * FMC_BH + (ty * FMC_TH + iy)]; + for (int ix = 0; ix < FMC_TW; ix++) + { + sums[iy * FMC_TW + ix] += v1[ix].x * v2.x; + sums[iy * FMC_TW + ix] += v1[ix].y * v2.y; + sums[iy * FMC_TW + ix] += v1[ix].z * v2.z; + sums[iy * FMC_TW + ix] += v1[ix].w * v2.w; + } + } + } + __syncthreads(); + } + // float *blksums = (float*)siftParts1; + for (int iy = 0; iy < FMC_TH; iy++) + for (int ix = 0; ix < FMC_TW; ix++) + blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix]; + __syncthreads(); + if (idx < FMC_BW) + { + for (int j = 0; j < FMC_BH; j++) + { + float sum = blksums[j * FMC_BW + idx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = min(blockIdx.y * FMC_GH + k + j, numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + __syncthreads(); + } + const int p1 = min(blockIdx.x * FMC_BW + idx, numPts1 - 1); + if (idx == 0) + while (atomicCAS((int *)&lock, 0, 1) != 0) + ; + __syncthreads(); + if (idx < FMC_BW) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + __syncthreads(); + if (idx == 0) + atomicExch((int *)&lock, 0); +} + +__global__ void FindMaxCorr7(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float siftParts1[17 * 64]; // features in columns + __shared__ float siftParts2[16 * 64]; // one extra to avoid shared conflicts + float4 *pts1 = (float4 *)siftParts1; + float4 *pts2 = (float4 *)siftParts2; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int p1l = min(blockIdx.x * 16 + ty, numPts1 - 1); + const float4 *p1l4 = (float4 *)sift1[p1l].data; + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < 512 / 16; k++) + { + const int p2l = min(blockIdx.y * 512 + k * 16 + ty, numPts2 - 1); + const float4 *p2l4 = (float4 *)sift2[p2l].data; +#define NUM 4 + float sum[NUM]; + if (ty < (16 / NUM)) + for (int l = 0; l < NUM; l++) + sum[l] = 0.0f; + __syncthreads(); + for (int i = 0; i < 2; i++) + { + pts1[17 * tx + ty] = p1l4[i * 16 + tx]; + pts2[16 * ty + tx] = p2l4[i * 16 + tx]; + __syncthreads(); + if (ty < (16 / NUM)) + { +#pragma unroll + for (int j = 0; j < 16; j++) + { + float4 p1v = pts1[17 * j + tx]; +#pragma unroll + for (int l = 0; l < NUM; l++) + { + float4 p2v = pts2[16 * (ty + l * (16 / NUM)) + j]; + sum[l] += p1v.x * p2v.x; + sum[l] += p1v.y * p2v.y; + sum[l] += p1v.z * p2v.z; + sum[l] += p1v.w * p2v.w; + } + } + } + __syncthreads(); + } + float *sums = siftParts1; + if (ty < (16 / NUM)) + for (int l = 0; l < NUM; l++) + sums[16 * (ty + l * (16 / NUM)) + tx] = sum[l]; + __syncthreads(); + if (ty == 0) + { + for (int j = 0; j < 16; j++) + { + float sum = sums[16 * j + tx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = min(blockIdx.y * 512 + k * 16 + j, numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + __syncthreads(); + } + const int p1 = min(blockIdx.x * 16 + tx, numPts1 - 1); + if (tx == 0 && ty == 0) + while (atomicCAS((int *)&lock, 0, 1) != 0) + ; + __syncthreads(); + if (ty == 0) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + __syncthreads(); + if (tx == 0 && ty == 0) + atomicExch((int *)&lock, 0); +} + +__global__ void FindMaxCorr6(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + //__shared__ float siftParts1[128*16]; // features in columns + __shared__ float siftParts2[128 * 16]; // one extra to avoid shared conflicts + __shared__ float sums[16 * 16]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int p1l = min(blockIdx.x * 16 + ty, numPts1 - 1); + float *pt1l = sift1[p1l].data; + float4 part1 = reinterpret_cast(pt1l)[tx]; + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < 512; k += 16) + { + const int p2l = min(blockIdx.y * 512 + k + ty, numPts2 - 1); + float *pt2l = sift2[p2l].data; + reinterpret_cast(siftParts2)[32 * ty + tx] = reinterpret_cast(pt2l)[tx]; + __syncthreads(); + for (int i = 0; i < 16; i++) + { + float4 part2 = reinterpret_cast(siftParts2)[32 * i + tx]; + float sum = part1.x * part2.x + part1.y * part2.y + part1.z * part2.z + part1.w * part2.w; + sum += ShiftDown(sum, 16); + sum += ShiftDown(sum, 8); + sum += ShiftDown(sum, 4); + sum += ShiftDown(sum, 2); + sum += ShiftDown(sum, 1); + if (tx == 0) + sums[16 * i + ty] = sum; + } + __syncthreads(); + if (ty == 0 && tx < 16) + { + for (int j = 0; j < 16; j++) + { + float sum = sums[16 * j + tx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = min(blockIdx.y * 512 + k + j, numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + __syncthreads(); + } + if (tx == 0 && ty == 0) + while (atomicCAS((int *)&lock, 0, 1) != 0) + ; + __syncthreads(); + if (ty == 0 && tx < 16) + { + const int p1 = min(blockIdx.x * 16 + tx, numPts1 - 1); + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + __syncthreads(); + if (tx == 0 && ty == 0) + atomicExch((int *)&lock, 0); +} + +__global__ void FindMaxCorr5(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float siftParts1[17 * 16]; // features in columns + __shared__ float siftParts2[17 * 16]; // one extra to avoid shared conflicts + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int p1l = min(blockIdx.x * 16 + ty, numPts1 - 1); + const float *pt1l = sift1[p1l].data; + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < 512 / 16; k++) + { + const int p2l = min(blockIdx.y * 512 + k * 16 + ty, numPts2 - 1); + const float *pt2l = sift2[p2l].data; + float sum = 0.0f; + for (int i = 0; i < 8; i++) + { + siftParts1[17 * tx + ty] = pt1l[i * 16 + tx]; // load and transpose + siftParts2[17 * tx + ty] = pt2l[i * 16 + tx]; + __syncthreads(); + for (int j = 0; j < 16; j++) + sum += siftParts1[17 * j + tx] * siftParts2[17 * j + ty]; + __syncthreads(); + } + float *sums = siftParts1; + sums[16 * ty + tx] = sum; + __syncthreads(); + if (ty == 0) + { + for (int j = 0; j < 16; j++) + { + float sum = sums[16 * j + tx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = min(blockIdx.y * 512 + k * 16 + j, numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + __syncthreads(); + } + const int p1 = min(blockIdx.x * 16 + tx, numPts1 - 1); + if (tx == 0 && ty == 0) + while (atomicCAS((int *)&lock, 0, 1) != 0) + ; + __syncthreads(); + if (ty == 0) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + __syncthreads(); + if (tx == 0 && ty == 0) + atomicExch((int *)&lock, 0); +} + +template +__device__ void InvertMatrix(float elem[size][size], float res[size][size]) +{ + int indx[size]; + float b[size]; + float vv[size]; + for (int i = 0; i < size; i++) + indx[i] = 0; + int imax = 0; + float d = 1.0; + for (int i = 0; i < size; i++) + { // find biggest element for each row + float big = 0.0; + for (int j = 0; j < size; j++) + { + float temp = fabs(elem[i][j]); + if (temp > big) + big = temp; + } + if (big > 0.0) + vv[i] = 1.0 / big; + else + vv[i] = 1e16; + } + for (int j = 0; j < size; j++) + { + for (int i = 0; i < j; i++) + { // ik (upper right), k=j + float sum = elem[i][j]; // i>=j (upper right) + for (int k = 0; k < j; k++) // kk (upper right), k=j (upper right) + float dum = vv[i] * fabs(sum); + if (dum >= big) + { + big = dum; + imax = i; + } + } + if (j != imax) + { // imax>j + for (int k = 0; k < size; k++) + { + float dum = elem[imax][k]; // upper right and lower left + elem[imax][k] = elem[j][k]; + elem[j][k] = dum; + } + d = -d; + vv[imax] = vv[j]; + } + indx[j] = imax; + if (elem[j][j] == 0.0) // j==j (upper right) + elem[j][j] = 1e-16; + if (j != (size - 1)) + { + float dum = 1.0 / elem[j][j]; + for (int i = j + 1; i < size; i++) // i>j + elem[i][j] *= dum; // i>j (upper right) + } + } + for (int j = 0; j < size; j++) + { + for (int k = 0; k < size; k++) + b[k] = 0.0; + b[j] = 1.0; + int ii = -1; + for (int i = 0; i < size; i++) + { + int ip = indx[i]; + float sum = b[ip]; + b[ip] = b[i]; + if (ii != -1) + for (int j = ii; j < i; j++) + sum -= elem[i][j] * b[j]; // i>j (upper right) + else if (sum != 0.0) + ii = i; + b[i] = sum; + } + for (int i = size - 1; i >= 0; i--) + { + float sum = b[i]; + for (int j = i + 1; j < size; j++) + sum -= elem[i][j] * b[j]; // i(a, ia); + __syncthreads(); + for (int j = 0; j < 8; j++) + { + float sum = 0.0f; + for (int i = 0; i < 8; i++) + sum += ia[j][i] * b[i]; + homo[j * numLoops + idx] = sum; + } + __syncthreads(); +} + +#define TESTHOMO_TESTS 16 // number of tests per block, alt. 32, 32 +#define TESTHOMO_LOOPS 16 // number of loops per block, alt. 8, 16 + +__global__ void TestHomographies(float *d_coord, float *d_homo, + int *d_counts, int numPts, float thresh2) +{ + __shared__ float homo[8 * TESTHOMO_LOOPS]; + __shared__ int cnts[TESTHOMO_TESTS * TESTHOMO_LOOPS]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int idx = blockIdx.y * blockDim.y + tx; + const int numLoops = blockDim.y * gridDim.y; + if (ty < 8 && tx < TESTHOMO_LOOPS) + homo[tx * 8 + ty] = d_homo[idx + ty * numLoops]; + __syncthreads(); + float a[8]; + for (int i = 0; i < 8; i++) + a[i] = homo[ty * 8 + i]; + int cnt = 0; + for (int i = tx; i < numPts; i += TESTHOMO_TESTS) + { + float x1 = d_coord[i + 0 * numPts]; + float y1 = d_coord[i + 1 * numPts]; + float x2 = d_coord[i + 2 * numPts]; + float y2 = d_coord[i + 3 * numPts]; + float nomx = __fmul_rz(a[0], x1) + __fmul_rz(a[1], y1) + a[2]; + float nomy = __fmul_rz(a[3], x1) + __fmul_rz(a[4], y1) + a[5]; + float deno = __fmul_rz(a[6], x1) + __fmul_rz(a[7], y1) + 1.0f; + float errx = __fmul_rz(x2, deno) - nomx; + float erry = __fmul_rz(y2, deno) - nomy; + float err2 = __fmul_rz(errx, errx) + __fmul_rz(erry, erry); + if (err2 < __fmul_rz(thresh2, __fmul_rz(deno, deno))) + cnt++; + } + int kty = TESTHOMO_TESTS * ty; + cnts[kty + tx] = cnt; + __syncthreads(); + int len = TESTHOMO_TESTS / 2; + while (len > 0) + { + if (tx < len) + cnts[kty + tx] += cnts[kty + tx + len]; + len /= 2; + __syncthreads(); + } + if (tx < TESTHOMO_LOOPS && ty == 0) + d_counts[idx] = cnts[TESTHOMO_TESTS * tx]; + __syncthreads(); +} + +//================= Host matching functions =====================// + +double FindHomography(SiftData &data, float *homography, int *numMatches, float &matchTime, int numLoops, float minScore, float maxAmbiguity, float thresh) +{ + *numMatches = 0; + homography[0] = homography[4] = homography[8] = 1.0f; + homography[1] = homography[2] = homography[3] = 0.0f; + homography[5] = homography[6] = homography[7] = 0.0f; + if (data.d_data == NULL) + return 0.0f; + SiftPoint *d_sift = data.d_data; + numLoops = iDivUp(numLoops, 16) * 16; + int numPts = data.numPts; + if (numPts < 8) + return 0.0f; + int numPtsUp = iDivUp(numPts, 16) * 16; + float *d_coord, *d_homo; + int *d_randPts, *h_randPts; + int randSize = 4 * sizeof(int) * numLoops; + int szFl = sizeof(float); + int szPt = sizeof(SiftPoint); +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMalloc((void **)&d_coord, 4 * sizeof(float) * numPtsUp)); + safeCall(cudaMalloc((void **)&d_randPts, randSize)); + safeCall(cudaMalloc((void **)&d_homo, 8 * sizeof(float) * numLoops)); + +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + h_randPts = (int *)malloc(randSize); + float *h_scores = (float *)malloc(sizeof(float) * numPtsUp); + float *h_ambiguities = (float *)malloc(sizeof(float) * numPtsUp); + + // temp variables are for host memory allocation, device data is transferred to temp + float *temp1 = (float *)malloc(szPt * numPtsUp); + float *temp2 = (float *)malloc(szPt * numPtsUp); + +#ifdef DEVICE_TIMER + auto start_memcpy_1 = std::chrono::steady_clock::now(); +#endif + + safeCall(cudaMemcpy(temp1, &d_sift[0].score, szPt * numPts, cudaMemcpyDeviceToHost)); + safeCall(cudaMemcpy(temp2, &d_sift[0].ambiguity, szPt * numPts, cudaMemcpyDeviceToHost)); + +#ifdef DEVICE_TIMER + auto stop_memcpy_1 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_1 - start_memcpy_1).count(); +#endif + + char *src_score = (char *)temp1; + char *src_ambiguity = (char *)temp2; + char *dst_score = (char *)h_scores; + char *dst_ambiguity = (char *)h_ambiguities; + + for (int i = 0; i < numPts; ++i) + { + memcpy(dst_score, src_score, szFl); + memcpy(dst_ambiguity, src_ambiguity, szFl); + src_score += szPt; + src_ambiguity += szPt; + dst_score += szFl; + dst_ambiguity += szFl; + } + + int *validPts = (int *)malloc(sizeof(int) * numPts); + int numValid = 0; + for (int i = 0; i < numPts; i++) + { + if (h_scores[i] > minScore && h_ambiguities[i] < maxAmbiguity) + validPts[numValid++] = i; + } + free(h_scores); + free(h_ambiguities); + if (numValid >= 8) + { + std::random_device rd; + uint32_t seed = rd(); + std::mt19937 rnd(seed); // mersenne_twister_engine + std::uniform_int_distribution dis(0, UINT32_MAX); + for (int i = 0; i < numLoops; i++) + { + int p1 = dis(rnd) % numValid; + int p2 = dis(rnd) % numValid; + int p3 = dis(rnd) % numValid; + int p4 = dis(rnd) % numValid; + while (p2 == p1) + p2 = dis(rnd) % numValid; + while (p3 == p1 || p3 == p2) + p3 = dis(rnd) % numValid; + while (p4 == p1 || p4 == p2 || p4 == p3) + p4 = dis(rnd) % numValid; + h_randPts[i + 0 * numLoops] = validPts[p1]; + h_randPts[i + 1 * numLoops] = validPts[p2]; + h_randPts[i + 2 * numLoops] = validPts[p3]; + h_randPts[i + 3 * numLoops] = validPts[p4]; + } + + float *temp3, *temp4, *temp5, *temp6; +#ifdef DEVICE_TIMER + auto start_malloc_2 = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMalloc((void **)&temp3, szPt * numPtsUp)); + safeCall(cudaMalloc((void **)&temp4, szPt * numPtsUp)); + safeCall(cudaMalloc((void **)&temp5, szPt * numPtsUp)); + safeCall(cudaMalloc((void **)&temp6, szPt * numPtsUp)); +#ifdef DEVICE_TIMER + auto stop_malloc_2 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_malloc_2 - start_malloc_2).count(); +#endif +#ifdef DEVICE_TIMER + auto start_memcpy_2 = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMemcpy(d_randPts, h_randPts, randSize, cudaMemcpyHostToDevice)); + safeCall(cudaDeviceSynchronize()); + safeCall(cudaMemcpy(temp3, &d_sift[0].xpos, szPt * numPts, cudaMemcpyDeviceToDevice)); + safeCall(cudaMemcpy(temp4, &d_sift[0].ypos, szPt * numPts, cudaMemcpyDeviceToDevice)); + safeCall(cudaMemcpy(temp5, &d_sift[0].match_xpos, szPt * numPts, cudaMemcpyDeviceToDevice)); + safeCall(cudaMemcpy(temp6, &d_sift[0].match_ypos, szPt * numPts, cudaMemcpyDeviceToDevice)); + + // kernel calto transfer memory from device to device + memcopyKernel<<<1, 1>>>(temp3, &d_coord[0 * numPtsUp], szPt, szFl, numPts, szFl); + safeCall(cudaGetLastError()); + safeCall(cudaDeviceSynchronize()); + memcopyKernel<<<1, 1>>>(temp4, &d_coord[1 * numPtsUp], szPt, szFl, numPts, szFl); + safeCall(cudaGetLastError()); + safeCall(cudaDeviceSynchronize()); + memcopyKernel<<<1, 1>>>(temp5, &d_coord[2 * numPtsUp], szPt, szFl, numPts, szFl); + safeCall(cudaGetLastError()); + safeCall(cudaDeviceSynchronize()); + memcopyKernel<<<1, 1>>>(temp6, &d_coord[3 * numPtsUp], szPt, szFl, numPts, szFl); + safeCall(cudaGetLastError()); + safeCall(cudaDeviceSynchronize()); + +#ifdef DEVICE_TIMER + auto stop_memcpy_2 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_2 - start_memcpy_2).count(); +#endif +#ifdef DEVICE_TIMER + auto start_kernel_1 = std::chrono::steady_clock::now(); +#endif + ComputeHomographies<<>>(d_coord, d_randPts, d_homo, numPtsUp); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_kernel_1 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_kernel_1 - start_kernel_1).count(); + // printf("ComputeHomographies time = %.2f us\n", std::chrono::duration(stop_kernel_1 - start_kernel_1).count()); +#endif + checkMsg("ComputeHomographies() execution failed\n"); + + dim3 blocks(1, numLoops / TESTHOMO_LOOPS); + dim3 threads(TESTHOMO_TESTS, TESTHOMO_LOOPS); +#ifdef DEVICE_TIMER + auto start_kernel_2 = std::chrono::steady_clock::now(); +#endif + TestHomographies<<>>(d_coord, d_homo, d_randPts, numPtsUp, thresh * thresh); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_kernel_2 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_kernel_2 - start_kernel_2).count(); + // printf("TestHomographies time = %.2f us\n", std::chrono::duration(stop_kernel_2 - start_kernel_2).count()); +#endif + checkMsg("TestHomographies() execution failed\n"); +#ifdef DEVICE_TIMER + auto start_memcpy_3 = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMemcpy(h_randPts, d_randPts, sizeof(int) * numLoops, cudaMemcpyDeviceToHost)); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_memcpy_3 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_3 - start_memcpy_3).count(); +#endif + int maxIndex = -1, maxCount = -1; + for (int i = 0; i < numLoops; i++) + if (h_randPts[i] > maxCount) + { + maxCount = h_randPts[i]; + maxIndex = i; + } + + *numMatches = maxCount; +#ifdef DEVICE_TIMER + auto start_memcpy_4 = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMemcpy2D(homography, szFl, &d_homo[maxIndex], sizeof(float) * numLoops, szFl, 8, cudaMemcpyDeviceToHost)); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_memcpy_4 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_4 - start_memcpy_4).count(); +#endif + + safeCall(cudaFree(temp3)); + safeCall(cudaFree(temp4)); + safeCall(cudaFree(temp5)); + safeCall(cudaFree(temp6)); + + } + + free(validPts); + free(h_randPts); + free(temp1); + free(temp2); + + safeCall(cudaFree(d_homo)); + safeCall(cudaFree(d_randPts)); + safeCall(cudaFree(d_coord)); + return matchTime; +} + +double MatchSiftData(SiftData &data1, SiftData &data2, float &matchTime) +{ + int numPts1 = data1.numPts; + int numPts2 = data2.numPts; + if (!numPts1 || !numPts2) + return 0.0; +#ifdef MANAGEDMEM + SiftPoint *sift1 = data1.m_data; + SiftPoint *sift2 = data2.m_data; +#else + if (data1.d_data == NULL || data2.d_data == NULL) + return 0.0f; + SiftPoint *sift1 = data1.d_data; + SiftPoint *sift2 = data2.d_data; +#endif + +// Original version with correlation and maximization in two different kernels +// Global memory reguirement: O(N^2) +#if 0 + float *d_corrData; + int corrWidth = iDivUp(numPts2, 16)*16; + int corrSize = sizeof(float)*numPts1*corrWidth; + safeCall(cudaMalloc((void **)&d_corrData, corrSize)); +#if 0 + dim3 blocks1(numPts1, iDivUp(numPts2, 16)); + dim3 threads1(16, 16); // each block: 1 points x 16 points + MatchSiftPoints<<>>(sift1, sift2, d_corrData, numPts1, numPts2); +#else + dim3 blocks(iDivUp(numPts1,16), iDivUp(numPts2, 16)); + dim3 threads(16, 16); // each block: 16 points x 16 points + MatchSiftPoints2<<>>(sift1, sift2, d_corrData, numPts1, numPts2); +#endif + safeCall(cudaDeviceSynchronize()); + dim3 blocksMax(iDivUp(numPts1, 16)); + dim3 threadsMax(16, 16); + FindMaxCorr<<>>(d_corrData, sift1, sift2, numPts1, corrWidth, sizeof(SiftPoint)); + safeCall(cudaDeviceSynchronize()); + checkMsg("FindMaxCorr() execution failed\n"); + safeCall(cudaFree(d_corrData)); +#endif + +// Version suggested by Nicholas Lin with combined correlation and maximization +// Global memory reguirement: O(N) +#if 0 + int block_dim = 16; + float *d_corrData; + int corrSize = numPts1 * block_dim * 2; + safeCall(cudaMalloc((void **)&d_corrData, sizeof(float) * corrSize)); + dim3 blocks(iDivUp(numPts1, block_dim)); + dim3 threads(block_dim, block_dim); + FindMaxCorr3<<>>(d_corrData, sift1, sift2, numPts1, numPts2); + safeCall(cudaDeviceSynchronize()); + checkMsg("FindMaxCorr3() execution failed\n"); + safeCall(cudaFree(d_corrData)); +#endif + +#if 0 + dim3 blocksMax(numPts1); + dim3 threadsMax(FMC2W, FMC2H); + FindMaxCorr2<<>>(sift1, sift2, numPts1, numPts2); + safeCall(cudaDeviceSynchronize()); + checkMsg("FindMaxCorr2() execution failed\n"); +#endif + +// Combined version with no global memory requirement using one FMC2H points per block +#if 0 + dim3 blocksMax2(iDivUp(numPts1, FMC2H)); + dim3 threadsMax2(FMC2W, FMC2H); + FindMaxCorr4<<>>(sift1, sift2, numPts1, numPts2); + safeCall(cudaDeviceSynchronize()); + checkMsg("FindMaxCorr4() execution failed\n"); +#endif + +// Combined version with no global memory requirement using global locks +#if 1 + dim3 blocksMax3(iDivUp(numPts1, 16), iDivUp(numPts2, 512)); + dim3 threadsMax3(16, 16); +#ifdef DEVICE_TIMER + auto start_kernel1 = std::chrono::steady_clock::now(); +#endif + CleanMatches<<>>(sift1, numPts1); + safeCall(cudaGetLastError()); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_kernel1 = std::chrono::steady_clock::now(); + // printf("CleanMatches time = %.2f us\n", std::chrono::duration(stop_kernel1 - start_kernel1).count()); + matchTime += std::chrono::duration(stop_kernel1 - start_kernel1).count(); + auto matchSiftDataTime += std::chrono::duration(stop_kernel1 - start_kernel1).count(); +#endif + int mode = 10; + // if (mode == 5) // K40c 5.0ms, 1080 Ti 1.2ms, 2080 Ti 0.83ms + // FindMaxCorr5<<>>(sift1, sift2, numPts1, numPts2); + // else if (mode == 6) + // { // 2080 Ti 0.89ms + // threadsMax3 = dim3(32, 16); + // FindMaxCorr6<<>>(sift1, sift2, numPts1, numPts2); + // } + // else if (mode == 7) // 2080 Ti 0.50ms + // FindMaxCorr7<<>>(sift1, sift2, numPts1, numPts2); + // else if (mode == 8) + // { // 2080 Ti 0.45ms + // blocksMax3 = dim3(iDivUp(numPts1, FMC_BW), iDivUp(numPts2, FMC_GH)); + // threadsMax3 = dim3(FMC_NW, FMC_NH); + // FindMaxCorr8<<>>(sift1, sift2, numPts1, numPts2); + // } + // else if (mode == 9) + // { // 2080 Ti 0.46ms + // blocksMax3 = dim3(iDivUp(numPts1, FMC_BW), iDivUp(numPts2, FMC_GH)); + // threadsMax3 = dim3(FMC_NW, FMC_NH); + // FindMaxCorr9<<>>(sift1, sift2, numPts1, numPts2); + // } + // else + if (mode == 10) + { + blocksMax3 = dim3(iDivUp(numPts1, M7W)); + threadsMax3 = dim3(M7W, M7H / M7R); +#ifdef DEVICE_TIMER + auto start_kernel2 = std::chrono::steady_clock::now(); +#endif + FindMaxCorr10<<>>(sift1, sift2, numPts1, numPts2); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_kernel2 = std::chrono::steady_clock::now(); + // printf("FindMaxCorr10 time = %.2f us\n", std::chrono::duration(stop_kernel2 - start_kernel2).count()); + matchTime += std::chrono::duration(stop_kernel2 - start_kernel2).count(); + matchSiftDataTime += std::chrono::duration(stop_kernel2 - start_kernel2).count(); +#endif + } + checkMsg("FindMaxCorr10() execution failed\n"); +#endif + + if (data1.h_data != NULL) + { + float *h_ptr = &data1.h_data[0].score; + float *d_ptr = &data1.d_data[0].score; +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + safeCall(cudaMemcpy(h_ptr, d_ptr, sizeof(SiftPoint) * data1.numPts, cudaMemcpyDeviceToHost)); + safeCall(cudaDeviceSynchronize()); +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); + matchSiftDataTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + } + return matchTime; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/MainSourceFiles.yaml b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/MainSourceFiles.yaml new file mode 100644 index 000000000..8a7a15c82 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/MainSourceFiles.yaml @@ -0,0 +1,10297 @@ +--- +MainSourceFile: MainSrcFiles_placehold +Replacements: + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu' + Offset: 1342 + Length: 0 + ReplacementText: "#include \n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu' + Offset: 1997 + Length: 100 + ReplacementText: 'DPCT_CHECK_ERROR(d_data = (float *)dpct::dpct_malloc(*(size_t *)&pitch, (size_t)(sizeof(float) * width), (size_t)height))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu' + Offset: 2113 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu' + Offset: 2952 + Length: 16 + ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_data, dpct::get_in_order_queue())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu' + Offset: 2968 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu' + Offset: 3099 + Length: 34 + ReplacementText: 'DPCT_CHECK_ERROR(delete (dpct::image_matrix *)t_data' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu' + Offset: 3133 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu' + Offset: 3405 + Length: 12 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::dpct_memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu' + Offset: 3491 + Length: 22 + ReplacementText: 'dpct::host_to_device' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu' + Offset: 3514 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu' + Offset: 3631 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 1342 + Length: 0 + ReplacementText: "#include \n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 1596 + Length: 32 + ReplacementText: 'static dpct::constant_memory d_MaxNumPoints;' + ConstantFlag: DeviceConstant + ConstantOffset: 1596 + InitStr: '' + NewHostVarName: d_MaxNumPoints_host_ct1 + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 1629 + Length: 50 + ReplacementText: 'dpct::global_memory d_PointCounter(8 * 2 + 1);' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 1680 + Length: 40 + ReplacementText: 'static dpct::constant_memory d_ScaleDownKernel(5);' + ConstantFlag: DeviceConstant + ConstantOffset: 1680 + InitStr: '' + NewHostVarName: d_ScaleDownKernel_host_ct1 + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 1721 + Length: 54 + ReplacementText: 'static dpct::constant_memory d_LowPassKernel(2 * LOWPASS_R + 1);' + ConstantFlag: DeviceConstant + ConstantOffset: 1721 + InitStr: '' + NewHostVarName: d_LowPassKernel_host_ct1 + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 1776 + Length: 48 + ReplacementText: 'static dpct::constant_memory d_LaplaceKernel(8 * 12 * 16);' + ConstantFlag: DeviceConstant + ConstantOffset: 1776 + InitStr: '' + NewHostVarName: d_LaplaceKernel_host_ct1 + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2024 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2138 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1,\n float const *d_ScaleDownKernel, float *brows" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2260 + Length: 32 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2310 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2340 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2370 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2418 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2589 + Length: 30 + ReplacementText: 'sycl::min(width - 1, sycl::max(0, xp - 2))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2638 + Length: 31 + ReplacementText: 'sycl::min(height - 1, sycl::max(0, yp - 2))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2809 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2834 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2852 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2876 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2885 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2919 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 2958 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 3227 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 3336 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1,\n float const *d_ScaleDownKernel, float *irows, float *brows" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 3458 + Length: 32 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 3493 + Length: 32 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 3543 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 3573 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 3603 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 3651 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 3699 + Length: 30 + ReplacementText: 'sycl::min(width - 1, sycl::max(0, xp - 2))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 3748 + Length: 31 + ReplacementText: 'sycl::min(height - 1, sycl::max(0, yp - 2))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 4002 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 4197 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 4231 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 4270 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 4533 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 4637 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, float const *d_ScaleDownKernel,\n float *inrow, float *brow, int *yRead, int *yWrite" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 4643 + Length: 40 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 4686 + Length: 45 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 4734 + Length: 38 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 4775 + Length: 39 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 4862 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 5056 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 5103 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 5482 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 5628 + Length: 32 + ReplacementText: 'sycl::min(dx2, width / 2 - xStart / 2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 5771 + Length: 0 + ReplacementText: " /*\n DPCT1118:3: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 5777 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 6124 + Length: 0 + ReplacementText: " /*\n DPCT1118:4: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 6130 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 6240 + Length: 0 + ReplacementText: " /*\n DPCT1118:5: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 6246 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 6592 + Length: 0 + ReplacementText: " /*\n DPCT1118:6: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 6598 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 6708 + Length: 0 + ReplacementText: " /*\n DPCT1118:7: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 6714 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 7061 + Length: 0 + ReplacementText: " /*\n DPCT1118:8: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 7067 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 7177 + Length: 0 + ReplacementText: " /*\n DPCT1118:9: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 7183 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 7529 + Length: 0 + ReplacementText: " /*\n DPCT1118:10: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 7535 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 7639 + Length: 0 + ReplacementText: " /*\n DPCT1118:11: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 7645 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8001 + Length: 0 + ReplacementText: " /*\n DPCT1118:12: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8007 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8037 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8139 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8160 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8190 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8213 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8256 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8345 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8393 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8441 + Length: 22 + ReplacementText: 'sycl::min(xl + 1, width - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8478 + Length: 23 + ReplacementText: 'sycl::min(yu + 1, height - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8926 + Length: 11 + ReplacementText: "/*\nDPCT1110:13: The total declared local variable size in device function ExtractSiftDescriptors exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 8965 + Length: 19 + ReplacementText: 'dpct::image_accessor_ext' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 9041 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, float *gauss,\n float *buffer, float *sums" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 9047 + Length: 27 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 9077 + Length: 29 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 9109 + Length: 25 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 9153 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 9194 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 9266 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 9333 + Length: 40 + ReplacementText: 'sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 9399 + Length: 15 + ReplacementText: "/*\n DPCT1065:92: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 9532 + Length: 11 + ReplacementText: 'sycl::sin(theta)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 9574 + Length: 11 + ReplacementText: 'sycl::cos(theta)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 9934 + Length: 46 + ReplacementText: 'texObj.read(xpos + cosa, ypos + sina)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 9998 + Length: 46 + ReplacementText: 'texObj.read(xpos - cosa, ypos - sina)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 10061 + Length: 46 + ReplacementText: 'texObj.read(xpos - sina, ypos + cosa)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 10125 + Length: 46 + ReplacementText: 'texObj.read(xpos + sina, ypos - cosa)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 10213 + Length: 24 + ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 10273 + Length: 14 + ReplacementText: 'sycl::atan2(dy, dx)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 10986 + Length: 37 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 11033 + Length: 36 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 11164 + Length: 42 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 32, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 11216 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 32, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 11416 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 8, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 11467 + Length: 40 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 8, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 11603 + Length: 42 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 40, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 11655 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 40, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 11718 + Length: 15 + ReplacementText: "/*\n DPCT1065:93: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 11889 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 11943 + Length: 15 + ReplacementText: "/*\n DPCT1065:94: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 12025 + Length: 38 + ReplacementText: 'sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 12150 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 12204 + Length: 15 + ReplacementText: "/*\n DPCT1065:95: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 12332 + Length: 13 + ReplacementText: 'sycl::rsqrt(tsum2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 12483 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 12545 + Length: 6 + ReplacementText: 'sycl::fabs(x)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 12568 + Length: 6 + ReplacementText: 'sycl::fabs(y)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 12576 + Length: 0 + ReplacementText: " /*\n DPCT1013:96: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 12588 + Length: 43 + ReplacementText: 'sycl::min(absx, absy) / sycl::max(absx, absy)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 12980 + Length: 11 + ReplacementText: "/*\nDPCT1110:14: The total declared local variable size in device function ExtractSiftDescriptorsCONSTNew exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13101 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1,\n int d_MaxNumPoints,\n unsigned int *d_PointCounter, float *gauss,\n float *buffer, float *sums" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13107 + Length: 27 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13137 + Length: 29 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13169 + Length: 25 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13213 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13254 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13340 + Length: 43 + ReplacementText: 'sycl::native::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13401 + Length: 51 + ReplacementText: 'dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13469 + Length: 51 + ReplacementText: 'dpct::min(d_PointCounter[2 * octave + 1], d_MaxNumPoints)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13666 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13706 + Length: 9 + ReplacementText: 'item_ct1.get_group_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13746 + Length: 0 + ReplacementText: " /*\n DPCT1118:15: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13750 + Length: 15 + ReplacementText: "/*\n DPCT1065:97: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13889 + Length: 13 + ReplacementText: 'sycl::sin(theta)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 13935 + Length: 13 + ReplacementText: 'sycl::cos(theta)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 14988 + Length: 0 + ReplacementText: " /*\n DPCT1013:102: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 15030 + Length: 29 + ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 15853 + Length: 37 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 15902 + Length: 36 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 16043 + Length: 42 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 32, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 16097 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 32, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 16315 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 8, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 16368 + Length: 40 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 8, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 16514 + Length: 42 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 40, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 16568 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 40, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 16635 + Length: 0 + ReplacementText: " /*\n DPCT1118:16: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 16639 + Length: 15 + ReplacementText: "/*\n DPCT1065:98: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 16818 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 16874 + Length: 0 + ReplacementText: " /*\n DPCT1118:17: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 16878 + Length: 15 + ReplacementText: "/*\n DPCT1065:99: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 16964 + Length: 38 + ReplacementText: 'sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17095 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17151 + Length: 0 + ReplacementText: " /*\n DPCT1118:18: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17155 + Length: 15 + ReplacementText: "/*\n DPCT1065:100: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17289 + Length: 13 + ReplacementText: 'sycl::rsqrt(tsum2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17449 + Length: 0 + ReplacementText: " /*\n DPCT1118:19: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17453 + Length: 15 + ReplacementText: "/*\n DPCT1065:101: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17477 + Length: 11 + ReplacementText: "/*\nDPCT1110:20: The total declared local variable size in device function ExtractSiftDescriptorsCONST exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17521 + Length: 19 + ReplacementText: 'dpct::image_accessor_ext' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17597 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1,\n int d_MaxNumPoints,\n unsigned int *d_PointCounter, float *gauss,\n float *buffer, float *sums" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17603 + Length: 27 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17633 + Length: 29 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17665 + Length: 25 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17709 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17750 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17836 + Length: 40 + ReplacementText: 'sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17894 + Length: 51 + ReplacementText: 'dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 17962 + Length: 51 + ReplacementText: 'dpct::min(d_PointCounter[2 * octave + 1], d_MaxNumPoints)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 18159 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 18199 + Length: 9 + ReplacementText: 'item_ct1.get_group_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 18239 + Length: 0 + ReplacementText: " /*\n DPCT1118:21: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 18243 + Length: 15 + ReplacementText: "/*\n DPCT1065:103: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 18382 + Length: 11 + ReplacementText: 'sycl::sin(theta)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 18426 + Length: 11 + ReplacementText: 'sycl::cos(theta)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 18802 + Length: 46 + ReplacementText: 'texObj.read(xpos + cosa, ypos + sina)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 18868 + Length: 46 + ReplacementText: 'texObj.read(xpos - cosa, ypos - sina)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 18933 + Length: 46 + ReplacementText: 'texObj.read(xpos - sina, ypos + cosa)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 18999 + Length: 46 + ReplacementText: 'texObj.read(xpos + sina, ypos - cosa)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 19089 + Length: 24 + ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 19151 + Length: 14 + ReplacementText: 'sycl::atan2(dy, dx)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 19904 + Length: 37 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 19953 + Length: 36 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 20094 + Length: 42 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 32, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 20148 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 32, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 20366 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 8, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 20419 + Length: 40 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 8, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 20565 + Length: 42 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 40, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 20619 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 40, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 20686 + Length: 0 + ReplacementText: " /*\n DPCT1118:22: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 20690 + Length: 15 + ReplacementText: "/*\n DPCT1065:104: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 20869 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 20925 + Length: 0 + ReplacementText: " /*\n DPCT1118:23: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 20929 + Length: 15 + ReplacementText: "/*\n DPCT1065:105: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21015 + Length: 38 + ReplacementText: 'sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21146 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21202 + Length: 0 + ReplacementText: " /*\n DPCT1118:24: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21206 + Length: 15 + ReplacementText: "/*\n DPCT1065:106: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21340 + Length: 13 + ReplacementText: 'sycl::rsqrt(tsum2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21500 + Length: 0 + ReplacementText: " /*\n DPCT1118:25: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21504 + Length: 15 + ReplacementText: "/*\n DPCT1065:107: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21528 + Length: 11 + ReplacementText: "/*\nDPCT1110:26: The total declared local variable size in device function ExtractSiftDescriptorsOld exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21570 + Length: 19 + ReplacementText: 'dpct::image_accessor_ext' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21646 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, float *gauss,\n float *buffer, float *sums" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21652 + Length: 27 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21682 + Length: 29 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21714 + Length: 27 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21760 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21801 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21873 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 21940 + Length: 40 + ReplacementText: 'sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 22006 + Length: 15 + ReplacementText: "/*\n DPCT1065:108: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 22139 + Length: 11 + ReplacementText: 'sycl::sin(theta)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 22181 + Length: 11 + ReplacementText: 'sycl::cos(theta)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 22541 + Length: 46 + ReplacementText: 'texObj.read(xpos + cosa, ypos + sina)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 22605 + Length: 46 + ReplacementText: 'texObj.read(xpos - cosa, ypos - sina)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 22668 + Length: 46 + ReplacementText: 'texObj.read(xpos - sina, ypos + cosa)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 22732 + Length: 46 + ReplacementText: 'texObj.read(xpos + sina, ypos - cosa)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 22820 + Length: 24 + ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 22880 + Length: 14 + ReplacementText: 'sycl::atan2(dy, dx)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 23593 + Length: 37 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 23640 + Length: 36 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 23771 + Length: 42 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 32, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 23823 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 32, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 24023 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 8, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 24074 + Length: 40 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 8, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 24210 + Length: 42 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 40, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 24262 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 40, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 24325 + Length: 15 + ReplacementText: "/*\n DPCT1065:109: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 24493 + Length: 15 + ReplacementText: "/*\n DPCT1065:110: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 24572 + Length: 15 + ReplacementText: "/*\n DPCT1065:111: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 24651 + Length: 15 + ReplacementText: "/*\n DPCT1065:112: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 24728 + Length: 15 + ReplacementText: "/*\n DPCT1065:113: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 24805 + Length: 15 + ReplacementText: "/*\n DPCT1065:114: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 24907 + Length: 13 + ReplacementText: 'sycl::rsqrt(tsum1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 24975 + Length: 15 + ReplacementText: "/*\n DPCT1065:115: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 25091 + Length: 15 + ReplacementText: "/*\n DPCT1065:116: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 25170 + Length: 15 + ReplacementText: "/*\n DPCT1065:117: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 25249 + Length: 15 + ReplacementText: "/*\n DPCT1065:118: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 25326 + Length: 15 + ReplacementText: "/*\n DPCT1065:119: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 25403 + Length: 15 + ReplacementText: "/*\n DPCT1065:120: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 25537 + Length: 13 + ReplacementText: 'sycl::rsqrt(tsum2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 25688 + Length: 11 + ReplacementText: "/*\nDPCT1110:27: The total declared local variable size in device function ExtractSiftDescriptor exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 25726 + Length: 19 + ReplacementText: 'dpct::image_accessor_ext' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 25810 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, float *gauss,\n float *buffer, float *sums" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 25816 + Length: 27 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 25846 + Length: 29 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 25878 + Length: 25 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 25923 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 26042 + Length: 40 + ReplacementText: 'sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 26108 + Length: 15 + ReplacementText: "/*\n DPCT1065:121: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 26241 + Length: 11 + ReplacementText: 'sycl::sin(theta)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 26283 + Length: 11 + ReplacementText: 'sycl::cos(theta)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 26643 + Length: 46 + ReplacementText: 'texObj.read(xpos + cosa, ypos + sina)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 26707 + Length: 46 + ReplacementText: 'texObj.read(xpos - cosa, ypos - sina)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 26770 + Length: 46 + ReplacementText: 'texObj.read(xpos - sina, ypos + cosa)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 26834 + Length: 46 + ReplacementText: 'texObj.read(xpos + sina, ypos - cosa)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 26922 + Length: 24 + ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 26982 + Length: 14 + ReplacementText: 'sycl::atan2(dy, dx)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 27695 + Length: 37 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 27742 + Length: 36 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 27873 + Length: 42 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 32, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 27925 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 32, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 28125 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 8, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 28176 + Length: 40 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 8, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 28312 + Length: 42 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p1 + 40, iangf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 28364 + Length: 41 + ReplacementText: 'dpct::atomic_fetch_add(buffer + p2 + 40, angf * grad2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 28427 + Length: 15 + ReplacementText: "/*\n DPCT1065:122: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 28598 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 28652 + Length: 15 + ReplacementText: "/*\n DPCT1065:123: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 28734 + Length: 38 + ReplacementText: 'sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 28859 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 28913 + Length: 15 + ReplacementText: "/*\n DPCT1065:124: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29041 + Length: 13 + ReplacementText: 'sycl::rsqrt(tsum2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29191 + Length: 15 + ReplacementText: "/*\n DPCT1065:125: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29211 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29286 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29302 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29315 + Length: 10 + ReplacementText: 'item_ct1.get_local_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29328 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29466 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29502 + Length: 19 + ReplacementText: 'dpct::image_accessor_ext' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29559 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, int d_MaxNumPoints,\n unsigned int *d_PointCounter, float *hist, float *gauss" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29565 + Length: 26 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29594 + Length: 27 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29639 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29669 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29794 + Length: 35 + ReplacementText: 'sycl::exp(i2sigma2 * (tx - 5) * (tx - 5))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 29869 + Length: 15 + ReplacementText: "/*\n DPCT1065:126: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 30083 + Length: 34 + ReplacementText: 'texObj.read(xf + 1.0, yf)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 30120 + Length: 34 + ReplacementText: 'texObj.read(xf - 1.0, yf)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 30171 + Length: 34 + ReplacementText: 'texObj.read(xf, yf + 1.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 30208 + Length: 34 + ReplacementText: 'texObj.read(xf, yf - 1.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 30266 + Length: 14 + ReplacementText: 'sycl::atan2(dy, dx)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 30350 + Length: 24 + ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 30380 + Length: 51 + ReplacementText: 'dpct::atomic_fetch_add(&hist[bin], grad * gauss[xd] * gauss[yd])' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 30439 + Length: 15 + ReplacementText: "/*\n DPCT1065:127: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 30751 + Length: 15 + ReplacementText: "/*\n DPCT1065:128: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 30924 + Length: 15 + ReplacementText: "/*\n DPCT1065:129: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 31797 + Length: 37 + ReplacementText: 'dpct::atomic_fetch_compare_inc(d_PointCounter, 0x7fffffff)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 32319 + Length: 11 + ReplacementText: "/*\nDPCT1110:28: The total declared local variable size in device function ComputeOrientationsCONSTNew exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 32427 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1,\n int d_MaxNumPoints,\n unsigned int *d_PointCounter,\n sycl::local_accessor img,\n sycl::local_accessor tmp, float *hist,\n float *gaussx, float *gaussy" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 32526 + Length: 46 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 32575 + Length: 31 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 32609 + Length: 42 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 32669 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 32698 + Length: 51 + ReplacementText: 'dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 32766 + Length: 51 + ReplacementText: 'dpct::min(d_PointCounter[2 * octave + 0], d_MaxNumPoints)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 32835 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 32875 + Length: 9 + ReplacementText: 'item_ct1.get_group_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 32963 + Length: 10 + ReplacementText: 'item_ct1.get_local_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 33194 + Length: 10 + ReplacementText: 'item_ct1.get_local_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 33277 + Length: 32 + ReplacementText: 'sycl::max(sycl::min(x - RAD + xi, w - 1), 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 33326 + Length: 32 + ReplacementText: 'sycl::max(sycl::min(y - RAD + yi, h - 1), 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 33457 + Length: 42 + ReplacementText: 'sycl::native::exp(-1.0f / (2.0f * (sc * sc - 0.25f)))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 33544 + Length: 42 + ReplacementText: 'sycl::native::exp(-4.0f / (2.0f * (sc * sc - 0.25f)))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 33759 + Length: 52 + ReplacementText: 'sycl::native::exp(i2sigma2 * (tx - RAD - xf) * (tx - RAD - xf))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 33832 + Length: 52 + ReplacementText: 'sycl::native::exp(i2sigma2 * (tx - RAD - yf) * (tx - RAD - yf))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 33892 + Length: 0 + ReplacementText: " /*\n DPCT1118:29: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 33896 + Length: 15 + ReplacementText: "/*\n DPCT1065:130: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 33960 + Length: 10 + ReplacementText: 'item_ct1.get_local_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 34181 + Length: 0 + ReplacementText: " /*\n DPCT1118:30: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 34185 + Length: 15 + ReplacementText: "/*\n DPCT1065:131: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 34255 + Length: 10 + ReplacementText: 'item_ct1.get_local_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 34502 + Length: 0 + ReplacementText: " /*\n DPCT1118:31: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 34506 + Length: 15 + ReplacementText: "/*\n DPCT1065:132: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 34576 + Length: 10 + ReplacementText: 'item_ct1.get_local_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 34814 + Length: 14 + ReplacementText: 'sycl::atan2(dy, dx)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 34866 + Length: 0 + ReplacementText: " /*\n DPCT1013:135: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 34885 + Length: 29 + ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 34922 + Length: 57 + ReplacementText: 'dpct::atomic_fetch_add(&hist[LEN + bin], grad * gaussx[x] * gaussy[y])' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 34987 + Length: 0 + ReplacementText: " /*\n DPCT1118:32: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 34991 + Length: 15 + ReplacementText: "/*\n DPCT1065:133: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 35639 + Length: 0 + ReplacementText: " /*\n DPCT1118:33: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 35643 + Length: 15 + ReplacementText: "/*\n DPCT1065:134: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 36349 + Length: 74 + ReplacementText: 'dpct::atomic_fetch_max(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0])' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 36693 + Length: 54 + ReplacementText: 'dpct::atomic_fetch_compare_inc(&d_PointCounter[2 * octave + 1], 0x7fffffff)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 37273 + Length: 11 + ReplacementText: "/*\nDPCT1110:34: The total declared local variable size in device function ComputeOrientationsCONST exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 37314 + Length: 19 + ReplacementText: 'dpct::image_accessor_ext' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 37371 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1,\n int d_MaxNumPoints, unsigned int *d_PointCounter,\n float *hist, float *gauss" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 37377 + Length: 26 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 37406 + Length: 27 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 37451 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 37480 + Length: 51 + ReplacementText: 'dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 37548 + Length: 51 + ReplacementText: 'dpct::min(d_PointCounter[2 * octave + 0], d_MaxNumPoints)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 37617 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 37657 + Length: 9 + ReplacementText: 'item_ct1.get_group_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 37797 + Length: 35 + ReplacementText: 'sycl::exp(i2sigma2 * (tx - 5) * (tx - 5))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 37874 + Length: 0 + ReplacementText: " /*\n DPCT1118:35: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 37878 + Length: 15 + ReplacementText: "/*\n DPCT1065:136: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 38110 + Length: 34 + ReplacementText: 'texObj.read(xf + 1.0, yf)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 38147 + Length: 34 + ReplacementText: 'texObj.read(xf - 1.0, yf)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 38200 + Length: 34 + ReplacementText: 'texObj.read(xf, yf + 1.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 38237 + Length: 34 + ReplacementText: 'texObj.read(xf, yf - 1.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 38297 + Length: 14 + ReplacementText: 'sycl::atan2(dy, dx)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 38387 + Length: 24 + ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 38419 + Length: 51 + ReplacementText: 'dpct::atomic_fetch_add(&hist[bin], grad * gauss[xd] * gauss[yd])' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 38478 + Length: 0 + ReplacementText: " /*\n DPCT1118:36: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 38482 + Length: 15 + ReplacementText: "/*\n DPCT1065:137: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 38808 + Length: 0 + ReplacementText: " /*\n DPCT1118:37: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 38812 + Length: 15 + ReplacementText: "/*\n DPCT1065:138: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 38983 + Length: 0 + ReplacementText: " /*\n DPCT1118:38: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 38987 + Length: 15 + ReplacementText: "/*\n DPCT1065:139: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 39679 + Length: 74 + ReplacementText: 'dpct::atomic_fetch_max(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0])' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 40014 + Length: 54 + ReplacementText: 'dpct::atomic_fetch_compare_inc(&d_PointCounter[2 * octave + 1], 0x7fffffff)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 40542 + Length: 0 + ReplacementText: " /*\n DPCT1118:39: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 40546 + Length: 15 + ReplacementText: "/*\n DPCT1065:140: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 40604 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 40642 + Length: 19 + ReplacementText: 'dpct::image_accessor_ext' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 40718 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, int d_MaxNumPoints,\n unsigned int *d_PointCounter, float *gauss,\n float *buffer, float *sums, float *hist,\n unsigned int &idx" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 40724 + Length: 26 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 40753 + Length: 27 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 40783 + Length: 28 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 40836 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 40865 + Length: 51 + ReplacementText: 'dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 40933 + Length: 51 + ReplacementText: 'dpct::min(d_PointCounter[2 * octave + 0], d_MaxNumPoints)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41002 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41042 + Length: 9 + ReplacementText: 'item_ct1.get_group_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41168 + Length: 35 + ReplacementText: 'sycl::exp(i2sigma2 * (tx - 5) * (tx - 5))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41245 + Length: 0 + ReplacementText: " /*\n DPCT1118:40: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41249 + Length: 15 + ReplacementText: "/*\n DPCT1065:141: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41481 + Length: 34 + ReplacementText: 'texObj.read(xf + 1.0, yf)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41518 + Length: 34 + ReplacementText: 'texObj.read(xf - 1.0, yf)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41571 + Length: 34 + ReplacementText: 'texObj.read(xf, yf + 1.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41608 + Length: 34 + ReplacementText: 'texObj.read(xf, yf - 1.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41668 + Length: 14 + ReplacementText: 'sycl::atan2(dy, dx)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41758 + Length: 24 + ReplacementText: 'sycl::sqrt(dx * dx + dy * dy)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41790 + Length: 51 + ReplacementText: 'dpct::atomic_fetch_add(&hist[bin], grad * gauss[xd] * gauss[yd])' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41849 + Length: 0 + ReplacementText: " /*\n DPCT1118:41: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 41853 + Length: 15 + ReplacementText: "/*\n DPCT1065:142: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 42179 + Length: 0 + ReplacementText: " /*\n DPCT1118:42: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 42183 + Length: 15 + ReplacementText: "/*\n DPCT1065:143: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 42354 + Length: 0 + ReplacementText: " /*\n DPCT1118:43: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 42358 + Length: 15 + ReplacementText: "/*\n DPCT1065:144: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 43081 + Length: 74 + ReplacementText: 'dpct::atomic_fetch_max(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0])' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 43395 + Length: 54 + ReplacementText: 'dpct::atomic_fetch_compare_inc(&d_PointCounter[2 * octave + 1], 0x7fffffff)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 43930 + Length: 0 + ReplacementText: " /*\n DPCT1118:44: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 43934 + Length: 15 + ReplacementText: "/*\n DPCT1065:145: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 44016 + Length: 0 + ReplacementText: ', item_ct1, gauss, buffer, sums' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 44175 + Length: 0 + ReplacementText: ', item_ct1, gauss, buffer, sums' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 50387 + Length: 11 + ReplacementText: "/*\nDPCT1110:45: The total declared local variable size in device function FindPointsMultiNew exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 50584 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, int d_MaxNumPoints,\n unsigned int *d_PointCounter, unsigned short *points" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 50620 + Length: 45 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 50673 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 50692 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 50711 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 50737 + Length: 74 + ReplacementText: 'dpct::atomic_fetch_max(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1])' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 50817 + Length: 74 + ReplacementText: 'dpct::atomic_fetch_max(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1])' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 50908 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 50935 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 50974 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 51051 + Length: 27 + ReplacementText: 'sycl::min(minx + MINMAX_W, width)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 51160 + Length: 32 + ReplacementText: 'sycl::max(sycl::min(xpos - 1, width - 1), 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 51210 + Length: 45 + ReplacementText: 'dpct::min((unsigned int)(height - MINMAX_H * item_ct1.get_group(1)), MINMAX_H)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 51343 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 51449 + Length: 22 + ReplacementText: 'sycl::fmax(maxv, sycl::fabs(val))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 51518 + Length: 37 + ReplacementText: 'sycl::any_of_group(item_ct1.get_sub_group(), (0xffffffff & (0x1 << item_ct1.get_sub_group().get_local_linear_id())) && maxv > thresh)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 51688 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 51791 + Length: 42 + ReplacementText: 'sycl::any_of_group(item_ct1.get_sub_group(), (0xffffffff & (0x1 << item_ct1.get_sub_group().get_local_linear_id())) && sycl::fabs(d11) > thresh)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 51866 + Length: 16 + ReplacementText: 'sycl::max(0, ypos - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 51916 + Length: 25 + ReplacementText: 'sycl::min(height - 1, ypos + 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 52209 + Length: 27 + ReplacementText: 'sycl::fmin(sycl::fmin(d00, d01), d02)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 52258 + Length: 27 + ReplacementText: 'sycl::fmax(sycl::fmax(d00, d01), d02)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 52397 + Length: 27 + ReplacementText: 'sycl::fmin(sycl::fmin(d20, d21), d22)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 52446 + Length: 27 + ReplacementText: 'sycl::fmax(sycl::fmax(d20, d21), d22)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 52495 + Length: 55 + ReplacementText: 'sycl::fmin(sycl::fmin(ymin1, sycl::fmin(sycl::fmin(d10, d12), d11)), ymin3)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 52572 + Length: 55 + ReplacementText: 'sycl::fmax(sycl::fmax(ymax1, sycl::fmax(sycl::fmax(d10, d12), d11)), ymax3)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 52650 + Length: 45 + ReplacementText: 'sycl::fmin(ShiftUp(ymin2, 1, item_ct1), ShiftDown(ymin2, 1, item_ct1))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 52717 + Length: 45 + ReplacementText: 'sycl::fmax(ShiftUp(ymax2, 1, item_ct1), ShiftDown(ymax2, 1, item_ct1))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 52783 + Length: 33 + ReplacementText: 'sycl::fmin(sycl::fmin(nmin2, ymin1), ymin3)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 52831 + Length: 28 + ReplacementText: 'sycl::fmin(sycl::fmin(minv, d10), d12)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 52880 + Length: 33 + ReplacementText: 'sycl::fmax(sycl::fmax(nmax2, ymax1), ymax3)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 52928 + Length: 28 + ReplacementText: 'sycl::fmax(sycl::fmax(maxv, d10), d12)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 53040 + Length: 20 + ReplacementText: 'sycl::fmin(-thresh, minv)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 53071 + Length: 19 + ReplacementText: 'sycl::fmax(thresh, maxv)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 53135 + Length: 14 + ReplacementText: 'sycl::popcount(ptbits)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 53265 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 53407 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 53599 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 54163 + Length: 26 + ReplacementText: '(tra * tra) / det' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 54807 + Length: 54 + ReplacementText: '1.0f / (idxx * dxx + idxy * dxy + idxs * dxs)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 55292 + Length: 19 + ReplacementText: 'dx / dxx' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 55327 + Length: 19 + ReplacementText: 'dy / dyy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 55362 + Length: 19 + ReplacementText: 'ds / dss' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 55503 + Length: 37 + ReplacementText: 'dpct::pow(2.0f, (float)scale / NUM_SCALES)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 55543 + Length: 19 + ReplacementText: 'sycl::exp2(pds * factor)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 55609 + Length: 74 + ReplacementText: 'dpct::atomic_fetch_max(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1])' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 55712 + Length: 54 + ReplacementText: 'dpct::atomic_fetch_compare_inc(&d_PointCounter[2 * octave + 0], 0x7fffffff)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 68605 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 68637 + Length: 19 + ReplacementText: 'dpct::image_accessor_ext' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 68726 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1,\n float const *d_LaplaceKernel, float *data1, float *data2" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 68732 + Length: 64 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 68799 + Length: 46 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 68863 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 68893 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 68939 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 68971 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 69002 + Length: 47 + ReplacementText: 'const_cast(d_LaplaceKernel + octave * 12 * 16 + scale * 16)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 69185 + Length: 26 + ReplacementText: 'texObj.read(x, y)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 69242 + Length: 32 + ReplacementText: 'texObj.read(x, y - 1.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 69277 + Length: 32 + ReplacementText: 'texObj.read(x, y + 1.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 69341 + Length: 32 + ReplacementText: 'texObj.read(x, y - 2.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 69376 + Length: 32 + ReplacementText: 'texObj.read(x, y + 2.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 69440 + Length: 32 + ReplacementText: 'texObj.read(x, y - 3.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 69475 + Length: 32 + ReplacementText: 'texObj.read(x, y + 3.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 69539 + Length: 32 + ReplacementText: 'texObj.read(x, y - 4.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 69574 + Length: 32 + ReplacementText: 'texObj.read(x, y + 4.0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 69611 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 70010 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 70185 + Length: 11 + ReplacementText: "/*\nDPCT1110:46: The total declared local variable size in device function LaplaceMultiMem exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 70294 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1,\n float const *d_LaplaceKernel, float *buff" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 70300 + Length: 63 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 70381 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 70411 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 70457 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 70495 + Length: 38 + ReplacementText: 'sycl::max(sycl::min(xp - LAPLACE_R, width - 1), 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 70747 + Length: 43 + ReplacementText: 'sycl::max(0, sycl::min(yp + i - LAPLACE_R, height - 1))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 70944 + Length: 47 + ReplacementText: 'const_cast(d_LaplaceKernel + octave * 12 * 16 + scale * 16)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 71308 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 79297 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 79386 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, float const *d_LowPassKernel,\n float *buffer" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 79392 + Length: 65 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 79475 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 79505 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 79535 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 79581 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 79628 + Length: 15 + ReplacementText: 'const_cast(d_LowPassKernel)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 79671 + Length: 30 + ReplacementText: 'sycl::max(sycl::min(xp - 4, width - 1), 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 79835 + Length: 10 + ReplacementText: 'sycl::min(yp, h)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 79890 + Length: 22 + ReplacementText: 'sycl::max(0, sycl::min(yp - 1, h))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 79929 + Length: 14 + ReplacementText: 'sycl::min(yp + 1, h)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 79989 + Length: 22 + ReplacementText: 'sycl::max(0, sycl::min(yp - 2, h))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80028 + Length: 14 + ReplacementText: 'sycl::min(yp + 2, h)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80088 + Length: 22 + ReplacementText: 'sycl::max(0, sycl::min(yp - 3, h))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80127 + Length: 14 + ReplacementText: 'sycl::min(yp + 3, h)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80187 + Length: 22 + ReplacementText: 'sycl::max(0, sycl::min(yp - 4, h))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80226 + Length: 14 + ReplacementText: 'sycl::min(yp + 4, h)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80254 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80623 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80720 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1,\n float const *d_LowPassKernel,\n sycl::local_accessor xrows" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80726 + Length: 31 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80775 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80805 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80835 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80881 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80943 + Length: 15 + ReplacementText: 'const_cast(d_LowPassKernel)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 80971 + Length: 30 + ReplacementText: 'sycl::max(sycl::min(xp - 4, width - 1), 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 81094 + Length: 35 + ReplacementText: 'sycl::max(sycl::min(yp + l + 4, height - 1), 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 81234 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 81298 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 81318 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 81383 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 81403 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 81468 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 81488 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 81553 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82187 + Length: 0 + ReplacementText: " /*\n DPCT1118:47: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82193 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82217 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82311 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, float const *d_LowPassKernel,\n sycl::local_accessor xrows" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82317 + Length: 31 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82366 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82396 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82426 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82472 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82534 + Length: 15 + ReplacementText: 'const_cast(d_LowPassKernel)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82562 + Length: 30 + ReplacementText: 'sycl::max(sycl::min(xp - 4, width - 1), 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82681 + Length: 35 + ReplacementText: 'sycl::max(sycl::min(yp + l + 4, height - 1), 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82821 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82859 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82879 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82918 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82938 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82977 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 82997 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 83036 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 83081 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 83192 + Length: 27 + ReplacementText: 'sycl::min(yp + l + 4, height - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 83296 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 83334 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 83354 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 83393 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 83413 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 83452 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 83472 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 83511 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 84080 + Length: 0 + ReplacementText: " /*\n DPCT1118:48: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Offset: 84084 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 1344 + Length: 0 + ReplacementText: "#include \n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 1573 + Length: 23 + ReplacementText: '#include "cudaSiftD.dp.cpp"' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 1653 + Length: 29 + ReplacementText: 'DPCT_CHECK_ERROR(nDevices = dpct::dev_mgr::instance().device_count())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 1848 + Length: 14 + ReplacementText: 'dpct::device_info' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 1880 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_device_info' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 1904 + Length: 5 + ReplacementText: prop + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 1911 + Length: 6 + ReplacementText: 'dpct::dev_mgr::instance().get_device(devNum)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 1918 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 1999 + Length: 4 + ReplacementText: 'get_name()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 2055 + Length: 15 + ReplacementText: 'get_memory_clock_rate()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 2122 + Length: 9 + ReplacementText: 'get_max_clock_frequency()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 2190 + Length: 14 + ReplacementText: 'get_memory_bus_width()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 2280 + Length: 15 + ReplacementText: 'get_memory_clock_rate()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 2304 + Length: 14 + ReplacementText: 'get_memory_bus_width()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 2967 + Length: 96 + ReplacementText: 'DPCT_CHECK_ERROR(memoryTmp = (float *)dpct::dpct_malloc(pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float)))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 3077 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 3370 + Length: 19 + ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(memoryTmp, dpct::get_in_order_queue())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 3389 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 3712 + Length: 66 + ReplacementText: 'DPCT_CHECK_ERROR(*((void **)&d_PointCounterAddr) = d_PointCounter.get_ptr()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 3778 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 3792 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memset' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 3852 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 3866 + Length: 18 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 3885 + Length: 14 + ReplacementText: 'd_MaxNumPoints.get_ptr()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 3931 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 3945 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 4750 + Length: 96 + ReplacementText: 'DPCT_CHECK_ERROR(memoryTmp = (float *)dpct::dpct_malloc(pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float)))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 4862 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 5418 + Length: 18 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 5437 + Length: 15 + ReplacementText: 'd_LaplaceKernel.get_ptr()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 5490 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 5506 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 6037 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 6114 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 6139 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 6155 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 6637 + Length: 21 + ReplacementText: 'dpct::max(initBlur, 0.001f)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 6853 + Length: 18 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 6872 + Length: 15 + ReplacementText: 'd_LaplaceKernel.get_ptr()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 6925 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 6941 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 7420 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 7497 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 7522 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 7538 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 7923 + Length: 19 + ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(memoryTmp, dpct::get_in_order_queue())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 7942 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 8071 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 8151 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 8176 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 8192 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 10586 + Length: 37 + ReplacementText: 'DPCT_CHECK_ERROR(data.d_data = (SiftPoint *)sycl::malloc_device(sz, dpct::get_in_order_queue()))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 10639 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 10922 + Length: 21 + ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(data.d_data, dpct::get_in_order_queue())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 10943 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 11242 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 11305 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 11330 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 11346 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 13143 + Length: 18 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 13162 + Length: 17 + ReplacementText: 'd_ScaleDownKernel.get_ptr()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 13209 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 13225 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 13717 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 13729 + Length: 63 + ReplacementText: '1, iDivUp(src.height, SCALEDOWN_H), iDivUp(src.width, SCALEDOWN_W)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 13797 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 13810 + Length: 15 + ReplacementText: '1, 1, SCALEDOWN_W + 4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 13913 + Length: 99 + ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n d_ScaleDownKernel.init();\n\n auto d_ScaleDownKernel_ptr_ct1 = d_ScaleDownKernel.get_ptr();\n\n /*\n DPCT1101:214: 'SCALEDOWN_W + 4' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor inrow_acc_ct1(sycl::range<1>(68/*SCALEDOWN_W + 4*/), cgh);\n /*\n DPCT1101:215: '5 * (SCALEDOWN_W / 2)' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor brow_acc_ct1(sycl::range<1>(160/*5 * (SCALEDOWN_W / 2)*/), cgh);\n /*\n DPCT1101:216: 'SCALEDOWN_H + 4' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor yRead_acc_ct1(sycl::range<1>(20/*SCALEDOWN_H + 4*/), cgh);\n /*\n DPCT1101:217: 'SCALEDOWN_H + 4' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor yWrite_acc_ct1(sycl::range<1>(20/*SCALEDOWN_H + 4*/), cgh);\n\n float * res_d_data_ct0 = res.d_data;\n float * src_d_data_ct1 = src.d_data;\n int src_width_ct2 = src.width;\n int src_pitch_ct3 = src.pitch;\n int src_height_ct4 = src.height;\n int res_pitch_ct5 = res.pitch;\n\n cgh.parallel_for(\n sycl::nd_range<3>(blocks * threads, threads), \n [=](sycl::nd_item<3> item_ct1) {\n ScaleDown(res_d_data_ct0, src_d_data_ct1, src_width_ct2, src_pitch_ct3, src_height_ct4, res_pitch_ct5, item_ct1, d_ScaleDownKernel_ptr_ct1, inrow_acc_ct1.get_pointer(), brow_acc_ct1.get_pointer(), yRead_acc_ct1.get_pointer(), yWrite_acc_ct1.get_pointer());\n });\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 14012 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 14025 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 14602 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 14614 + Length: 59 + ReplacementText: '1, iDivUp(res.height, SCALEUP_H), iDivUp(res.width, SCALEUP_W)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 14678 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 14691 + Length: 28 + ReplacementText: '1, SCALEUP_H / 2, SCALEUP_W / 2' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 14807 + Length: 97 + ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n float * res_d_data_ct0 = res.d_data;\n float * src_d_data_ct1 = src.d_data;\n int src_width_ct2 = src.width;\n int src_pitch_ct3 = src.pitch;\n int src_height_ct4 = src.height;\n int res_pitch_ct5 = res.pitch;\n\n cgh.parallel_for(\n sycl::nd_range<3>(blocks * threads, threads), \n [=](sycl::nd_item<3> item_ct1) {\n ScaleUp(res_d_data_ct0, src_d_data_ct1, src_width_ct2, src_pitch_ct3, src_height_ct4, res_pitch_ct5, item_ct1);\n });\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 14904 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 14917 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 15400 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 15412 + Length: 3 + ReplacementText: 1, 1, 512 + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 15420 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 15433 + Length: 3 + ReplacementText: 1, 1, 256 + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 15524 + Length: 119 + ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n d_MaxNumPoints.init();\n d_PointCounter.init();\n\n auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr();\n auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr();\n\n /*\n DPCT1101:218: 'WID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n /*\n DPCT1101:219: 'WID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor img_acc_ct1(sycl::range<2>(19/*WID*/, 19/*WID*/), cgh);\n /*\n DPCT1101:220: 'WID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n /*\n DPCT1101:221: 'WID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor tmp_acc_ct1(sycl::range<2>(19/*WID*/, 19/*WID*/), cgh);\n /*\n DPCT1101:222: '2 * LEN' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor hist_acc_ct1(sycl::range<1>(64/*2 * LEN*/), cgh);\n /*\n DPCT1101:223: 'WID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor gaussx_acc_ct1(sycl::range<1>(19/*WID*/), cgh);\n /*\n DPCT1101:224: 'WID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor gaussy_acc_ct1(sycl::range<1>(19/*WID*/), cgh);\n\n float * src_d_data_ct0 = src.d_data;\n int src_width_ct1 = src.width;\n int src_pitch_ct2 = src.pitch;\n int src_height_ct3 = src.height;\n\n cgh.parallel_for(\n sycl::nd_range<3>(blocks * threads, threads), \n [=](sycl::nd_item<3> item_ct1) {\n ComputeOrientationsCONSTNew(src_d_data_ct0, src_width_ct1, src_pitch_ct2, src_height_ct3, siftData.d_data, octave, item_ct1, *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1, img_acc_ct1, tmp_acc_ct1, hist_acc_ct1.get_pointer(), gaussx_acc_ct1.get_pointer(), gaussy_acc_ct1.get_pointer());\n });\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 15643 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 15656 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 16193 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 16205 + Length: 3 + ReplacementText: 1, 1, 512 + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 16213 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 16226 + Length: 5 + ReplacementText: 1, 8, 16 + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 16319 + Length: 104 + ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n d_MaxNumPoints.init();\n d_PointCounter.init();\n\n auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr();\n auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr();\n\n sycl::local_accessor gauss_acc_ct1(sycl::range<1>(16), cgh);\n sycl::local_accessor buffer_acc_ct1(sycl::range<1>(128), cgh);\n sycl::local_accessor sums_acc_ct1(sycl::range<1>(4), cgh);\n\n cgh.parallel_for(\n sycl::nd_range<3>(blocks * threads, threads), \n [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] {\n ExtractSiftDescriptorsCONSTNew(texObj, pitch, siftData.d_data, subsampling, octave, item_ct1, *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1, gauss_acc_ct1.get_pointer(), buffer_acc_ct1.get_pointer(), sums_acc_ct1.get_pointer());\n });\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 16423 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 16436 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 16938 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 16950 + Length: 27 + ReplacementText: '1, 1, iDivUp(siftData.numPts, 64)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 16982 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 16995 + Length: 2 + ReplacementText: 1, 1, 64 + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 17085 + Length: 78 + ReplacementText: "dpct::get_in_order_queue().parallel_for(\n sycl::nd_range<3>(blocks * threads, threads), \n [=](sycl::nd_item<3> item_ct1) {\n RescalePositions(siftData.d_data, siftData.numPts, scale, item_ct1);\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 17163 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 17176 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 18195 + Length: 18 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 18214 + Length: 15 + ReplacementText: 'd_LowPassKernel.get_ptr()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 18275 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 18291 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 18599 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 18611 + Length: 51 + ReplacementText: '1, iDivUp(height, LOWPASS_H), iDivUp(width, LOWPASS_W)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 18680 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 18693 + Length: 28 + ReplacementText: '1, 4, LOWPASS_W + 2 * LOWPASS_R' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 18820 + Length: 82 + ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n d_LowPassKernel.init();\n\n auto d_LowPassKernel_ptr_ct1 = d_LowPassKernel.get_ptr();\n\n sycl::local_accessor xrows_acc_ct1(sycl::range<2>(16, 32), cgh);\n\n float * src_d_data_ct0 = src.d_data;\n float * res_d_data_ct1 = res.d_data;\n\n cgh.parallel_for(\n sycl::nd_range<3>(blocks * threads, threads), \n [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] {\n LowPassBlockOld(src_d_data_ct0, res_d_data_ct1, width, pitch, height, item_ct1, d_LowPassKernel_ptr_ct1, xrows_acc_ct1);\n });\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 18902 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 18915 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 20407 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 20420 + Length: 25 + ReplacementText: '1, 1, LAPLACE_W + 2 * LAPLACE_R' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 20464 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 20476 + Length: 32 + ReplacementText: '1, height, iDivUp(width, LAPLACE_W)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 20603 + Length: 103 + ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n d_LaplaceKernel.init();\n\n auto d_LaplaceKernel_ptr_ct1 = d_LaplaceKernel.get_ptr();\n\n /*\n DPCT1101:226: '(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor buff_acc_ct1(sycl::range<1>(1088/*(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S*/), cgh);\n\n float * baseImage_d_data_ct0 = baseImage.d_data;\n float * results_d_data_ct1 = results[0].d_data;\n\n cgh.parallel_for(\n sycl::nd_range<3>(blocks * threads, threads), \n [=](sycl::nd_item<3> item_ct1) {\n LaplaceMultiMem(baseImage_d_data_ct0, results_d_data_ct1, width, pitch, height, octave, item_ct1, d_LaplaceKernel_ptr_ct1, buff_acc_ct1.get_pointer());\n });\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 20706 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 20719 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 21515 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 21527 + Length: 53 + ReplacementText: '1, iDivUp(h, MINMAX_H), iDivUp(w, MINMAX_W) * NUM_SCALES' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 21585 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 21598 + Length: 12 + ReplacementText: '1, 1, MINMAX_W + 2' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 21698 + Length: 185 + ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n d_MaxNumPoints.init();\n d_PointCounter.init();\n\n auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr();\n auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr();\n\n /*\n DPCT1101:227: '2 * MEMWID' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor points_acc_ct1(sycl::range<1>(64/*2 * MEMWID*/), cgh);\n\n float * sources_d_data_ct0 = sources->d_data;\n\n cgh.parallel_for(\n sycl::nd_range<3>(blocks * threads, threads), \n [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] {\n FindPointsMultiNew(sources_d_data_ct0, siftData.d_data, w, p, h, subsampling, lowestScale, thresh, factor, edgeLimit, octave, item_ct1, *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1, points_acc_ct1.get_pointer());\n });\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 21883 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Offset: 21896 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp' + Offset: 1403 + Length: 0 + ReplacementText: "#include \n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp' + Offset: 1459 + Length: 18 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp' + Offset: 1477 + Length: 26 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp' + Offset: 3410 + Length: 0 + ReplacementText: " /*\n DPCT1093:83: The \"0\" device may be not the one intended for use. Adjust the selected device if needed.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp' + Offset: 3412 + Length: 13 + ReplacementText: 'dpct::select_device' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 1158 + Length: 0 + ReplacementText: "#include \n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 1311 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 1420 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, float *siftPoint,\n float *sums" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 1426 + Length: 32 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 1461 + Length: 31 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 1510 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 1540 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 1570 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 1599 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 1771 + Length: 15 + ReplacementText: "/*\n DPCT1065:146: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 1936 + Length: 15 + ReplacementText: "/*\n DPCT1065:147: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 1997 + Length: 15 + ReplacementText: "/*\n DPCT1065:148: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2058 + Length: 15 + ReplacementText: "/*\n DPCT1065:149: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2201 + Length: 9 + ReplacementText: 'item_ct1.get_group_range(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2218 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2253 + Length: 15 + ReplacementText: "/*\n DPCT1065:150: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2273 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2383 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, float *siftPoints1,\n float *siftPoints2" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2389 + Length: 39 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2431 + Length: 39 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2488 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2518 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2559 + Length: 38 + ReplacementText: 'dpct::min(numPts1 - 1, (unsigned int)(item_ct1.get_group(2) * 16 + ty))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2633 + Length: 38 + ReplacementText: 'dpct::min(numPts2 - 1, (unsigned int)(item_ct1.get_group(1) * 16 + ty))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2841 + Length: 15 + ReplacementText: "/*\n DPCT1065:151: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2875 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 2914 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 3210 + Length: 9 + ReplacementText: 'item_ct1.get_group_range(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 3266 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 3387 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, float *maxScore,\n float *maxScor2, int *maxIndex" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 3393 + Length: 35 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 3431 + Length: 35 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 3469 + Length: 33 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 3520 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 3550 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 3606 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 3624 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 3754 + Length: 15 + ReplacementText: "/*\n DPCT1065:152: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 4085 + Length: 15 + ReplacementText: "/*\n DPCT1065:153: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 4553 + Length: 0 + ReplacementText: " /*\n DPCT1118:49: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 4557 + Length: 15 + ReplacementText: "/*\n DPCT1065:154: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 4923 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 5029 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, int *maxIndex" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 5051 + Length: 10 + ReplacementText: 'item_ct1.get_local_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 5100 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 5130 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 5160 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 5224 + Length: 33 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 5281 + Length: 15 + ReplacementText: "/*\n DPCT1065:155: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 5867 + Length: 15 + ReplacementText: "/*\n DPCT1065:156: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 6387 + Length: 0 + ReplacementText: " /*\n DPCT1118:50: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 6393 + Length: 15 + ReplacementText: "/*\n DPCT1065:157: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 6742 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 6831 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, float *siftPoint,\n float *maxScore, float *maxScor2, int *maxIndex" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 6837 + Length: 32 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 6872 + Length: 33 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 6908 + Length: 33 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 6944 + Length: 31 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 6993 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 7055 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 7085 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 7239 + Length: 15 + ReplacementText: "/*\n DPCT1065:158: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 7371 + Length: 15 + ReplacementText: "/*\n DPCT1065:159: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 7650 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 7889 + Length: 15 + ReplacementText: "/*\n DPCT1065:160: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 8368 + Length: 0 + ReplacementText: " /*\n DPCT1118:51: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 8372 + Length: 15 + ReplacementText: "/*\n DPCT1065:161: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 8666 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 8755 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, float *siftPoint,\n float *maxScore, float *maxScor2, int *maxIndex" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 8761 + Length: 40 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 8804 + Length: 33 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 8840 + Length: 33 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 8876 + Length: 31 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 8925 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 8955 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 9082 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 9224 + Length: 15 + ReplacementText: "/*\n DPCT1065:162: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 9506 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 9745 + Length: 15 + ReplacementText: "/*\n DPCT1065:163: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 10030 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 10390 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 10448 + Length: 0 + ReplacementText: ', const sycl::nd_item<3> &item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 10469 + Length: 47 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 64 + item_ct1.get_local_id(2)), numPts1 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 10623 + Length: 11 + ReplacementText: "/*\nDPCT1110:52: The total declared local variable size in device function FindMaxCorr10 exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 10713 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, sycl::float4 *buffer1,\n sycl::float4 *buffer2" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 10719 + Length: 42 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 10764 + Length: 42 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 10818 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 10842 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 10873 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 10946 + Length: 25 + ReplacementText: 'sycl::min(bp1 + j, numPts1 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 11073 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 11499 + Length: 25 + ReplacementText: 'sycl::min(bp2 + j, numPts2 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 11611 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 11647 + Length: 0 + ReplacementText: " /*\n DPCT1118:53: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 11651 + Length: 15 + ReplacementText: "/*\n DPCT1065:165: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 11906 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 12125 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 12260 + Length: 7 + ReplacementText: 'v1[i].x()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 12270 + Length: 4 + ReplacementText: 'v2.x()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 12304 + Length: 7 + ReplacementText: 'v1[i].y()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 12314 + Length: 4 + ReplacementText: 'v2.y()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 12348 + Length: 7 + ReplacementText: 'v1[i].z()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 12358 + Length: 4 + ReplacementText: 'v2.z()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 12392 + Length: 7 + ReplacementText: 'v1[i].w()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 12402 + Length: 4 + ReplacementText: 'v2.w()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 12693 + Length: 37 + ReplacementText: 'sycl::min(bp2 + M7R * iy + dy, numPts2 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 12857 + Length: 0 + ReplacementText: " /*\n DPCT1118:54: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 12861 + Length: 15 + ReplacementText: "/*\n DPCT1065:166: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 13292 + Length: 15 + ReplacementText: "/*\n DPCT1065:164: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 13597 + Length: 25 + ReplacementText: 'sycl::max(max_score, sec_score)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 14313 + Length: 33 + ReplacementText: 'dpct::global_memory lock(0);' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 14348 + Length: 11 + ReplacementText: "/*\nDPCT1110:55: The total declared local variable size in device function FindMaxCorr9 exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 14437 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, volatile int &lock,\n sycl::float4 *siftParts1, sycl::float4 *siftParts2" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 14443 + Length: 46 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 14509 + Length: 46 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 14656 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 14686 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 14737 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 14810 + Length: 43 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), numPts1 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 14867 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 14991 + Length: 33 + ReplacementText: 'sycl::min(FMC_GH, numPts2 - FMC_BH + 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 15093 + Length: 47 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + idx), numPts2 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 15156 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 15536 + Length: 0 + ReplacementText: " /*\n DPCT1118:56: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 15540 + Length: 15 + ReplacementText: "/*\n DPCT1065:169: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 15983 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16180 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16339 + Length: 8 + ReplacementText: 'v1[ix].x()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16350 + Length: 4 + ReplacementText: 'v2.x()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16394 + Length: 8 + ReplacementText: 'v1[ix].y()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16405 + Length: 4 + ReplacementText: 'v2.y()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16449 + Length: 8 + ReplacementText: 'v1[ix].z()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16460 + Length: 4 + ReplacementText: 'v2.z()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16504 + Length: 8 + ReplacementText: 'v1[ix].w()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16515 + Length: 4 + ReplacementText: 'v2.w()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16551 + Length: 0 + ReplacementText: " /*\n DPCT1118:60: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16557 + Length: 15 + ReplacementText: "/*\n DPCT1065:173: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16655 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16842 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 16995 + Length: 8 + ReplacementText: 'v1[ix].x()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17006 + Length: 4 + ReplacementText: 'v2.x()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17048 + Length: 8 + ReplacementText: 'v1[ix].y()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17059 + Length: 4 + ReplacementText: 'v2.y()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17101 + Length: 8 + ReplacementText: 'v1[ix].z()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17112 + Length: 4 + ReplacementText: 'v2.z()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17154 + Length: 8 + ReplacementText: 'v1[ix].w()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17165 + Length: 4 + ReplacementText: 'v2.w()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17195 + Length: 0 + ReplacementText: " /*\n DPCT1118:57: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17199 + Length: 15 + ReplacementText: "/*\n DPCT1065:170: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17433 + Length: 0 + ReplacementText: " /*\n DPCT1118:58: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17437 + Length: 15 + ReplacementText: "/*\n DPCT1065:171: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17692 + Length: 45 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + j), numPts2 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17822 + Length: 0 + ReplacementText: " /*\n DPCT1118:59: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17826 + Length: 15 + ReplacementText: "/*\n DPCT1065:172: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17864 + Length: 43 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), numPts1 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17936 + Length: 29 + ReplacementText: 'dpct::atomic_compare_exchange_strong((int *)&lock, 0, 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 17982 + Length: 15 + ReplacementText: "/*\n DPCT1065:167: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 18155 + Length: 30 + ReplacementText: 'sycl::max(sift1[p1].score, maxScor2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 18531 + Length: 15 + ReplacementText: "/*\n DPCT1065:168: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 18568 + Length: 27 + ReplacementText: 'dpct::atomic_exchange((int *)&lock, 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 18600 + Length: 11 + ReplacementText: "/*\nDPCT1110:61: The total declared local variable size in device function FindMaxCorr8 exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 18689 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, volatile int &lock,\n sycl::float4 *siftParts1, sycl::float4 *siftParts2,\n float *blksums" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 18695 + Length: 46 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 18761 + Length: 46 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 18827 + Length: 42 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 18908 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 18938 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 18989 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 19062 + Length: 43 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), numPts1 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 19119 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 19243 + Length: 33 + ReplacementText: 'sycl::min(FMC_GH, numPts2 - FMC_BH + 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 19345 + Length: 47 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + idx), numPts2 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 19408 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 19826 + Length: 0 + ReplacementText: " /*\n DPCT1118:64: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 19832 + Length: 15 + ReplacementText: "/*\n DPCT1065:178: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 19905 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20096 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20249 + Length: 8 + ReplacementText: 'v1[ix].x()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20260 + Length: 4 + ReplacementText: 'v2.x()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20304 + Length: 8 + ReplacementText: 'v1[ix].y()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20315 + Length: 4 + ReplacementText: 'v2.y()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20359 + Length: 8 + ReplacementText: 'v1[ix].z()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20370 + Length: 4 + ReplacementText: 'v2.z()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20414 + Length: 8 + ReplacementText: 'v1[ix].w()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20425 + Length: 4 + ReplacementText: 'v2.w()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20461 + Length: 0 + ReplacementText: " /*\n DPCT1118:65: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20467 + Length: 15 + ReplacementText: "/*\n DPCT1065:179: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20708 + Length: 0 + ReplacementText: " /*\n DPCT1118:62: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20712 + Length: 15 + ReplacementText: "/*\n DPCT1065:176: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 20967 + Length: 45 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + j), numPts2 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 21097 + Length: 0 + ReplacementText: " /*\n DPCT1118:63: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 21101 + Length: 15 + ReplacementText: "/*\n DPCT1065:177: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 21139 + Length: 43 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), numPts1 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 21211 + Length: 29 + ReplacementText: 'dpct::atomic_compare_exchange_strong((int *)&lock, 0, 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 21257 + Length: 15 + ReplacementText: "/*\n DPCT1065:174: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 21430 + Length: 30 + ReplacementText: 'sycl::max(sift1[p1].score, maxScor2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 21806 + Length: 15 + ReplacementText: "/*\n DPCT1065:175: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 21843 + Length: 27 + ReplacementText: 'dpct::atomic_exchange((int *)&lock, 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 21875 + Length: 11 + ReplacementText: "/*\nDPCT1110:66: The total declared local variable size in device function FindMaxCorr7 exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 21964 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, volatile int &lock,\n float *siftParts1, float *siftParts2" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 21970 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22033 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22112 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22128 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22151 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22167 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22205 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22235 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22266 + Length: 38 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), numPts1 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22314 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22330 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22489 + Length: 48 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + ty), numPts2 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22549 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22565 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22709 + Length: 0 + ReplacementText: " /*\n DPCT1118:67: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22713 + Length: 15 + ReplacementText: "/*\n DPCT1065:182: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22860 + Length: 0 + ReplacementText: " /*\n DPCT1118:70: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22866 + Length: 15 + ReplacementText: "/*\n DPCT1065:185: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 22990 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23101 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23174 + Length: 5 + ReplacementText: 'p1v.x()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23182 + Length: 5 + ReplacementText: 'p2v.x()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23211 + Length: 5 + ReplacementText: 'p1v.y()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23219 + Length: 5 + ReplacementText: 'p2v.y()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23248 + Length: 5 + ReplacementText: 'p1v.z()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23256 + Length: 5 + ReplacementText: 'p2v.z()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23285 + Length: 5 + ReplacementText: 'p1v.w()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23293 + Length: 5 + ReplacementText: 'p2v.w()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23330 + Length: 0 + ReplacementText: " /*\n DPCT1118:71: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23336 + Length: 15 + ReplacementText: "/*\n DPCT1065:186: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23506 + Length: 0 + ReplacementText: " /*\n DPCT1118:68: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23510 + Length: 15 + ReplacementText: "/*\n DPCT1065:183: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23748 + Length: 47 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + j), numPts2 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23880 + Length: 0 + ReplacementText: " /*\n DPCT1118:69: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23884 + Length: 15 + ReplacementText: "/*\n DPCT1065:184: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23922 + Length: 38 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), numPts1 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 23999 + Length: 29 + ReplacementText: 'dpct::atomic_compare_exchange_strong((int *)&lock, 0, 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 24045 + Length: 15 + ReplacementText: "/*\n DPCT1065:180: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 24213 + Length: 30 + ReplacementText: 'sycl::max(sift1[p1].score, maxScor2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 24589 + Length: 15 + ReplacementText: "/*\n DPCT1065:181: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 24636 + Length: 27 + ReplacementText: 'dpct::atomic_exchange((int *)&lock, 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 24668 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 24757 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, volatile int &lock,\n float *siftParts2, float *sums" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 24827 + Length: 38 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 24907 + Length: 31 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 24956 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 24986 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25017 + Length: 38 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), numPts1 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25092 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25124 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25277 + Length: 43 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * 512 + k + ty), numPts2 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25378 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25433 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25454 + Length: 0 + ReplacementText: " /*\n DPCT1118:72: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25458 + Length: 15 + ReplacementText: "/*\n DPCT1065:189: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25520 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25552 + Length: 6 + ReplacementText: 'sycl::float4' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25606 + Length: 7 + ReplacementText: 'part1.x()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25616 + Length: 7 + ReplacementText: 'part2.x()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25626 + Length: 7 + ReplacementText: 'part1.y()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25636 + Length: 7 + ReplacementText: 'part2.y()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25646 + Length: 7 + ReplacementText: 'part1.z()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25656 + Length: 7 + ReplacementText: 'part2.z()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25666 + Length: 7 + ReplacementText: 'part1.w()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25676 + Length: 7 + ReplacementText: 'part2.w()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25715 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25747 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25779 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25811 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25843 + Length: 0 + ReplacementText: ', item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25904 + Length: 0 + ReplacementText: " /*\n DPCT1118:73: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 25908 + Length: 15 + ReplacementText: "/*\n DPCT1065:190: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 26157 + Length: 42 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * 512 + k + j), numPts2 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 26284 + Length: 0 + ReplacementText: " /*\n DPCT1118:74: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 26288 + Length: 15 + ReplacementText: "/*\n DPCT1065:191: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 26346 + Length: 29 + ReplacementText: 'dpct::atomic_compare_exchange_strong((int *)&lock, 0, 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 26392 + Length: 15 + ReplacementText: "/*\n DPCT1065:187: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 26458 + Length: 38 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), numPts1 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 26630 + Length: 30 + ReplacementText: 'sycl::max(sift1[p1].score, maxScor2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 27006 + Length: 15 + ReplacementText: "/*\n DPCT1065:188: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 27053 + Length: 27 + ReplacementText: 'dpct::atomic_exchange((int *)&lock, 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 27085 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 27174 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, volatile int &lock,\n float *siftParts1, float *siftParts2" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 27180 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 27243 + Length: 37 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 27337 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 27367 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 27398 + Length: 38 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), numPts1 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 27610 + Length: 48 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + ty), numPts2 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 27887 + Length: 0 + ReplacementText: " /*\n DPCT1118:77: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 27893 + Length: 15 + ReplacementText: "/*\n DPCT1065:196: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 28011 + Length: 0 + ReplacementText: " /*\n DPCT1118:78: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 28017 + Length: 15 + ReplacementText: "/*\n DPCT1065:197: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 28100 + Length: 0 + ReplacementText: " /*\n DPCT1118:75: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 28104 + Length: 15 + ReplacementText: "/*\n DPCT1065:194: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 28342 + Length: 47 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + j), numPts2 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 28474 + Length: 0 + ReplacementText: " /*\n DPCT1118:76: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 28478 + Length: 15 + ReplacementText: "/*\n DPCT1065:195: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 28516 + Length: 38 + ReplacementText: 'dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), numPts1 - 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 28593 + Length: 29 + ReplacementText: 'dpct::atomic_compare_exchange_strong((int *)&lock, 0, 1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 28639 + Length: 15 + ReplacementText: "/*\n DPCT1065:192: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 28807 + Length: 30 + ReplacementText: 'sycl::max(sift1[p1].score, maxScor2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 29183 + Length: 15 + ReplacementText: "/*\n DPCT1065:193: Consider replacing sycl::nd_item::barrier() with sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better performance if there is no access to global memory.\n */\n item_ct1.barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 29230 + Length: 27 + ReplacementText: 'dpct::atomic_exchange((int *)&lock, 0)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 29282 + Length: 11 + ReplacementText: "/*\nDPCT1110:79: The total declared local variable size in device function InvertMatrix exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 29651 + Length: 16 + ReplacementText: 'sycl::fabs(elem[i][j])' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 30547 + Length: 9 + ReplacementText: 'sycl::fabs(sum)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 31887 + Length: 11 + ReplacementText: "/*\nDPCT1110:80: The total declared local variable size in device function ComputeHomographies exceeds 128 bytes and may cause high register pressure. Consult with your hardware vendor to find the total register size available and adjust the code, or use smaller sub-group size to avoid high register pressure.\n*/\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 32010 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 32072 + Length: 10 + ReplacementText: 'item_ct1.get_group(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 32101 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 32132 + Length: 10 + ReplacementText: 'item_ct1.get_local_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 32177 + Length: 10 + ReplacementText: 'item_ct1.get_local_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 32190 + Length: 9 + ReplacementText: 'item_ct1.get_group_range(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 32859 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 33036 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 33195 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 33332 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1, float *homo,\n int *cnts" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 33338 + Length: 42 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 33383 + Length: 53 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 33454 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(2)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 33484 + Length: 11 + ReplacementText: 'item_ct1.get_local_id(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 33515 + Length: 10 + ReplacementText: 'item_ct1.get_group(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 33528 + Length: 10 + ReplacementText: 'item_ct1.get_local_range(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 33568 + Length: 10 + ReplacementText: 'item_ct1.get_local_range(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 33581 + Length: 9 + ReplacementText: 'item_ct1.get_group_range(1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 33684 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34005 + Length: 0 + ReplacementText: " /*\n DPCT1013:198: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34022 + Length: 19 + ReplacementText: 'a[0] * x1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34044 + Length: 19 + ReplacementText: 'a[1] * y1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34072 + Length: 0 + ReplacementText: " /*\n DPCT1013:199: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34089 + Length: 19 + ReplacementText: 'a[3] * x1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34111 + Length: 19 + ReplacementText: 'a[4] * y1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34139 + Length: 0 + ReplacementText: " /*\n DPCT1013:200: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34156 + Length: 19 + ReplacementText: 'a[6] * x1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34178 + Length: 19 + ReplacementText: 'a[7] * y1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34206 + Length: 0 + ReplacementText: " /*\n DPCT1013:201: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34223 + Length: 19 + ReplacementText: 'x2 * deno' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34251 + Length: 0 + ReplacementText: " /*\n DPCT1013:202: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34268 + Length: 19 + ReplacementText: 'y2 * deno' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34296 + Length: 0 + ReplacementText: " /*\n DPCT1013:203: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34313 + Length: 21 + ReplacementText: 'errx * errx' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34337 + Length: 21 + ReplacementText: 'erry * erry' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34360 + Length: 0 + ReplacementText: " /*\n DPCT1013:204: The rounding mode could not be specified and the generated code may have different accuracy than the original code. Verify the correctness. SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 standard.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34375 + Length: 41 + ReplacementText: 'thresh2 * deno * deno' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34494 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34643 + Length: 0 + ReplacementText: " /*\n DPCT1118:81: SYCL group functions and algorithms must be encountered in converged control flow. You may need to adjust the code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34647 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 34755 + Length: 15 + ReplacementText: 'item_ct1.barrier(sycl::access::fence_space::local_space)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 35669 + Length: 59 + ReplacementText: 'DPCT_CHECK_ERROR(d_coord = (float *)sycl::malloc_device(4 * sizeof(float) * numPtsUp, dpct::get_in_order_queue()))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 35742 + Length: 41 + ReplacementText: 'DPCT_CHECK_ERROR(d_randPts = (int *)sycl::malloc_device(randSize, dpct::get_in_order_queue()))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 35797 + Length: 58 + ReplacementText: 'DPCT_CHECK_ERROR(d_homo = (float *)sycl::malloc_device(8 * sizeof(float) * numLoops, dpct::get_in_order_queue()))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 36492 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 36541 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 36566 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 36580 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 36633 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 36658 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 38437 + Length: 44 + ReplacementText: 'DPCT_CHECK_ERROR(temp3 = (float *)sycl::malloc_device(szPt * numPtsUp, dpct::get_in_order_queue()))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 38497 + Length: 44 + ReplacementText: 'DPCT_CHECK_ERROR(temp4 = (float *)sycl::malloc_device(szPt * numPtsUp, dpct::get_in_order_queue()))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 38557 + Length: 44 + ReplacementText: 'DPCT_CHECK_ERROR(temp5 = (float *)sycl::malloc_device(szPt * numPtsUp, dpct::get_in_order_queue()))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 38617 + Length: 44 + ReplacementText: 'DPCT_CHECK_ERROR(temp6 = (float *)sycl::malloc_device(szPt * numPtsUp, dpct::get_in_order_queue()))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 38949 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 38990 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39015 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39031 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39070 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39118 + Length: 26 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39145 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39161 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39209 + Length: 26 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39236 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39252 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39306 + Length: 26 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39333 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39349 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39403 + Length: 26 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39430 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39496 + Length: 80 + ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n float * d_coord_numPtsUp_ct1 = &d_coord[0 * numPtsUp];\n\n cgh.parallel_for(\n sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), \n [=](sycl::nd_item<3> item_ct1) {\n memcopyKernel(temp3, d_coord_numPtsUp_ct1, szPt, szFl, numPts, szFl);\n });\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39576 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39578 + Length: 0 + ReplacementText: " /*\n DPCT1010:205: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39591 + Length: 18 + ReplacementText: '0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39625 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39655 + Length: 80 + ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n float * d_coord_numPtsUp_ct1 = &d_coord[1 * numPtsUp];\n\n cgh.parallel_for(\n sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), \n [=](sycl::nd_item<3> item_ct1) {\n memcopyKernel(temp4, d_coord_numPtsUp_ct1, szPt, szFl, numPts, szFl);\n });\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39735 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39737 + Length: 0 + ReplacementText: " /*\n DPCT1010:206: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39750 + Length: 18 + ReplacementText: '0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39784 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39814 + Length: 80 + ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n float * d_coord_numPtsUp_ct1 = &d_coord[2 * numPtsUp];\n\n cgh.parallel_for(\n sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), \n [=](sycl::nd_item<3> item_ct1) {\n memcopyKernel(temp5, d_coord_numPtsUp_ct1, szPt, szFl, numPts, szFl);\n });\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39894 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39896 + Length: 0 + ReplacementText: " /*\n DPCT1010:207: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39909 + Length: 18 + ReplacementText: '0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39943 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 39973 + Length: 80 + ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n float * d_coord_numPtsUp_ct1 = &d_coord[3 * numPtsUp];\n\n cgh.parallel_for(\n sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), \n [=](sycl::nd_item<3> item_ct1) {\n memcopyKernel(temp6, d_coord_numPtsUp_ct1, szPt, szFl, numPts, szFl);\n });\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 40053 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 40055 + Length: 0 + ReplacementText: " /*\n DPCT1010:208: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 40068 + Length: 18 + ReplacementText: '0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 40102 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 40405 + Length: 80 + ReplacementText: "dpct::get_in_order_queue().parallel_for(\n sycl::nd_range<3>(sycl::range<3>(1, 1, numLoops / 16) * sycl::range<3>(1, 1, 16), sycl::range<3>(1, 1, 16)), \n [=](sycl::nd_item<3> item_ct1) {\n ComputeHomographies(d_coord, d_randPts, d_homo, numPtsUp, item_ct1);\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 40485 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 40500 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 40920 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 40932 + Length: 28 + ReplacementText: '1, numLoops / TESTHOMO_LOOPS, 1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 40967 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 40980 + Length: 30 + ReplacementText: 1, TESTHOMO_LOOPS, TESTHOMO_TESTS + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 41104 + Length: 92 + ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n /*\n DPCT1101:210: '8 * TESTHOMO_LOOPS' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor homo_acc_ct1(sycl::range<1>(128/*8 * TESTHOMO_LOOPS*/), cgh);\n /*\n DPCT1101:211: 'TESTHOMO_TESTS * TESTHOMO_LOOPS' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor cnts_acc_ct1(sycl::range<1>(256/*TESTHOMO_TESTS * TESTHOMO_LOOPS*/), cgh);\n\n cgh.parallel_for(\n sycl::nd_range<3>(blocks * threads, threads), \n [=](sycl::nd_item<3> item_ct1) {\n TestHomographies(d_coord, d_homo, d_randPts, numPtsUp, thresh * thresh, item_ct1, homo_acc_ct1.get_pointer(), cnts_acc_ct1.get_pointer());\n });\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 41196 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 41211 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 41720 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 41775 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 41800 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 41816 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42339 + Length: 12 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::dpct_memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42424 + Length: 22 + ReplacementText: 'dpct::device_to_host' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42447 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42463 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42688 + Length: 15 + ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(temp3, dpct::get_in_order_queue())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42703 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42719 + Length: 15 + ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(temp4, dpct::get_in_order_queue())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42734 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42750 + Length: 15 + ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(temp5, dpct::get_in_order_queue())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42765 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42781 + Length: 15 + ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(temp6, dpct::get_in_order_queue())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42796 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42884 + Length: 16 + ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_homo, dpct::get_in_order_queue())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42900 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42914 + Length: 19 + ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_randPts, dpct::get_in_order_queue())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42933 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42947 + Length: 17 + ReplacementText: 'DPCT_CHECK_ERROR(sycl::free(d_coord, dpct::get_in_order_queue())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 42964 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 45682 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 45698 + Length: 41 + ReplacementText: '1, iDivUp(numPts2, 512), iDivUp(numPts1, 16)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 45744 + Length: 4 + ReplacementText: 'sycl::range<3>' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 45761 + Length: 6 + ReplacementText: 1, 16, 16 + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 45856 + Length: 57 + ReplacementText: "dpct::get_in_order_queue().parallel_for(\n sycl::nd_range<3>(sycl::range<3>(1, 1, iDivUp(numPts1, 64)) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)), \n [=](sycl::nd_item<3> item_ct1) {\n CleanMatches(sift1, numPts1, item_ct1);\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 45913 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 45915 + Length: 0 + ReplacementText: " /*\n DPCT1010:209: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 45926 + Length: 18 + ReplacementText: '0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 45958 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 47433 + Length: 26 + ReplacementText: 'sycl::range<3>(1, 1, iDivUp(numPts1, M7W))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 47479 + Length: 20 + ReplacementText: 'sycl::range<3>(1, M7H / M7R, M7W)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 47587 + Length: 0 + ReplacementText: " /*\n DPCT1049:82: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 47591 + Length: 74 + ReplacementText: "dpct::get_in_order_queue().submit(\n [&](sycl::handler &cgh) {\n /*\n DPCT1101:212: 'M7W * NDIM / 4' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor buffer1_acc_ct1(sycl::range<1>(1024/*M7W * NDIM / 4*/), cgh);\n /*\n DPCT1101:213: 'M7H * NDIM / 4' expression was replaced with a value. Modify the code to use the original expression, provided in comments, if it is correct.\n */\n sycl::local_accessor buffer2_acc_ct1(sycl::range<1>(1024/*M7H * NDIM / 4*/), cgh);\n\n cgh.parallel_for(\n sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3), \n [=](sycl::nd_item<3> item_ct1) {\n FindMaxCorr10(sift1, sift2, numPts1, numPts2, item_ct1, buffer1_acc_ct1.get_pointer(), buffer2_acc_ct1.get_pointer());\n });\n });" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: true + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 47665 + Length: 1 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 47680 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 48409 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 48466 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 48491 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Offset: 48507 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false +MainSourceFilesDigest: + - MainSourceFile: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaImage.cu' + Digest: 59bd519d549bdc5cf38176784f41e490 + - MainSourceFile: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftD.cu' + Digest: 504c8e0303973e4f03d7dc8e37014b37 + - MainSourceFile: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSiftH.cu' + Digest: bcafa76012a74a51ca2537c0be6c3ff9 + - MainSourceFile: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/mainSift.cpp' + Digest: 7ac60955dd21b882e666af92155acd17 + - MainSourceFile: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/matching.cu' + Digest: 521f5b6ac7d703c12bf5041028124e6a +DpctVersion: 18.0.0 +MainHelperFileName: '' +USMLevel: '' +FeatureMap: {} +CompileTargets: + cudasift: + - MigratedFileName: '/home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp' + CompileOptions: '-isystem /usr/include/opencv4 -O3 -std=gnu++17 ' + Compiler: 'c++' + - MigratedFileName: './geomFuncs.cpp' + CompileOptions: '-isystem /usr/include/opencv4 -O3 -std=gnu++17 ' + Compiler: 'c++' + - MigratedFileName: './mainSift.cpp.dp.cpp' + CompileOptions: '-isystem /usr/include/opencv4 -O3 -std=c++17 -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ' + Compiler: 'c++' + - MigratedFileName: './cudaImage.dp.cpp' + CompileOptions: '-O3 -DNVCC ' + Compiler: nvcc + - MigratedFileName: './cudaSiftH.dp.cpp' + CompileOptions: '-O3 -DNVCC ' + Compiler: nvcc + - MigratedFileName: './matching.dp.cpp' + CompileOptions: '-O3 -DNVCC ' + Compiler: nvcc +OptionMap: + AnalysisScopePath: + Value: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA' + Specified: false + AsyncHandler: + Value: 'false' + Specified: false + CommentsEnabled: + Value: 'false' + Specified: false + CompilationsDir: + Value: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build' + Specified: true + CtadEnabled: + Value: 'false' + Specified: false + EnablepProfiling: + Value: 'false' + Specified: false + ExperimentalFlag: + Value: '0' + Specified: false + ExplicitClNamespace: + Value: 'false' + Specified: false + ExplicitNamespace: + Value: '20' + Specified: false + ExtensionDDFlag: + Value: '0' + Specified: false + ExtensionDEFlag: + Value: '4294967295' + Specified: false + HelperFuncPreferenceFlag: + Value: '0' + Specified: false + NDRangeDim: + Value: '3' + Specified: false + NoDRYPattern: + Value: 'false' + Specified: false + NoUseGenericSpace: + Value: '' + Specified: true + OptimizeMigration: + Value: 'false' + Specified: false + ProcessAll: + Value: 'false' + Specified: false + RuleFile: + Value: '' + Specified: false + SyclNamedLambda: + Value: 'false' + Specified: false + UsmLevel: + Value: '1' + Specified: false +... diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Makefile.dpct b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Makefile.dpct new file mode 100644 index 000000000..f0724e067 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Makefile.dpct @@ -0,0 +1,71 @@ +CC := icpx + +LD := $(CC) + +#DPCT2001:228: You can link with more library by add them here. +LIB := -lopencv_core -lopencv_imgcodecs + +FLAGS := + +ifeq ($(shell which $(CC)),) + $(error ERROR - $(CC) compiler not found) +endif + +ROOT_DIR := $(shell dirname $(shell which $(CC))) +INCLUDE_SYCL := $(ROOT_DIR)/../include +INCLUDE_CL := $(ROOT_DIR)/../include/sycl + +TARGET_0_SRC_0 = /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.cpp +TARGET_0_OBJ_0 = /home/local_user/sandbox/Velocity-Bench/cudaSift/common/Utility.o +TARGET_0_FLAG_0 = -isystem /usr/include/opencv4 -O3 -std=gnu++17 ${FLAGS} + +TARGET_0_SRC_1 = ./geomFuncs.cpp +TARGET_0_OBJ_1 = ./geomFuncs.o +TARGET_0_FLAG_1 = -isystem /usr/include/opencv4 -O3 -std=gnu++17 ${FLAGS} + +TARGET_0_SRC_2 = ./mainSift.cpp.dp.cpp +TARGET_0_OBJ_2 = ./mainSift.cpp.dp.o +TARGET_0_FLAG_2 = -isystem /usr/include/opencv4 -O3 -std=c++17 -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS} + +TARGET_0_SRC_3 = ./cudaImage.dp.cpp +TARGET_0_OBJ_3 = ./cudaImage.dp.o +TARGET_0_FLAG_3 = -O3 -DNVCC ${FLAGS} + +TARGET_0_SRC_4 = ./cudaSiftH.dp.cpp +TARGET_0_OBJ_4 = ./cudaSiftH.dp.o +TARGET_0_FLAG_4 = -O3 -DNVCC ${FLAGS} + +TARGET_0_SRC_5 = ./matching.dp.cpp +TARGET_0_OBJ_5 = ./matching.dp.o +TARGET_0_FLAG_5 = -O3 -DNVCC ${FLAGS} + +TARGET_0 := cudasift + +TARGET := ${TARGET_0} + +.PHONY:all clean +OBJS_0 := ${TARGET_0_OBJ_0} ${TARGET_0_OBJ_1} ${TARGET_0_OBJ_2} ${TARGET_0_OBJ_3} ${TARGET_0_OBJ_4} ${TARGET_0_OBJ_5} +all: $(TARGET) +$(TARGET_0): $(OBJS_0) + $(CC) -fsycl -o $@ $^ $(LIB) + +$(TARGET_0_OBJ_0):$(TARGET_0_SRC_0) + c++ -c ${TARGET_0_SRC_0} -o ${TARGET_0_OBJ_0} $(TARGET_0_FLAG_0) + +$(TARGET_0_OBJ_1):$(TARGET_0_SRC_1) + c++ -c ${TARGET_0_SRC_1} -o ${TARGET_0_OBJ_1} $(TARGET_0_FLAG_1) + +$(TARGET_0_OBJ_2):$(TARGET_0_SRC_2) + c++ -c ${TARGET_0_SRC_2} -o ${TARGET_0_OBJ_2} $(TARGET_0_FLAG_2) + +$(TARGET_0_OBJ_3):$(TARGET_0_SRC_3) + $(CC) -fsycl -c ${TARGET_0_SRC_3} -o ${TARGET_0_OBJ_3} $(TARGET_0_FLAG_3) + +$(TARGET_0_OBJ_4):$(TARGET_0_SRC_4) + $(CC) -fsycl -c ${TARGET_0_SRC_4} -o ${TARGET_0_OBJ_4} $(TARGET_0_FLAG_4) + +$(TARGET_0_OBJ_5):$(TARGET_0_SRC_5) + $(CC) -fsycl -c ${TARGET_0_SRC_5} -o ${TARGET_0_OBJ_5} $(TARGET_0_FLAG_5) + +clean: + rm -f ${OBJS_0} $(TARGET) diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.cpp new file mode 100644 index 000000000..6c230dd44 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.cpp @@ -0,0 +1,83 @@ +// Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include + +#include "Utility.h" + +using namespace Utility; + +int Utility::RunDataVerification(const int threshold, const float matchPercentage) +{ + printf("Performing data verification \n"); + switch (threshold) + { + case 1: + if (matchPercentage > 20.0f && matchPercentage < 30.0f) + { + printf("Data verification is SUCCESSFUL. \n\n"); + } + else + { + printf("Data verification FAILED. \n\n"); + return -1; + } + break; + case 2: + if (matchPercentage > 26.0f && matchPercentage < 38.0f) + { + printf("Data verification is SUCCESSFUL. \n\n"); + } + else + { + printf("Data verification FAILED. \n\n"); + return -1; + } + break; + case 3: + if (matchPercentage > 35.0f && matchPercentage < 45.0f) + { + printf("Data verification is SUCCESSFUL. \n\n"); + } + else + { + printf("Data verification FAILED. \n\n"); + return -1; + } + break; + case 4: + if (matchPercentage > 40.0f && matchPercentage < 50.0f) + { + printf("Data verification is SUCCESSFUL. \n\n"); + } + else + { + printf("Data verification FAILED. \n\n"); + return -1; + } + break; + default: + printf("Threshold values should be in the range [1, 4]. \n\n"); + return -1; + } + return 0; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.h new file mode 100644 index 000000000..da09d2d78 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.h @@ -0,0 +1,31 @@ +// Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#ifndef UTILITY_H +#define UTILITY_H + +namespace Utility +{ + int RunDataVerification(const int thresh, const float matchPercentage); + +} +#endif // UTILITY_H diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.o new file mode 100644 index 000000000..220855fcd Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/Utility.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.dp.cpp new file mode 100644 index 000000000..e799acdb9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.dp.cpp @@ -0,0 +1,116 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include + +#include "cudautils.h" +#include "cudaImage.h" + +int iDivUp(int a, int b) { return (a % b != 0) ? (a / b + 1) : (a / b); } +int iDivDown(int a, int b) { return a / b; } +int iAlignUp(int a, int b) { return (a % b != 0) ? (a - a % b + b) : a; } +int iAlignDown(int a, int b) { return a - a % b; } + +void CudaImage::Allocate(int w, int h, int p, bool host, float &totTime, float *devmem, float *hostmem) +{ + width = w; + height = h; + pitch = p; + d_data = devmem; + h_data = hostmem; + t_data = NULL; + if (devmem == NULL) + { +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR( + d_data = (float *)dpct::dpct_malloc(*(size_t *)&pitch, + (size_t)(sizeof(float) * width), + (size_t)height))); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + std::cout << "Allocate Time is " << std::chrono::duration(stop_malloc - start_malloc).count() << " us" << std::endl; + totTime += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + pitch /= sizeof(float); + if (d_data == NULL) + printf("Failed to allocate device data\n"); + d_internalAlloc = true; + } + if (host && hostmem == NULL) + { + h_data = (float *)malloc(sizeof(float) * pitch * height); + h_internalAlloc = true; + } +} + +CudaImage::CudaImage() : width(0), height(0), pitch(0), d_data(NULL), h_data(NULL), t_data(NULL), d_internalAlloc(false), h_internalAlloc(false) +{ +} + +CudaImage::~CudaImage() +{ + if (d_internalAlloc && d_data != NULL) + safeCall(DPCT_CHECK_ERROR(sycl::free(d_data, dpct::get_in_order_queue()))); + d_data = NULL; + if (h_internalAlloc && h_data != NULL) + free(h_data); + h_data = NULL; + if (t_data != NULL) + safeCall(DPCT_CHECK_ERROR(delete (dpct::image_matrix *)t_data)); + t_data = NULL; +} + +double CudaImage::Download(float &totTime) +{ + double downloadTime = 0.0; + int p = sizeof(float) * pitch; + if (d_data != NULL && h_data != NULL) + { +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR(dpct::dpct_memcpy( + d_data, p, h_data, sizeof(float) * width, sizeof(float) * width, height, + dpct::host_to_device))); + // safeCall(cudaMemcpy(d_data, h_data, sizeof(float) * width * height, cudaMemcpyHostToDevice)); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); + downloadTime = std::chrono::duration(stop_memcpy - start_memcpy).count(); + std::cout << "Download Time is " << downloadTime << " us" << std::endl; +#endif + } + return downloadTime; +} \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.dp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.dp.o new file mode 100644 index 000000000..c9edccaa2 Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.dp.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.h new file mode 100644 index 000000000..737446686 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaImage.h @@ -0,0 +1,38 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +#ifndef CUDAIMAGE_H +#define CUDAIMAGE_H + +class CudaImage +{ +public: + int width, height; + int pitch; + float *h_data; + float *d_data; + float *t_data; + bool d_internalAlloc; + bool h_internalAlloc; + +public: + CudaImage(); + CudaImage(const CudaImage&) = delete; + CudaImage& operator=(const CudaImage&) = delete; + ~CudaImage(); + void Allocate(int width, int height, int pitch, bool withHost, float &totTime, float *devMem = NULL, float *hostMem = NULL); + double Download(float &totTime); + double Readback(); + double InitTexture(); + double CopyToTexture(CudaImage &dst, bool host); +}; + +int iDivUp(int a, int b); +int iDivDown(int a, int b); +int iAlignUp(int a, int b); +int iAlignDown(int a, int b); +void StartTimer(unsigned int *hTimer); +double StopTimer(unsigned int hTimer); + +#endif // CUDAIMAGE_H diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h new file mode 100644 index 000000000..00903f8f6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h @@ -0,0 +1,48 @@ +#ifndef CUDASIFT_H +#define CUDASIFT_H + +#include "cudaImage.h" + +typedef struct dpct_type_558722 +{ + float xpos; + float ypos; + float scale; + float sharpness; + float edgeness; + float orientation; + float score; + float ambiguity; + int match; + float match_xpos; + float match_ypos; + float match_error; + float subsampling; + float empty[3]; + float data[128]; +} SiftPoint; + +typedef struct dpct_type_948814 +{ + int numPts; // Number of available Sift points + int maxPts; // Number of allocated Sift points +#ifdef MANAGEDMEM + SiftPoint *m_data; // Managed data +#else + SiftPoint *h_data; // Host (CPU) data + SiftPoint *d_data; // Device (GPU) data +#endif +} SiftData; + +void InitCuda(int devNum = 0); +float *AllocSiftTempMemory(int width, int height, int numOctaves, float &totTime, bool scaleUp = false); +void FreeSiftTempMemory(float *memoryTmp); +void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, + float &totTime, float lowestScale = 0.0f, bool scaleUp = false, float *tempMemory = 0); +void InitSiftData(SiftData &data, float &totTime, int num = 1024, bool host = false, bool dev = true); +void FreeSiftData(SiftData &data); +void PrintSiftData(SiftData &data); +double MatchSiftData(SiftData &data1, SiftData &data2, float &matchTime); +double FindHomography(SiftData &data, float *homography, int *numMatches, float &matchTime, int numLoops = 1000, float minScore = 0.85f, float maxAmbiguity = 0.95f, float thresh = 5.0f); + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h.yaml b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h.yaml new file mode 100644 index 000000000..961853067 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h.yaml @@ -0,0 +1,91 @@ +--- +MainSourceFile: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/out/cudaSift.h' +Replacements: + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSift.h' + Offset: 77 + Length: 0 + ReplacementText: ' dpct_type_558722' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSift.h' + Offset: 375 + Length: 0 + ReplacementText: ' dpct_type_948814' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false +MainSourceFilesDigest: + - MainSourceFile: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudaSift.h' + Digest: 3cca4f7dd3623244964a8145ffe4cdbe +DpctVersion: 18.0.0 +MainHelperFileName: '' +USMLevel: '' +FeatureMap: {} +CompileTargets: {} +OptionMap: + AnalysisScopePath: + Value: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA' + Specified: false + AsyncHandler: + Value: 'false' + Specified: false + CommentsEnabled: + Value: 'false' + Specified: false + CompilationsDir: + Value: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build' + Specified: true + CtadEnabled: + Value: 'false' + Specified: false + EnablepProfiling: + Value: 'false' + Specified: false + ExperimentalFlag: + Value: '0' + Specified: false + ExplicitClNamespace: + Value: 'false' + Specified: false + ExplicitNamespace: + Value: '20' + Specified: false + ExtensionDDFlag: + Value: '0' + Specified: false + ExtensionDEFlag: + Value: '4294967295' + Specified: false + HelperFuncPreferenceFlag: + Value: '0' + Specified: false + NDRangeDim: + Value: '3' + Specified: false + NoDRYPattern: + Value: 'false' + Specified: false + NoUseGenericSpace: + Value: '' + Specified: true + OptimizeMigration: + Value: 'false' + Specified: false + ProcessAll: + Value: 'false' + Specified: false + RuleFile: + Value: '' + Specified: false + SyclNamedLambda: + Value: 'false' + Specified: false + UsmLevel: + Value: '1' + Specified: false +... diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftD.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftD.dp.cpp new file mode 100644 index 000000000..9ff5263e9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftD.dp.cpp @@ -0,0 +1,2888 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include +#include "cudautils.h" +#include "cudaSiftD.h" +#include "cudaSift.h" + +/////////////////////////////////////////////////////////////////////////////// +// Kernel configuration +/////////////////////////////////////////////////////////////////////////////// + +static dpct::constant_memory d_MaxNumPoints; +dpct::global_memory d_PointCounter(8 * 2 + 1); +static dpct::constant_memory d_ScaleDownKernel(5); +static dpct::constant_memory d_LowPassKernel(2 * LOWPASS_R + 1); +static dpct::constant_memory d_LaplaceKernel(8 * 12 * 16); + +/////////////////////////////////////////////////////////////////////////////// +// Lowpass filter and subsample image +/////////////////////////////////////////////////////////////////////////////// +void ScaleDownDenseShift(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch, + const sycl::nd_item<3> &item_ct1, + float const *d_ScaleDownKernel, float *brows) +{ +#define BW (SCALEDOWN_W + 4) +#define BH (SCALEDOWN_H + 4) +#define W2 (SCALEDOWN_W / 2) +#define H2 (SCALEDOWN_H / 2) + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int xp = item_ct1.get_group(2) * SCALEDOWN_W + tx; + const int yp = item_ct1.get_group(1) * SCALEDOWN_H + ty; + const float k0 = d_ScaleDownKernel[0]; + const float k1 = d_ScaleDownKernel[1]; + const float k2 = d_ScaleDownKernel[2]; + const int xl = sycl::min(width - 1, sycl::max(0, xp - 2)); + const int yl = sycl::min(height - 1, sycl::max(0, yp - 2)); + if (xp < (width + 4) && yp < (height + 4)) + { + float v = d_Data[yl * pitch + xl]; + brows[BW * ty + tx] = + k0 * (v + ShiftDown(v, 4, item_ct1)) + + k1 * (ShiftDown(v, 1, item_ct1) + ShiftDown(v, 3, item_ct1)) + + k2 * ShiftDown(v, 2, item_ct1); + } + item_ct1.barrier(sycl::access::fence_space::local_space); + const int xs = item_ct1.get_group(2) * W2 + tx; + const int ys = item_ct1.get_group(1) * H2 + ty; + if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2)) + { + float *ptr = &brows[BW * (ty * 2) + (tx * 2)]; + d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * BW]) + k1 * (ptr[1 * BW] + ptr[3 * BW]) + k2 * ptr[2 * BW]; + } +} + +void ScaleDownDense(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch, + const sycl::nd_item<3> &item_ct1, + float const *d_ScaleDownKernel, float *irows, float *brows) +{ +#define BW (SCALEDOWN_W + 4) +#define BH (SCALEDOWN_H + 4) +#define W2 (SCALEDOWN_W / 2) +#define H2 (SCALEDOWN_H / 2) + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int xp = item_ct1.get_group(2) * SCALEDOWN_W + tx; + const int yp = item_ct1.get_group(1) * SCALEDOWN_H + ty; + const int xl = sycl::min(width - 1, sycl::max(0, xp - 2)); + const int yl = sycl::min(height - 1, sycl::max(0, yp - 2)); + const float k0 = d_ScaleDownKernel[0]; + const float k1 = d_ScaleDownKernel[1]; + const float k2 = d_ScaleDownKernel[2]; + if (xp < (width + 4) && yp < (height + 4)) + irows[BW * ty + tx] = d_Data[yl * pitch + xl]; + item_ct1.barrier(sycl::access::fence_space::local_space); + if (yp < (height + 4) && tx < W2) + { + float *ptr = &irows[BW * ty + 2 * tx]; + brows[W2 * ty + tx] = k0 * (ptr[0] + ptr[4]) + k1 * (ptr[1] + ptr[3]) + k2 * ptr[2]; + } + item_ct1.barrier(sycl::access::fence_space::local_space); + const int xs = item_ct1.get_group(2) * W2 + tx; + const int ys = item_ct1.get_group(1) * H2 + ty; + if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2)) + { + float *ptr = &brows[W2 * (ty * 2) + tx]; + d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * W2]) + k1 * (ptr[1 * W2] + ptr[3 * W2]) + k2 * ptr[2 * W2]; + } +} + +void ScaleDown(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch, + const sycl::nd_item<3> &item_ct1, float const *d_ScaleDownKernel, + float *inrow, float *brow, int *yRead, int *yWrite) +{ + +#define dx2 (SCALEDOWN_W / 2) + const int tx = item_ct1.get_local_id(2); + const int tx0 = tx + 0 * dx2; + const int tx1 = tx + 1 * dx2; + const int tx2 = tx + 2 * dx2; + const int tx3 = tx + 3 * dx2; + const int tx4 = tx + 4 * dx2; + const int xStart = item_ct1.get_group(2) * SCALEDOWN_W; + const int yStart = item_ct1.get_group(1) * SCALEDOWN_H; + const int xWrite = xStart / 2 + tx; + float k0 = d_ScaleDownKernel[0]; + float k1 = d_ScaleDownKernel[1]; + float k2 = d_ScaleDownKernel[2]; + if (tx < SCALEDOWN_H + 4) + { + int y = yStart + tx - 2; + y = (y < 0 ? 0 : y); + y = (y >= height ? height - 1 : y); + yRead[tx] = y * pitch; + yWrite[tx] = (yStart + tx - 4) / 2 * newpitch; + } + item_ct1.barrier(sycl::access::fence_space::local_space); + int xRead = xStart + tx - 2; + xRead = (xRead < 0 ? 0 : xRead); + xRead = (xRead >= width ? width - 1 : xRead); + + int maxtx = sycl::min(dx2, width / 2 - xStart / 2); + for (int dy = 0; dy < SCALEDOWN_H + 4; dy += 5) + { + { + inrow[tx] = d_Data[yRead[dy + 0] + xRead]; + /* + DPCT1118:3: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < maxtx) + { + brow[tx4] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 4 && !(dy & 1)) + d_Result[yWrite[dy + 0] + xWrite] = k2 * brow[tx2] + k0 * (brow[tx0] + brow[tx4]) + k1 * (brow[tx1] + brow[tx3]); + } + /* + DPCT1118:4: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + item_ct1.barrier(sycl::access::fence_space::local_space); + } + if (dy < (SCALEDOWN_H + 3)) + { + inrow[tx] = d_Data[yRead[dy + 1] + xRead]; + /* + DPCT1118:5: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < maxtx) + { + brow[tx0] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 3 && (dy & 1)) + d_Result[yWrite[dy + 1] + xWrite] = k2 * brow[tx3] + k0 * (brow[tx1] + brow[tx0]) + k1 * (brow[tx2] + brow[tx4]); + } + /* + DPCT1118:6: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + item_ct1.barrier(sycl::access::fence_space::local_space); + } + if (dy < (SCALEDOWN_H + 2)) + { + inrow[tx] = d_Data[yRead[dy + 2] + xRead]; + /* + DPCT1118:7: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < maxtx) + { + brow[tx1] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 2 && !(dy & 1)) + d_Result[yWrite[dy + 2] + xWrite] = k2 * brow[tx4] + k0 * (brow[tx2] + brow[tx1]) + k1 * (brow[tx3] + brow[tx0]); + } + /* + DPCT1118:8: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + item_ct1.barrier(sycl::access::fence_space::local_space); + } + if (dy < (SCALEDOWN_H + 1)) + { + inrow[tx] = d_Data[yRead[dy + 3] + xRead]; + /* + DPCT1118:9: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < maxtx) + { + brow[tx2] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 1 && (dy & 1)) + d_Result[yWrite[dy + 3] + xWrite] = k2 * brow[tx0] + k0 * (brow[tx3] + brow[tx2]) + k1 * (brow[tx4] + brow[tx1]); + } + /* + DPCT1118:10: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + item_ct1.barrier(sycl::access::fence_space::local_space); + } + if (dy < SCALEDOWN_H) + { + inrow[tx] = d_Data[yRead[dy + 4] + xRead]; + /* + DPCT1118:11: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < dx2 && xWrite < width / 2) + { + brow[tx3] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (!(dy & 1)) + d_Result[yWrite[dy + 4] + xWrite] = k2 * brow[tx1] + k0 * (brow[tx4] + brow[tx3]) + k1 * (brow[tx0] + brow[tx2]); + } + /* + DPCT1118:12: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + item_ct1.barrier(sycl::access::fence_space::local_space); + } + } +} + +void ScaleUp(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch, + const sycl::nd_item<3> &item_ct1) +{ + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + int x = item_ct1.get_group(2) * SCALEUP_W + 2 * tx; + int y = item_ct1.get_group(1) * SCALEUP_H + 2 * ty; + if (x < 2 * width && y < 2 * height) + { + int xl = item_ct1.get_group(2) * (SCALEUP_W / 2) + tx; + int yu = item_ct1.get_group(1) * (SCALEUP_H / 2) + ty; + int xr = sycl::min(xl + 1, width - 1); + int yd = sycl::min(yu + 1, height - 1); + float vul = d_Data[yu * pitch + xl]; + float vur = d_Data[yu * pitch + xr]; + float vdl = d_Data[yd * pitch + xl]; + float vdr = d_Data[yd * pitch + xr]; + d_Result[(y + 0) * newpitch + x + 0] = vul; + d_Result[(y + 0) * newpitch + x + 1] = 0.50f * (vul + vur); + d_Result[(y + 1) * newpitch + x + 0] = 0.50f * (vul + vdl); + d_Result[(y + 1) * newpitch + x + 1] = 0.25f * (vul + vur + vdl + vdr); + } +} + +/* +DPCT1110:13: The total declared local variable size in device function +ExtractSiftDescriptors exceeds 128 bytes and may cause high register pressure. +Consult with your hardware vendor to find the total register size available and +adjust the code, or use smaller sub-group size to avoid high register pressure. +*/ +void ExtractSiftDescriptors(dpct::image_accessor_ext texObj, + SiftPoint *d_sift, int fstPts, float subsampling, + const sycl::nd_item<3> &item_ct1, float *gauss, + float *buffer, float *sums) +{ + + const int tx = item_ct1.get_local_id(2); // 0 -> 16 + const int ty = item_ct1.get_local_id(1); // 0 -> 8 + const int idx = ty * 16 + tx; + const int bx = item_ct1.get_group(2) + fstPts; // 0 -> numPts + if (ty == 0) + gauss[tx] = sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + buffer[idx] = 0.0f; + /* + DPCT1065:92: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sycl::sin(theta); // cosa -sina + float cosa = sycl::cos(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + float dx = texObj.read(xpos + cosa, ypos + sina) - + texObj.read(xpos - cosa, ypos - sina); + float dy = texObj.read(xpos - sina, ypos + cosa) - + texObj.read(xpos + sina, ypos - cosa); + float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * sycl::atan2(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + dpct::atomic_fetch_add( + buffer + p1, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 32, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 8, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 40, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 40, angf * grad2); + } + } + } + /* + DPCT1065:93: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i, item_ct1); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + /* + DPCT1065:94: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f); + + sum = tsum1 * tsum1; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i, item_ct1); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + /* + DPCT1065:95: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * sycl::rsqrt(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } +} + +float FastAtan2(float y, float x) +{ + float absx = sycl::fabs(x); + float absy = sycl::fabs(y); + /* + DPCT1013:96: The rounding mode could not be specified and the generated code + may have different accuracy than the original code. Verify the correctness. + SYCL math built-in function rounding mode is aligned with OpenCL C 1.2 + standard. + */ + float a = sycl::min(absx, absy) / sycl::max(absx, absy); + float s = a * a; + float r = ((-0.0464964749f * s + 0.15931422f) * s - 0.327622764f) * s * a + a; + r = (absy > absx ? 1.57079637f - r : r); + r = (x < 0 ? 3.14159274f - r : r); + r = (y < 0 ? -r : r); + return r; +} + +// __global__ void ExtractSiftDescriptorsCONSTNew(cudaTextureObject_t texObj, SiftPoint *d_sift, float subsampling, int octave) +/* +DPCT1110:14: The total declared local variable size in device function +ExtractSiftDescriptorsCONSTNew exceeds 128 bytes and may cause high register +pressure. Consult with your hardware vendor to find the total register size +available and adjust the code, or use smaller sub-group size to avoid high +register pressure. +*/ +void ExtractSiftDescriptorsCONSTNew(float *texObj, int pitch, SiftPoint *d_sift, + float subsampling, int octave, + const sycl::nd_item<3> &item_ct1, + int d_MaxNumPoints, + unsigned int *d_PointCounter, float *gauss, + float *buffer, float *sums) +{ + + const int tx = item_ct1.get_local_id(2); // 0 -> 16 + const int ty = item_ct1.get_local_id(1); // 0 -> 8 + const int idx = ty * 16 + tx; + if (ty == 0) + gauss[tx] = sycl::native::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + + int fstPts = dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = dpct::min(d_PointCounter[2 * octave + 1], d_MaxNumPoints); + // if (tx==0 && ty==0) + // printf("%d %d %d %d\n", octave, fstPts, min(d_PointCounter[2*octave], d_MaxNumPoints), totPts); + for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts; + bx += item_ct1.get_group_range(2)) + { + + buffer[idx] = 0.0f; + /* + DPCT1118:15: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:97: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sycl::sin(theta); // cosa -sina + float cosa = sycl::cos(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + + // float dx = tex2D(texObj, xpos + cosa, ypos + sina) - + // tex2D(texObj, xpos - cosa, ypos - sina); + // float dy = tex2D(texObj, xpos - sina, ypos + cosa) - + // tex2D(texObj, xpos + sina, ypos - cosa); + + int xi1 = xpos + cosa; + int yi1 = ypos + sina; + + int xi2 = xpos - cosa; + int yi2 = ypos - sina; + + float dx = *(texObj + yi1 * pitch + xi1) - + *(texObj + yi2 * pitch + xi2); + + xi1 = xpos - sina; + yi1 = ypos + cosa; + + xi2 = xpos + sina; + yi2 = ypos - cosa; + + float dy = *(texObj + yi1 * pitch + xi1) - + *(texObj + yi2 * pitch + xi2); + + /* + DPCT1013:102: The rounding mode could not be specified and the generated + code may have different accuracy than the original code. Verify the + correctness. SYCL math built-in function rounding mode is aligned with + OpenCL C 1.2 standard. + */ + float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * FastAtan2(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + dpct::atomic_fetch_add( + buffer + p1, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 32, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 8, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 40, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 40, angf * grad2); + } + } + } + /* + DPCT1118:16: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:98: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i, item_ct1); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + /* + DPCT1118:17: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:99: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f); + + sum = tsum1 * tsum1; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i, item_ct1); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + /* + DPCT1118:18: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:100: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * sycl::rsqrt(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } + /* + DPCT1118:19: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:101: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } +} + +/* +DPCT1110:20: The total declared local variable size in device function +ExtractSiftDescriptorsCONST exceeds 128 bytes and may cause high register +pressure. Consult with your hardware vendor to find the total register size +available and adjust the code, or use smaller sub-group size to avoid high +register pressure. +*/ +void ExtractSiftDescriptorsCONST(dpct::image_accessor_ext texObj, + SiftPoint *d_sift, float subsampling, + int octave, const sycl::nd_item<3> &item_ct1, + int d_MaxNumPoints, + unsigned int *d_PointCounter, float *gauss, + float *buffer, float *sums) +{ + + const int tx = item_ct1.get_local_id(2); // 0 -> 16 + const int ty = item_ct1.get_local_id(1); // 0 -> 8 + const int idx = ty * 16 + tx; + if (ty == 0) + gauss[tx] = sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + + int fstPts = dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = dpct::min(d_PointCounter[2 * octave + 1], d_MaxNumPoints); + // if (tx==0 && ty==0) + // printf("%d %d %d %d\n", octave, fstPts, min(d_PointCounter[2*octave], d_MaxNumPoints), totPts); + for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts; + bx += item_ct1.get_group_range(2)) + { + + buffer[idx] = 0.0f; + /* + DPCT1118:21: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:103: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sycl::sin(theta); // cosa -sina + float cosa = sycl::cos(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + float dx = texObj.read(xpos + cosa, ypos + sina) - + texObj.read(xpos - cosa, ypos - sina); + float dy = texObj.read(xpos - sina, ypos + cosa) - + texObj.read(xpos + sina, ypos - cosa); + float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * sycl::atan2(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + dpct::atomic_fetch_add( + buffer + p1, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 32, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 8, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 40, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 40, angf * grad2); + } + } + } + /* + DPCT1118:22: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:104: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i, item_ct1); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + /* + DPCT1118:23: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:105: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f); + + sum = tsum1 * tsum1; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i, item_ct1); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + /* + DPCT1118:24: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:106: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * sycl::rsqrt(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } + /* + DPCT1118:25: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:107: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } +} + +/* +DPCT1110:26: The total declared local variable size in device function +ExtractSiftDescriptorsOld exceeds 128 bytes and may cause high register +pressure. Consult with your hardware vendor to find the total register size +available and adjust the code, or use smaller sub-group size to avoid high +register pressure. +*/ +void ExtractSiftDescriptorsOld(dpct::image_accessor_ext texObj, + SiftPoint *d_sift, int fstPts, float subsampling, + const sycl::nd_item<3> &item_ct1, float *gauss, + float *buffer, float *sums) +{ + + const int tx = item_ct1.get_local_id(2); // 0 -> 16 + const int ty = item_ct1.get_local_id(1); // 0 -> 8 + const int idx = ty * 16 + tx; + const int bx = item_ct1.get_group(2) + fstPts; // 0 -> numPts + if (ty == 0) + gauss[tx] = sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + buffer[idx] = 0.0f; + /* + DPCT1065:108: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sycl::sin(theta); // cosa -sina + float cosa = sycl::cos(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + float dx = texObj.read(xpos + cosa, ypos + sina) - + texObj.read(xpos - cosa, ypos - sina); + float dy = texObj.read(xpos - sina, ypos + cosa) - + texObj.read(xpos + sina, ypos - cosa); + float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * sycl::atan2(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + dpct::atomic_fetch_add( + buffer + p1, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 32, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 8, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 40, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 40, angf * grad2); + } + } + } + /* + DPCT1065:109: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + // Normalize twice and suppress peaks first time + if (idx < 64) + sums[idx] = buffer[idx] * buffer[idx] + buffer[idx + 64] * buffer[idx + 64]; + /* + DPCT1065:110: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx < 32) + sums[idx] = sums[idx] + sums[idx + 32]; + /* + DPCT1065:111: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx < 16) + sums[idx] = sums[idx] + sums[idx + 16]; + /* + DPCT1065:112: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx < 8) + sums[idx] = sums[idx] + sums[idx + 8]; + /* + DPCT1065:113: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx < 4) + sums[idx] = sums[idx] + sums[idx + 4]; + /* + DPCT1065:114: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + buffer[idx] = buffer[idx] * sycl::rsqrt(tsum1); + + if (buffer[idx] > 0.2f) + buffer[idx] = 0.2f; + /* + DPCT1065:115: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx < 64) + sums[idx] = buffer[idx] * buffer[idx] + buffer[idx + 64] * buffer[idx + 64]; + /* + DPCT1065:116: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx < 32) + sums[idx] = sums[idx] + sums[idx + 32]; + /* + DPCT1065:117: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx < 16) + sums[idx] = sums[idx] + sums[idx + 16]; + /* + DPCT1065:118: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx < 8) + sums[idx] = sums[idx] + sums[idx + 8]; + /* + DPCT1065:119: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx < 4) + sums[idx] = sums[idx] + sums[idx + 4]; + /* + DPCT1065:120: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + + float *desc = d_sift[bx].data; + desc[idx] = buffer[idx] * sycl::rsqrt(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } +} + +/* +DPCT1110:27: The total declared local variable size in device function +ExtractSiftDescriptor exceeds 128 bytes and may cause high register pressure. +Consult with your hardware vendor to find the total register size available and +adjust the code, or use smaller sub-group size to avoid high register pressure. +*/ +void ExtractSiftDescriptor(dpct::image_accessor_ext texObj, + SiftPoint *d_sift, float subsampling, int octave, + int bx, const sycl::nd_item<3> &item_ct1, + float *gauss, float *buffer, float *sums) +{ + + const int idx = item_ct1.get_local_id(2); + const int tx = idx & 15; // 0 -> 16 + const int ty = idx / 16; // 0 -> 8 + if (ty == 0) + gauss[tx] = sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + buffer[idx] = 0.0f; + /* + DPCT1065:121: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sycl::sin(theta); // cosa -sina + float cosa = sycl::cos(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + float dx = texObj.read(xpos + cosa, ypos + sina) - + texObj.read(xpos - cosa, ypos - sina); + float dy = texObj.read(xpos - sina, ypos + cosa) - + texObj.read(xpos + sina, ypos - cosa); + float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * sycl::atan2(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + dpct::atomic_fetch_add( + buffer + p1, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 32, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 8, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + dpct::atomic_fetch_add( + buffer + p1 + 40, iangf * grad2); + dpct::atomic_fetch_add( + buffer + p2 + 40, angf * grad2); + } + } + } + /* + DPCT1065:122: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i, item_ct1); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + /* + DPCT1065:123: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = sycl::min(buffer[idx] * sycl::rsqrt(tsum1), 0.2f); + + sum = tsum1 * tsum1; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i, item_ct1); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + /* + DPCT1065:124: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * sycl::rsqrt(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } + /* + DPCT1065:125: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); +} + +void RescalePositions(SiftPoint *d_sift, int numPts, float scale, + const sycl::nd_item<3> &item_ct1) +{ + int num = item_ct1.get_group(2) * item_ct1.get_local_range(2) + + item_ct1.get_local_id(2); + if (num < numPts) + { + d_sift[num].xpos *= scale; + d_sift[num].ypos *= scale; + d_sift[num].scale *= scale; + } +} + +void ComputeOrientations(dpct::image_accessor_ext texObj, + SiftPoint *d_Sift, int fstPts, + const sycl::nd_item<3> &item_ct1, int d_MaxNumPoints, + unsigned int *d_PointCounter, float *hist, + float *gauss) +{ + + const int tx = item_ct1.get_local_id(2); + const int bx = item_ct1.get_group(2) + fstPts; + float i2sigma2 = -1.0f / (4.5f * d_Sift[bx].scale * d_Sift[bx].scale); + if (tx < 11) + gauss[tx] = sycl::exp(i2sigma2 * (tx - 5) * (tx - 5)); + if (tx < 64) + hist[tx] = 0.0f; + /* + DPCT1065:126: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + float xp = d_Sift[bx].xpos - 4.5f; + float yp = d_Sift[bx].ypos - 4.5f; + int yd = tx / 11; + int xd = tx - yd * 11; + float xf = xp + xd; + float yf = yp + yd; + if (yd < 11) + { + float dx = texObj.read(xf + 1.0, yf) - texObj.read(xf - 1.0, yf); + float dy = texObj.read(xf, yf + 1.0) - texObj.read(xf, yf - 1.0); + int bin = 16.0f * sycl::atan2(dy, dx) / 3.1416f + 16.5f; + if (bin > 31) + bin = 0; + float grad = sycl::sqrt(dx * dx + dy * dy); + dpct::atomic_fetch_add( + &hist[bin], grad * gauss[xd] * gauss[yd]); + } + /* + DPCT1065:127: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + int x1m = (tx >= 1 ? tx - 1 : tx + 31); + int x1p = (tx <= 30 ? tx + 1 : tx - 31); + if (tx < 32) + { + int x2m = (tx >= 2 ? tx - 2 : tx + 30); + int x2p = (tx <= 29 ? tx + 2 : tx - 30); + hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]); + } + /* + DPCT1065:128: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (tx < 32) + { + float v = hist[32 + tx]; + if(x1p < 32 && x1m < 32) + hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f); + } + /* + DPCT1065:129: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + for (int i = 0; i < 32; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[32 + ((i1 + 1) & 31)]; + float val2 = hist[32 + ((i1 + 31) & 31)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + if (maxval2 > 0.8f * maxval1) + { + float val1 = hist[32 + ((i2 + 1) & 31)]; + float val2 = hist[32 + ((i2 + 31) & 31)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + unsigned int idx = dpct::atomic_fetch_compare_inc< + sycl::access::address_space::generic_space>(d_PointCounter, + 0x7fffffff); + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = d_Sift[bx].scale; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + ; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } +} + +// With constant number of blocks +/* +DPCT1110:28: The total declared local variable size in device function +ComputeOrientationsCONSTNew exceeds 128 bytes and may cause high register +pressure. Consult with your hardware vendor to find the total register size +available and adjust the code, or use smaller sub-group size to avoid high +register pressure. +*/ +void ComputeOrientationsCONSTNew(float *image, int w, int p, int h, + SiftPoint *d_Sift, int octave, + const sycl::nd_item<3> &item_ct1, + int d_MaxNumPoints, + unsigned int *d_PointCounter, + sycl::local_accessor img, + sycl::local_accessor tmp, + float *hist, float *gaussx, float *gaussy) +{ +#define RAD 9 +#define WID (2 * RAD + 1) +#define LEN 32 //%%%% Note: Lowe suggests 36, not 32 + + const int tx = item_ct1.get_local_id(2); + + int fstPts = dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = dpct::min(d_PointCounter[2 * octave + 0], d_MaxNumPoints); + for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts; + bx += item_ct1.get_group_range(2)) + { + + float sc = d_Sift[bx].scale; + for (int i = tx; i < 2 * LEN; i += item_ct1.get_local_range(2)) + hist[i] = 0.0f; + float xp = d_Sift[bx].xpos; + float yp = d_Sift[bx].ypos; + int xi = (int)xp; + int yi = (int)yp; + float xf = xp - xi; + float yf = yp - yi; + for (int i = tx; i < WID * WID; i += item_ct1.get_local_range(2)) + { + int y = i / WID; + int x = i - y * WID; + int xp = sycl::max(sycl::min(x - RAD + xi, w - 1), 0); + int yp = sycl::max(sycl::min(y - RAD + yi, h - 1), 0); + img[y][x] = image[yp * p + xp]; + } + float fac[5]; + fac[1] = fac[3] = + (sc > 0.5f ? sycl::native::exp(-1.0f / (2.0f * (sc * sc - 0.25f))) + : 0.0f); + fac[0] = fac[4] = + (sc > 0.5f ? sycl::native::exp(-4.0f / (2.0f * (sc * sc - 0.25f))) + : 0.0f); + fac[2] = 1.0f; + float i2sigma2 = -1.0f / (2.0f * 2.0f * 2.0f * sc * sc); //%%%% Note: Lowe suggests 1.5, not 2.0 + if (tx < WID) + { + gaussx[tx] = + sycl::native::exp(i2sigma2 * (tx - RAD - xf) * (tx - RAD - xf)); + gaussy[tx] = + sycl::native::exp(i2sigma2 * (tx - RAD - yf) * (tx - RAD - yf)); + } + /* + DPCT1118:29: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:130: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + for (int i = tx; i < (WID - 4) * WID; i += item_ct1.get_local_range(2)) + { + int y = i / WID; + int x = i - y * WID; + y += 2; + tmp[y][x] = img[y][x] + fac[1] * (img[y - 1][x] + img[y + 1][x]) + + fac[0] * (img[y - 2][x] + img[y + 2][x]); + } + /* + DPCT1118:30: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:131: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + for (int i = tx; i < (WID - 4) * (WID - 4); + i += item_ct1.get_local_range(2)) + { + int y = i / (WID - 4); + int x = i - y * (WID - 4); + x += 2; + y += 2; + img[y][x] = tmp[y][x] + fac[1] * (tmp[y][x - 1] + tmp[y][x + 1]) + + fac[0] * (tmp[y][x - 2] + tmp[y][x + 2]); + } + /* + DPCT1118:31: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:132: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + for (int i = tx; i < (WID - 6) * (WID - 6); + i += item_ct1.get_local_range(2)) + { + int y = i / (WID - 6); + int x = i - y * (WID - 6); + x += 3; + y += 3; + float dx = img[y][x + 1] - img[y][x - 1]; + float dy = img[y + 1][x] - img[y - 1][x]; + int bin = + (int)((LEN / 2) * sycl::atan2(dy, dx) / 3.1416f + (LEN / 2) + 0.5f) % + LEN; + /* + DPCT1013:135: The rounding mode could not be specified and the generated + code may have different accuracy than the original code. Verify the + correctness. SYCL math built-in function rounding mode is aligned with + OpenCL C 1.2 standard. + */ + float grad = sycl::sqrt(dx * dx + dy * dy); + dpct::atomic_fetch_add( + &hist[LEN + bin], grad * gaussx[x] * gaussy[y]); + } + /* + DPCT1118:32: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:133: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + int x1m = (tx >= 1 ? tx - 1 : tx + LEN - 1); + int x1p = (tx < (LEN - 1) ? tx + 1 : tx - LEN + 1); + int x2m = (tx >= 2 ? tx - 2 : tx + LEN - 2); + int x2p = (tx < (LEN - 2) ? tx + 2 : tx - LEN + 2); + if (tx < LEN) + { + hist[tx] = 6.0f * hist[tx + LEN] + 4.0f * (hist[x1m + LEN] + hist[x1p + LEN]) + + 1.0f * (hist[x2m + LEN] + hist[x2p + LEN]); + hist[tx + LEN] = 8.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + + 0.0f * (hist[x2m] + hist[x2p]); + float val = hist[tx + LEN]; + hist[tx] = (val > hist[x1m + LEN] && val >= hist[x1p + LEN] ? val : 0.0f); + } + /* + DPCT1118:33: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:134: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + for (int i = 0; i < LEN; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[LEN + ((i1 + 1) % LEN)]; + float val2 = hist[LEN + ((i1 + LEN - 1) % LEN)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN; + dpct::atomic_fetch_max( + &d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]); + if (maxval2 > 0.8f * maxval1 && true) + { + float val1 = hist[LEN + ((i2 + 1) % LEN)]; + float val2 = hist[LEN + ((i2 + LEN - 1) % LEN)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + unsigned int idx = dpct::atomic_fetch_compare_inc< + sycl::access::address_space::generic_space>( + &d_PointCounter[2 * octave + 1], 0x7fffffff); + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = sc; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } + } +#undef RAD +#undef WID +#undef LEN +} + +// With constant number of blocks +/* +DPCT1110:34: The total declared local variable size in device function +ComputeOrientationsCONST exceeds 128 bytes and may cause high register pressure. +Consult with your hardware vendor to find the total register size available and +adjust the code, or use smaller sub-group size to avoid high register pressure. +*/ +void ComputeOrientationsCONST(dpct::image_accessor_ext texObj, + SiftPoint *d_Sift, int octave, + const sycl::nd_item<3> &item_ct1, + int d_MaxNumPoints, unsigned int *d_PointCounter, + float *hist, float *gauss) +{ + + const int tx = item_ct1.get_local_id(2); + + int fstPts = dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = dpct::min(d_PointCounter[2 * octave + 0], d_MaxNumPoints); + for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts; + bx += item_ct1.get_group_range(2)) + { + + float i2sigma2 = -1.0f / (2.0f * 1.5f * 1.5f * d_Sift[bx].scale * d_Sift[bx].scale); + if (tx < 11) + gauss[tx] = sycl::exp(i2sigma2 * (tx - 5) * (tx - 5)); + if (tx < 64) + hist[tx] = 0.0f; + /* + DPCT1118:35: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:136: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + float xp = d_Sift[bx].xpos - 4.5f; + float yp = d_Sift[bx].ypos - 4.5f; + int yd = tx / 11; + int xd = tx - yd * 11; + float xf = xp + xd; + float yf = yp + yd; + if (yd < 11) + { + float dx = texObj.read(xf + 1.0, yf) - texObj.read(xf - 1.0, yf); + float dy = texObj.read(xf, yf + 1.0) - texObj.read(xf, yf - 1.0); + int bin = 16.0f * sycl::atan2(dy, dx) / 3.1416f + 16.5f; + if (bin > 31) + bin = 0; + float grad = sycl::sqrt(dx * dx + dy * dy); + dpct::atomic_fetch_add( + &hist[bin], grad * gauss[xd] * gauss[yd]); + } + /* + DPCT1118:36: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:137: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + int x1m = (tx >= 1 ? tx - 1 : tx + 31); + int x1p = (tx <= 30 ? tx + 1 : tx - 31); + if (tx < 32) + { + int x2m = (tx >= 2 ? tx - 2 : tx + 30); + int x2p = (tx <= 29 ? tx + 2 : tx - 30); + hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]); + } + /* + DPCT1118:37: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:138: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (tx < 32) + { + float v = hist[32 + tx]; + if(x1m < 32) + hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f); + } + /* + DPCT1118:38: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:139: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + for (int i = 0; i < 32; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[32 + ((i1 + 1) & 31)]; + float val2 = hist[32 + ((i1 + 31) & 31)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + dpct::atomic_fetch_max( + &d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]); + if (maxval2 > 0.8f * maxval1 && true) + { + float val1 = hist[32 + ((i2 + 1) & 31)]; + float val2 = hist[32 + ((i2 + 31) & 31)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + unsigned int idx = dpct::atomic_fetch_compare_inc< + sycl::access::address_space::generic_space>( + &d_PointCounter[2 * octave + 1], 0x7fffffff); + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = d_Sift[bx].scale; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + ; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } + /* + DPCT1118:39: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:140: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } +} + +// With constant number of blocks +void OrientAndExtractCONST(dpct::image_accessor_ext texObj, + SiftPoint *d_Sift, float subsampling, int octave, + const sycl::nd_item<3> &item_ct1, int d_MaxNumPoints, + unsigned int *d_PointCounter, float *gauss, + float *buffer, float *sums, float *hist, + unsigned int &idx) +{ + + //%%%% + const int tx = item_ct1.get_local_id(2); + + int fstPts = dpct::min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = dpct::min(d_PointCounter[2 * octave + 0], d_MaxNumPoints); + for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts; + bx += item_ct1.get_group_range(2)) + { + + float i2sigma2 = -1.0f / (4.5f * d_Sift[bx].scale * d_Sift[bx].scale); + if (tx < 11) + gauss[tx] = sycl::exp(i2sigma2 * (tx - 5) * (tx - 5)); + if (tx < 64) + hist[tx] = 0.0f; + /* + DPCT1118:40: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:141: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + float xp = d_Sift[bx].xpos - 4.5f; + float yp = d_Sift[bx].ypos - 4.5f; + int yd = tx / 11; + int xd = tx - yd * 11; + float xf = xp + xd; + float yf = yp + yd; + if (yd < 11) + { + float dx = texObj.read(xf + 1.0, yf) - texObj.read(xf - 1.0, yf); + float dy = texObj.read(xf, yf + 1.0) - texObj.read(xf, yf - 1.0); + int bin = 16.0f * sycl::atan2(dy, dx) / 3.1416f + 16.5f; + if (bin > 31) + bin = 0; + float grad = sycl::sqrt(dx * dx + dy * dy); + dpct::atomic_fetch_add( + &hist[bin], grad * gauss[xd] * gauss[yd]); + } + /* + DPCT1118:41: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:142: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + int x1m = (tx >= 1 ? tx - 1 : tx + 31); + int x1p = (tx <= 30 ? tx + 1 : tx - 31); + if (tx < 32) + { + int x2m = (tx >= 2 ? tx - 2 : tx + 30); + int x2p = (tx <= 29 ? tx + 2 : tx - 30); + hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]); + } + /* + DPCT1118:42: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:143: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (tx < 32) + { + float v = hist[32 + tx]; + if(x1m < 32) + hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f); + } + /* + DPCT1118:43: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:144: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + for (int i = 0; i < 32; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[32 + ((i1 + 1) & 31)]; + float val2 = hist[32 + ((i1 + 31) & 31)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + idx = 0xffffffff; //%%%% + dpct::atomic_fetch_max( + &d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]); + if (maxval2 > 0.8f * maxval1) + { + float val1 = hist[32 + ((i2 + 1) & 31)]; + float val2 = hist[32 + ((i2 + 31) & 31)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + idx = dpct::atomic_fetch_compare_inc< + sycl::access::address_space::generic_space>( + &d_PointCounter[2 * octave + 1], 0x7fffffff); //%%%% + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = d_Sift[bx].scale; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + ; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } + /* + DPCT1118:44: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:145: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + ExtractSiftDescriptor(texObj, d_Sift, subsampling, octave, bx, item_ct1, + gauss, buffer, sums); //%%%% + if (idx < d_MaxNumPoints) //%%%% + ExtractSiftDescriptor(texObj, d_Sift, subsampling, octave, idx, item_ct1, + gauss, buffer, sums); //%%%% + } +} + +/////////////////////////////////////////////////////////////////////////////// +// Subtract two images (multi-scale version) +/////////////////////////////////////////////////////////////////////////////// + +// __global__ void FindPointsMultiTest(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave) +// { +// #define MEMWID (MINMAX_W + 2) +// __shared__ unsigned int cnt; +// __shared__ unsigned short points[3 * MEMWID]; + +// if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0 && threadIdx.y == 0) +// { +// atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); +// atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]); +// } +// int tx = threadIdx.x; +// int ty = threadIdx.y; +// if (tx == 0 && ty == 0) +// cnt = 0; +// __syncthreads(); + +// int ypos = MINMAX_H * blockIdx.y + ty; +// if (ypos >= height) +// return; +// int block = blockIdx.x / NUM_SCALES; +// int scale = blockIdx.x - NUM_SCALES * block; +// int minx = block * MINMAX_W; +// int maxx = min(minx + MINMAX_W, width); +// int xpos = minx + tx; +// int size = pitch * height; +// int ptr = size * scale + max(min(xpos - 1, width - 1), 0); + +// float maxv = fabs(d_Data0[ptr + ypos * pitch + 1 * size]); +// maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W)); + +// if (Shuffle(maxv, 0) > thresh) +// { +// int yptr1 = ptr + ypos * pitch; +// int yptr0 = ptr + max(0, ypos - 1) * pitch; +// int yptr2 = ptr + min(height - 1, ypos + 1) * pitch; +// float d20 = d_Data0[yptr0 + 1 * size]; +// float d21 = d_Data0[yptr1 + 1 * size]; +// float d22 = d_Data0[yptr2 + 1 * size]; +// float d31 = d_Data0[yptr1 + 2 * size]; +// float d11 = d_Data0[yptr1]; + +// float d10 = d_Data0[yptr0]; +// float d12 = d_Data0[yptr2]; +// float ymin1 = fminf(fminf(d10, d11), d12); +// float ymax1 = fmaxf(fmaxf(d10, d11), d12); +// float d30 = d_Data0[yptr0 + 2 * size]; +// float d32 = d_Data0[yptr2 + 2 * size]; +// float ymin3 = fminf(fminf(d30, d31), d32); +// float ymax3 = fmaxf(fmaxf(d30, d31), d32); +// float ymin2 = fminf(fminf(ymin1, fminf(fminf(d20, d22), d21)), ymin3); +// float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d20, d22), d21)), ymax3); + +// float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1)); +// float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1)); +// if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx) +// { +// if (d21 < -thresh) +// { +// float minv = fminf(fminf(nmin2, ymin1), ymin3); +// minv = fminf(fminf(minv, d20), d22); +// if (d21 < minv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// if (d21 > thresh) +// { +// float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3); +// maxv = fmaxf(fmaxf(maxv, d20), d22); +// if (d21 > maxv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// } +// } +// __syncthreads(); +// if (ty == 0 && tx < cnt) +// { +// int xpos = points[3 * tx + 0]; +// int ypos = points[3 * tx + 1]; +// int scale = points[3 * tx + 2]; +// int ptr = xpos + (ypos + (scale + 1) * height) * pitch; +// float val = d_Data0[ptr]; +// float *data1 = &d_Data0[ptr]; +// float dxx = 2.0f * val - data1[-1] - data1[1]; +// float dyy = 2.0f * val - data1[-pitch] - data1[pitch]; +// float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]); +// float tra = dxx + dyy; +// float det = dxx * dyy - dxy * dxy; +// if (tra * tra < edgeLimit * det) +// { +// float edge = __fdividef(tra * tra, det); +// float dx = 0.5f * (data1[1] - data1[-1]); +// float dy = 0.5f * (data1[pitch] - data1[-pitch]); +// float *data0 = d_Data0 + ptr - height * pitch; +// float *data2 = d_Data0 + ptr + height * pitch; +// float ds = 0.5f * (data0[0] - data2[0]); +// float dss = 2.0f * val - data2[0] - data0[0]; +// float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]); +// float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]); +// float idxx = dyy * dss - dys * dys; +// float idxy = dys * dxs - dxy * dss; +// float idxs = dxy * dys - dyy * dxs; +// float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs); +// float idyy = dxx * dss - dxs * dxs; +// float idys = dxy * dxs - dxx * dys; +// float idss = dxx * dyy - dxy * dxy; +// float pdx = idet * (idxx * dx + idxy * dy + idxs * ds); +// float pdy = idet * (idxy * dx + idyy * dy + idys * ds); +// float pds = idet * (idxs * dx + idys * dy + idss * ds); +// if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f) +// { +// pdx = __fdividef(dx, dxx); +// pdy = __fdividef(dy, dyy); +// pds = __fdividef(ds, dss); +// } +// float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds); +// int maxPts = d_MaxNumPoints; +// float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor); +// if (sc >= lowestScale) +// { +// unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff); +// idx = (idx >= maxPts ? maxPts - 1 : idx); +// d_Sift[idx].xpos = xpos + pdx; +// d_Sift[idx].ypos = ypos + pdy; +// d_Sift[idx].scale = sc; +// d_Sift[idx].sharpness = val + dval; +// d_Sift[idx].edgeness = edge; +// d_Sift[idx].subsampling = subsampling; +// } +// } +// } +// } + +/* +DPCT1110:45: The total declared local variable size in device function +FindPointsMultiNew exceeds 128 bytes and may cause high register pressure. +Consult with your hardware vendor to find the total register size available and +adjust the code, or use smaller sub-group size to avoid high register pressure. +*/ +void FindPointsMultiNew(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, + int height, float subsampling, float lowestScale, + float thresh, float factor, float edgeLimit, int octave, + const sycl::nd_item<3> &item_ct1, int d_MaxNumPoints, + unsigned int *d_PointCounter, unsigned short *points) +{ +#define MEMWID (MINMAX_W + 2) + + if (item_ct1.get_group(2) == 0 && item_ct1.get_group(1) == 0 && + item_ct1.get_local_id(2) == 0) + { + dpct::atomic_fetch_max( + &d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); + dpct::atomic_fetch_max( + &d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]); + } + int tx = item_ct1.get_local_id(2); + int block = item_ct1.get_group(2) / NUM_SCALES; + int scale = item_ct1.get_group(2) - NUM_SCALES * block; + int minx = block * MINMAX_W; + int maxx = sycl::min(minx + MINMAX_W, width); + int xpos = minx + tx; + int size = pitch * height; + int ptr = size * scale + sycl::max(sycl::min(xpos - 1, width - 1), 0); + + int yloops = dpct::min( + (unsigned int)(height - MINMAX_H * item_ct1.get_group(1)), MINMAX_H); + float maxv = 0.0f; + for (int y = 0; y < yloops; y++) + { + int ypos = MINMAX_H * item_ct1.get_group(1) + y; + int yptr1 = ptr + ypos * pitch; + float val = d_Data0[yptr1 + 1 * size]; + maxv = sycl::fmax(maxv, sycl::fabs(val)); + } + // if (tx==0) printf("XXX1\n"); + if (!sycl::any_of_group( + item_ct1.get_sub_group(), + (0xffffffff & + (0x1 << item_ct1.get_sub_group().get_local_linear_id())) && + maxv > thresh)) + return; + // if (tx==0) printf("XXX2\n"); + + int ptbits = 0; + for (int y = 0; y < yloops; y++) + { + + int ypos = MINMAX_H * item_ct1.get_group(1) + y; + int yptr1 = ptr + ypos * pitch; + float d11 = d_Data0[yptr1 + 1 * size]; + if (sycl::any_of_group( + item_ct1.get_sub_group(), + (0xffffffff & + (0x1 << item_ct1.get_sub_group().get_local_linear_id())) && + sycl::fabs(d11) > thresh)) + { + + int yptr0 = ptr + sycl::max(0, ypos - 1) * pitch; + int yptr2 = ptr + sycl::min(height - 1, ypos + 1) * pitch; + float d01 = d_Data0[yptr1]; + float d10 = d_Data0[yptr0 + 1 * size]; + float d12 = d_Data0[yptr2 + 1 * size]; + float d21 = d_Data0[yptr1 + 2 * size]; + + float d00 = d_Data0[yptr0]; + float d02 = d_Data0[yptr2]; + float ymin1 = sycl::fmin(sycl::fmin(d00, d01), d02); + float ymax1 = sycl::fmax(sycl::fmax(d00, d01), d02); + float d20 = d_Data0[yptr0 + 2 * size]; + float d22 = d_Data0[yptr2 + 2 * size]; + float ymin3 = sycl::fmin(sycl::fmin(d20, d21), d22); + float ymax3 = sycl::fmax(sycl::fmax(d20, d21), d22); + float ymin2 = sycl::fmin( + sycl::fmin(ymin1, sycl::fmin(sycl::fmin(d10, d12), d11)), ymin3); + float ymax2 = sycl::fmax( + sycl::fmax(ymax1, sycl::fmax(sycl::fmax(d10, d12), d11)), ymax3); + + float nmin2 = sycl::fmin(ShiftUp(ymin2, 1, item_ct1), + ShiftDown(ymin2, 1, item_ct1)); + float nmax2 = sycl::fmax(ShiftUp(ymax2, 1, item_ct1), + ShiftDown(ymax2, 1, item_ct1)); + float minv = sycl::fmin(sycl::fmin(nmin2, ymin1), ymin3); + minv = sycl::fmin(sycl::fmin(minv, d10), d12); + float maxv = sycl::fmax(sycl::fmax(nmax2, ymax1), ymax3); + maxv = sycl::fmax(sycl::fmax(maxv, d10), d12); + + if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx) + ptbits |= ((d11 < sycl::fmin(-thresh, minv)) | + (d11 > sycl::fmax(thresh, maxv))) + << y; + } + } + + unsigned int totbits = sycl::popcount(ptbits); + unsigned int numbits = totbits; + for (int d = 1; d < 32; d <<= 1) + { + unsigned int num = ShiftUp(totbits, d, item_ct1); + if (tx >= d) + totbits += num; + } + int pos = totbits - numbits; + for (int y = 0; y < yloops; y++) + { + int ypos = MINMAX_H * item_ct1.get_group(1) + y; + if (ptbits & (1 << y) && pos < MEMWID) + { + points[2 * pos + 0] = xpos - 1; + points[2 * pos + 1] = ypos; + pos++; + } + } + + totbits = Shuffle(totbits, 31, item_ct1); + if (tx < totbits) + { + int xpos = points[2 * tx + 0]; + int ypos = points[2 * tx + 1]; + int ptr = xpos + (ypos + (scale + 1) * height) * pitch; + float val = d_Data0[ptr]; + float *data1 = &d_Data0[ptr]; + float dxx = 2.0f * val - data1[-1] - data1[1]; + float dyy = 2.0f * val - data1[-pitch] - data1[pitch]; + float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]); + float tra = dxx + dyy; + float det = dxx * dyy - dxy * dxy; + if (tra * tra < edgeLimit * det) + { + float edge = (tra * tra) / det; + float dx = 0.5f * (data1[1] - data1[-1]); + float dy = 0.5f * (data1[pitch] - data1[-pitch]); + float *data0 = d_Data0 + ptr - height * pitch; + float *data2 = d_Data0 + ptr + height * pitch; + float ds = 0.5f * (data0[0] - data2[0]); + float dss = 2.0f * val - data2[0] - data0[0]; + float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]); + float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]); + float idxx = dyy * dss - dys * dys; + float idxy = dys * dxs - dxy * dss; + float idxs = dxy * dys - dyy * dxs; + float idet = 1.0f / (idxx * dxx + idxy * dxy + idxs * dxs); + float idyy = dxx * dss - dxs * dxs; + float idys = dxy * dxs - dxx * dys; + float idss = dxx * dyy - dxy * dxy; + float pdx = idet * (idxx * dx + idxy * dy + idxs * ds); + float pdy = idet * (idxy * dx + idyy * dy + idys * ds); + float pds = idet * (idxs * dx + idys * dy + idss * ds); + if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f) + { + pdx = dx / dxx; + pdy = dy / dyy; + pds = ds / dss; + } + float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds); + int maxPts = d_MaxNumPoints; + float sc = + dpct::pow(2.0f, (float)scale / NUM_SCALES) * sycl::exp2(pds * factor); + if (sc >= lowestScale) + { + dpct::atomic_fetch_max( + &d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); + unsigned int idx = dpct::atomic_fetch_compare_inc< + sycl::access::address_space::generic_space>( + &d_PointCounter[2 * octave + 0], 0x7fffffff); + idx = (idx >= maxPts ? maxPts - 1 : idx); + d_Sift[idx].xpos = xpos + pdx; + d_Sift[idx].ypos = ypos + pdy; + d_Sift[idx].scale = sc; + d_Sift[idx].sharpness = val + dval; + d_Sift[idx].edgeness = edge; + d_Sift[idx].subsampling = subsampling; + } + } + } +} + +// __global__ void FindPointsMulti(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave) +// { +// #define MEMWID (MINMAX_W + 2) +// __shared__ unsigned int cnt; +// __shared__ unsigned short points[3 * MEMWID]; + + +// if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0) +// { +// atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); +// atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]); +// } +// int tx = threadIdx.x; +// int block = blockIdx.x / NUM_SCALES; +// int scale = blockIdx.x - NUM_SCALES * block; +// int minx = block * MINMAX_W; +// int maxx = min(minx + MINMAX_W, width); +// int xpos = minx + tx; +// int size = pitch * height; +// int ptr = size * scale + max(min(xpos - 1, width - 1), 0); + +// int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H); +// float maxv = 0.0f; +// for (int y = 0; y < yloops; y++) +// { +// int ypos = MINMAX_H * blockIdx.y + y; +// int yptr1 = ptr + ypos * pitch; +// float val = d_Data0[yptr1 + 1 * size]; +// maxv = fmaxf(maxv, fabs(val)); +// } +// maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W)); +// if (Shuffle(maxv, 0) <= thresh) +// return; + +// if (tx == 0) +// cnt = 0; +// __syncthreads(); + +// for (int y = 0; y < yloops; y++) +// { + +// int ypos = MINMAX_H * blockIdx.y + y; +// int yptr1 = ptr + ypos * pitch; +// int yptr0 = ptr + max(0, ypos - 1) * pitch; +// int yptr2 = ptr + min(height - 1, ypos + 1) * pitch; +// float d20 = d_Data0[yptr0 + 1 * size]; +// float d21 = d_Data0[yptr1 + 1 * size]; +// float d22 = d_Data0[yptr2 + 1 * size]; +// float d31 = d_Data0[yptr1 + 2 * size]; +// float d11 = d_Data0[yptr1]; + +// float d10 = d_Data0[yptr0]; +// float d12 = d_Data0[yptr2]; +// float ymin1 = fminf(fminf(d10, d11), d12); +// float ymax1 = fmaxf(fmaxf(d10, d11), d12); +// float d30 = d_Data0[yptr0 + 2 * size]; +// float d32 = d_Data0[yptr2 + 2 * size]; +// float ymin3 = fminf(fminf(d30, d31), d32); +// float ymax3 = fmaxf(fmaxf(d30, d31), d32); +// float ymin2 = fminf(fminf(ymin1, fminf(fminf(d20, d22), d21)), ymin3); +// float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d20, d22), d21)), ymax3); + +// float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1)); +// float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1)); +// if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx) +// { +// if (d21 < -thresh) +// { +// float minv = fminf(fminf(nmin2, ymin1), ymin3); +// minv = fminf(fminf(minv, d20), d22); +// if (d21 < minv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// if (d21 > thresh) +// { +// float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3); +// maxv = fmaxf(fmaxf(maxv, d20), d22); +// if (d21 > maxv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// } +// } +// if (tx < cnt) +// { +// int xpos = points[3 * tx + 0]; +// int ypos = points[3 * tx + 1]; +// int scale = points[3 * tx + 2]; +// int ptr = xpos + (ypos + (scale + 1) * height) * pitch; +// float val = d_Data0[ptr]; +// float *data1 = &d_Data0[ptr]; +// float dxx = 2.0f * val - data1[-1] - data1[1]; +// float dyy = 2.0f * val - data1[-pitch] - data1[pitch]; +// float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]); +// float tra = dxx + dyy; +// float det = dxx * dyy - dxy * dxy; +// if (tra * tra < edgeLimit * det) +// { +// float edge = __fdividef(tra * tra, det); +// float dx = 0.5f * (data1[1] - data1[-1]); +// float dy = 0.5f * (data1[pitch] - data1[-pitch]); +// float *data0 = d_Data0 + ptr - height * pitch; +// float *data2 = d_Data0 + ptr + height * pitch; +// float ds = 0.5f * (data0[0] - data2[0]); +// float dss = 2.0f * val - data2[0] - data0[0]; +// float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]); +// float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]); +// float idxx = dyy * dss - dys * dys; +// float idxy = dys * dxs - dxy * dss; +// float idxs = dxy * dys - dyy * dxs; +// float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs); +// float idyy = dxx * dss - dxs * dxs; +// float idys = dxy * dxs - dxx * dys; +// float idss = dxx * dyy - dxy * dxy; +// float pdx = idet * (idxx * dx + idxy * dy + idxs * ds); +// float pdy = idet * (idxy * dx + idyy * dy + idys * ds); +// float pds = idet * (idxs * dx + idys * dy + idss * ds); +// if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f) +// { +// pdx = __fdividef(dx, dxx); +// pdy = __fdividef(dy, dyy); +// pds = __fdividef(ds, dss); +// } +// float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds); +// int maxPts = d_MaxNumPoints; +// float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor); +// if (sc >= lowestScale) +// { +// atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); +// unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff); +// idx = (idx >= maxPts ? maxPts - 1 : idx); +// d_Sift[idx].xpos = xpos + pdx; +// d_Sift[idx].ypos = ypos + pdy; +// d_Sift[idx].scale = sc; +// d_Sift[idx].sharpness = val + dval; +// d_Sift[idx].edgeness = edge; +// d_Sift[idx].subsampling = subsampling; +// } +// } +// } +// } + +// __global__ void FindPointsMultiOld(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave) +// { +// #define MEMWID (MINMAX_W + 2) +// __shared__ float ymin1[MEMWID], ymin2[MEMWID], ymin3[MEMWID]; +// __shared__ float ymax1[MEMWID], ymax2[MEMWID], ymax3[MEMWID]; +// __shared__ unsigned int cnt; +// __shared__ unsigned short points[3 * MEMWID]; + +// if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0) +// { +// atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); +// atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]); +// } +// int tx = threadIdx.x; +// int block = blockIdx.x / NUM_SCALES; +// int scale = blockIdx.x - NUM_SCALES * block; +// int minx = block * MINMAX_W; +// int maxx = min(minx + MINMAX_W, width); +// int xpos = minx + tx; +// int size = pitch * height; +// int ptr = size * scale + max(min(xpos - 1, width - 1), 0); + +// int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H); +// float maxv = 0.0f; +// for (int y = 0; y < yloops; y++) +// { +// int ypos = MINMAX_H * blockIdx.y + y; +// int yptr1 = ptr + ypos * pitch; +// float val = d_Data0[yptr1 + 1 * size]; +// maxv = fmaxf(maxv, fabs(val)); +// } +// maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W)); +// if (Shuffle(maxv, 0) <= thresh) +// return; + +// if (tx == 0) +// cnt = 0; +// __syncthreads(); + +// for (int y = 0; y < yloops; y++) +// { + +// int ypos = MINMAX_H * blockIdx.y + y; +// int yptr1 = ptr + ypos * pitch; +// int yptr0 = ptr + max(0, ypos - 1) * pitch; +// int yptr2 = ptr + min(height - 1, ypos + 1) * pitch; +// float d20 = d_Data0[yptr0 + 1 * size]; +// float d21 = d_Data0[yptr1 + 1 * size]; +// float d22 = d_Data0[yptr2 + 1 * size]; +// float d31 = d_Data0[yptr1 + 2 * size]; +// float d11 = d_Data0[yptr1]; + +// float d10 = d_Data0[yptr0]; +// float d12 = d_Data0[yptr2]; +// ymin1[tx] = fminf(fminf(d10, d11), d12); +// ymax1[tx] = fmaxf(fmaxf(d10, d11), d12); +// float d30 = d_Data0[yptr0 + 2 * size]; +// float d32 = d_Data0[yptr2 + 2 * size]; +// ymin3[tx] = fminf(fminf(d30, d31), d32); +// ymax3[tx] = fmaxf(fmaxf(d30, d31), d32); +// ymin2[tx] = fminf(fminf(ymin1[tx], fminf(fminf(d20, d22), d21)), ymin3[tx]); +// ymax2[tx] = fmaxf(fmaxf(ymax1[tx], fmaxf(fmaxf(d20, d22), d21)), ymax3[tx]); + +// __syncthreads(); + +// if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx) +// { +// if (d21 < -thresh) +// { +// float minv = fminf(fminf(fminf(ymin2[tx - 1], ymin2[tx + 1]), ymin1[tx]), ymin3[tx]); +// minv = fminf(fminf(minv, d20), d22); +// if (d21 < minv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// if (d21 > thresh) +// { +// float maxv = fmaxf(fmaxf(fmaxf(ymax2[tx - 1], ymax2[tx + 1]), ymax1[tx]), ymax3[tx]); +// maxv = fmaxf(fmaxf(maxv, d20), d22); +// if (d21 > maxv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// } +// __syncthreads(); +// } +// if (tx < cnt) +// { +// int xpos = points[3 * tx + 0]; +// int ypos = points[3 * tx + 1]; +// int scale = points[3 * tx + 2]; +// int ptr = xpos + (ypos + (scale + 1) * height) * pitch; +// float val = d_Data0[ptr]; +// float *data1 = &d_Data0[ptr]; +// float dxx = 2.0f * val - data1[-1] - data1[1]; +// float dyy = 2.0f * val - data1[-pitch] - data1[pitch]; +// float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]); +// float tra = dxx + dyy; +// float det = dxx * dyy - dxy * dxy; +// if (tra * tra < edgeLimit * det) +// { +// float edge = __fdividef(tra * tra, det); +// float dx = 0.5f * (data1[1] - data1[-1]); +// float dy = 0.5f * (data1[pitch] - data1[-pitch]); +// float *data0 = d_Data0 + ptr - height * pitch; +// float *data2 = d_Data0 + ptr + height * pitch; +// float ds = 0.5f * (data0[0] - data2[0]); +// float dss = 2.0f * val - data2[0] - data0[0]; +// float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]); +// float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]); +// float idxx = dyy * dss - dys * dys; +// float idxy = dys * dxs - dxy * dss; +// float idxs = dxy * dys - dyy * dxs; +// float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs); +// float idyy = dxx * dss - dxs * dxs; +// float idys = dxy * dxs - dxx * dys; +// float idss = dxx * dyy - dxy * dxy; +// float pdx = idet * (idxx * dx + idxy * dy + idxs * ds); +// float pdy = idet * (idxy * dx + idyy * dy + idys * ds); +// float pds = idet * (idxs * dx + idys * dy + idss * ds); +// if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f) +// { +// pdx = __fdividef(dx, dxx); +// pdy = __fdividef(dy, dyy); +// pds = __fdividef(ds, dss); +// } +// float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds); +// int maxPts = d_MaxNumPoints; +// float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor); +// if (sc >= lowestScale) +// { +// unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff); +// idx = (idx >= maxPts ? maxPts - 1 : idx); +// d_Sift[idx].xpos = xpos + pdx; +// d_Sift[idx].ypos = ypos + pdy; +// d_Sift[idx].scale = sc; +// d_Sift[idx].sharpness = val + dval; +// d_Sift[idx].edgeness = edge; +// d_Sift[idx].subsampling = subsampling; +// } +// } +// } +// } + +void LaplaceMultiTex(dpct::image_accessor_ext texObj, float *d_Result, + int width, int pitch, int height, int octave, + const sycl::nd_item<3> &item_ct1, + float const *d_LaplaceKernel, float *data1, float *data2) +{ + + const int tx = item_ct1.get_local_id(2); + const int xp = item_ct1.get_group(2) * LAPLACE_W + tx; + const int yp = item_ct1.get_group(1); + const int scale = item_ct1.get_local_id(1); + float *kernel = + const_cast(d_LaplaceKernel + octave * 12 * 16 + scale * 16); + float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale; + float x = xp - 3.5; + float y = yp + 0.5; + sdata1[tx] = kernel[0] * texObj.read(x, y) + + kernel[1] * (texObj.read(x, y - 1.0) + texObj.read(x, y + 1.0)) + + kernel[2] * (texObj.read(x, y - 2.0) + texObj.read(x, y + 2.0)) + + kernel[3] * (texObj.read(x, y - 3.0) + texObj.read(x, y + 3.0)) + + kernel[4] * (texObj.read(x, y - 4.0) + texObj.read(x, y + 4.0)); + item_ct1.barrier(sycl::access::fence_space::local_space); + float *sdata2 = data2 + LAPLACE_W * scale; + if (tx < LAPLACE_W) + { + sdata2[tx] = kernel[0] * sdata1[tx + 4] + + kernel[1] * (sdata1[tx + 3] + sdata1[tx + 5]) + + kernel[2] * (sdata1[tx + 2] + sdata1[tx + 6]) + + kernel[3] * (sdata1[tx + 1] + sdata1[tx + 7]) + + kernel[4] * (sdata1[tx + 0] + sdata1[tx + 8]); + } + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width) + d_Result[scale * height * pitch + yp * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W]; +} + +/* +DPCT1110:46: The total declared local variable size in device function +LaplaceMultiMem exceeds 128 bytes and may cause high register pressure. Consult +with your hardware vendor to find the total register size available and adjust +the code, or use smaller sub-group size to avoid high register pressure. +*/ +void LaplaceMultiMem(float *d_Image, float *d_Result, int width, int pitch, + int height, int octave, const sycl::nd_item<3> &item_ct1, + float const *d_LaplaceKernel, float *buff) +{ + + const int tx = item_ct1.get_local_id(2); + const int xp = item_ct1.get_group(2) * LAPLACE_W + tx; + const int yp = item_ct1.get_group(1); + float *data = d_Image + sycl::max(sycl::min(xp - LAPLACE_R, width - 1), + 0); // multiply with 4 for max func + float temp[2 * LAPLACE_R + 1]; + + float kern[LAPLACE_S][LAPLACE_R + 1]; + if (xp < (width + 2 * LAPLACE_R)) + { + for (int i = 0; i <= 2 * LAPLACE_R; i++) + temp[i] = + data[sycl::max(0, sycl::min(yp + i - LAPLACE_R, height - 1)) * pitch]; + for (int scale = 0; scale < LAPLACE_S; scale++) + { + float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; + float *kernel = + const_cast(d_LaplaceKernel + octave * 12 * 16 + scale * 16); + for (int i = 0; i <= LAPLACE_R; i++) + { + kern[scale][i] = kernel[i]; + } + float sum = kern[scale][0] * temp[LAPLACE_R]; +#pragma unroll + for (int j = 1; j <= LAPLACE_R; j++) + sum += kern[scale][j] * (temp[LAPLACE_R - j] + temp[LAPLACE_R + j]); + buf[tx] = sum; + } + } + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < LAPLACE_W && xp < (width + 2 * LAPLACE_R)) + { + int scale = 0; + float oldRes = kern[scale][0] * buff[tx + LAPLACE_R]; + +#pragma unroll + for (int j = 1; j <= LAPLACE_R; j++) + oldRes += kern[scale][j] * (buff[tx + LAPLACE_R - j] + buff[tx + LAPLACE_R + j]); + + for (int scale = 1; scale < LAPLACE_S; scale++) + { + float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; + + float res = kern[scale][0] * buf[tx + LAPLACE_R]; + +#pragma unroll + for (int j = 1; j <= LAPLACE_R; j++) + res += kern[scale][j] * (buf[tx + LAPLACE_R - j] + buf[tx + LAPLACE_R + j]); + + d_Result[(scale - 1) * height * pitch + yp * pitch + xp] = res - oldRes; + oldRes = res; + } + } +} + +// __global__ void LaplaceMultiMemWide(float *d_Image, float *d_Result, int width, int pitch, int height, int octave) +// { +// __shared__ float buff[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S]; +// const int tx = threadIdx.x; +// const int xp = blockIdx.x * LAPLACE_W + tx; +// const int xp4 = blockIdx.x * LAPLACE_W + 4 * tx; +// const int yp = blockIdx.y; +// float kern[LAPLACE_S][LAPLACE_R + 1]; +// float *data = d_Image + max(min(xp - 4, width - 1), 0); +// float temp[9]; +// if (xp < (width + 2 * LAPLACE_R)) +// { +// for (int i = 0; i < 4; i++) +// temp[i] = data[max(0, min(yp + i - 4, height - 1)) * pitch]; +// for (int i = 4; i < 8 + 1; i++) +// temp[i] = data[min(yp + i - 4, height - 1) * pitch]; +// for (int scale = 0; scale < LAPLACE_S; scale++) +// { +// float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; +// for (int i = 0; i <= LAPLACE_R; i++) +// kern[scale][i] = kernel[LAPLACE_R - i]; +// float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; +// buf[tx] = kern[scale][4] * temp[4] + +// kern[scale][3] * (temp[3] + temp[5]) + kern[scale][2] * (temp[2] + temp[6]) + +// kern[scale][1] * (temp[1] + temp[7]) + kern[scale][0] * (temp[0] + temp[8]); +// } +// } +// __syncthreads(); +// if (tx < LAPLACE_W / 4 && xp4 < width) +// { +// float4 b0 = reinterpret_cast(buff)[tx + 0]; +// float4 b1 = reinterpret_cast(buff)[tx + 1]; +// float4 b2 = reinterpret_cast(buff)[tx + 2]; +// float4 old4, new4, dif4; +// old4.x = kern[0][4] * b1.x + kern[0][3] * (b0.w + b1.y) + kern[0][2] * (b0.z + b1.z) + +// kern[0][1] * (b0.y + b1.w) + kern[0][0] * (b0.x + b2.x); +// old4.y = kern[0][4] * b1.y + kern[0][3] * (b1.x + b1.z) + kern[0][2] * (b0.w + b1.w) + +// kern[0][1] * (b0.z + b2.x) + kern[0][0] * (b0.y + b2.y); +// old4.z = kern[0][4] * b1.z + kern[0][3] * (b1.y + b1.w) + kern[0][2] * (b1.x + b2.x) + +// kern[0][1] * (b0.w + b2.y) + kern[0][0] * (b0.z + b2.z); +// old4.w = kern[0][4] * b1.w + kern[0][3] * (b1.z + b2.x) + kern[0][2] * (b1.y + b2.y) + +// kern[0][1] * (b1.x + b2.z) + kern[0][0] * (b0.w + b2.w); +// for (int scale = 1; scale < LAPLACE_S; scale++) +// { +// float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; +// float4 b0 = reinterpret_cast(buf)[tx + 0]; +// float4 b1 = reinterpret_cast(buf)[tx + 1]; +// float4 b2 = reinterpret_cast(buf)[tx + 2]; +// new4.x = kern[scale][4] * b1.x + kern[scale][3] * (b0.w + b1.y) + +// kern[scale][2] * (b0.z + b1.z) + kern[scale][1] * (b0.y + b1.w) + +// kern[scale][0] * (b0.x + b2.x); +// new4.y = kern[scale][4] * b1.y + kern[scale][3] * (b1.x + b1.z) + +// kern[scale][2] * (b0.w + b1.w) + kern[scale][1] * (b0.z + b2.x) + +// kern[scale][0] * (b0.y + b2.y); +// new4.z = kern[scale][4] * b1.z + kern[scale][3] * (b1.y + b1.w) + +// kern[scale][2] * (b1.x + b2.x) + kern[scale][1] * (b0.w + b2.y) + +// kern[scale][0] * (b0.z + b2.z); +// new4.w = kern[scale][4] * b1.w + kern[scale][3] * (b1.z + b2.x) + +// kern[scale][2] * (b1.y + b2.y) + kern[scale][1] * (b1.x + b2.z) + +// kern[scale][0] * (b0.w + b2.w); +// dif4.x = new4.x - old4.x; +// dif4.y = new4.y - old4.y; +// dif4.z = new4.z - old4.z; +// dif4.w = new4.w - old4.w; +// reinterpret_cast(&d_Result[(scale - 1) * height * pitch + yp * pitch + xp4])[0] = dif4; +// old4 = new4; +// } +// } +// } + +// __global__ void LaplaceMultiMemTest(float *d_Image, float *d_Result, int width, int pitch, int height, int octave) +// { +// __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S]; +// __shared__ float data2[LAPLACE_W * LAPLACE_S]; +// const int tx = threadIdx.x; +// const int xp = blockIdx.x * LAPLACE_W + tx; +// const int yp = LAPLACE_H * blockIdx.y; +// const int scale = threadIdx.y; +// float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; +// float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale; +// float *data = d_Image + max(min(xp - 4, width - 1), 0); +// int h = height - 1; +// float temp[8 + LAPLACE_H], kern[LAPLACE_R + 1]; +// for (int i = 0; i < 4; i++) +// temp[i] = data[max(0, min(yp + i - 4, h)) * pitch]; +// for (int i = 4; i < 8 + LAPLACE_H; i++) +// temp[i] = data[min(yp + i - 4, h) * pitch]; +// for (int i = 0; i <= LAPLACE_R; i++) +// kern[i] = kernel[LAPLACE_R - i]; +// for (int j = 0; j < LAPLACE_H; j++) +// { +// sdata1[tx] = kern[4] * temp[4 + j] + +// kern[3] * (temp[3 + j] + temp[5 + j]) + kern[2] * (temp[2 + j] + temp[6 + j]) + +// kern[1] * (temp[1 + j] + temp[7 + j]) + kern[0] * (temp[0 + j] + temp[8 + j]); +// __syncthreads(); +// float *sdata2 = data2 + LAPLACE_W * scale; +// if (tx < LAPLACE_W) +// { +// sdata2[tx] = kern[4] * sdata1[tx + 4] + +// kern[3] * (sdata1[tx + 3] + sdata1[tx + 5]) + kern[2] * (sdata1[tx + 2] + sdata1[tx + 6]) + +// kern[1] * (sdata1[tx + 1] + sdata1[tx + 7]) + kern[0] * (sdata1[tx + 0] + sdata1[tx + 8]); +// } +// __syncthreads(); +// if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width && (yp + j) < height) +// d_Result[scale * height * pitch + (yp + j) * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W]; +// } +// } + +// __global__ void LaplaceMultiMemOld(float *d_Image, float *d_Result, int width, int pitch, int height, int octave) +// { +// __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S]; +// __shared__ float data2[LAPLACE_W * LAPLACE_S]; +// const int tx = threadIdx.x; +// const int xp = blockIdx.x * LAPLACE_W + tx; +// const int yp = blockIdx.y; +// const int scale = threadIdx.y; +// float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; +// float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale; +// float *data = d_Image + max(min(xp - 4, width - 1), 0); +// int h = height - 1; +// sdata1[tx] = kernel[0] * data[min(yp, h) * pitch] + +// kernel[1] * (data[max(0, min(yp - 1, h)) * pitch] + data[min(yp + 1, h) * pitch]) + +// kernel[2] * (data[max(0, min(yp - 2, h)) * pitch] + data[min(yp + 2, h) * pitch]) + +// kernel[3] * (data[max(0, min(yp - 3, h)) * pitch] + data[min(yp + 3, h) * pitch]) + +// kernel[4] * (data[max(0, min(yp - 4, h)) * pitch] + data[min(yp + 4, h) * pitch]); +// __syncthreads(); +// float *sdata2 = data2 + LAPLACE_W * scale; +// if (tx < LAPLACE_W) +// { +// sdata2[tx] = kernel[0] * sdata1[tx + 4] + +// kernel[1] * (sdata1[tx + 3] + sdata1[tx + 5]) + +// kernel[2] * (sdata1[tx + 2] + sdata1[tx + 6]) + +// kernel[3] * (sdata1[tx + 1] + sdata1[tx + 7]) + +// kernel[4] * (sdata1[tx + 0] + sdata1[tx + 8]); +// } +// __syncthreads(); +// if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width) +// d_Result[scale * height * pitch + yp * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W]; +// } + +void LowPass(float *d_Image, float *d_Result, int width, int pitch, int height, + const sycl::nd_item<3> &item_ct1, float const *d_LowPassKernel, + float *buffer) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int xp = item_ct1.get_group(2) * LOWPASS_W + tx; + const int yp = item_ct1.get_group(1) * LOWPASS_H + ty; + float *kernel = const_cast(d_LowPassKernel); + float *data = d_Image + sycl::max(sycl::min(xp - 4, width - 1), 0); + float *buff = buffer + ty * (LOWPASS_W + 2 * LOWPASS_R); + int h = height - 1; + if (yp < height) + buff[tx] = kernel[4] * data[sycl::min(yp, h) * pitch] + + kernel[3] * (data[sycl::max(0, sycl::min(yp - 1, h)) * pitch] + + data[sycl::min(yp + 1, h) * pitch]) + + kernel[2] * (data[sycl::max(0, sycl::min(yp - 2, h)) * pitch] + + data[sycl::min(yp + 2, h) * pitch]) + + kernel[1] * (data[sycl::max(0, sycl::min(yp - 3, h)) * pitch] + + data[sycl::min(yp + 3, h) * pitch]) + + kernel[0] * (data[sycl::max(0, sycl::min(yp - 4, h)) * pitch] + + data[sycl::min(yp + 4, h) * pitch]); + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < LOWPASS_W && xp < width && yp < height) + d_Result[yp * pitch + xp] = kernel[4] * buff[tx + 4] + + kernel[3] * (buff[tx + 3] + buff[tx + 5]) + kernel[2] * (buff[tx + 2] + buff[tx + 6]) + + kernel[1] * (buff[tx + 1] + buff[tx + 7]) + kernel[0] * (buff[tx + 0] + buff[tx + 8]); +} + +void LowPassBlockOld(float *d_Image, float *d_Result, int width, int pitch, int height, + const sycl::nd_item<3> &item_ct1, + float const *d_LowPassKernel, + sycl::local_accessor xrows) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int xp = item_ct1.get_group(2) * LOWPASS_W + tx; + const int yp = item_ct1.get_group(1) * LOWPASS_H + ty; + const int N = 16; + float *k = const_cast(d_LowPassKernel); + int xl = sycl::max(sycl::min(xp - 4, width - 1), 0); + for (int l = -8; l <= LOWPASS_H; l += 4) + { + if (l < LOWPASS_H) + { + int yl = sycl::max(sycl::min(yp + l + 4, height - 1), 0); + float val = d_Image[yl * pitch + xl]; + xrows[(l + 8 + ty) % N][tx] = + k[4] * ShiftDown(val, 4, item_ct1) + + k[3] * (ShiftDown(val, 5, item_ct1) + ShiftDown(val, 3, item_ct1)) + + k[2] * (ShiftDown(val, 6, item_ct1) + ShiftDown(val, 2, item_ct1)) + + k[1] * (ShiftDown(val, 7, item_ct1) + ShiftDown(val, 1, item_ct1)) + + k[0] * (ShiftDown(val, 8, item_ct1) + val); + } + if (l >= 4) + { + int ys = yp + l - 4; + if (xp < width && ys < height && tx < LOWPASS_W) + d_Result[ys * pitch + xp] = k[4] * xrows[(l + 0 + ty) % N][tx] + + k[3] * (xrows[(l - 1 + ty) % N][tx] + xrows[(l + 1 + ty) % N][tx]) + + k[2] * (xrows[(l - 2 + ty) % N][tx] + xrows[(l + 2 + ty) % N][tx]) + + k[1] * (xrows[(l - 3 + ty) % N][tx] + xrows[(l + 3 + ty) % N][tx]) + + k[0] * (xrows[(l - 4 + ty) % N][tx] + xrows[(l + 4 + ty) % N][tx]); + } + if (l >= 0) + /* + DPCT1118:47: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + item_ct1.barrier(sycl::access::fence_space::local_space); + } +} + +void LowPassBlock(float *d_Image, float *d_Result, int width, int pitch, int height, + const sycl::nd_item<3> &item_ct1, float const *d_LowPassKernel, + sycl::local_accessor xrows) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int xp = item_ct1.get_group(2) * LOWPASS_W + tx; + const int yp = item_ct1.get_group(1) * LOWPASS_H + ty; + const int N = 16; + float *k = const_cast(d_LowPassKernel); + int xl = sycl::max(sycl::min(xp - 4, width - 1), 0); +#pragma unroll + for (int l = -8; l < 4; l += 4) + { + int ly = l + ty; + int yl = sycl::max(sycl::min(yp + l + 4, height - 1), 0); + float val = d_Image[yl * pitch + xl]; // d_Image[yl*pitch + xl].x + val = k[4] * ShiftDown(val, 4, item_ct1) + + k[3] * (ShiftDown(val, 5, item_ct1) + ShiftDown(val, 3, item_ct1)) + + k[2] * (ShiftDown(val, 6, item_ct1) + ShiftDown(val, 2, item_ct1)) + + k[1] * (ShiftDown(val, 7, item_ct1) + ShiftDown(val, 1, item_ct1)) + + k[0] * (ShiftDown(val, 8, item_ct1) + val); + xrows[ly + 8][tx] = val; + } + item_ct1.barrier(sycl::access::fence_space::local_space); +#pragma unroll + for (int l = 4; l < LOWPASS_H; l += 4) + { + int ly = l + ty; + int yl = sycl::min(yp + l + 4, height - 1); + float val = d_Image[yl * pitch + xl]; + val = k[4] * ShiftDown(val, 4, item_ct1) + + k[3] * (ShiftDown(val, 5, item_ct1) + ShiftDown(val, 3, item_ct1)) + + k[2] * (ShiftDown(val, 6, item_ct1) + ShiftDown(val, 2, item_ct1)) + + k[1] * (ShiftDown(val, 7, item_ct1) + ShiftDown(val, 1, item_ct1)) + + k[0] * (ShiftDown(val, 8, item_ct1) + val); + xrows[(ly + 8) % N][tx] = val; + int ys = yp + l - 4; + if (xp < width && ys < height && tx < LOWPASS_W) + d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] + + k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) + + k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) + + k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) + + k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]); + /* + DPCT1118:48: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + item_ct1.barrier(sycl::access::fence_space::local_space); + } + int ly = LOWPASS_H + ty; + int ys = yp + LOWPASS_H - 4; + if (xp < width && ys < height && tx < LOWPASS_W) + d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] + + k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) + + k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) + + k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) + + k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]); +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftD.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftD.h new file mode 100644 index 000000000..52fd52aa4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftD.h @@ -0,0 +1,80 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#ifndef CUDASIFTD_H +#define CUDASIFTD_H + +#define NUM_SCALES 5 + +// Scale down thread block width +#define SCALEDOWN_W 64 // 60 + +// Scale down thread block height +#define SCALEDOWN_H 16 // 8 + +// Scale up thread block width +#define SCALEUP_W 64 + +// Scale up thread block height +#define SCALEUP_H 8 + +// Find point thread block width +#define MINMAX_W 30 // 32 + +// Find point thread block height +#define MINMAX_H 8 // 16 + +// Laplace thread block width +#define LAPLACE_W 128 // 56 + +// Laplace rows per thread +#define LAPLACE_H 4 + +// Number of laplace scales +#define LAPLACE_S (NUM_SCALES + 3) + +// Laplace filter kernel radius +#define LAPLACE_R 4 + +#define LOWPASS_W 24 // 56 +#define LOWPASS_H 32 // 16 +#define LOWPASS_R 4 + +//====================== Number of threads ====================// +// ScaleDown: SCALEDOWN_W + 4 +// LaplaceMulti: (LAPLACE_W+2*LAPLACE_R)*LAPLACE_S +// FindPointsMulti: MINMAX_W + 2 +// ComputeOrientations: 128 +// ExtractSiftDescriptors: 256 + +//====================== Number of blocks ====================// +// ScaleDown: (width/SCALEDOWN_W) * (height/SCALEDOWN_H) +// LaplceMulti: (width+2*LAPLACE_R)/LAPLACE_W * height +// FindPointsMulti: (width/MINMAX_W)*NUM_SCALES * (height/MINMAX_H) +// ComputeOrientations: numpts +// ExtractSiftDescriptors: numpts + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.dp.cpp new file mode 100644 index 000000000..a07783ffc --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.dp.cpp @@ -0,0 +1,878 @@ +//********************************************************// +// CUDA SIFT extractor by Mårten Björkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cudautils.h" +#include "cudaImage.h" +#include "cudaSift.h" +#include "cudaSiftD.h" +#include "cudaSiftH.h" + +#include "cudaSiftD.dp.cpp" + +void InitCuda(int devNum) +{ + int nDevices; + safeCall( + DPCT_CHECK_ERROR(nDevices = dpct::dev_mgr::instance().device_count())); + if (!nDevices) + { + std::cerr << "No CUDA devices available" << std::endl; + return; + } + devNum = std::min(nDevices - 1, devNum); + deviceInit(devNum); + dpct::device_info prop; + safeCall(DPCT_CHECK_ERROR(dpct::get_device_info( + prop, dpct::dev_mgr::instance().get_device(devNum)))); + printf("Device Number: %d\n", devNum); + printf(" Device name: %s\n", prop.get_name()); + printf(" Memory Clock Rate (MHz): %d\n", + prop.get_memory_clock_rate() / 1000); + printf(" Clock Freq (MHz): %d\n", prop.get_max_clock_frequency() / 1000); + printf(" Memory Bus Width (bits): %d\n", prop.get_memory_bus_width()); + printf(" Peak Memory Bandwidth (GB/s): %.1f\n\n", + 2.0 * prop.get_memory_clock_rate() * + (prop.get_memory_bus_width() / 8) / 1.0e6); +} + +float *AllocSiftTempMemory(int width, int height, int numOctaves, float &time, bool scaleUp) +{ + const int nd = NUM_SCALES + 3; + int w = width * (scaleUp ? 2 : 1); + int h = height * (scaleUp ? 2 : 1); + int p = iAlignUp(w, 128); + int size = h * p; // image sizes + int sizeTmp = nd * h * p; // laplace buffer sizes + for (int i = 0; i < numOctaves; i++) + { + w /= 2; + h /= 2; + int p = iAlignUp(w, 128); + size += h * p; + sizeTmp += nd * h * p; + } + float *memoryTmp = NULL; + size_t pitch; + size += sizeTmp; + +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR( + memoryTmp = (float *)dpct::dpct_malloc( + pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float)))); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + time += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + return memoryTmp; +} + +void FreeSiftTempMemory(float *memoryTmp) +{ + if (memoryTmp) + safeCall( + DPCT_CHECK_ERROR(sycl::free(memoryTmp, dpct::get_in_order_queue()))); +} + +void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, + float thresh, float &totTime, float lowestScale, bool scaleUp, float *tempMemory) +{ + unsigned int *d_PointCounterAddr; +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR(*((void **)&d_PointCounterAddr) = + d_PointCounter.get_ptr())); + safeCall(DPCT_CHECK_ERROR( + dpct::get_in_order_queue() + .memset(d_PointCounterAddr, 0, (8 * 2 + 1) * sizeof(int)) + .wait())); + safeCall(DPCT_CHECK_ERROR( + dpct::get_in_order_queue() + .memcpy(d_MaxNumPoints.get_ptr(), &siftData.maxPts, sizeof(int)) + .wait())); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + const int nd = NUM_SCALES + 3; + int w = img.width * (scaleUp ? 2 : 1); + int h = img.height * (scaleUp ? 2 : 1); + int p = iAlignUp(w, 128); + int width = w, height = h; + int size = h * p; // image sizes + int sizeTmp = nd * h * p; // laplace buffer sizes + for (int i = 0; i < numOctaves; i++) + { + w /= 2; + h /= 2; + int p = iAlignUp(w, 128); + size += h * p; + sizeTmp += nd * h * p; + } + float *memoryTmp = tempMemory; + size += sizeTmp; + if (!tempMemory) + { + size_t pitch; +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR( + memoryTmp = (float *)dpct::dpct_malloc( + pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float)))); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + } + float *memorySub = memoryTmp + sizeTmp; + + CudaImage lowImg; + lowImg.Allocate(width, height, iAlignUp(width, 128), false, totTime, memorySub); + if (!scaleUp) + { + float kernel[8 * 12 * 16]; + PrepareLaplaceKernels(numOctaves, 0.0f, kernel); +#ifdef DEVICE_TIMER + auto start_memcpy1 = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue() + .memcpy(d_LaplaceKernel.get_ptr(), kernel, + 8 * 12 * 16 * sizeof(float)) + .wait())); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_memcpy1 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy1 - start_memcpy1).count(); +#endif + LowPass(lowImg, img, fmax(initBlur, 0.001f), totTime); + ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale, 1.0f, memoryTmp, + memorySub + height * iAlignUp(width, 128), totTime); +#ifdef DEVICE_TIMER + auto start_memcpy2 = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue() + .memcpy(&siftData.numPts, + &d_PointCounterAddr[2 * numOctaves], + sizeof(int)) + .wait())); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_memcpy2 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy2 - start_memcpy2).count(); +#endif + siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts); + } + else + { + CudaImage upImg; + upImg.Allocate(width, height, iAlignUp(width, 128), false, totTime, memoryTmp); + ScaleUp(upImg, img, totTime); + LowPass(lowImg, upImg, dpct::max(initBlur, 0.001f), totTime); + float kernel[8 * 12 * 16]; + PrepareLaplaceKernels(numOctaves, 0.0f, kernel); +#ifdef DEVICE_TIMER + auto start_memcpy3 = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue() + .memcpy(d_LaplaceKernel.get_ptr(), kernel, + 8 * 12 * 16 * sizeof(float)) + .wait())); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_memcpy3 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy3 - start_memcpy3).count(); +#endif + ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale * 2.0f, 1.0f, memoryTmp, + memorySub + height * iAlignUp(width, 128), totTime); +#ifdef DEVICE_TIMER + auto start_memcpy4 = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue() + .memcpy(&siftData.numPts, + &d_PointCounterAddr[2 * numOctaves], + sizeof(int)) + .wait())); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_memcpy4 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy4 - start_memcpy4).count(); +#endif + siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts); + RescalePositions(siftData, 0.5f, totTime); + } + + if (!tempMemory) + safeCall( + DPCT_CHECK_ERROR(sycl::free(memoryTmp, dpct::get_in_order_queue()))); + if (siftData.h_data) + { +#ifdef DEVICE_TIMER + auto start_memcpy5 = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue() + .memcpy(siftData.h_data, siftData.d_data, + sizeof(SiftPoint) * siftData.numPts) + .wait())); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_memcpy5 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy5 - start_memcpy5).count(); + printf("Total time for sift extraction = %.2f us\n\n", totTime); +#endif + } + printf("Number of Points after sift extraction = %d\n\n", siftData.numPts); +} + +int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, float lowestScale, + float subsampling, float *memoryTmp, float *memorySub, float &totTime) +{ + int w = img.width; + int h = img.height; + if (numOctaves > 1) + { + CudaImage subImg; + int p = iAlignUp(w / 2, 128); + subImg.Allocate(w / 2, h / 2, p, false, totTime, memorySub); + ScaleDown(subImg, img, 0.5f, totTime); + float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f; + ExtractSiftLoop(siftData, subImg, numOctaves - 1, totInitBlur, thresh, lowestScale, subsampling * 2.0f, + memoryTmp, memorySub + (h / 2) * p, totTime); + } + ExtractSiftOctave(siftData, img, numOctaves, thresh, lowestScale, subsampling, memoryTmp, totTime); + return 0; +} + +void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh, + float lowestScale, float subsampling, float *memoryTmp, float &totTime) +{ + const int nd = NUM_SCALES + 3; + CudaImage diffImg[nd]; + int w = img.width; + int h = img.height; + int p = iAlignUp(w, 128); + for (int i = 0; i < nd - 1; i++) + diffImg[i].Allocate(w, h, p, false, totTime, memoryTmp + i * p * h); + + float baseBlur = pow(2.0f, -1.0f / NUM_SCALES); + float diffScale = pow(2.0f, 1.0f / NUM_SCALES); + LaplaceMulti(img, diffImg, octave, totTime); + FindPointsMulti(diffImg, siftData, thresh, 10.0f, 1.0f / NUM_SCALES, lowestScale / subsampling, subsampling, octave, totTime); + ComputeOrientations(img, siftData, octave, totTime); + ExtractSiftDescriptors(img.d_data, img.pitch, siftData, subsampling, octave, totTime); +} + +void InitSiftData(SiftData &data, float &time, int num, bool host, bool dev) +{ + data.numPts = 0; + data.maxPts = num; + int sz = sizeof(SiftPoint) * num; + data.h_data = NULL; + if (host) + data.h_data = (SiftPoint *)malloc(sz); + data.d_data = NULL; + if (dev) + { +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR(data.d_data = (SiftPoint *)sycl::malloc_device( + sz, dpct::get_in_order_queue()))); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + time += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + } +} + +void FreeSiftData(SiftData &data) +{ + if (data.d_data != NULL) + safeCall( + DPCT_CHECK_ERROR(sycl::free(data.d_data, dpct::get_in_order_queue()))); + data.d_data = NULL; + if (data.h_data != NULL) + free(data.h_data); + data.numPts = 0; + data.maxPts = 0; +} + +void PrintSiftData(SiftData &data) +{ + SiftPoint *h_data = data.h_data; + if (data.h_data == NULL) + { + h_data = (SiftPoint *)malloc(sizeof(SiftPoint) * data.maxPts); + safeCall(DPCT_CHECK_ERROR( + dpct::get_in_order_queue() + .memcpy(h_data, data.d_data, sizeof(SiftPoint) * data.numPts) + .wait())); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); + data.h_data = h_data; + } + for (int i = 0; i < data.numPts; i++) + { + printf("xpos = %.2f\n", h_data[i].xpos); + printf("ypos = %.2f\n", h_data[i].ypos); + printf("scale = %.2f\n", h_data[i].scale); + printf("sharpness = %.2f\n", h_data[i].sharpness); + printf("edgeness = %.2f\n", h_data[i].edgeness); + printf("orientation = %.2f\n", h_data[i].orientation); + printf("score = %.2f\n", h_data[i].score); + float *siftData = (float *)&h_data[i].data; + for (int j = 0; j < 8; j++) + { + if (j == 0) + printf("data = "); + else + printf(" "); + for (int k = 0; k < 16; k++) + if (siftData[j + 8 * k] < 0.05) + printf(" . "); + else + printf("%.2f ", siftData[j + 8 * k]); + printf("\n"); + } + } + printf("Number of available points: %d\n", data.numPts); + printf("Number of allocated points: %d\n", data.maxPts); +} + +/////////////////////////////////////////////////////////////////////////////// +// Host side master functions +/////////////////////////////////////////////////////////////////////////////// + +double ScaleDown(CudaImage &res, CudaImage &src, float variance, float &totTime) +{ + static float oldVariance = -1.0f; + if (res.d_data == NULL || src.d_data == NULL) + { + printf("ScaleDown: missing data\n"); + return 0.0; + } + if (oldVariance != variance) + { + float h_Kernel[5]; + float kernelSum = 0.0f; + for (int j = 0; j < 5; j++) + { + h_Kernel[j] = (float)expf(-(double)(j - 2) * (j - 2) / 2.0 / variance); + kernelSum += h_Kernel[j]; + } + for (int j = 0; j < 5; j++) + h_Kernel[j] /= kernelSum; +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR( + dpct::get_in_order_queue() + .memcpy(d_ScaleDownKernel.get_ptr(), h_Kernel, 5 * sizeof(float)) + .wait())); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + oldVariance = variance; + } +#if 0 + dim3 blocks(iDivUp(src.width, SCALEDOWN_W), iDivUp(src.height, SCALEDOWN_H)); + dim3 threads(SCALEDOWN_W + 4, SCALEDOWN_H + 4); + ScaleDownDenseShift<<>>(res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch); +#else + sycl::range<3> blocks(1, iDivUp(src.height, SCALEDOWN_H), + iDivUp(src.width, SCALEDOWN_W)); + sycl::range<3> threads(1, 1, SCALEDOWN_W + 4); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + dpct::get_in_order_queue().submit([&](sycl::handler &cgh) { + d_ScaleDownKernel.init(); + + auto d_ScaleDownKernel_ptr_ct1 = d_ScaleDownKernel.get_ptr(); + + /* + DPCT1101:214: 'SCALEDOWN_W + 4' expression was replaced with a value. + Modify the code to use the original expression, provided in comments, if + it is correct. + */ + sycl::local_accessor inrow_acc_ct1( + sycl::range<1>(68 /*SCALEDOWN_W + 4*/), cgh); + /* + DPCT1101:215: '5 * (SCALEDOWN_W / 2)' expression was replaced with a + value. Modify the code to use the original expression, provided in + comments, if it is correct. + */ + sycl::local_accessor brow_acc_ct1( + sycl::range<1>(160 /*5 * (SCALEDOWN_W / 2)*/), cgh); + /* + DPCT1101:216: 'SCALEDOWN_H + 4' expression was replaced with a value. + Modify the code to use the original expression, provided in comments, if + it is correct. + */ + sycl::local_accessor yRead_acc_ct1( + sycl::range<1>(20 /*SCALEDOWN_H + 4*/), cgh); + /* + DPCT1101:217: 'SCALEDOWN_H + 4' expression was replaced with a value. + Modify the code to use the original expression, provided in comments, if + it is correct. + */ + sycl::local_accessor yWrite_acc_ct1( + sycl::range<1>(20 /*SCALEDOWN_H + 4*/), cgh); + + float *res_d_data_ct0 = res.d_data; + float *src_d_data_ct1 = src.d_data; + int src_width_ct2 = src.width; + int src_pitch_ct3 = src.pitch; + int src_height_ct4 = src.height; + int res_pitch_ct5 = res.pitch; + + cgh.parallel_for( + sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) { + ScaleDown(res_d_data_ct0, src_d_data_ct1, src_width_ct2, + src_pitch_ct3, src_height_ct4, res_pitch_ct5, item_ct1, + d_ScaleDownKernel_ptr_ct1, inrow_acc_ct1.get_pointer(), + brow_acc_ct1.get_pointer(), yRead_acc_ct1.get_pointer(), + yWrite_acc_ct1.get_pointer()); + }); + }); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ScaleDown time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif +#endif + checkMsg("ScaleDown() execution failed\n"); + return 0.0; +} + +double ScaleUp(CudaImage &res, CudaImage &src, float &totTime) +{ + if (res.d_data == NULL || src.d_data == NULL) + { + printf("ScaleUp: missing data\n"); + return 0.0; + } + sycl::range<3> blocks(1, iDivUp(res.height, SCALEUP_H), + iDivUp(res.width, SCALEUP_W)); + sycl::range<3> threads(1, SCALEUP_H / 2, SCALEUP_W / 2); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + dpct::get_in_order_queue().submit([&](sycl::handler &cgh) { + float *res_d_data_ct0 = res.d_data; + float *src_d_data_ct1 = src.d_data; + int src_width_ct2 = src.width; + int src_pitch_ct3 = src.pitch; + int src_height_ct4 = src.height; + int res_pitch_ct5 = res.pitch; + + cgh.parallel_for(sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) { + ScaleUp(res_d_data_ct0, src_d_data_ct1, src_width_ct2, + src_pitch_ct3, src_height_ct4, res_pitch_ct5, + item_ct1); + }); + }); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ScaleUp time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("ScaleUp() execution failed\n"); + return 0.0; +} + +double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, float &totTime) +{ + sycl::range<3> blocks(1, 1, 512); + sycl::range<3> threads(1, 1, 256); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + dpct::get_in_order_queue().submit([&](sycl::handler &cgh) { + d_MaxNumPoints.init(); + d_PointCounter.init(); + + auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr(); + auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr(); + + /* + DPCT1101:218: 'WID' expression was replaced with a value. Modify the code + to use the original expression, provided in comments, if it is correct. + */ + /* + DPCT1101:219: 'WID' expression was replaced with a value. Modify the code + to use the original expression, provided in comments, if it is correct. + */ + sycl::local_accessor img_acc_ct1( + sycl::range<2>(19 /*WID*/, 19 /*WID*/), cgh); + /* + DPCT1101:220: 'WID' expression was replaced with a value. Modify the code + to use the original expression, provided in comments, if it is correct. + */ + /* + DPCT1101:221: 'WID' expression was replaced with a value. Modify the code + to use the original expression, provided in comments, if it is correct. + */ + sycl::local_accessor tmp_acc_ct1( + sycl::range<2>(19 /*WID*/, 19 /*WID*/), cgh); + /* + DPCT1101:222: '2 * LEN' expression was replaced with a value. Modify the + code to use the original expression, provided in comments, if it is + correct. + */ + sycl::local_accessor hist_acc_ct1(sycl::range<1>(64 /*2 * LEN*/), + cgh); + /* + DPCT1101:223: 'WID' expression was replaced with a value. Modify the code + to use the original expression, provided in comments, if it is correct. + */ + sycl::local_accessor gaussx_acc_ct1(sycl::range<1>(19 /*WID*/), + cgh); + /* + DPCT1101:224: 'WID' expression was replaced with a value. Modify the code + to use the original expression, provided in comments, if it is correct. + */ + sycl::local_accessor gaussy_acc_ct1(sycl::range<1>(19 /*WID*/), + cgh); + + float *src_d_data_ct0 = src.d_data; + int src_width_ct1 = src.width; + int src_pitch_ct2 = src.pitch; + int src_height_ct3 = src.height; + + cgh.parallel_for(sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) { + ComputeOrientationsCONSTNew( + src_d_data_ct0, src_width_ct1, src_pitch_ct2, + src_height_ct3, siftData.d_data, octave, item_ct1, + *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1, + img_acc_ct1, tmp_acc_ct1, hist_acc_ct1.get_pointer(), + gaussx_acc_ct1.get_pointer(), + gaussy_acc_ct1.get_pointer()); + }); + }); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ComputeOrientationsCONSTNew time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel) + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("ComputeOrientations() execution failed\n"); + return 0.0; +} + +double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, float &totTime) +{ + sycl::range<3> blocks(1, 1, 512); + sycl::range<3> threads(1, 8, 16); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + dpct::get_in_order_queue().submit([&](sycl::handler &cgh) { + d_MaxNumPoints.init(); + d_PointCounter.init(); + + auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr(); + auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr(); + + sycl::local_accessor gauss_acc_ct1(sycl::range<1>(16), cgh); + sycl::local_accessor buffer_acc_ct1(sycl::range<1>(128), cgh); + sycl::local_accessor sums_acc_ct1(sycl::range<1>(4), cgh); + + cgh.parallel_for( + sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { + ExtractSiftDescriptorsCONSTNew( + texObj, pitch, siftData.d_data, subsampling, octave, item_ct1, + *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1, + gauss_acc_ct1.get_pointer(), buffer_acc_ct1.get_pointer(), + sums_acc_ct1.get_pointer()); + }); + }); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ExtractSiftDescriptorsCONSTNew time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("ExtractSiftDescriptors() execution failed\n"); + return 0.0; +} +double RescalePositions(SiftData &siftData, float scale, float &totTime) +{ + sycl::range<3> blocks(1, 1, iDivUp(siftData.numPts, 64)); + sycl::range<3> threads(1, 1, 64); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + dpct::get_in_order_queue().parallel_for( + sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) { + RescalePositions(siftData.d_data, siftData.numPts, scale, item_ct1); + }); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("RescalePositions time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("RescapePositions() execution failed\n"); + return 0.0; +} + +double LowPass(CudaImage &res, CudaImage &src, float scale, float &totTime) +{ + float kernel[2 * LOWPASS_R + 1]; + static float oldScale = -1.0f; + if (scale != oldScale) + { + float kernelSum = 0.0f; + float ivar2 = 1.0f / (2.0f * scale * scale); + for (int j = -LOWPASS_R; j <= LOWPASS_R; j++) + { + kernel[j + LOWPASS_R] = (float)expf(-(double)j * j * ivar2); + kernelSum += kernel[j + LOWPASS_R]; + } + for (int j = -LOWPASS_R; j <= LOWPASS_R; j++) + kernel[j + LOWPASS_R] /= kernelSum; +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue() + .memcpy(d_LowPassKernel.get_ptr(), kernel, + (2 * LOWPASS_R + 1) * sizeof(float)) + .wait())); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + oldScale = scale; + } + int width = res.width; + int pitch = res.pitch; + int height = res.height; + sycl::range<3> blocks(1, iDivUp(height, LOWPASS_H), + iDivUp(width, LOWPASS_W)); //[80,34,1] + + sycl::range<3> threads(1, 4, LOWPASS_W + 2 * LOWPASS_R); //[32,4,1] +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + dpct::get_in_order_queue().submit([&](sycl::handler &cgh) { + d_LowPassKernel.init(); + + auto d_LowPassKernel_ptr_ct1 = d_LowPassKernel.get_ptr(); + + sycl::local_accessor xrows_acc_ct1(sycl::range<2>(16, 32), cgh); + + float *src_d_data_ct0 = src.d_data; + float *res_d_data_ct1 = res.d_data; + + cgh.parallel_for( + sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { + LowPassBlockOld(src_d_data_ct0, res_d_data_ct1, width, pitch, height, + item_ct1, d_LowPassKernel_ptr_ct1, xrows_acc_ct1); + }); + }); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("LowPassBlock time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("LowPass() execution failed\n"); + return 0.0; +} + +//==================== Multi-scale functions ===================// + +void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel) +{ + if (numOctaves > 1) + { + float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f; + PrepareLaplaceKernels(numOctaves - 1, totInitBlur, kernel); + } + float scale = pow(2.0f, -1.0f / NUM_SCALES); + float diffScale = pow(2.0f, 1.0f / NUM_SCALES); + for (int i = 0; i < NUM_SCALES + 3; i++) + { + float kernelSum = 0.0f; + float var = scale * scale - initBlur * initBlur; + for (int j = 0; j <= LAPLACE_R; j++) + { + kernel[numOctaves * 12 * 16 + 16 * i + j] = (float)expf(-(double)j * j / 2.0 / var); + kernelSum += (j == 0 ? 1 : 2) * kernel[numOctaves * 12 * 16 + 16 * i + j]; + } + for (int j = 0; j <= LAPLACE_R; j++) + kernel[numOctaves * 12 * 16 + 16 * i + j] /= kernelSum; + scale *= diffScale; + } +} + +double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, float &totTime) +{ + int width = results[0].width; + int pitch = results[0].pitch; + int height = results[0].height; +#if 1 + sycl::range<3> threads(1, 1, LAPLACE_W + 2 * LAPLACE_R); //(136) + sycl::range<3> blocks(1, height, iDivUp(width, LAPLACE_W)); //(15) +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + dpct::get_in_order_queue().submit([&](sycl::handler &cgh) { + d_LaplaceKernel.init(); + + auto d_LaplaceKernel_ptr_ct1 = d_LaplaceKernel.get_ptr(); + + /* + DPCT1101:226: '(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S' expression was + replaced with a value. Modify the code to use the original expression, + provided in comments, if it is correct. + */ + sycl::local_accessor buff_acc_ct1( + sycl::range<1>(1088 /*(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S*/), cgh); + + float *baseImage_d_data_ct0 = baseImage.d_data; + float *results_d_data_ct1 = results[0].d_data; + + cgh.parallel_for(sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) { + LaplaceMultiMem(baseImage_d_data_ct0, results_d_data_ct1, + width, pitch, height, octave, item_ct1, + d_LaplaceKernel_ptr_ct1, + buff_acc_ct1.get_pointer()); + }); + }); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("LaplaceMultiMem time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif +#endif + checkMsg("LaplaceMulti() execution failed\n"); + return 0.0; +} + +double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor, + float lowestScale, float subsampling, int octave, float &totTime) +{ + if (sources->d_data == NULL) + { + printf("FindPointsMulti: missing data\n"); + return 0.0; + } + int w = sources->width; + int p = sources->pitch; + int h = sources->height; +#if 1 + sycl::range<3> blocks(1, iDivUp(h, MINMAX_H), + iDivUp(w, MINMAX_W) * NUM_SCALES); + sycl::range<3> threads(1, 1, MINMAX_W + 2); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + dpct::get_in_order_queue().submit([&](sycl::handler &cgh) { + d_MaxNumPoints.init(); + d_PointCounter.init(); + + auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr(); + auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr(); + + /* + DPCT1101:227: '2 * MEMWID' expression was replaced with a value. Modify + the code to use the original expression, provided in comments, if it is + correct. + */ + sycl::local_accessor points_acc_ct1( + sycl::range<1>(64 /*2 * MEMWID*/), cgh); + + float *sources_d_data_ct0 = sources->d_data; + + cgh.parallel_for( + sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { + FindPointsMultiNew(sources_d_data_ct0, siftData.d_data, w, p, h, + subsampling, lowestScale, thresh, factor, + edgeLimit, octave, item_ct1, + *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1, + points_acc_ct1.get_pointer()); + }); + }); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("FindPointsMultiNew time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()) + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif +#endif + checkMsg("FindPointsMulti() execution failed\n"); + return 0.0; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.dp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.dp.o new file mode 100644 index 000000000..bf1f3616c Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.dp.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.h new file mode 100644 index 000000000..95e8384ec --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudaSiftH.h @@ -0,0 +1,50 @@ + +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#ifndef CUDASIFTH_H +#define CUDASIFTH_H + +#include "cudautils.h" +#include "cudaImage.h" +#include "cudaSift.h" + +int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, + float lowestScale, float subsampling, float *memoryTmp, float *memorySub, float &totTime); +void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh, float lowestScale, + float subsampling, float *memoryTmp, float &totTime); +double ScaleDown(CudaImage &res, CudaImage &src, float variance, float &totTime); +double ScaleUp(CudaImage &res, CudaImage &src, float &totTime); +double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, float &totTime); +double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, float &totTime); +double RescalePositions(SiftData &siftData, float scale, float &totTime); +double LowPass(CudaImage &res, CudaImage &src, float scale, float &totTime); +void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel); +double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, float &totTime); +double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor, + float lowestScale, float subsampling, int octave, float &totTime); + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudasift b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudasift new file mode 100755 index 000000000..1b6f89c3d Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudasift differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudautils.h b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudautils.h new file mode 100644 index 000000000..28a5ce756 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudautils.h @@ -0,0 +1,209 @@ +#ifndef CUDAUTILS_H +#define CUDAUTILS_H + +#define DPCT_COMPAT_RT_VERSION 12020 +#include +#include +#include +#include +#include + +#ifdef WIN32 +#include +#endif + +#define safeCall(err) __safeCall(err, __FILE__, __LINE__) +#define safeThreadSync() __safeThreadSync(__FILE__, __LINE__) +#define checkMsg(msg) __checkMsg(msg, __FILE__, __LINE__) + +inline void __safeCall(dpct::err0 err, const char *file, const int line) +{ +} + +inline void __safeThreadSync(const char *file, const int line) try { + dpct::err0 err = + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()); +} +catch (sycl::exception const &exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ + << ", line:" << __LINE__ << std::endl; + std::exit(1); +} + +inline void __checkMsg(const char *errorMessage, const char *file, const int line) +{ + /* + DPCT1010:86: SYCL uses exceptions to report errors and does not use the error + codes. The call was replaced with 0. You need to rewrite this code. + */ + dpct::err0 err = 0; +} + +inline bool deviceInit(int dev) +{ + int deviceCount; + safeCall( + DPCT_CHECK_ERROR(deviceCount = dpct::dev_mgr::instance().device_count())); + if (deviceCount == 0) + { + fprintf(stderr, "CUDA error: no devices supporting CUDA.\n"); + return false; + } + if (dev < 0) + dev = 0; + if (dev > deviceCount - 1) + dev = deviceCount - 1; + dpct::device_info deviceProp; + safeCall(DPCT_CHECK_ERROR(dpct::get_device_info( + deviceProp, dpct::dev_mgr::instance().get_device(dev)))); + /* + DPCT1005:88: The SYCL device version is different from CUDA Compute + Compatibility. You may need to rewrite this code. + */ + if (deviceProp.get_major_version() < 1) + { + fprintf(stderr, "error: device does not support CUDA.\n"); + return false; + } + /* + DPCT1093:89: The "dev" device may be not the one intended for use. Adjust the + selected device if needed. + */ + safeCall(DPCT_CHECK_ERROR(dpct::select_device(dev))); + return true; +} + +class TimerGPU +{ +public: + dpct::event_ptr start, stop; + std::chrono::time_point start_ct1; + std::chrono::time_point stop_ct1; + dpct::queue_ptr stream; + TimerGPU(dpct::queue_ptr stream_ = &dpct::get_in_order_queue()) + : stream(stream_) + { + start = new sycl::event(); + stop = new sycl::event(); + /* + DPCT1012:90: Detected kernel execution time measurement pattern and + generated an initial code for time measurements in SYCL. You can change the + way time is measured depending on your goals. + */ + start_ct1 = std::chrono::steady_clock::now(); + *start = stream->ext_oneapi_submit_barrier(); + } + ~TimerGPU() + { + dpct::destroy_event(start); + dpct::destroy_event(stop); + } + float read() + { + /* + DPCT1012:91: Detected kernel execution time measurement pattern and + generated an initial code for time measurements in SYCL. You can change the + way time is measured depending on your goals. + */ + stop_ct1 = std::chrono::steady_clock::now(); + *stop = stream->ext_oneapi_submit_barrier(); + stop->wait_and_throw(); + float time; + time = + std::chrono::duration(stop_ct1 - start_ct1).count(); + return time; + } +}; + +class TimerCPU +{ + static const int bits = 10; + +public: + long long beg_clock; + float freq; + TimerCPU(float freq_) : freq(freq_) + { // freq = clock frequency in MHz + beg_clock = getTSC(bits); + } + long long getTSC(int bits) + { +#ifdef WIN32 + return __rdtsc() / (1LL << bits); +#else + unsigned int low, high; + __asm__(".byte 0x0f, 0x31" + : "=a"(low), "=d"(high)); + return ((long long)high << (32 - bits)) | ((long long)low >> bits); +#endif + } + float read() + { + long long end_clock = getTSC(bits); + long long Kcycles = end_clock - beg_clock; + float time = (float)(1 << bits) * Kcycles / freq / 1e3f; + return time; + } +}; + +template +__inline__ T ShiftDown(T var, unsigned int delta, + const sycl::nd_item<3> &item_ct1, int width = 32) +{ +#if (DPCT_COMPAT_RT_VERSION >= 9000) + /* + DPCT1023:0: The SYCL sub-group does not support mask options for + dpct::shift_sub_group_left. You can specify + "--use-experimental-features=masked-sub-group-operation" to use the + experimental helper function to migrate __shfl_down_sync. + */ + /* + DPCT1096:225: The right-most dimension of the work-group used in the SYCL + kernel that calls this function may be less than "32". The function + "dpct::shift_sub_group_left" may return an unexpected result on the CPU + device. Modify the size of the work-group to ensure that the value of the + right-most dimension is a multiple of "32". + */ + return dpct::shift_sub_group_left(item_ct1.get_sub_group(), var, delta, + width); +#else + return __shfl_down(var, delta, width); +#endif +} + +template +__inline__ T ShiftUp(T var, unsigned int delta, + const sycl::nd_item<3> &item_ct1, int width = 32) +{ +#if (DPCT_COMPAT_RT_VERSION >= 9000) + /* + DPCT1023:1: The SYCL sub-group does not support mask options for + dpct::shift_sub_group_right. You can specify + "--use-experimental-features=masked-sub-group-operation" to use the + experimental helper function to migrate __shfl_up_sync. + */ + return dpct::shift_sub_group_right(item_ct1.get_sub_group(), var, delta, + width); +#else + return __shfl_up(var, delta, width); +#endif +} + +template +__inline__ T Shuffle(T var, unsigned int lane, const sycl::nd_item<3> &item_ct1, int width = 32) +{ +#if (DPCT_COMPAT_RT_VERSION >= 9000) + /* + DPCT1023:2: The SYCL sub-group does not support mask options for + dpct::select_from_sub_group. You can specify + "--use-experimental-features=masked-sub-group-operation" to use the + experimental helper function to migrate __shfl_sync. + */ + return dpct::select_from_sub_group(item_ct1.get_sub_group(), var, lane, + width); +#else + return __shfl(var, lane, width); +#endif +} + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudautils.h.yaml b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudautils.h.yaml new file mode 100644 index 000000000..626cbf84d --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/cudautils.h.yaml @@ -0,0 +1,559 @@ +--- +MainSourceFile: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/out/cudautils.h' +Replacements: + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 41 + Length: 0 + ReplacementText: "#define DPCT_COMPAT_RT_VERSION 12020\n#include \n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 78 + Length: 0 + ReplacementText: "\n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 323 + Length: 9 + ReplacementText: 'dpct::err0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 376 + Length: 165 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 608 + Length: 0 + ReplacementText: ' try ' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 612 + Length: 9 + ReplacementText: 'dpct::err0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 628 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 655 + Length: 175 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 832 + Length: 0 + ReplacementText: "\ncatch (sycl::exception const &exc) {\n std::cerr << exc.what() << \"Exception caught at file:\" << __FILE__ << \", line:\" << __LINE__ << std::endl;\n std::exit(1);\n}" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 919 + Length: 0 + ReplacementText: " /*\n DPCT1010:86: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 921 + Length: 11 + ReplacementText: 'dpct::err0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 939 + Length: 18 + ReplacementText: '0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 961 + Length: 176 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1205 + Length: 32 + ReplacementText: 'DPCT_CHECK_ERROR(deviceCount = dpct::dev_mgr::instance().device_count())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1442 + Length: 14 + ReplacementText: 'dpct::device_info' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1480 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_device_info' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1504 + Length: 11 + ReplacementText: deviceProp + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1517 + Length: 3 + ReplacementText: 'dpct::dev_mgr::instance().get_device(dev)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1521 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1524 + Length: 0 + ReplacementText: " /*\n DPCT1005:88: The SYCL device version is different from CUDA Compute Compatibility. You may need to rewrite this code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1541 + Length: 5 + ReplacementText: 'get_major_version()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1641 + Length: 0 + ReplacementText: " /*\n DPCT1093:89: The \"dev\" device may be not the one intended for use. Adjust the selected device if needed.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1652 + Length: 13 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::select_device' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1670 + Length: 0 + ReplacementText: ')' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1718 + Length: 11 + ReplacementText: 'dpct::event_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1742 + Length: 0 + ReplacementText: "\n std::chrono::time_point start_ct1;\n std::chrono::time_point stop_ct1;" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1745 + Length: 12 + ReplacementText: 'dpct::queue_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1777 + Length: 12 + ReplacementText: 'dpct::queue_ptr' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1800 + Length: 1 + ReplacementText: '&dpct::get_in_order_queue()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1829 + Length: 23 + ReplacementText: 'start = new sycl::event()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1858 + Length: 22 + ReplacementText: 'stop = new sycl::event()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1882 + Length: 0 + ReplacementText: " /*\n DPCT1012:90: Detected kernel execution time measurement pattern and generated an initial code for time measurements in SYCL. You can change the way time is measured depending on your goals.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1886 + Length: 30 + ReplacementText: "start_ct1 = std::chrono::steady_clock::now();\n *start = stream->ext_oneapi_submit_barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1944 + Length: 23 + ReplacementText: 'dpct::destroy_event(start)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 1973 + Length: 22 + ReplacementText: 'dpct::destroy_event(stop)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 2020 + Length: 0 + ReplacementText: " /*\n DPCT1012:91: Detected kernel execution time measurement pattern and generated an initial code for time measurements in SYCL. You can change the way time is measured depending on your goals.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 2024 + Length: 29 + ReplacementText: "stop_ct1 = std::chrono::steady_clock::now();\n *stop = stream->ext_oneapi_submit_barrier()" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 2059 + Length: 26 + ReplacementText: 'stop->wait_and_throw()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 2107 + Length: 40 + ReplacementText: 'time = std::chrono::duration(stop_ct1 - start_ct1).count()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 2857 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 2916 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 2941 + Length: 14 + ReplacementText: DPCT_COMPAT_RT_VERSION + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 2965 + Length: 0 + ReplacementText: " /*\n DPCT1023:0: The SYCL sub-group does not support mask options for dpct::shift_sub_group_left. You can specify \"--use-experimental-features=masked-sub-group-operation\" to use the experimental helper function to migrate __shfl_down_sync.\n */\n /*\n DPCT1096:225: The right-most dimension of the work-group used in the SYCL kernel that calls this function may be less than \"32\". The function \"dpct::shift_sub_group_left\" may return an unexpected result on the CPU device. Modify the size of the work-group to ensure that the value of the right-most dimension is a multiple of \"32\".\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 2974 + Length: 47 + ReplacementText: 'dpct::shift_sub_group_left(item_ct1.get_sub_group(), var, delta, width)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 3099 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 3156 + Length: 0 + ReplacementText: ",\n const sycl::nd_item<3> &item_ct1" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 3181 + Length: 14 + ReplacementText: DPCT_COMPAT_RT_VERSION + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 3205 + Length: 0 + ReplacementText: " /*\n DPCT1023:1: The SYCL sub-group does not support mask options for dpct::shift_sub_group_right. You can specify \"--use-experimental-features=masked-sub-group-operation\" to use the experimental helper function to migrate __shfl_up_sync.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 3214 + Length: 45 + ReplacementText: 'dpct::shift_sub_group_right(item_ct1.get_sub_group(), var, delta, width)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 3335 + Length: 11 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 3391 + Length: 0 + ReplacementText: ', const sycl::nd_item<3> &item_ct1' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 3416 + Length: 14 + ReplacementText: DPCT_COMPAT_RT_VERSION + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 3440 + Length: 0 + ReplacementText: " /*\n DPCT1023:2: The SYCL sub-group does not support mask options for dpct::select_from_sub_group. You can specify \"--use-experimental-features=masked-sub-group-operation\" to use the experimental helper function to migrate __shfl_sync.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Offset: 3449 + Length: 41 + ReplacementText: 'dpct::select_from_sub_group(item_ct1.get_sub_group(), var, lane, width)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false +MainSourceFilesDigest: + - MainSourceFile: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/cudautils.h' + Digest: 127b6c30b6236cc7de6968ae1eff8ccf +DpctVersion: 18.0.0 +MainHelperFileName: '' +USMLevel: '' +FeatureMap: {} +CompileTargets: {} +OptionMap: + AnalysisScopePath: + Value: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA' + Specified: false + AsyncHandler: + Value: 'false' + Specified: false + CommentsEnabled: + Value: 'false' + Specified: false + CompilationsDir: + Value: '/home/local_user/sandbox/Velocity-Bench/cudaSift/CUDA/build' + Specified: true + CtadEnabled: + Value: 'false' + Specified: false + EnablepProfiling: + Value: 'false' + Specified: false + ExperimentalFlag: + Value: '0' + Specified: false + ExplicitClNamespace: + Value: 'false' + Specified: false + ExplicitNamespace: + Value: '20' + Specified: false + ExtensionDDFlag: + Value: '0' + Specified: false + ExtensionDEFlag: + Value: '4294967295' + Specified: false + HelperFuncPreferenceFlag: + Value: '0' + Specified: false + NDRangeDim: + Value: '3' + Specified: false + NoDRYPattern: + Value: 'false' + Specified: false + NoUseGenericSpace: + Value: '' + Specified: true + OptimizeMigration: + Value: 'false' + Specified: false + ProcessAll: + Value: 'false' + Specified: false + RuleFile: + Value: '' + Specified: false + SyclNamedLambda: + Value: 'false' + Specified: false + UsmLevel: + Value: '1' + Specified: false +... diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/geomFuncs.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/geomFuncs.cpp new file mode 100644 index 000000000..c01e6e7d2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/geomFuncs.cpp @@ -0,0 +1,72 @@ +#include +#include +#include +#include "cudaSift.h" + +int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh) +{ +#ifdef MANAGEDMEM + SiftPoint *mpts = data.m_data; +#else + if (data.h_data==NULL) + return 0; + SiftPoint *mpts = data.h_data; +#endif + float limit = thresh*thresh; + int numPts = data.numPts; + cv::Mat M(8, 8, CV_64FC1); + cv::Mat A(8, 1, CV_64FC1), X(8, 1, CV_64FC1); + double Y[8]; + for (int i=0;i<8;i++) + A.at(i, 0) = homography[i] / homography[8]; + for (int loop=0;loopmaxAmbiguity) + continue; + float den = A.at(6)*pt.xpos + A.at(7)*pt.ypos + 1.0f; + float dx = (A.at(0)*pt.xpos + A.at(1)*pt.ypos + A.at(2)) / den - pt.match_xpos; + float dy = (A.at(3)*pt.xpos + A.at(4)*pt.ypos + A.at(5)) / den - pt.match_ypos; + float err = dx*dx + dy*dy; + float wei = (err(r,c) += (Y[c] * Y[r] * wei); + X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_xpos * wei); + Y[0] = Y[1] = Y[2] = 0.0; + Y[3] = pt.xpos; + Y[4] = pt.ypos; + Y[5] = 1.0; + Y[6] = - pt.xpos * pt.match_ypos; + Y[7] = - pt.ypos * pt.match_ypos; + for (int c=0;c<8;c++) + for (int r=0;r<8;r++) + M.at(r,c) += (Y[c] * Y[r] * wei); + X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_ypos * wei); + } + cv::solve(M, X, A, cv::DECOMP_CHOLESKY); + } + int numfit = 0; + for (int i=0;i(6)*pt.xpos + A.at(7)*pt.ypos + 1.0; + float dx = (A.at(0)*pt.xpos + A.at(1)*pt.ypos + A.at(2)) / den - pt.match_xpos; + float dy = (A.at(3)*pt.xpos + A.at(4)*pt.ypos + A.at(5)) / den - pt.match_ypos; + float err = dx*dx + dy*dy; + if (err(i); + homography[8] = 1.0f; + return numfit; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/geomFuncs.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/geomFuncs.o new file mode 100644 index 000000000..3d2f10fdb Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/geomFuncs.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/mainSift.cpp.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/mainSift.cpp.dp.cpp new file mode 100644 index 000000000..ea9b0c280 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/mainSift.cpp.dp.cpp @@ -0,0 +1,282 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Björkman aka Celebrandil // +// celle @ csc.kth.se // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Utility.h" +#include "cudaImage.h" +#include "cudaSift.h" + +int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh); +void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img); +void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography); + +double ScaleUp(CudaImage &res, CudaImage &src); + +/////////////////////////////////////////////////////////////////////////////// +// Main program +/////////////////////////////////////////////////////////////////////////////// +int main(int argc, char **argv) +{ + auto totalProgTimer_start = std::chrono::steady_clock::now(); + int devNum = 0, imgSet = 0; + if (argc > 1) + devNum = std::atoi(argv[1]); + if (argc > 2) + imgSet = std::atoi(argv[2]); + + float totTime = 0.0; + float imageInitTime = 0.0; + float extractSiftTime = 0.0; + float matchingTime = 0.0; + float ioReadTime = 0.0; + float dataVerificationTime = 0.0; + + // Read images using OpenCV + cv::Mat limg, rimg; + auto ioRead_start = std::chrono::steady_clock::now(); + if (imgSet) + { + cv::imread("../../inputData/left.pgm", 0).convertTo(limg, CV_32FC1); + cv::imread("../../inputData/righ.pgm", 0).convertTo(rimg, CV_32FC1); + } + else + { + cv::imread("../../inputData/img1.png", 0).convertTo(limg, CV_32FC1); + cv::imread("../../inputData/img2.png", 0).convertTo(rimg, CV_32FC1); + } + auto ioRead_stop = std::chrono::steady_clock::now(); + ioReadTime = std::chrono::duration(ioRead_stop - ioRead_start).count(); + + unsigned int w = limg.cols; + unsigned int h = limg.rows; + std::cout << "Image size = (" << w << "," << h << ")" << std::endl; + + // Initial Cuda images and download images to device + std::cout << "Initializing data..." << std::endl; + /* + DPCT1093:83: The "0" device may be not the one intended for use. Adjust the + selected device if needed. + */ + dpct::select_device(0); + CudaImage img1, img2; + + img1.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)limg.data); + img2.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)rimg.data); + img1.Download(imageInitTime); + img2.Download(imageInitTime); + + // Extract Sift features from images + SiftData siftData1, siftData2; + float initBlur = 1.0f; + float thresh = (imgSet ? 4.5f : 2.0f); + + InitSiftData(siftData1, imageInitTime, 32768, true, true); + InitSiftData(siftData2, imageInitTime, 32768, true, true); + + // A bit of benchmarking + // for (int thresh1=1.00f;thresh1<=4.01f;thresh1+=0.50f) { + float *memoryTmp = AllocSiftTempMemory(w, h, 5, imageInitTime, false); + for (int i = 0; i < 50; i++) + { + float time = 0.0f; // set total time to init time + ExtractSift(siftData1, img1, 5, initBlur, thresh, time, 0.0f, false, memoryTmp); + extractSiftTime += time; + time = 0.0f; + ExtractSift(siftData2, img2, 5, initBlur, thresh, time, 0.0f, false, memoryTmp); + extractSiftTime += time; + } + FreeSiftTempMemory(memoryTmp); + + // Match Sift features and find a homography + for (int i = 0; i < 1; i++) + MatchSiftData(siftData1, siftData2, matchingTime); + float homography[9]; + int numMatches; + FindHomography(siftData1, homography, &numMatches, matchingTime, 10000, 0.00f, 0.80f, 5.0); + int numFit = ImproveHomography(siftData1, homography, 5, 0.00f, 0.80f, 3.0); + float matchPercentage = 100.0f * numFit / std::min(siftData1.numPts, siftData2.numPts); + + std::cout << "Number of original features: " << siftData1.numPts << " " << siftData2.numPts << std::endl; + std::cout << "Number of matching features: " << numFit << " " << numMatches << " " << matchPercentage << "% " << initBlur << " " << thresh << "\n" + << std::endl; + +#ifdef DEVICE_TIMER + totTime = imageInitTime + extractSiftTime + matchingTime; + + std::cout << "Images initialization time = " << imageInitTime / 1000 << " ms" << std::endl; + std::cout << "Feature extraction time = " << extractSiftTime / 1000 << " ms" << std::endl; + std::cout << "Matching time = " << matchingTime / 1000 << " ms" + << "\n" + << std::endl; + std::cout << "Total Deivce Time = " << totTime / 1000 << " ms" + << "\n" + << std::endl; +#endif + + // data validation + auto dataVerficationTimer_start = std::chrono::steady_clock::now(); + int data_verification_flag = Utility::RunDataVerification(thresh, matchPercentage); + auto dataVerficationTimer_stop = std::chrono::steady_clock::now(); + dataVerificationTime = std::chrono::duration(dataVerficationTimer_stop - dataVerficationTimer_start).count(); + // // Print out and store summary data + // // PrintMatchData(siftData1, siftData2, img1); + // cv::imwrite("data/limg_pts.pgm", limg); + + // MatchAll(siftData1, siftData2, homography); + + // Free Sift data from device + FreeSiftData(siftData1); + FreeSiftData(siftData2); + + auto totalProgTimer_end = std::chrono::steady_clock::now(); + float totalProgramTime = std::chrono::duration(totalProgTimer_end - totalProgTimer_start).count() - ioReadTime - dataVerificationTime; + std::cout << "Total workload time = " << totalProgramTime / 1000 << " ms" + << "\n" + << std::endl; + return data_verification_flag; +} + +void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography) +{ +#ifdef MANAGEDMEM + SiftPoint *sift1 = siftData1.m_data; + SiftPoint *sift2 = siftData2.m_data; +#else + SiftPoint *sift1 = siftData1.h_data; + SiftPoint *sift2 = siftData2.h_data; +#endif + int numPts1 = siftData1.numPts; + int numPts2 = siftData2.numPts; + int numFound = 0; +#if 1 + homography[0] = homography[4] = -1.0f; + homography[1] = homography[3] = homography[6] = homography[7] = 0.0f; + homography[2] = 1279.0f; + homography[5] = 959.0f; +#endif + for (int i = 0; i < numPts1; i++) + { + float *data1 = sift1[i].data; + std::cout << i << ":" << sift1[i].scale << ":" << (int)sift1[i].orientation << " " << sift1[i].xpos << " " << sift1[i].ypos << std::endl; + bool found = false; + for (int j = 0; j < numPts2; j++) + { + float *data2 = sift2[j].data; + float sum = 0.0f; + for (int k = 0; k < 128; k++) + sum += data1[k] * data2[k]; + float den = homography[6] * sift1[i].xpos + homography[7] * sift1[i].ypos + homography[8]; + float dx = (homography[0] * sift1[i].xpos + homography[1] * sift1[i].ypos + homography[2]) / den - sift2[j].xpos; + float dy = (homography[3] * sift1[i].xpos + homography[4] * sift1[i].ypos + homography[5]) / den - sift2[j].ypos; + float err = dx * dx + dy * dy; + if (err < 100.0f) // 100.0 + found = true; + if (err < 100.0f || j == sift1[i].match) + { // 100.0 + if (j == sift1[i].match && err < 100.0f) + std::cout << " *"; + else if (j == sift1[i].match) + std::cout << " -"; + else if (err < 100.0f) + std::cout << " +"; + else + std::cout << " "; + std::cout << j << ":" << sum << ":" << (int)sqrt(err) << ":" << sift2[j].scale << ":" << (int)sift2[j].orientation << " " << sift2[j].xpos << " " << sift2[j].ypos << " " << (int)dx << " " << (int)dy << std::endl; + } + } + std::cout << std::endl; + if (found) + numFound++; + } + std::cout << "Number of finds: " << numFound << " / " << numPts1 << std::endl; + std::cout << homography[0] << " " << homography[1] << " " << homography[2] << std::endl; //%%% + std::cout << homography[3] << " " << homography[4] << " " << homography[5] << std::endl; //%%% + std::cout << homography[6] << " " << homography[7] << " " << homography[8] << std::endl; //%%% +} + +void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img) +{ + int numPts = siftData1.numPts; +#ifdef MANAGEDMEM + SiftPoint *sift1 = siftData1.m_data; + SiftPoint *sift2 = siftData2.m_data; +#else + SiftPoint *sift1 = siftData1.h_data; + SiftPoint *sift2 = siftData2.h_data; +#endif + float *h_img = img.h_data; + int w = img.width; + int h = img.height; + std::cout << std::setprecision(3); + for (int j = 0; j < numPts; j++) + { + int k = sift1[j].match; + if (sift1[j].match_error < 5) + { + float dx = sift2[k].xpos - sift1[j].xpos; + float dy = sift2[k].ypos - sift1[j].ypos; +#if 0 + if (false && sift1[j].xpos>550 && sift1[j].xpos<600) { + std::cout << "pos1=(" << (int)sift1[j].xpos << "," << (int)sift1[j].ypos << ") "; + std::cout << j << ": " << "score=" << sift1[j].score << " ambiguity=" << sift1[j].ambiguity << " match=" << k << " "; + std::cout << "scale=" << sift1[j].scale << " "; + std::cout << "error=" << (int)sift1[j].match_error << " "; + std::cout << "orient=" << (int)sift1[j].orientation << "," << (int)sift2[k].orientation << " "; + std::cout << " delta=(" << (int)dx << "," << (int)dy << ")" << std::endl; + } +#endif +#if 1 + int len = (int)(fabs(dx) > fabs(dy) ? fabs(dx) : fabs(dy)); + for (int l = 0; l < len; l++) + { + int x = (int)(sift1[j].xpos + dx * l / len); + int y = (int)(sift1[j].ypos + dy * l / len); + h_img[y * w + x] = 255.0f; + } +#endif + } + int x = (int)(sift1[j].xpos + 0.5); + int y = (int)(sift1[j].ypos + 0.5); + int s = std::min(x, std::min(y, std::min(w - x - 2, std::min(h - y - 2, (int)(1.41 * sift1[j].scale))))); + int p = y * w + x; + p += (w + 1); + for (int k = 0; k < s; k++) + h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 0.0f; + p -= (w + 1); + for (int k = 0; k < s; k++) + h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 255.0f; + } + std::cout << std::setprecision(6); +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/mainSift.cpp.dp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/mainSift.cpp.dp.o new file mode 100644 index 000000000..2bdeed748 Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/mainSift.cpp.dp.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/matching.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/matching.dp.cpp new file mode 100644 index 000000000..0a717400f --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/matching.dp.cpp @@ -0,0 +1,2209 @@ +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include "cudaSift.h" +#include "cudautils.h" + +//================= Device matching functions =====================// + +void MatchSiftPoints(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2, + const sycl::nd_item<3> &item_ct1, float *siftPoint, + float *sums) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int p1 = item_ct1.get_group(2); + const int p2 = item_ct1.get_group(1) * 16 + ty; + const float *ptr1 = sift1[p1].data; + const float *ptr2 = sift2[p2].data; + const int i = 16 * ty + tx; + if (ty < 8) + siftPoint[i] = ptr1[i]; + /* + DPCT1065:146: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + float sum = 0.0f; + if (p2 < numPts2) + for (int j = 0; j < 8; j++) + sum += siftPoint[16 * j + tx] * ptr2[16 * j + tx]; + sums[i] = sum; + /* + DPCT1065:147: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (tx < 8) + sums[i] += sums[i + 8]; + /* + DPCT1065:148: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (tx < 4) + sums[i] += sums[i + 4]; + /* + DPCT1065:149: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (ty == 0) + { + sum = sums[16 * tx + 0] + sums[16 * tx + 1] + sums[16 * tx + 2] + sums[16 * tx + 3]; + corrData[p1 * item_ct1.get_group_range(1) * 16 + + item_ct1.get_group(1) * 16 + tx] = sum; + } + /* + DPCT1065:150: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); +} + +void MatchSiftPoints2(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2, + const sycl::nd_item<3> &item_ct1, float *siftPoints1, + float *siftPoints2) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const float *ptr1 = + sift1[dpct::min(numPts1 - 1, + (unsigned int)(item_ct1.get_group(2) * 16 + ty))] + .data; + const float *ptr2 = + sift2[dpct::min(numPts2 - 1, + (unsigned int)(item_ct1.get_group(1) * 16 + ty))] + .data; + for (int i = 0; i < 8; i++) + { + siftPoints1[128 * ty + 16 * i + tx] = ptr1[16 * i + tx]; + siftPoints2[128 * ty + 16 * i + tx] = ptr2[16 * i + tx]; + } + /* + DPCT1065:151: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + const int p1 = item_ct1.get_group(2) * 16 + ty; + const int p2 = item_ct1.get_group(1) * 16 + tx; + const float *pt1 = &siftPoints1[ty * 128]; + const float *pt2 = &siftPoints2[tx * 128]; + float sum = 0.0f; + for (int i = 0; i < 128; i++) + { + int itx = (i + tx) & 127; // avoid bank conflicts + sum += pt1[itx] * pt2[itx]; + } + if (p1 < numPts1) + corrData[p1 * item_ct1.get_group_range(1) * 16 + p2] = + (p2 < numPts2 ? sum : -1.0f); +} + +void FindMaxCorr(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int corrWidth, int siftSize, + const sycl::nd_item<3> &item_ct1, float *maxScore, + float *maxScor2, int *maxIndex) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int idx = ty * 16 + tx; + int p1 = item_ct1.get_group(2) * 16 + item_ct1.get_local_id(1); + p1 = (p1 >= numPts1 ? numPts1 - 1 : p1); + maxScore[idx] = -1.0f; + maxScor2[idx] = -1.0f; + maxIndex[idx] = -1; + /* + DPCT1065:152: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + float *corrs = &corrData[p1 * corrWidth]; + for (int i = tx; i < corrWidth; i += 16) + { + float val = corrs[i]; + if (val > maxScore[idx]) + { + maxScor2[idx] = maxScore[idx]; + maxScore[idx] = val; + maxIndex[idx] = i; + } + else if (val > maxScor2[idx]) + maxScor2[idx] = val; + } + /* + DPCT1065:153: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + for (int len = 8; len > 0; len /= 2) + { + if (tx < 8) + { + float val = maxScore[idx + len]; + int i = maxIndex[idx + len]; + if (val > maxScore[idx]) + { + maxScor2[idx] = maxScore[idx]; + maxScore[idx] = val; + maxIndex[idx] = i; + } + else if (val > maxScor2[idx]) + maxScor2[idx] = val; + float va2 = maxScor2[idx + len]; + if (va2 > maxScor2[idx]) + maxScor2[idx] = va2; + } + /* + DPCT1118:49: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:154: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } + if (tx == 0) + { + sift1[p1].score = maxScore[ty * 16]; + sift1[p1].ambiguity = maxScor2[ty * 16] / (maxScore[ty * 16] + 1e-6); + sift1[p1].match = maxIndex[ty * 16]; + sift1[p1].match_xpos = sift2[maxIndex[ty * 16]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[ty * 16]].ypos; + } +} + +// Version based on suggestion by Nicholas Lin +void FindMaxCorr3(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + const sycl::nd_item<3> &item_ct1, int *maxIndex) +{ + int block_dim = item_ct1.get_local_range(2); // blockDim.x == 16 + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int p1 = item_ct1.get_group(2) * block_dim + ty; + const int idx = ty * 16 + tx; + + maxIndex[idx] = 0; + /* + DPCT1065:155: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + float *corrs = NULL; + if (p1 < numPts1) + { + corrs = &corrData[p1 * block_dim * 2]; + corrs[tx] = 0.0f; + corrs[tx + 16] = 0.0f; + const float *pt1 = sift1[p1].data; + for (int p2 = tx; p2 < numPts2; p2 += 16) + { + float *pt2 = sift2[p2].data; + float sum = 0.0f; + for (int i = 0; i < 128; i++) + sum += pt1[i] * pt2[i]; + if (sum > corrs[tx]) + { + corrs[tx + 16] = corrs[tx]; + corrs[tx] = sum; + maxIndex[idx] = p2; + } + else if (sum > corrs[tx + 16]) + corrs[tx + 16] = sum; + } + } + /* + DPCT1065:156: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (p1 < numPts1) + { + for (int len = 8; len > 0; len /= 2) + { + if (tx < len) + { + float val = corrs[tx + len]; + int i = maxIndex[idx + len]; + if (val > corrs[tx]) + { + corrs[tx + 16] = corrs[tx]; + corrs[tx] = val; + maxIndex[idx] = i; + } + else if (val > corrs[tx + 16]) + corrs[tx + 16] = val; + float va2 = corrs[tx + 16 + len]; + if (va2 > corrs[tx + 16]) + corrs[tx + 16] = va2; + } + /* + DPCT1118:50: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:157: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } + if (tx == 0) + { + sift1[p1].score = corrs[0]; + sift1[p1].ambiguity = corrs[16] / (corrs[0] + 1e-6); + sift1[p1].match = maxIndex[ty << 4]; + sift1[p1].match_xpos = sift2[maxIndex[ty << 4]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[ty << 4]].ypos; + } + } +} + +#define FMC2W 16 +#define FMC2H 4 + +void FindMaxCorr2(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + const sycl::nd_item<3> &item_ct1, float *siftPoint, + float *maxScore, float *maxScor2, int *maxIndex) +{ + + const int p1 = item_ct1.get_group(2); + if (p1 >= numPts1) + return; + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int idx = ty * FMC2W + tx; + if (idx < FMC2H) + { + maxScore[idx] = -1.0f; + maxScor2[idx] = -1.0f; + maxIndex[idx] = 0; + } + /* + DPCT1065:158: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + const float *pt1 = sift1[p1].data; + for (int i = idx; i < 128; i += FMC2W * FMC2H) + siftPoint[i] = pt1[i]; + /* + DPCT1065:159: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + for (int p2 = ty; p2 < numPts2; p2 += FMC2H) + { + const float *pt2 = sift2[p2].data; + float sum = 0.0f; + for (int j = tx; j < 128; j += FMC2W) + sum += siftPoint[j] * pt2[j]; + for (int j = FMC2W / 2; j > 0; j /= 2) + sum += ShiftDown(sum, j, item_ct1); + if (tx == 0) + { + if (sum > maxScore[ty]) + { + maxScor2[ty] = maxScore[ty]; + maxScore[ty] = sum; + maxIndex[ty] = p2; + } + else if (sum > maxScor2[ty]) + maxScor2[ty] = sum; + } + } + /* + DPCT1065:160: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + for (int len = FMC2H / 2; len > 0; len /= 2) + { + if (ty == 0 && tx < len) + { + float val = maxScore[tx + len]; + int p2 = maxIndex[tx + len]; + if (val > maxScore[tx]) + { + maxScor2[tx] = maxScore[tx]; + maxScore[tx] = val; + maxIndex[tx] = p2; + } + else if (val > maxScor2[tx]) + maxScor2[tx] = val; + float va2 = maxScor2[tx + len]; + if (va2 > maxScor2[tx]) + maxScor2[tx] = va2; + } + /* + DPCT1118:51: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:161: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } + if (ty == 0 && tx == 0) + { + sift1[p1].score = maxScore[0]; + sift1[p1].ambiguity = maxScor2[0] / (maxScore[0] + 1e-6); + sift1[p1].match = maxIndex[0]; + sift1[p1].match_xpos = sift2[maxIndex[0]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[0]].ypos; + } +} + +void FindMaxCorr4(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + const sycl::nd_item<3> &item_ct1, float *siftPoint, + float *maxScore, float *maxScor2, int *maxIndex) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + if (tx == 0) + { + maxScore[ty] = -1.0f; + maxScor2[ty] = -1.0f; + maxIndex[ty] = 0; + } + const int p1 = item_ct1.get_group(2) * FMC2H + ty; + const float *pt1 = sift1[p1].data; + for (int j = tx; j < 128; j += FMC2W) + siftPoint[128 * ty + j] = pt1[j]; + /* + DPCT1065:162: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + for (int p2 = 0; p2 < numPts2; p2++) + { + const float *pt2 = sift2[p2].data; + float sum = 0.0f; + for (int j = tx; j < 128; j += FMC2W) + sum += siftPoint[128 * ty + j] * pt2[j]; + for (int j = FMC2W / 2; j > 0; j /= 2) + sum += ShiftDown(sum, j, item_ct1); + if (tx == 0) + { + if (sum > maxScore[ty]) + { + maxScor2[ty] = maxScore[ty]; + maxScore[ty] = sum; + maxIndex[ty] = p2; + } + else if (sum > maxScor2[ty]) + maxScor2[ty] = sum; + } + } + /* + DPCT1065:163: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (tx == 0) + { + sift1[p1].score = maxScore[ty]; + sift1[p1].ambiguity = maxScor2[ty] / (maxScore[ty] + 1e-6); + sift1[p1].match = maxIndex[ty]; + sift1[p1].match_xpos = sift2[maxIndex[ty]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[ty]].ypos; + } +} + +void memcopyKernel(float *src, float *dst, size_t src_pitch, size_t dst_pitch, int numPts, size_t width) +{ + char *d_src = (char *)src; + char *d_dst = (char *)dst; + + for (int i = 0; i < numPts; ++i) + { + for (int j = 0; j < width; ++j) + { + d_dst[j] = d_src[j]; + } + d_src = d_src + src_pitch; + d_dst = d_dst + dst_pitch; + } +} + +void +CleanMatches(SiftPoint *sift1, int numPts1, const sycl::nd_item<3> &item_ct1) +{ + const int p1 = dpct::min( + (unsigned int)(item_ct1.get_group(2) * 64 + item_ct1.get_local_id(2)), + numPts1 - 1); + sift1[p1].score = 0.0f; +} + +#define M7W 32 +#define M7H 32 +#define M7R 4 +#define NRX 2 +#define NDIM 128 + +/* +DPCT1110:52: The total declared local variable size in device function +FindMaxCorr10 exceeds 128 bytes and may cause high register pressure. Consult +with your hardware vendor to find the total register size available and adjust +the code, or use smaller sub-group size to avoid high register pressure. +*/ +void FindMaxCorr10(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + const sycl::nd_item<3> &item_ct1, sycl::float4 *buffer1, + sycl::float4 *buffer2) +{ + + int tx = item_ct1.get_local_id(2); + int ty = item_ct1.get_local_id(1); + int bp1 = M7W * item_ct1.get_group(2); + for (int j = ty; j < M7W; j += M7H / M7R) + { + int p1 = sycl::min(bp1 + j, numPts1 - 1); + for (int d = tx; d < NDIM / 4; d += M7W) + buffer1[j * NDIM / 4 + (d + j) % (NDIM / 4)] = + ((sycl::float4 *)&sift1[p1].data)[d]; + } + + float max_score[NRX]; + float sec_score[NRX]; + int index[NRX]; + for (int i = 0; i < NRX; i++) + { + max_score[i] = 0.0f; + sec_score[i] = 0.0f; + index[i] = -1; + } + + int idx = ty * M7W + tx; + int ix = idx % (M7W / NRX); + int iy = idx / (M7W / NRX); + for (int bp2 = 0; bp2 < numPts2 - M7H + 1; bp2 += M7H) + { + for (int j = ty; j < M7H; j += M7H / M7R) + { + int p2 = sycl::min(bp2 + j, numPts2 - 1); + for (int d = tx; d < NDIM / 4; d += M7W) + buffer2[j * NDIM / 4 + d] = ((sycl::float4 *)&sift2[p2].data)[d]; + } + /* + DPCT1118:53: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:165: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + if (idx < M7W * M7H / M7R / NRX) + { + float score[M7R][NRX]; + for (int dy = 0; dy < M7R; dy++) + for (int i = 0; i < NRX; i++) + score[dy][i] = 0.0f; + for (int d = 0; d < NDIM / 4; d++) + { + sycl::float4 v1[NRX]; + for (int i = 0; i < NRX; i++) + v1[i] = buffer1[((M7W / NRX) * i + ix) * NDIM / 4 + (d + (M7W / NRX) * i + ix) % (NDIM / 4)]; + for (int dy = 0; dy < M7R; dy++) + { + sycl::float4 v2 = buffer2[(M7R * iy + dy) * (NDIM / 4) + d]; + for (int i = 0; i < NRX; i++) + { + score[dy][i] += v1[i].x() * v2.x(); + score[dy][i] += v1[i].y() * v2.y(); + score[dy][i] += v1[i].z() * v2.z(); + score[dy][i] += v1[i].w() * v2.w(); + } + } + } + for (int dy = 0; dy < M7R; dy++) + { + for (int i = 0; i < NRX; i++) + { + if (score[dy][i] > max_score[i]) + { + sec_score[i] = max_score[i]; + max_score[i] = score[dy][i]; + index[i] = sycl::min(bp2 + M7R * iy + dy, numPts2 - 1); + } + else if (score[dy][i] > sec_score[i]) + sec_score[i] = score[dy][i]; + } + } + } + /* + DPCT1118:54: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:166: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } + float *scores1 = (float *)buffer1; + float *scores2 = &scores1[M7W * M7H / M7R]; + int *indices = (int *)&scores2[M7W * M7H / M7R]; + if (idx < M7W * M7H / M7R / NRX) + { + for (int i = 0; i < NRX; i++) + { + scores1[iy * M7W + (M7W / NRX) * i + ix] = max_score[i]; + scores2[iy * M7W + (M7W / NRX) * i + ix] = sec_score[i]; + indices[iy * M7W + (M7W / NRX) * i + ix] = index[i]; + } + } + /* + DPCT1065:164: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + if (ty == 0) + { + float max_score = scores1[tx]; + float sec_score = scores2[tx]; + int index = indices[tx]; + for (int y = 0; y < M7H / M7R; y++) + if (index != indices[y * M7W + tx]) + { + if (scores1[y * M7W + tx] > max_score) + { + sec_score = sycl::max(max_score, sec_score); + max_score = scores1[y * M7W + tx]; + index = indices[y * M7W + tx]; + } + else if (scores1[y * M7W + tx] > sec_score) + sec_score = scores1[y * M7W + tx]; + } + sift1[bp1 + tx].score = max_score; + sift1[bp1 + tx].match = index; + sift1[bp1 + tx].match_xpos = sift2[index].xpos; + sift1[bp1 + tx].match_ypos = sift2[index].ypos; + sift1[bp1 + tx].ambiguity = sec_score / (max_score + 1e-6f); + } +} + +#define FMC_GH 512 +#define FMC_BW 32 +#define FMC_BH 32 +#define FMC_BD 16 +#define FMC_TW 1 +#define FMC_TH 4 +#define FMC_NW (FMC_BW / FMC_TW) // 32 +#define FMC_NH (FMC_BH / FMC_TH) // 8 +#define FMC_NT (FMC_NW * FMC_NH) // 256 = 8 warps + +dpct::global_memory lock(0); + +/* +DPCT1110:55: The total declared local variable size in device function +FindMaxCorr9 exceeds 128 bytes and may cause high register pressure. Consult +with your hardware vendor to find the total register size available and adjust +the code, or use smaller sub-group size to avoid high register pressure. +*/ +void FindMaxCorr9(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + const sycl::nd_item<3> &item_ct1, volatile int &lock, + sycl::float4 *siftParts1, sycl::float4 *siftParts2) +{ + // 4*32*8 = 1024 + // 4*32*8 = 1024 + //__shared__ float blksums[FMC_BW*FMC_BH]; // 32*32 = 1024 + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int idx = ty * FMC_NW + tx; + sycl::float4 *pts1 = 0, *pts2 = 0; + if (idx < FMC_BW) + { + const int p1l = dpct::min( + (unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), numPts1 - 1); + pts1 = (sycl::float4 *)sift1[p1l].data; + } + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < sycl::min(FMC_GH, numPts2 - FMC_BH + 1); k += FMC_BH) + { + if (idx < FMC_BH) + { + const int p2l = + dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + idx), + numPts2 - 1); + pts2 = (sycl::float4 *)sift2[p2l].data; + } + float sums[FMC_TW * FMC_TH]; + for (int i = 0; i < FMC_TW * FMC_TH; i++) + sums[i] = 0.0f; + + if (idx < FMC_BW) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts1[(i + 0) * FMC_BW + idx] = pts1[0 + i]; + if (idx < FMC_BH) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts2[(i + 0) * FMC_BH + idx] = pts2[0 + i]; + /* + DPCT1118:56: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:169: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + int b = FMC_BD / 2; + for (int d = FMC_BD / 2; d < 32; d += FMC_BD / 2) + { + if (idx < FMC_BW) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts1[(i + b) * FMC_BW + idx] = pts1[d + i]; + if (idx < FMC_BH) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts2[(i + b) * FMC_BH + idx] = pts2[d + i]; + + b ^= FMC_BD / 2; + for (int i = 0; i < FMC_BD / 2; i++) + { + sycl::float4 v1[FMC_TW]; + for (int ix = 0; ix < FMC_TW; ix++) + v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)]; + for (int iy = 0; iy < FMC_TH; iy++) + { + sycl::float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)]; + for (int ix = 0; ix < FMC_TW; ix++) + { + sums[iy * FMC_TW + ix] += v1[ix].x() * v2.x(); + sums[iy * FMC_TW + ix] += v1[ix].y() * v2.y(); + sums[iy * FMC_TW + ix] += v1[ix].z() * v2.z(); + sums[iy * FMC_TW + ix] += v1[ix].w() * v2.w(); + } + } + } + /* + DPCT1118:60: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:173: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } + + b ^= FMC_BD / 2; + for (int i = 0; i < FMC_BD / 2; i++) + { + sycl::float4 v1[FMC_TW]; + for (int ix = 0; ix < FMC_TW; ix++) + v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)]; + for (int iy = 0; iy < FMC_TH; iy++) + { + sycl::float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)]; + for (int ix = 0; ix < FMC_TW; ix++) + { + sums[iy * FMC_TW + ix] += v1[ix].x() * v2.x(); + sums[iy * FMC_TW + ix] += v1[ix].y() * v2.y(); + sums[iy * FMC_TW + ix] += v1[ix].z() * v2.z(); + sums[iy * FMC_TW + ix] += v1[ix].w() * v2.w(); + } + } + } + /* + DPCT1118:57: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:170: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + float *blksums = (float *)siftParts1; + for (int iy = 0; iy < FMC_TH; iy++) + for (int ix = 0; ix < FMC_TW; ix++) + blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix]; + /* + DPCT1118:58: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:171: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx < FMC_BW) + { + for (int j = 0; j < FMC_BH; j++) + { + float sum = blksums[j * FMC_BW + idx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = + dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + j), + numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + /* + DPCT1118:59: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:172: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } + const int p1 = dpct::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), + numPts1 - 1); + if (idx == 0) + while (dpct::atomic_compare_exchange_strong< + sycl::access::address_space::generic_space>((int *)&lock, 0, + 1) != 0) + ; + /* + DPCT1065:167: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx < FMC_BW) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = sycl::max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + /* + DPCT1065:168: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx == 0) + dpct::atomic_exchange( + (int *)&lock, 0); +} + +/* +DPCT1110:61: The total declared local variable size in device function +FindMaxCorr8 exceeds 128 bytes and may cause high register pressure. Consult +with your hardware vendor to find the total register size available and adjust +the code, or use smaller sub-group size to avoid high register pressure. +*/ +void FindMaxCorr8(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + const sycl::nd_item<3> &item_ct1, volatile int &lock, + sycl::float4 *siftParts1, sycl::float4 *siftParts2, + float *blksums) +{ + // 4*32*8 = 1024 + // 4*32*8 = 1024 + // 32*32 = 1024 + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int idx = ty * FMC_NW + tx; + sycl::float4 *pts1 = 0, *pts2 = 0; + if (idx < FMC_BW) + { + const int p1l = dpct::min( + (unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), numPts1 - 1); + pts1 = (sycl::float4 *)sift1[p1l].data; + } + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < sycl::min(FMC_GH, numPts2 - FMC_BH + 1); k += FMC_BH) + { + if (idx < FMC_BH) + { + const int p2l = + dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + idx), + numPts2 - 1); + pts2 = (sycl::float4 *)sift2[p2l].data; + } + float sums[FMC_TW * FMC_TH]; + for (int i = 0; i < FMC_TW * FMC_TH; i++) + sums[i] = 0.0f; + for (int d = 0; d < 32; d += FMC_BD) + { + if (idx < FMC_BW) + for (int i = 0; i < FMC_BD; i++) + siftParts1[i * FMC_BW + idx] = pts1[d + i]; + if (idx < FMC_BH) + for (int i = 0; i < FMC_BD; i++) + siftParts2[i * FMC_BH + idx] = pts2[d + i]; + /* + DPCT1118:64: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:178: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + + for (int i = 0; i < FMC_BD; i++) + { + sycl::float4 v1[FMC_TW]; + for (int ix = 0; ix < FMC_TW; ix++) + v1[ix] = siftParts1[i * FMC_BW + (tx * FMC_TW + ix)]; + for (int iy = 0; iy < FMC_TH; iy++) + { + sycl::float4 v2 = siftParts2[i * FMC_BH + (ty * FMC_TH + iy)]; + for (int ix = 0; ix < FMC_TW; ix++) + { + sums[iy * FMC_TW + ix] += v1[ix].x() * v2.x(); + sums[iy * FMC_TW + ix] += v1[ix].y() * v2.y(); + sums[iy * FMC_TW + ix] += v1[ix].z() * v2.z(); + sums[iy * FMC_TW + ix] += v1[ix].w() * v2.w(); + } + } + } + /* + DPCT1118:65: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:179: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } + // float *blksums = (float*)siftParts1; + for (int iy = 0; iy < FMC_TH; iy++) + for (int ix = 0; ix < FMC_TW; ix++) + blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix]; + /* + DPCT1118:62: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:176: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx < FMC_BW) + { + for (int j = 0; j < FMC_BH; j++) + { + float sum = blksums[j * FMC_BW + idx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = + dpct::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + j), + numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + /* + DPCT1118:63: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:177: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } + const int p1 = dpct::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), + numPts1 - 1); + if (idx == 0) + while (dpct::atomic_compare_exchange_strong< + sycl::access::address_space::generic_space>((int *)&lock, 0, + 1) != 0) + ; + /* + DPCT1065:174: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx < FMC_BW) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = sycl::max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + /* + DPCT1065:175: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (idx == 0) + dpct::atomic_exchange( + (int *)&lock, 0); +} + +/* +DPCT1110:66: The total declared local variable size in device function +FindMaxCorr7 exceeds 128 bytes and may cause high register pressure. Consult +with your hardware vendor to find the total register size available and adjust +the code, or use smaller sub-group size to avoid high register pressure. +*/ +void FindMaxCorr7(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + const sycl::nd_item<3> &item_ct1, volatile int &lock, + float *siftParts1, float *siftParts2) +{ + // features in columns + // one extra to avoid shared conflicts + sycl::float4 *pts1 = (sycl::float4 *)siftParts1; + sycl::float4 *pts2 = (sycl::float4 *)siftParts2; + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int p1l = + dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), numPts1 - 1); + const sycl::float4 *p1l4 = (sycl::float4 *)sift1[p1l].data; + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < 512 / 16; k++) + { + const int p2l = dpct::min( + (unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + ty), numPts2 - 1); + const sycl::float4 *p2l4 = (sycl::float4 *)sift2[p2l].data; +#define NUM 4 + float sum[NUM]; + if (ty < (16 / NUM)) + for (int l = 0; l < NUM; l++) + sum[l] = 0.0f; + /* + DPCT1118:67: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:182: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + for (int i = 0; i < 2; i++) + { + pts1[17 * tx + ty] = p1l4[i * 16 + tx]; + pts2[16 * ty + tx] = p2l4[i * 16 + tx]; + /* + DPCT1118:70: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:185: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (ty < (16 / NUM)) + { +#pragma unroll + for (int j = 0; j < 16; j++) + { + sycl::float4 p1v = pts1[17 * j + tx]; +#pragma unroll + for (int l = 0; l < NUM; l++) + { + sycl::float4 p2v = pts2[16 * (ty + l * (16 / NUM)) + j]; + sum[l] += p1v.x() * p2v.x(); + sum[l] += p1v.y() * p2v.y(); + sum[l] += p1v.z() * p2v.z(); + sum[l] += p1v.w() * p2v.w(); + } + } + } + /* + DPCT1118:71: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:186: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } + float *sums = siftParts1; + if (ty < (16 / NUM)) + for (int l = 0; l < NUM; l++) + sums[16 * (ty + l * (16 / NUM)) + tx] = sum[l]; + /* + DPCT1118:68: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:183: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (ty == 0) + { + for (int j = 0; j < 16; j++) + { + float sum = sums[16 * j + tx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = dpct::min( + (unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + j), + numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + /* + DPCT1118:69: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:184: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } + const int p1 = + dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), numPts1 - 1); + if (tx == 0 && ty == 0) + while (dpct::atomic_compare_exchange_strong< + sycl::access::address_space::generic_space>((int *)&lock, 0, + 1) != 0) + ; + /* + DPCT1065:180: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (ty == 0) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = sycl::max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + /* + DPCT1065:181: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (tx == 0 && ty == 0) + dpct::atomic_exchange( + (int *)&lock, 0); +} + +void FindMaxCorr6(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + const sycl::nd_item<3> &item_ct1, volatile int &lock, + float *siftParts2, float *sums) +{ + //__shared__ float siftParts1[128*16]; // features in columns + // one extra to avoid shared conflicts + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int p1l = + dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), numPts1 - 1); + float *pt1l = sift1[p1l].data; + sycl::float4 part1 = reinterpret_cast(pt1l)[tx]; + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < 512; k += 16) + { + const int p2l = dpct::min( + (unsigned int)(item_ct1.get_group(1) * 512 + k + ty), numPts2 - 1); + float *pt2l = sift2[p2l].data; + reinterpret_cast(siftParts2)[32 * ty + tx] = + reinterpret_cast(pt2l)[tx]; + /* + DPCT1118:72: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:189: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + for (int i = 0; i < 16; i++) + { + sycl::float4 part2 = + reinterpret_cast(siftParts2)[32 * i + tx]; + float sum = part1.x() * part2.x() + part1.y() * part2.y() + + part1.z() * part2.z() + part1.w() * part2.w(); + sum += ShiftDown(sum, 16, item_ct1); + sum += ShiftDown(sum, 8, item_ct1); + sum += ShiftDown(sum, 4, item_ct1); + sum += ShiftDown(sum, 2, item_ct1); + sum += ShiftDown(sum, 1, item_ct1); + if (tx == 0) + sums[16 * i + ty] = sum; + } + /* + DPCT1118:73: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:190: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (ty == 0 && tx < 16) + { + for (int j = 0; j < 16; j++) + { + float sum = sums[16 * j + tx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = dpct::min( + (unsigned int)(item_ct1.get_group(1) * 512 + k + j), numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + /* + DPCT1118:74: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:191: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } + if (tx == 0 && ty == 0) + while (dpct::atomic_compare_exchange_strong< + sycl::access::address_space::generic_space>((int *)&lock, 0, + 1) != 0) + ; + /* + DPCT1065:187: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (ty == 0 && tx < 16) + { + const int p1 = + dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), numPts1 - 1); + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = sycl::max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + /* + DPCT1065:188: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (tx == 0 && ty == 0) + dpct::atomic_exchange( + (int *)&lock, 0); +} + +void FindMaxCorr5(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + const sycl::nd_item<3> &item_ct1, volatile int &lock, + float *siftParts1, float *siftParts2) +{ + // features in columns + // one extra to avoid shared conflicts + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int p1l = + dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), numPts1 - 1); + const float *pt1l = sift1[p1l].data; + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < 512 / 16; k++) + { + const int p2l = dpct::min( + (unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + ty), numPts2 - 1); + const float *pt2l = sift2[p2l].data; + float sum = 0.0f; + for (int i = 0; i < 8; i++) + { + siftParts1[17 * tx + ty] = pt1l[i * 16 + tx]; // load and transpose + siftParts2[17 * tx + ty] = pt2l[i * 16 + tx]; + /* + DPCT1118:77: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:196: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + for (int j = 0; j < 16; j++) + sum += siftParts1[17 * j + tx] * siftParts2[17 * j + ty]; + /* + DPCT1118:78: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:197: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } + float *sums = siftParts1; + sums[16 * ty + tx] = sum; + /* + DPCT1118:75: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:194: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (ty == 0) + { + for (int j = 0; j < 16; j++) + { + float sum = sums[16 * j + tx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = dpct::min( + (unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + j), + numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + /* + DPCT1118:76: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + /* + DPCT1065:195: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + } + const int p1 = + dpct::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), numPts1 - 1); + if (tx == 0 && ty == 0) + while (dpct::atomic_compare_exchange_strong< + sycl::access::address_space::generic_space>((int *)&lock, 0, + 1) != 0) + ; + /* + DPCT1065:192: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (ty == 0) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = sycl::max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + /* + DPCT1065:193: Consider replacing sycl::nd_item::barrier() with + sycl::nd_item::barrier(sycl::access::fence_space::local_space) for better + performance if there is no access to global memory. + */ + item_ct1.barrier(); + if (tx == 0 && ty == 0) + dpct::atomic_exchange( + (int *)&lock, 0); +} + +template +/* +DPCT1110:79: The total declared local variable size in device function +InvertMatrix exceeds 128 bytes and may cause high register pressure. Consult +with your hardware vendor to find the total register size available and adjust +the code, or use smaller sub-group size to avoid high register pressure. +*/ +void InvertMatrix(float elem[size][size], float res[size][size]) +{ + int indx[size]; + float b[size]; + float vv[size]; + for (int i = 0; i < size; i++) + indx[i] = 0; + int imax = 0; + float d = 1.0; + for (int i = 0; i < size; i++) + { // find biggest element for each row + float big = 0.0; + for (int j = 0; j < size; j++) + { + float temp = sycl::fabs(elem[i][j]); + if (temp > big) + big = temp; + } + if (big > 0.0) + vv[i] = 1.0 / big; + else + vv[i] = 1e16; + } + for (int j = 0; j < size; j++) + { + for (int i = 0; i < j; i++) + { // ik (upper right), k=j + float sum = elem[i][j]; // i>=j (upper right) + for (int k = 0; k < j; k++) // kk (upper right), k=j (upper right) + float dum = vv[i] * sycl::fabs(sum); + if (dum >= big) + { + big = dum; + imax = i; + } + } + if (j != imax) + { // imax>j + for (int k = 0; k < size; k++) + { + float dum = elem[imax][k]; // upper right and lower left + elem[imax][k] = elem[j][k]; + elem[j][k] = dum; + } + d = -d; + vv[imax] = vv[j]; + } + indx[j] = imax; + if (elem[j][j] == 0.0) // j==j (upper right) + elem[j][j] = 1e-16; + if (j != (size - 1)) + { + float dum = 1.0 / elem[j][j]; + for (int i = j + 1; i < size; i++) // i>j + elem[i][j] *= dum; // i>j (upper right) + } + } + for (int j = 0; j < size; j++) + { + for (int k = 0; k < size; k++) + b[k] = 0.0; + b[j] = 1.0; + int ii = -1; + for (int i = 0; i < size; i++) + { + int ip = indx[i]; + float sum = b[ip]; + b[ip] = b[i]; + if (ii != -1) + for (int j = ii; j < i; j++) + sum -= elem[i][j] * b[j]; // i>j (upper right) + else if (sum != 0.0) + ii = i; + b[i] = sum; + } + for (int i = size - 1; i >= 0; i--) + { + float sum = b[i]; + for (int j = i + 1; j < size; j++) + sum -= elem[i][j] * b[j]; // i &item_ct1) +{ + float a[8][8], ia[8][8]; + float b[8]; + const int bx = item_ct1.get_group(2); + const int tx = item_ct1.get_local_id(2); + const int idx = item_ct1.get_local_range(2) * bx + tx; + const int numLoops = + item_ct1.get_local_range(2) * item_ct1.get_group_range(2); + for (int i = 0; i < 4; i++) + { + int pt = randPts[i * numLoops + idx]; + float x1 = coord[pt + 0 * numPts]; + float y1 = coord[pt + 1 * numPts]; + float x2 = coord[pt + 2 * numPts]; + float y2 = coord[pt + 3 * numPts]; + float *row1 = a[2 * i + 0]; + row1[0] = x1; + row1[1] = y1; + row1[2] = 1.0; + row1[3] = row1[4] = row1[5] = 0.0; + row1[6] = -x2 * x1; + row1[7] = -x2 * y1; + float *row2 = a[2 * i + 1]; + row2[0] = row2[1] = row2[2] = 0.0; + row2[3] = x1; + row2[4] = y1; + row2[5] = 1.0; + row2[6] = -y2 * x1; + row2[7] = -y2 * y1; + b[2 * i + 0] = x2; + b[2 * i + 1] = y2; + } + InvertMatrix<8>(a, ia); + item_ct1.barrier(sycl::access::fence_space::local_space); + for (int j = 0; j < 8; j++) + { + float sum = 0.0f; + for (int i = 0; i < 8; i++) + sum += ia[j][i] * b[i]; + homo[j * numLoops + idx] = sum; + } + item_ct1.barrier(sycl::access::fence_space::local_space); +} + +#define TESTHOMO_TESTS 16 // number of tests per block, alt. 32, 32 +#define TESTHOMO_LOOPS 16 // number of loops per block, alt. 8, 16 + +void TestHomographies(float *d_coord, float *d_homo, + int *d_counts, int numPts, float thresh2, + const sycl::nd_item<3> &item_ct1, float *homo, + int *cnts) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int idx = item_ct1.get_group(1) * item_ct1.get_local_range(1) + tx; + const int numLoops = + item_ct1.get_local_range(1) * item_ct1.get_group_range(1); + if (ty < 8 && tx < TESTHOMO_LOOPS) + homo[tx * 8 + ty] = d_homo[idx + ty * numLoops]; + item_ct1.barrier(sycl::access::fence_space::local_space); + float a[8]; + for (int i = 0; i < 8; i++) + a[i] = homo[ty * 8 + i]; + int cnt = 0; + for (int i = tx; i < numPts; i += TESTHOMO_TESTS) + { + float x1 = d_coord[i + 0 * numPts]; + float y1 = d_coord[i + 1 * numPts]; + float x2 = d_coord[i + 2 * numPts]; + float y2 = d_coord[i + 3 * numPts]; + /* + DPCT1013:198: The rounding mode could not be specified and the generated + code may have different accuracy than the original code. Verify the + correctness. SYCL math built-in function rounding mode is aligned with + OpenCL C 1.2 standard. + */ + float nomx = a[0] * x1 + a[1] * y1 + a[2]; + /* + DPCT1013:199: The rounding mode could not be specified and the generated + code may have different accuracy than the original code. Verify the + correctness. SYCL math built-in function rounding mode is aligned with + OpenCL C 1.2 standard. + */ + float nomy = a[3] * x1 + a[4] * y1 + a[5]; + /* + DPCT1013:200: The rounding mode could not be specified and the generated + code may have different accuracy than the original code. Verify the + correctness. SYCL math built-in function rounding mode is aligned with + OpenCL C 1.2 standard. + */ + float deno = a[6] * x1 + a[7] * y1 + 1.0f; + /* + DPCT1013:201: The rounding mode could not be specified and the generated + code may have different accuracy than the original code. Verify the + correctness. SYCL math built-in function rounding mode is aligned with + OpenCL C 1.2 standard. + */ + float errx = x2 * deno - nomx; + /* + DPCT1013:202: The rounding mode could not be specified and the generated + code may have different accuracy than the original code. Verify the + correctness. SYCL math built-in function rounding mode is aligned with + OpenCL C 1.2 standard. + */ + float erry = y2 * deno - nomy; + /* + DPCT1013:203: The rounding mode could not be specified and the generated + code may have different accuracy than the original code. Verify the + correctness. SYCL math built-in function rounding mode is aligned with + OpenCL C 1.2 standard. + */ + float err2 = errx * errx + erry * erry; + /* + DPCT1013:204: The rounding mode could not be specified and the generated + code may have different accuracy than the original code. Verify the + correctness. SYCL math built-in function rounding mode is aligned with + OpenCL C 1.2 standard. + */ + if (err2 < thresh2 * deno * deno) + cnt++; + } + int kty = TESTHOMO_TESTS * ty; + cnts[kty + tx] = cnt; + item_ct1.barrier(sycl::access::fence_space::local_space); + int len = TESTHOMO_TESTS / 2; + while (len > 0) + { + if (tx < len) + cnts[kty + tx] += cnts[kty + tx + len]; + len /= 2; + /* + DPCT1118:81: SYCL group functions and algorithms must be encountered in + converged control flow. You may need to adjust the code. + */ + item_ct1.barrier(sycl::access::fence_space::local_space); + } + if (tx < TESTHOMO_LOOPS && ty == 0) + d_counts[idx] = cnts[TESTHOMO_TESTS * tx]; + item_ct1.barrier(sycl::access::fence_space::local_space); +} + +//================= Host matching functions =====================// + +double FindHomography(SiftData &data, float *homography, int *numMatches, float &matchTime, int numLoops, float minScore, float maxAmbiguity, float thresh) +{ + *numMatches = 0; + homography[0] = homography[4] = homography[8] = 1.0f; + homography[1] = homography[2] = homography[3] = 0.0f; + homography[5] = homography[6] = homography[7] = 0.0f; + if (data.d_data == NULL) + return 0.0f; + SiftPoint *d_sift = data.d_data; + numLoops = iDivUp(numLoops, 16) * 16; + int numPts = data.numPts; + if (numPts < 8) + return 0.0f; + int numPtsUp = iDivUp(numPts, 16) * 16; + float *d_coord, *d_homo; + int *d_randPts, *h_randPts; + int randSize = 4 * sizeof(int) * numLoops; + int szFl = sizeof(float); + int szPt = sizeof(SiftPoint); +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR( + d_coord = (float *)sycl::malloc_device(4 * sizeof(float) * numPtsUp, + dpct::get_in_order_queue()))); + safeCall(DPCT_CHECK_ERROR(d_randPts = (int *)sycl::malloc_device( + randSize, dpct::get_in_order_queue()))); + safeCall(DPCT_CHECK_ERROR( + d_homo = (float *)sycl::malloc_device(8 * sizeof(float) * numLoops, + dpct::get_in_order_queue()))); + +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + h_randPts = (int *)malloc(randSize); + float *h_scores = (float *)malloc(sizeof(float) * numPtsUp); + float *h_ambiguities = (float *)malloc(sizeof(float) * numPtsUp); + + // temp variables are for host memory allocation, device data is transferred to temp + float *temp1 = (float *)malloc(szPt * numPtsUp); + float *temp2 = (float *)malloc(szPt * numPtsUp); + +#ifdef DEVICE_TIMER + auto start_memcpy_1 = std::chrono::steady_clock::now(); +#endif + + safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue() + .memcpy(temp1, &d_sift[0].score, szPt * numPts) + .wait())); + safeCall( + DPCT_CHECK_ERROR(dpct::get_in_order_queue() + .memcpy(temp2, &d_sift[0].ambiguity, szPt * numPts) + .wait())); + +#ifdef DEVICE_TIMER + auto stop_memcpy_1 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_1 - start_memcpy_1).count(); +#endif + + char *src_score = (char *)temp1; + char *src_ambiguity = (char *)temp2; + char *dst_score = (char *)h_scores; + char *dst_ambiguity = (char *)h_ambiguities; + + for (int i = 0; i < numPts; ++i) + { + memcpy(dst_score, src_score, szFl); + memcpy(dst_ambiguity, src_ambiguity, szFl); + src_score += szPt; + src_ambiguity += szPt; + dst_score += szFl; + dst_ambiguity += szFl; + } + + int *validPts = (int *)malloc(sizeof(int) * numPts); + int numValid = 0; + for (int i = 0; i < numPts; i++) + { + if (h_scores[i] > minScore && h_ambiguities[i] < maxAmbiguity) + validPts[numValid++] = i; + } + free(h_scores); + free(h_ambiguities); + if (numValid >= 8) + { + std::random_device rd; + uint32_t seed = rd(); + std::mt19937 rnd(seed); // mersenne_twister_engine + std::uniform_int_distribution dis(0, UINT32_MAX); + for (int i = 0; i < numLoops; i++) + { + int p1 = dis(rnd) % numValid; + int p2 = dis(rnd) % numValid; + int p3 = dis(rnd) % numValid; + int p4 = dis(rnd) % numValid; + while (p2 == p1) + p2 = dis(rnd) % numValid; + while (p3 == p1 || p3 == p2) + p3 = dis(rnd) % numValid; + while (p4 == p1 || p4 == p2 || p4 == p3) + p4 = dis(rnd) % numValid; + h_randPts[i + 0 * numLoops] = validPts[p1]; + h_randPts[i + 1 * numLoops] = validPts[p2]; + h_randPts[i + 2 * numLoops] = validPts[p3]; + h_randPts[i + 3 * numLoops] = validPts[p4]; + } + + float *temp3, *temp4, *temp5, *temp6; +#ifdef DEVICE_TIMER + auto start_malloc_2 = std::chrono::steady_clock::now(); +#endif + safeCall( + DPCT_CHECK_ERROR(temp3 = (float *)sycl::malloc_device( + szPt * numPtsUp, dpct::get_in_order_queue()))); + safeCall( + DPCT_CHECK_ERROR(temp4 = (float *)sycl::malloc_device( + szPt * numPtsUp, dpct::get_in_order_queue()))); + safeCall( + DPCT_CHECK_ERROR(temp5 = (float *)sycl::malloc_device( + szPt * numPtsUp, dpct::get_in_order_queue()))); + safeCall( + DPCT_CHECK_ERROR(temp6 = (float *)sycl::malloc_device( + szPt * numPtsUp, dpct::get_in_order_queue()))); +#ifdef DEVICE_TIMER + auto stop_malloc_2 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_malloc_2 - start_malloc_2).count(); +#endif +#ifdef DEVICE_TIMER + auto start_memcpy_2 = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue() + .memcpy(d_randPts, h_randPts, randSize) + .wait())); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); + safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy( + temp3, &d_sift[0].xpos, szPt * numPts))); + safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy( + temp4, &d_sift[0].ypos, szPt * numPts))); + safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy( + temp5, &d_sift[0].match_xpos, szPt * numPts))); + safeCall(DPCT_CHECK_ERROR(dpct::get_in_order_queue().memcpy( + temp6, &d_sift[0].match_ypos, szPt * numPts))); + + // kernel calto transfer memory from device to device + dpct::get_in_order_queue().submit([&](sycl::handler &cgh) { + float *d_coord_numPtsUp_ct1 = &d_coord[0 * numPtsUp]; + + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + memcopyKernel(temp3, d_coord_numPtsUp_ct1, szPt, szFl, numPts, + szFl); + }); + }); + /* + DPCT1010:205: SYCL uses exceptions to report errors and does not use the + error codes. The call was replaced with 0. You need to rewrite this code. + */ + safeCall(0); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); + dpct::get_in_order_queue().submit([&](sycl::handler &cgh) { + float *d_coord_numPtsUp_ct1 = &d_coord[1 * numPtsUp]; + + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + memcopyKernel(temp4, d_coord_numPtsUp_ct1, szPt, szFl, numPts, + szFl); + }); + }); + /* + DPCT1010:206: SYCL uses exceptions to report errors and does not use the + error codes. The call was replaced with 0. You need to rewrite this code. + */ + safeCall(0); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); + dpct::get_in_order_queue().submit([&](sycl::handler &cgh) { + float *d_coord_numPtsUp_ct1 = &d_coord[2 * numPtsUp]; + + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + memcopyKernel(temp5, d_coord_numPtsUp_ct1, szPt, szFl, numPts, + szFl); + }); + }); + /* + DPCT1010:207: SYCL uses exceptions to report errors and does not use the + error codes. The call was replaced with 0. You need to rewrite this code. + */ + safeCall(0); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); + dpct::get_in_order_queue().submit([&](sycl::handler &cgh) { + float *d_coord_numPtsUp_ct1 = &d_coord[3 * numPtsUp]; + + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + memcopyKernel(temp6, d_coord_numPtsUp_ct1, szPt, szFl, numPts, + szFl); + }); + }); + /* + DPCT1010:208: SYCL uses exceptions to report errors and does not use the + error codes. The call was replaced with 0. You need to rewrite this code. + */ + safeCall(0); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); + +#ifdef DEVICE_TIMER + auto stop_memcpy_2 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_2 - start_memcpy_2).count(); +#endif +#ifdef DEVICE_TIMER + auto start_kernel_1 = std::chrono::steady_clock::now(); +#endif + dpct::get_in_order_queue().parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, numLoops / 16) * + sycl::range<3>(1, 1, 16), + sycl::range<3>(1, 1, 16)), + [=](sycl::nd_item<3> item_ct1) { + ComputeHomographies(d_coord, d_randPts, d_homo, numPtsUp, item_ct1); + }); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_kernel_1 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_kernel_1 - start_kernel_1).count(); + // printf("ComputeHomographies time = %.2f us\n", std::chrono::duration(stop_kernel_1 - start_kernel_1).count()); +#endif + checkMsg("ComputeHomographies() execution failed\n"); + + sycl::range<3> blocks(1, numLoops / TESTHOMO_LOOPS, 1); + sycl::range<3> threads(1, TESTHOMO_LOOPS, TESTHOMO_TESTS); +#ifdef DEVICE_TIMER + auto start_kernel_2 = std::chrono::steady_clock::now(); +#endif + dpct::get_in_order_queue().submit([&](sycl::handler &cgh) { + /* + DPCT1101:210: '8 * TESTHOMO_LOOPS' expression was replaced with a value. + Modify the code to use the original expression, provided in comments, if + it is correct. + */ + sycl::local_accessor homo_acc_ct1( + sycl::range<1>(128 /*8 * TESTHOMO_LOOPS*/), cgh); + /* + DPCT1101:211: 'TESTHOMO_TESTS * TESTHOMO_LOOPS' expression was replaced + with a value. Modify the code to use the original expression, provided + in comments, if it is correct. + */ + sycl::local_accessor cnts_acc_ct1( + sycl::range<1>(256 /*TESTHOMO_TESTS * TESTHOMO_LOOPS*/), cgh); + + cgh.parallel_for(sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) { + TestHomographies(d_coord, d_homo, d_randPts, numPtsUp, + thresh * thresh, item_ct1, + homo_acc_ct1.get_pointer(), + cnts_acc_ct1.get_pointer()); + }); + }); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_kernel_2 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_kernel_2 - start_kernel_2).count(); + // printf("TestHomographies time = %.2f us\n", std::chrono::duration(stop_kernel_2 - start_kernel_2).count()); +#endif + checkMsg("TestHomographies() execution failed\n"); +#ifdef DEVICE_TIMER + auto start_memcpy_3 = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR( + dpct::get_in_order_queue() + .memcpy(h_randPts, d_randPts, sizeof(int) * numLoops) + .wait())); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_memcpy_3 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_3 - start_memcpy_3).count(); +#endif + int maxIndex = -1, maxCount = -1; + for (int i = 0; i < numLoops; i++) + if (h_randPts[i] > maxCount) + { + maxCount = h_randPts[i]; + maxIndex = i; + } + + *numMatches = maxCount; +#ifdef DEVICE_TIMER + auto start_memcpy_4 = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR(dpct::dpct_memcpy( + homography, szFl, &d_homo[maxIndex], sizeof(float) * numLoops, szFl, 8, + dpct::device_to_host))); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_memcpy_4 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_4 - start_memcpy_4).count(); +#endif + + safeCall(DPCT_CHECK_ERROR(sycl::free(temp3, dpct::get_in_order_queue()))); + safeCall(DPCT_CHECK_ERROR(sycl::free(temp4, dpct::get_in_order_queue()))); + safeCall(DPCT_CHECK_ERROR(sycl::free(temp5, dpct::get_in_order_queue()))); + safeCall(DPCT_CHECK_ERROR(sycl::free(temp6, dpct::get_in_order_queue()))); + } + + free(validPts); + free(h_randPts); + free(temp1); + free(temp2); + + safeCall(DPCT_CHECK_ERROR(sycl::free(d_homo, dpct::get_in_order_queue()))); + safeCall(DPCT_CHECK_ERROR(sycl::free(d_randPts, dpct::get_in_order_queue()))); + safeCall(DPCT_CHECK_ERROR(sycl::free(d_coord, dpct::get_in_order_queue()))); + return matchTime; +} + +double MatchSiftData(SiftData &data1, SiftData &data2, float &matchTime) +{ + int numPts1 = data1.numPts; + int numPts2 = data2.numPts; + if (!numPts1 || !numPts2) + return 0.0; +#ifdef MANAGEDMEM + SiftPoint *sift1 = data1.m_data; + SiftPoint *sift2 = data2.m_data; +#else + if (data1.d_data == NULL || data2.d_data == NULL) + return 0.0f; + SiftPoint *sift1 = data1.d_data; + SiftPoint *sift2 = data2.d_data; +#endif + +// Original version with correlation and maximization in two different kernels +// Global memory reguirement: O(N^2) +#if 0 + float *d_corrData; + int corrWidth = iDivUp(numPts2, 16)*16; + int corrSize = sizeof(float)*numPts1*corrWidth; + safeCall(cudaMalloc((void **)&d_corrData, corrSize)); +#if 0 + dim3 blocks1(numPts1, iDivUp(numPts2, 16)); + dim3 threads1(16, 16); // each block: 1 points x 16 points + MatchSiftPoints<<>>(sift1, sift2, d_corrData, numPts1, numPts2); +#else + dim3 blocks(iDivUp(numPts1,16), iDivUp(numPts2, 16)); + dim3 threads(16, 16); // each block: 16 points x 16 points + MatchSiftPoints2<<>>(sift1, sift2, d_corrData, numPts1, numPts2); +#endif + safeCall(cudaDeviceSynchronize()); + dim3 blocksMax(iDivUp(numPts1, 16)); + dim3 threadsMax(16, 16); + FindMaxCorr<<>>(d_corrData, sift1, sift2, numPts1, corrWidth, sizeof(SiftPoint)); + safeCall(cudaDeviceSynchronize()); + checkMsg("FindMaxCorr() execution failed\n"); + safeCall(cudaFree(d_corrData)); +#endif + +// Version suggested by Nicholas Lin with combined correlation and maximization +// Global memory reguirement: O(N) +#if 0 + int block_dim = 16; + float *d_corrData; + int corrSize = numPts1 * block_dim * 2; + safeCall(cudaMalloc((void **)&d_corrData, sizeof(float) * corrSize)); + dim3 blocks(iDivUp(numPts1, block_dim)); + dim3 threads(block_dim, block_dim); + FindMaxCorr3<<>>(d_corrData, sift1, sift2, numPts1, numPts2); + safeCall(cudaDeviceSynchronize()); + checkMsg("FindMaxCorr3() execution failed\n"); + safeCall(cudaFree(d_corrData)); +#endif + +#if 0 + dim3 blocksMax(numPts1); + dim3 threadsMax(FMC2W, FMC2H); + FindMaxCorr2<<>>(sift1, sift2, numPts1, numPts2); + safeCall(cudaDeviceSynchronize()); + checkMsg("FindMaxCorr2() execution failed\n"); +#endif + +// Combined version with no global memory requirement using one FMC2H points per block +#if 0 + dim3 blocksMax2(iDivUp(numPts1, FMC2H)); + dim3 threadsMax2(FMC2W, FMC2H); + FindMaxCorr4<<>>(sift1, sift2, numPts1, numPts2); + safeCall(cudaDeviceSynchronize()); + checkMsg("FindMaxCorr4() execution failed\n"); +#endif + +// Combined version with no global memory requirement using global locks +#if 1 + sycl::range<3> blocksMax3(1, iDivUp(numPts2, 512), iDivUp(numPts1, 16)); + sycl::range<3> threadsMax3(1, 16, 16); +#ifdef DEVICE_TIMER + auto start_kernel1 = std::chrono::steady_clock::now(); +#endif + dpct::get_in_order_queue().parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, iDivUp(numPts1, 64)) * + sycl::range<3>(1, 1, 64), + sycl::range<3>(1, 1, 64)), + [=](sycl::nd_item<3> item_ct1) { + CleanMatches(sift1, numPts1, item_ct1); + }); + /* + DPCT1010:209: SYCL uses exceptions to report errors and does not use the error + codes. The call was replaced with 0. You need to rewrite this code. + */ + safeCall(0); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_kernel1 = std::chrono::steady_clock::now(); + // printf("CleanMatches time = %.2f us\n", std::chrono::duration(stop_kernel1 - start_kernel1).count()); + matchTime += std::chrono::duration(stop_kernel1 - start_kernel1).count(); + auto matchSiftDataTime += std::chrono::duration(stop_kernel1 - start_kernel1).count(); +#endif + int mode = 10; + // if (mode == 5) // K40c 5.0ms, 1080 Ti 1.2ms, 2080 Ti 0.83ms + // FindMaxCorr5<<>>(sift1, sift2, numPts1, numPts2); + // else if (mode == 6) + // { // 2080 Ti 0.89ms + // threadsMax3 = dim3(32, 16); + // FindMaxCorr6<<>>(sift1, sift2, numPts1, numPts2); + // } + // else if (mode == 7) // 2080 Ti 0.50ms + // FindMaxCorr7<<>>(sift1, sift2, numPts1, numPts2); + // else if (mode == 8) + // { // 2080 Ti 0.45ms + // blocksMax3 = dim3(iDivUp(numPts1, FMC_BW), iDivUp(numPts2, FMC_GH)); + // threadsMax3 = dim3(FMC_NW, FMC_NH); + // FindMaxCorr8<<>>(sift1, sift2, numPts1, numPts2); + // } + // else if (mode == 9) + // { // 2080 Ti 0.46ms + // blocksMax3 = dim3(iDivUp(numPts1, FMC_BW), iDivUp(numPts2, FMC_GH)); + // threadsMax3 = dim3(FMC_NW, FMC_NH); + // FindMaxCorr9<<>>(sift1, sift2, numPts1, numPts2); + // } + // else + if (mode == 10) + { + blocksMax3 = sycl::range<3>(1, 1, iDivUp(numPts1, M7W)); + threadsMax3 = sycl::range<3>(1, M7H / M7R, M7W); +#ifdef DEVICE_TIMER + auto start_kernel2 = std::chrono::steady_clock::now(); +#endif + /* + DPCT1049:82: The work-group size passed to the SYCL kernel may exceed the + limit. To get the device limit, query info::device::max_work_group_size. + Adjust the work-group size if needed. + */ + dpct::get_in_order_queue().submit([&](sycl::handler &cgh) { + /* + DPCT1101:212: 'M7W * NDIM / 4' expression was replaced with a value. + Modify the code to use the original expression, provided in comments, if + it is correct. + */ + sycl::local_accessor buffer1_acc_ct1( + sycl::range<1>(1024 /*M7W * NDIM / 4*/), cgh); + /* + DPCT1101:213: 'M7H * NDIM / 4' expression was replaced with a value. + Modify the code to use the original expression, provided in comments, if + it is correct. + */ + sycl::local_accessor buffer2_acc_ct1( + sycl::range<1>(1024 /*M7H * NDIM / 4*/), cgh); + + cgh.parallel_for(sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3), + [=](sycl::nd_item<3> item_ct1) { + FindMaxCorr10(sift1, sift2, numPts1, numPts2, item_ct1, + buffer1_acc_ct1.get_pointer(), + buffer2_acc_ct1.get_pointer()); + }); + }); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_kernel2 = std::chrono::steady_clock::now(); + // printf("FindMaxCorr10 time = %.2f us\n", std::chrono::duration(stop_kernel2 - start_kernel2).count()); + matchTime += std::chrono::duration(stop_kernel2 - start_kernel2).count(); + matchSiftDataTime += std::chrono::duration(stop_kernel2 - start_kernel2).count(); +#endif + } + checkMsg("FindMaxCorr10() execution failed\n"); +#endif + + if (data1.h_data != NULL) + { + float *h_ptr = &data1.h_data[0].score; + float *d_ptr = &data1.d_data[0].score; +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + safeCall(DPCT_CHECK_ERROR( + dpct::get_in_order_queue() + .memcpy(h_ptr, d_ptr, sizeof(SiftPoint) * data1.numPts) + .wait())); + safeCall( + DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())); +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); + matchSiftDataTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + } + return matchTime; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/matching.dp.o b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/matching.dp.o new file mode 100644 index 000000000..cd453ccc0 Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/CUDA/out/matching.dp.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/CMakeLists.txt b/third-party-programs/Velocity-Bench/cudaSift/HIP/CMakeLists.txt new file mode 100644 index 000000000..378b04c35 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/CMakeLists.txt @@ -0,0 +1,108 @@ +# Modifications Copyright (C) 2023 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +# OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE. + +# SPDX-License-Identifier: MIT + +cmake_minimum_required(VERSION 3.10) +project(cudasift LANGUAGES CXX) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +set(DEF_WL_CXX_FLAGS " -D__HIP_PLATFORM_AMD__ ") +set(DEF_GENERAL_CXX_FLAGS " -Wall -O3 -Wextra ") +set(DEF_COMBINED_CXX_FLAGS "${DEF_GENERAL_CXX_FLAGS} ${DEF_WL_CXX_FLAGS}") + +if(NOT DEFINED ROCM_PATH) + if(NOT DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which HIP has been installed") + else() + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which HIP has been installed") + endif() +endif() + +set(CMAKE_MODULE_PATH "${ROCM_PATH}/hip/cmake" ${CMAKE_MODULE_PATH}) +set(HIP_INCLUDE_DIRS "${ROCM_PATH}/include" ${HIP_INCLUDE_DIRS}) +set(HIP_LIBRARIES "${ROCM_PATH}/lib" ${HIP_LIBRARIES}) + +option(DEVICE_TIMER "Build using Device Timer" OFF) + +find_package(HIP REQUIRED) + +if(HIP_FOUND) + message(STATUS "Found HIP: " ${HIP_VERSION}) +else() + message(FATAL_ERROR "Could not find HIP!") +endif() + +find_package(OpenCV REQUIRED) +include_directories(${OpenCV_INCLUDE_DIRS}) + +set(SOURCES + ${CMAKE_SOURCE_DIR}/../common/Utility.cpp + cudaImage.cpp + cudaImage.h + cudaSiftH.cpp + cudaSiftH.h + matching.cpp + cudaSiftD.h + cudaSift.h + cudautils.h + geomFuncs.cpp + mainSift.cpp +) + +include_directories( + ${CMAKE_SOURCE_DIR}/../common/ + ${CMAKE_CURRENT_SOURCE_DIR} +) + +# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific) +# -DOVERRIDE_GENERAL_CXX_FLAGS=" -blah -blah " overrides the general flags only (and not the workload specific flags) +# passing in both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS is not allowed, in order to prevent ambiguity +if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "") + message(FATAL_ERROR "Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together") +elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "") + message(STATUS "Using DEFAULT compilation flags") + set(CMAKE_CXX_FLAGS "${DEF_COMBINED_CXX_FLAGS}") +elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "") + message(STATUS "OVERRIDING GENERAL compilation flags") + set(CMAKE_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS}") + string(APPEND CMAKE_CXX_FLAGS ${DEF_WL_CXX_FLAGS}) +elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "") + message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags") +endif() + +message(STATUS "CXX Compilation flags to: ${CMAKE_CXX_FLAGS}") + +if(DEVICE_TIMER) + message(STATUS "Enabling Device Timer") + add_compile_options(-DDEVICE_TIMER) +endif() + +set(HIP_SEPARABLE_COMPILATION ON) +set(MY_TARGET_NAME ${PROJECT_NAME}) +set(MY_HIPCC_OPTIONS) +set(MY_NVCC_OPTIONS) +set(CMAKE_HIP_ARCHITECTURES OFF) + +set_source_files_properties(${cuda_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) +hip_add_executable(${MY_TARGET_NAME} ${SOURCES} ${MY_HIPCC_OPTIONS} NVCC_OPTIONS ${MY_NVCC_OPTIONS}) +target_link_libraries(cudasift stdc++ stdc++fs ${OpenCV_LIBS}) diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaImage.cpp b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaImage.cpp new file mode 100644 index 000000000..5c4cb0ccf --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaImage.cpp @@ -0,0 +1,111 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include +#include + +#include "cudautils.h" +#include "cudaImage.h" + +int iDivUp(int a, int b) { return (a % b != 0) ? (a / b + 1) : (a / b); } +int iDivDown(int a, int b) { return a / b; } +int iAlignUp(int a, int b) { return (a % b != 0) ? (a - a % b + b) : a; } +int iAlignDown(int a, int b) { return a - a % b; } + +void CudaImage::Allocate(int w, int h, int p, bool host, float &time, float *devmem, float *hostmem) +{ + width = w; + height = h; + pitch = p; + d_data = devmem; + h_data = hostmem; + t_data = NULL; + if (devmem == NULL) + { + +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + safeCall(hipMallocPitch((void **)&d_data, (size_t *)&pitch, (size_t)(sizeof(float) * width), (size_t)height)); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + std::cout << "Allocate time is " << std::chrono::duration(stop_malloc - start_malloc).count() << " us" << std::endl; + time += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + pitch /= sizeof(float); + if (d_data == NULL) + printf("Failed to allocate device data\n"); + d_internalAlloc = true; + } + if (host && hostmem == NULL) + { + h_data = (float *)malloc(sizeof(float) * pitch * height); + h_internalAlloc = true; + } +} + +CudaImage::CudaImage() : width(0), height(0), pitch(0), d_data(NULL), h_data(NULL), t_data(NULL), d_internalAlloc(false), h_internalAlloc(false) +{ +} + +CudaImage::~CudaImage() +{ + if (d_internalAlloc && d_data != NULL) + safeCall(hipFree(d_data)); + d_data = NULL; + if (h_internalAlloc && h_data != NULL) + free(h_data); + h_data = NULL; + if (t_data != NULL) + safeCall(hipFreeArray((hipArray *)t_data)); + t_data = NULL; +} + +double CudaImage::Download(float &time) +{ + double downloadTime = 0.0; + int p = sizeof(float) * pitch; + if (d_data != NULL && h_data != NULL) + { + +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + safeCall(hipMemcpy(d_data, h_data, sizeof(float) * width * height, hipMemcpyHostToDevice)); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + downloadTime = std::chrono::duration(stop_memcpy - start_memcpy).count(); + time += downloadTime; + std::cout << "Download Time is " << downloadTime << " us" << std::endl; +#endif + } + return downloadTime; +} \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaImage.h b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaImage.h new file mode 100644 index 000000000..8f15f8098 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaImage.h @@ -0,0 +1,63 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#ifndef CUDAIMAGE_H +#define CUDAIMAGE_H + +#include +#define OCML_BASIC_ROUNDED_OPERATIONS + +class CudaImage +{ +public: + int width, height; + int pitch; + float *h_data; + float *d_data; + float *t_data; + bool d_internalAlloc; + bool h_internalAlloc; + +public: + CudaImage(); + CudaImage(const CudaImage&) = delete; + CudaImage& operator=(const CudaImage&) = delete; + ~CudaImage(); + void Allocate(int width, int height, int pitch, bool withHost, float &time, float *devMem = NULL, float *hostMem = NULL); + double Download(float &time); + double Readback(); + double InitTexture(); + double CopyToTexture(CudaImage &dst, bool host); +}; + +int iDivUp(int a, int b); +int iDivDown(int a, int b); +int iAlignUp(int a, int b); +int iAlignDown(int a, int b); +void StartTimer(unsigned int *hTimer); +double StopTimer(unsigned int hTimer); + +#endif // CUDAIMAGE_H diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSift.h b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSift.h new file mode 100644 index 000000000..b49f6c503 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSift.h @@ -0,0 +1,48 @@ +#ifndef CUDASIFT_H +#define CUDASIFT_H + +#include "cudaImage.h" + +typedef struct +{ + float xpos; + float ypos; + float scale; + float sharpness; + float edgeness; + float orientation; + float score; + float ambiguity; + int match; + float match_xpos; + float match_ypos; + float match_error; + float subsampling; + float empty[3]; + float data[128]; +} SiftPoint; + +typedef struct +{ + int numPts; // Number of available Sift points + int maxPts; // Number of allocated Sift points +#ifdef MANAGEDMEM + SiftPoint *m_data; // Managed data +#else + SiftPoint *h_data; // Host (CPU) data + SiftPoint *d_data; // Device (GPU) data +#endif +} SiftData; + +void InitCuda(int devNum = 0); +float *AllocSiftTempMemory(int width, int height, int numOctaves, float &totTime, bool scaleUp = false); +void FreeSiftTempMemory(float *memoryTmp); +void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, + float &totTime, float lowestScale = 0.0f, bool scaleUp = false, float *tempMemory = 0); +void InitSiftData(SiftData &data, float &totTime, int num = 1024, bool host = false, bool dev = true); +void FreeSiftData(SiftData &data); +void PrintSiftData(SiftData &data); +double MatchSiftData(SiftData &data1, SiftData &data2, float &matchTime); +double FindHomography(SiftData &data, float *homography, int *numMatches, float &matchTime, int numLoops = 1000, float minScore = 0.85f, float maxAmbiguity = 0.95f, float thresh = 5.0f); + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftD.cpp b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftD.cpp new file mode 100644 index 000000000..33dd561b8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftD.cpp @@ -0,0 +1,2252 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include "hip/hip_runtime.h" +#include "cudautils.h" +#include "cudaSiftD.h" +#include "cudaSift.h" + +/////////////////////////////////////////////////////////////////////////////// +// Kernel configuration +/////////////////////////////////////////////////////////////////////////////// + +__constant__ int d_MaxNumPoints; +__device__ unsigned int d_PointCounter[8 * 2 + 1]; +__constant__ float d_ScaleDownKernel[5]; +__constant__ float d_LowPassKernel[2 * LOWPASS_R + 1]; +__constant__ float d_LaplaceKernel[8 * 12 * 16]; + +/////////////////////////////////////////////////////////////////////////////// +// Lowpass filter and subsample image +/////////////////////////////////////////////////////////////////////////////// +__global__ void ScaleDownDenseShift(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch) +{ +#define BW (SCALEDOWN_W + 4) +#define BH (SCALEDOWN_H + 4) +#define W2 (SCALEDOWN_W / 2) +#define H2 (SCALEDOWN_H / 2) + __shared__ float brows[BH * BW]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int xp = blockIdx.x * SCALEDOWN_W + tx; + const int yp = blockIdx.y * SCALEDOWN_H + ty; + const float k0 = d_ScaleDownKernel[0]; + const float k1 = d_ScaleDownKernel[1]; + const float k2 = d_ScaleDownKernel[2]; + const int xl = min(width - 1, max(0, xp - 2)); + const int yl = min(height - 1, max(0, yp - 2)); + if (xp < (width + 4) && yp < (height + 4)) + { + float v = d_Data[yl * pitch + xl]; + brows[BW * ty + tx] = k0 * (v + ShiftDown(v, 4)) + k1 * (ShiftDown(v, 1) + ShiftDown(v, 3)) + k2 * ShiftDown(v, 2); + } + __syncthreads(); + const int xs = blockIdx.x * W2 + tx; + const int ys = blockIdx.y * H2 + ty; + if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2)) + { + float *ptr = &brows[BW * (ty * 2) + (tx * 2)]; + d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * BW]) + k1 * (ptr[1 * BW] + ptr[3 * BW]) + k2 * ptr[2 * BW]; + } +} + +__global__ void ScaleDownDense(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch) +{ +#define BW (SCALEDOWN_W + 4) +#define BH (SCALEDOWN_H + 4) +#define W2 (SCALEDOWN_W / 2) +#define H2 (SCALEDOWN_H / 2) + __shared__ float irows[BH * BW]; + __shared__ float brows[BH * W2]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int xp = blockIdx.x * SCALEDOWN_W + tx; + const int yp = blockIdx.y * SCALEDOWN_H + ty; + const int xl = min(width - 1, max(0, xp - 2)); + const int yl = min(height - 1, max(0, yp - 2)); + const float k0 = d_ScaleDownKernel[0]; + const float k1 = d_ScaleDownKernel[1]; + const float k2 = d_ScaleDownKernel[2]; + if (xp < (width + 4) && yp < (height + 4)) + irows[BW * ty + tx] = d_Data[yl * pitch + xl]; + __syncthreads(); + if (yp < (height + 4) && tx < W2) + { + float *ptr = &irows[BW * ty + 2 * tx]; + brows[W2 * ty + tx] = k0 * (ptr[0] + ptr[4]) + k1 * (ptr[1] + ptr[3]) + k2 * ptr[2]; + } + __syncthreads(); + const int xs = blockIdx.x * W2 + tx; + const int ys = blockIdx.y * H2 + ty; + if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2)) + { + float *ptr = &brows[W2 * (ty * 2) + tx]; + d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * W2]) + k1 * (ptr[1 * W2] + ptr[3 * W2]) + k2 * ptr[2 * W2]; + } +} + +__global__ void ScaleDown(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch) +{ + __shared__ float inrow[SCALEDOWN_W + 4]; + __shared__ float brow[5 * (SCALEDOWN_W / 2)]; + __shared__ int yRead[SCALEDOWN_H + 4]; + __shared__ int yWrite[SCALEDOWN_H + 4]; +#define dx2 (SCALEDOWN_W / 2) + const int tx = threadIdx.x; + const int tx0 = tx + 0 * dx2; + const int tx1 = tx + 1 * dx2; + const int tx2 = tx + 2 * dx2; + const int tx3 = tx + 3 * dx2; + const int tx4 = tx + 4 * dx2; + const int xStart = blockIdx.x * SCALEDOWN_W; + const int yStart = blockIdx.y * SCALEDOWN_H; + const int xWrite = xStart / 2 + tx; + float k0 = d_ScaleDownKernel[0]; + float k1 = d_ScaleDownKernel[1]; + float k2 = d_ScaleDownKernel[2]; + if (tx < SCALEDOWN_H + 4) + { + int y = yStart + tx - 2; + y = (y < 0 ? 0 : y); + y = (y >= height ? height - 1 : y); + yRead[tx] = y * pitch; + yWrite[tx] = (yStart + tx - 4) / 2 * newpitch; + } + __syncthreads(); + int xRead = xStart + tx - 2; + xRead = (xRead < 0 ? 0 : xRead); + xRead = (xRead >= width ? width - 1 : xRead); + + int maxtx = min(dx2, width / 2 - xStart / 2); + for (int dy = 0; dy < SCALEDOWN_H + 4; dy += 5) + { + { + inrow[tx] = d_Data[yRead[dy + 0] + xRead]; + __syncthreads(); + if (tx < maxtx) + { + brow[tx4] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 4 && !(dy & 1)) + d_Result[yWrite[dy + 0] + xWrite] = k2 * brow[tx2] + k0 * (brow[tx0] + brow[tx4]) + k1 * (brow[tx1] + brow[tx3]); + } + __syncthreads(); + } + if (dy < (SCALEDOWN_H + 3)) + { + inrow[tx] = d_Data[yRead[dy + 1] + xRead]; + __syncthreads(); + if (tx < maxtx) + { + brow[tx0] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 3 && (dy & 1)) + d_Result[yWrite[dy + 1] + xWrite] = k2 * brow[tx3] + k0 * (brow[tx1] + brow[tx0]) + k1 * (brow[tx2] + brow[tx4]); + } + __syncthreads(); + } + if (dy < (SCALEDOWN_H + 2)) + { + inrow[tx] = d_Data[yRead[dy + 2] + xRead]; + __syncthreads(); + if (tx < maxtx) + { + brow[tx1] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 2 && !(dy & 1)) + d_Result[yWrite[dy + 2] + xWrite] = k2 * brow[tx4] + k0 * (brow[tx2] + brow[tx1]) + k1 * (brow[tx3] + brow[tx0]); + } + __syncthreads(); + } + if (dy < (SCALEDOWN_H + 1)) + { + inrow[tx] = d_Data[yRead[dy + 3] + xRead]; + __syncthreads(); + if (tx < maxtx) + { + brow[tx2] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 1 && (dy & 1)) + d_Result[yWrite[dy + 3] + xWrite] = k2 * brow[tx0] + k0 * (brow[tx3] + brow[tx2]) + k1 * (brow[tx4] + brow[tx1]); + } + __syncthreads(); + } + if (dy < SCALEDOWN_H) + { + inrow[tx] = d_Data[yRead[dy + 4] + xRead]; + __syncthreads(); + if (tx < dx2 && xWrite < width / 2) + { + brow[tx3] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (!(dy & 1)) + d_Result[yWrite[dy + 4] + xWrite] = k2 * brow[tx1] + k0 * (brow[tx4] + brow[tx3]) + k1 * (brow[tx0] + brow[tx2]); + } + __syncthreads(); + } + } +} + +__global__ void ScaleUp(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch) +{ + const int tx = threadIdx.x; + const int ty = threadIdx.y; + int x = blockIdx.x * SCALEUP_W + 2 * tx; + int y = blockIdx.y * SCALEUP_H + 2 * ty; + if (x < 2 * width && y < 2 * height) + { + int xl = blockIdx.x * (SCALEUP_W / 2) + tx; + int yu = blockIdx.y * (SCALEUP_H / 2) + ty; + int xr = min(xl + 1, width - 1); + int yd = min(yu + 1, height - 1); + float vul = d_Data[yu * pitch + xl]; + float vur = d_Data[yu * pitch + xr]; + float vdl = d_Data[yd * pitch + xl]; + float vdr = d_Data[yd * pitch + xr]; + d_Result[(y + 0) * newpitch + x + 0] = vul; + d_Result[(y + 0) * newpitch + x + 1] = 0.50f * (vul + vur); + d_Result[(y + 1) * newpitch + x + 0] = 0.50f * (vul + vdl); + d_Result[(y + 1) * newpitch + x + 1] = 0.25f * (vul + vur + vdl + vdr); + } +} + +__global__ void ExtractSiftDescriptors(hipTextureObject_t texObj, SiftPoint *d_sift, int fstPts, float subsampling) +{ + __shared__ float gauss[16]; + __shared__ float buffer[128]; + __shared__ float sums[4]; + + const int tx = threadIdx.x; // 0 -> 16 + const int ty = threadIdx.y; // 0 -> 8 + const int idx = ty * 16 + tx; + const int bx = blockIdx.x + fstPts; // 0 -> numPts + if (ty == 0) + gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + buffer[idx] = 0.0f; + __syncthreads(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sinf(theta); // cosa -sina + float cosa = cosf(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + float dx = tex2D(texObj, xpos + cosa, ypos + sina) - + tex2D(texObj, xpos - cosa, ypos - sina); + float dy = tex2D(texObj, xpos - sina, ypos + cosa) - + tex2D(texObj, xpos + sina, ypos - cosa); + float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + atomicAdd(buffer + p1, iangf * grad2); + atomicAdd(buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 32, iangf * grad2); + atomicAdd(buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + atomicAdd(buffer + p1 + 8, iangf * grad2); + atomicAdd(buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 40, iangf * grad2); + atomicAdd(buffer + p2 + 40, angf * grad2); + } + } + } + __syncthreads(); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f); + + sum = tsum1 * tsum1; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * rsqrtf(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } +} + +__device__ float FastAtan2(float y, float x) +{ + float absx = abs(x); + float absy = abs(y); + float a = __fdiv_rn(min(absx, absy), max(absx, absy)); + float s = a * a; + float r = ((-0.0464964749f * s + 0.15931422f) * s - 0.327622764f) * s * a + a; + r = (absy > absx ? 1.57079637f - r : r); + r = (x < 0 ? 3.14159274f - r : r); + r = (y < 0 ? -r : r); + return r; +} + +__global__ void ExtractSiftDescriptorsCONSTNew(float *texObj, int pitch, SiftPoint *d_sift, float subsampling, int octave) +{ + __shared__ float gauss[16]; + __shared__ float buffer[128]; + __shared__ float sums[4]; + + const int tx = threadIdx.x; // 0 -> 16 + const int ty = threadIdx.y; // 0 -> 8 + const int idx = ty * 16 + tx; + if (ty == 0) + gauss[tx] = __expf(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + + int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = min(d_PointCounter[2 * octave + 1], d_MaxNumPoints); + for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x) + { + + buffer[idx] = 0.0f; + __syncthreads(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = __sinf(theta); // cosa -sina + float cosa = __cosf(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + + int xi1 = xpos + cosa; + int yi1 = ypos + sina; + + int xi2 = xpos - cosa; + int yi2 = ypos - sina; + + float dx = *(texObj + yi1 * pitch + xi1) - + *(texObj + yi2 * pitch + xi2); + + xi1 = xpos - sina; + yi1 = ypos + cosa; + + xi2 = xpos + sina; + yi2 = ypos - cosa; + + float dy = *(texObj + yi1 * pitch + xi1) - + *(texObj + yi2 * pitch + xi2); + + float grad = gauss[y] * gauss[tx] * __fsqrt_rn(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * FastAtan2(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + atomicAdd(buffer + p1, iangf * grad2); + atomicAdd(buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 32, iangf * grad2); + atomicAdd(buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + atomicAdd(buffer + p1 + 8, iangf * grad2); + atomicAdd(buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 40, iangf * grad2); + atomicAdd(buffer + p2 + 40, angf * grad2); + } + } + } + __syncthreads(); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f); + + sum = tsum1 * tsum1; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * rsqrtf(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } + __syncthreads(); + } +} + +__global__ void ExtractSiftDescriptorsCONST(hipTextureObject_t texObj, SiftPoint *d_sift, float subsampling, int octave) +{ + __shared__ float gauss[16]; + __shared__ float buffer[128]; + __shared__ float sums[4]; + + const int tx = threadIdx.x; // 0 -> 16 + const int ty = threadIdx.y; // 0 -> 8 + const int idx = ty * 16 + tx; + if (ty == 0) + gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + + int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = min(d_PointCounter[2 * octave + 1], d_MaxNumPoints); + for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x) + { + + buffer[idx] = 0.0f; + __syncthreads(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sinf(theta); // cosa -sina + float cosa = cosf(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + float dx = tex2D(texObj, xpos + cosa, ypos + sina) - + tex2D(texObj, xpos - cosa, ypos - sina); + float dy = tex2D(texObj, xpos - sina, ypos + cosa) - + tex2D(texObj, xpos + sina, ypos - cosa); + float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + atomicAdd(buffer + p1, iangf * grad2); + atomicAdd(buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 32, iangf * grad2); + atomicAdd(buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + atomicAdd(buffer + p1 + 8, iangf * grad2); + atomicAdd(buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 40, iangf * grad2); + atomicAdd(buffer + p2 + 40, angf * grad2); + } + } + } + __syncthreads(); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f); + + sum = tsum1 * tsum1; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * rsqrtf(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } + __syncthreads(); + } +} + +__global__ void ExtractSiftDescriptorsOld(hipTextureObject_t texObj, SiftPoint *d_sift, int fstPts, float subsampling) +{ + __shared__ float gauss[16]; + __shared__ float buffer[128]; + __shared__ float sums[128]; + + const int tx = threadIdx.x; // 0 -> 16 + const int ty = threadIdx.y; // 0 -> 8 + const int idx = ty * 16 + tx; + const int bx = blockIdx.x + fstPts; // 0 -> numPts + if (ty == 0) + gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + buffer[idx] = 0.0f; + __syncthreads(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sinf(theta); // cosa -sina + float cosa = cosf(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + float dx = tex2D(texObj, xpos + cosa, ypos + sina) - + tex2D(texObj, xpos - cosa, ypos - sina); + float dy = tex2D(texObj, xpos - sina, ypos + cosa) - + tex2D(texObj, xpos + sina, ypos - cosa); + float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + atomicAdd(buffer + p1, iangf * grad2); + atomicAdd(buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 32, iangf * grad2); + atomicAdd(buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + atomicAdd(buffer + p1 + 8, iangf * grad2); + atomicAdd(buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 40, iangf * grad2); + atomicAdd(buffer + p2 + 40, angf * grad2); + } + } + } + __syncthreads(); + + // Normalize twice and suppress peaks first time + if (idx < 64) + sums[idx] = buffer[idx] * buffer[idx] + buffer[idx + 64] * buffer[idx + 64]; + __syncthreads(); + if (idx < 32) + sums[idx] = sums[idx] + sums[idx + 32]; + __syncthreads(); + if (idx < 16) + sums[idx] = sums[idx] + sums[idx + 16]; + __syncthreads(); + if (idx < 8) + sums[idx] = sums[idx] + sums[idx + 8]; + __syncthreads(); + if (idx < 4) + sums[idx] = sums[idx] + sums[idx + 4]; + __syncthreads(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + buffer[idx] = buffer[idx] * rsqrtf(tsum1); + + if (buffer[idx] > 0.2f) + buffer[idx] = 0.2f; + __syncthreads(); + if (idx < 64) + sums[idx] = buffer[idx] * buffer[idx] + buffer[idx + 64] * buffer[idx + 64]; + __syncthreads(); + if (idx < 32) + sums[idx] = sums[idx] + sums[idx + 32]; + __syncthreads(); + if (idx < 16) + sums[idx] = sums[idx] + sums[idx + 16]; + __syncthreads(); + if (idx < 8) + sums[idx] = sums[idx] + sums[idx + 8]; + __syncthreads(); + if (idx < 4) + sums[idx] = sums[idx] + sums[idx + 4]; + __syncthreads(); + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + + float *desc = d_sift[bx].data; + desc[idx] = buffer[idx] * rsqrtf(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } +} + +__device__ void ExtractSiftDescriptor(hipTextureObject_t texObj, SiftPoint *d_sift, float subsampling, int octave, int bx) +{ + __shared__ float gauss[16]; + __shared__ float buffer[128]; + __shared__ float sums[4]; + + const int idx = threadIdx.x; + const int tx = idx & 15; // 0 -> 16 + const int ty = idx / 16; // 0 -> 8 + if (ty == 0) + gauss[tx] = exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + buffer[idx] = 0.0f; + __syncthreads(); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sinf(theta); // cosa -sina + float cosa = cosf(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + float dx = tex2D(texObj, xpos + cosa, ypos + sina) - + tex2D(texObj, xpos - cosa, ypos - sina); + float dy = tex2D(texObj, xpos - sina, ypos + cosa) - + tex2D(texObj, xpos + sina, ypos - cosa); + float grad = gauss[y] * gauss[tx] * sqrtf(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * atan2f(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + atomicAdd(buffer + p1, iangf * grad2); + atomicAdd(buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 32, iangf * grad2); + atomicAdd(buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + atomicAdd(buffer + p1 + 8, iangf * grad2); + atomicAdd(buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + atomicAdd(buffer + p1 + 40, iangf * grad2); + atomicAdd(buffer + p2 + 40, angf * grad2); + } + } + } + __syncthreads(); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = min(buffer[idx] * rsqrtf(tsum1), 0.2f); + + sum = tsum1 * tsum1; + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + __syncthreads(); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * rsqrtf(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } + __syncthreads(); +} + +__global__ void RescalePositions(SiftPoint *d_sift, int numPts, float scale) +{ + int num = blockIdx.x * blockDim.x + threadIdx.x; + if (num < numPts) + { + d_sift[num].xpos *= scale; + d_sift[num].ypos *= scale; + d_sift[num].scale *= scale; + } +} + +__global__ void ComputeOrientations(hipTextureObject_t texObj, SiftPoint *d_Sift, int fstPts) +{ + __shared__ float hist[64]; + __shared__ float gauss[11]; + const int tx = threadIdx.x; + const int bx = blockIdx.x + fstPts; + float i2sigma2 = -1.0f / (4.5f * d_Sift[bx].scale * d_Sift[bx].scale); + if (tx < 11) + gauss[tx] = exp(i2sigma2 * (tx - 5) * (tx - 5)); + if (tx < 64) + hist[tx] = 0.0f; + __syncthreads(); + float xp = d_Sift[bx].xpos - 4.5f; + float yp = d_Sift[bx].ypos - 4.5f; + int yd = tx / 11; + int xd = tx - yd * 11; + float xf = xp + xd; + float yf = yp + yd; + if (yd < 11) + { + float dx = tex2D(texObj, xf + 1.0, yf) - tex2D(texObj, xf - 1.0, yf); + float dy = tex2D(texObj, xf, yf + 1.0) - tex2D(texObj, xf, yf - 1.0); + int bin = 16.0f * atan2f(dy, dx) / 3.1416f + 16.5f; + if (bin > 31) + bin = 0; + float grad = sqrtf(dx * dx + dy * dy); + atomicAdd(&hist[bin], grad * gauss[xd] * gauss[yd]); + } + __syncthreads(); + int x1m = (tx >= 1 ? tx - 1 : tx + 31); + int x1p = (tx <= 30 ? tx + 1 : tx - 31); + if (tx < 32) + { + int x2m = (tx >= 2 ? tx - 2 : tx + 30); + int x2p = (tx <= 29 ? tx + 2 : tx - 30); + hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]); + } + __syncthreads(); + if (tx < 32) + { + float v = hist[32 + tx]; + hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f); + } + __syncthreads(); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + for (int i = 0; i < 32; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[32 + ((i1 + 1) & 31)]; + float val2 = hist[32 + ((i1 + 31) & 31)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + if (maxval2 > 0.8f * maxval1) + { + float val1 = hist[32 + ((i2 + 1) & 31)]; + float val2 = hist[32 + ((i2 + 31) & 31)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + unsigned int idx = atomicInc(d_PointCounter, 0x7fffffff); + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = d_Sift[bx].scale; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + ; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } +} + +// With constant number of blocks +__global__ void ComputeOrientationsCONSTNew(float *image, int w, int p, int h, SiftPoint *d_Sift, int octave) +{ +#define RAD 9 +#define WID (2 * RAD + 1) +#define LEN 32 //%%%% Note: Lowe suggests 36, not 32 + __shared__ float img[WID][WID], tmp[WID][WID]; + __shared__ float hist[2 * LEN]; + __shared__ float gaussx[WID], gaussy[WID]; + const int tx = threadIdx.x; + + int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = min(d_PointCounter[2 * octave + 0], d_MaxNumPoints); + for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x) + { + + float sc = d_Sift[bx].scale; + for (int i = tx; i < 2 * LEN; i += blockDim.x) + hist[i] = 0.0f; + float xp = d_Sift[bx].xpos; + float yp = d_Sift[bx].ypos; + int xi = (int)xp; + int yi = (int)yp; + float xf = xp - xi; + float yf = yp - yi; + for (int i = tx; i < WID * WID; i += blockDim.x) + { + int y = i / WID; + int x = i - y * WID; + int xp = max(min(x - RAD + xi, w - 1), 0); + int yp = max(min(y - RAD + yi, h - 1), 0); + img[y][x] = image[yp * p + xp]; + } + float fac[5]; + fac[1] = fac[3] = (sc > 0.5f ? __expf(-1.0f / (2.0f * (sc * sc - 0.25f))) : 0.0f); + fac[0] = fac[4] = (sc > 0.5f ? __expf(-4.0f / (2.0f * (sc * sc - 0.25f))) : 0.0f); + fac[2] = 1.0f; + float i2sigma2 = -1.0f / (2.0f * 2.0f * 2.0f * sc * sc); //%%%% Note: Lowe suggests 1.5, not 2.0 + if (tx < WID) + { + gaussx[tx] = __expf(i2sigma2 * (tx - RAD - xf) * (tx - RAD - xf)); + gaussy[tx] = __expf(i2sigma2 * (tx - RAD - yf) * (tx - RAD - yf)); + } + __syncthreads(); + for (int i = tx; i < (WID - 4) * WID; i += blockDim.x) + { + int y = i / WID; + int x = i - y * WID; + y += 2; + tmp[y][x] = img[y][x] + fac[1] * (img[y - 1][x] + img[y + 1][x]) + + fac[0] * (img[y - 2][x] + img[y + 2][x]); + } + __syncthreads(); + for (int i = tx; i < (WID - 4) * (WID - 4); i += blockDim.x) + { + int y = i / (WID - 4); + int x = i - y * (WID - 4); + x += 2; + y += 2; + img[y][x] = tmp[y][x] + fac[1] * (tmp[y][x - 1] + tmp[y][x + 1]) + + fac[0] * (tmp[y][x - 2] + tmp[y][x + 2]); + } + __syncthreads(); + for (int i = tx; i < (WID - 6) * (WID - 6); i += blockDim.x) + { + int y = i / (WID - 6); + int x = i - y * (WID - 6); + x += 3; + y += 3; + float dx = img[y][x + 1] - img[y][x - 1]; + float dy = img[y + 1][x] - img[y - 1][x]; + int bin = (int)((LEN / 2) * atan2f(dy, dx) / 3.1416f + (LEN / 2) + 0.5f) % LEN; + float grad = __fsqrt_rn(dx * dx + dy * dy); + atomicAdd(&hist[LEN + bin], grad * gaussx[x] * gaussy[y]); + } + __syncthreads(); + int x1m = (tx >= 1 ? tx - 1 : tx + LEN - 1); + int x1p = (tx < (LEN - 1) ? tx + 1 : tx - LEN + 1); + int x2m = (tx >= 2 ? tx - 2 : tx + LEN - 2); + int x2p = (tx < (LEN - 2) ? tx + 2 : tx - LEN + 2); + if (tx < LEN) + { + hist[tx] = 6.0f * hist[tx + LEN] + 4.0f * (hist[x1m + LEN] + hist[x1p + LEN]) + + 1.0f * (hist[x2m + LEN] + hist[x2p + LEN]); + hist[tx + LEN] = 8.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + + 0.0f * (hist[x2m] + hist[x2p]); + float val = hist[tx + LEN]; + hist[tx] = (val > hist[x1m + LEN] && val >= hist[x1p + LEN] ? val : 0.0f); + } + __syncthreads(); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + for (int i = 0; i < LEN; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[LEN + ((i1 + 1) % LEN)]; + float val2 = hist[LEN + ((i1 + LEN - 1) % LEN)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN; + atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]); + if (maxval2 > 0.8f * maxval1 && true) + { + float val1 = hist[LEN + ((i2 + 1) % LEN)]; + float val2 = hist[LEN + ((i2 + LEN - 1) % LEN)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 1], 0x7fffffff); + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = sc; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } + } +#undef RAD +#undef WID +#undef LEN +} + +// With constant number of blocks +__global__ void ComputeOrientationsCONST(hipTextureObject_t texObj, SiftPoint *d_Sift, int octave) +{ + __shared__ float hist[64]; + __shared__ float gauss[11]; + const int tx = threadIdx.x; + + int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = min(d_PointCounter[2 * octave + 0], d_MaxNumPoints); + for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x) + { + + float i2sigma2 = -1.0f / (2.0f * 1.5f * 1.5f * d_Sift[bx].scale * d_Sift[bx].scale); + if (tx < 11) + gauss[tx] = exp(i2sigma2 * (tx - 5) * (tx - 5)); + if (tx < 64) + hist[tx] = 0.0f; + __syncthreads(); + float xp = d_Sift[bx].xpos - 4.5f; + float yp = d_Sift[bx].ypos - 4.5f; + int yd = tx / 11; + int xd = tx - yd * 11; + float xf = xp + xd; + float yf = yp + yd; + if (yd < 11) + { + float dx = tex2D(texObj, xf + 1.0, yf) - tex2D(texObj, xf - 1.0, yf); + float dy = tex2D(texObj, xf, yf + 1.0) - tex2D(texObj, xf, yf - 1.0); + int bin = 16.0f * atan2f(dy, dx) / 3.1416f + 16.5f; + if (bin > 31) + bin = 0; + float grad = sqrtf(dx * dx + dy * dy); + atomicAdd(&hist[bin], grad * gauss[xd] * gauss[yd]); + } + __syncthreads(); + int x1m = (tx >= 1 ? tx - 1 : tx + 31); + int x1p = (tx <= 30 ? tx + 1 : tx - 31); + if (tx < 32) + { + int x2m = (tx >= 2 ? tx - 2 : tx + 30); + int x2p = (tx <= 29 ? tx + 2 : tx - 30); + hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]); + } + __syncthreads(); + if (tx < 32) + { + float v = hist[32 + tx]; + hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f); + } + __syncthreads(); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + for (int i = 0; i < 32; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[32 + ((i1 + 1) & 31)]; + float val2 = hist[32 + ((i1 + 31) & 31)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]); + if (maxval2 > 0.8f * maxval1 && true) + { + float val1 = hist[32 + ((i2 + 1) & 31)]; + float val2 = hist[32 + ((i2 + 31) & 31)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 1], 0x7fffffff); + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = d_Sift[bx].scale; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + ; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } + __syncthreads(); + } +} + +// With constant number of blocks +__global__ void OrientAndExtractCONST(hipTextureObject_t texObj, SiftPoint *d_Sift, float subsampling, int octave) +{ + __shared__ float hist[64]; + __shared__ float gauss[11]; + __shared__ unsigned int idx; //%%%% + const int tx = threadIdx.x; + + int fstPts = min(d_PointCounter[2 * octave - 1], d_MaxNumPoints); + int totPts = min(d_PointCounter[2 * octave + 0], d_MaxNumPoints); + for (int bx = blockIdx.x + fstPts; bx < totPts; bx += gridDim.x) + { + + float i2sigma2 = -1.0f / (4.5f * d_Sift[bx].scale * d_Sift[bx].scale); + if (tx < 11) + gauss[tx] = exp(i2sigma2 * (tx - 5) * (tx - 5)); + if (tx < 64) + hist[tx] = 0.0f; + __syncthreads(); + float xp = d_Sift[bx].xpos - 4.5f; + float yp = d_Sift[bx].ypos - 4.5f; + int yd = tx / 11; + int xd = tx - yd * 11; + float xf = xp + xd; + float yf = yp + yd; + if (yd < 11) + { + float dx = tex2D(texObj, xf + 1.0, yf) - tex2D(texObj, xf - 1.0, yf); + float dy = tex2D(texObj, xf, yf + 1.0) - tex2D(texObj, xf, yf - 1.0); + int bin = 16.0f * atan2f(dy, dx) / 3.1416f + 16.5f; + if (bin > 31) + bin = 0; + float grad = sqrtf(dx * dx + dy * dy); + atomicAdd(&hist[bin], grad * gauss[xd] * gauss[yd]); + } + __syncthreads(); + int x1m = (tx >= 1 ? tx - 1 : tx + 31); + int x1p = (tx <= 30 ? tx + 1 : tx - 31); + if (tx < 32) + { + int x2m = (tx >= 2 ? tx - 2 : tx + 30); + int x2p = (tx <= 29 ? tx + 2 : tx - 30); + hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]); + } + __syncthreads(); + if (tx < 32) + { + float v = hist[32 + tx]; + hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f); + } + __syncthreads(); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + for (int i = 0; i < 32; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[32 + ((i1 + 1) & 31)]; + float val2 = hist[32 + ((i1 + 31) & 31)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + idx = 0xffffffff; //%%%% + atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave + 0]); + if (maxval2 > 0.8f * maxval1) + { + float val1 = hist[32 + ((i2 + 1) & 31)]; + float val2 = hist[32 + ((i2 + 31) & 31)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + idx = atomicInc(&d_PointCounter[2 * octave + 1], 0x7fffffff); //%%%% + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = d_Sift[bx].scale; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + ; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } + __syncthreads(); + ExtractSiftDescriptor(texObj, d_Sift, subsampling, octave, bx); //%%%% + if (idx < d_MaxNumPoints) //%%%% + ExtractSiftDescriptor(texObj, d_Sift, subsampling, octave, idx); //%%%% + } +} + +/////////////////////////////////////////////////////////////////////////////// +// Subtract two images (multi-scale version) +/////////////////////////////////////////////////////////////////////////////// + +// __global__ void FindPointsMultiTest(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave) +// { +// #define MEMWID (MINMAX_W + 2) +// __shared__ unsigned int cnt; +// __shared__ unsigned short points[3 * MEMWID]; + +// if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0 && threadIdx.y == 0) +// { +// atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); +// atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]); +// } +// int tx = threadIdx.x; +// int ty = threadIdx.y; +// if (tx == 0 && ty == 0) +// cnt = 0; +// __syncthreads(); + +// int ypos = MINMAX_H * blockIdx.y + ty; +// if (ypos >= height) +// return; +// int block = blockIdx.x / NUM_SCALES; +// int scale = blockIdx.x - NUM_SCALES * block; +// int minx = block * MINMAX_W; +// int maxx = min(minx + MINMAX_W, width); +// int xpos = minx + tx; +// int size = pitch * height; +// int ptr = size * scale + max(min(xpos - 1, width - 1), 0); + +// float maxv = fabs(d_Data0[ptr + ypos * pitch + 1 * size]); +// maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W)); + +// if (Shuffle(maxv, 0) > thresh) +// { +// int yptr1 = ptr + ypos * pitch; +// int yptr0 = ptr + max(0, ypos - 1) * pitch; +// int yptr2 = ptr + min(height - 1, ypos + 1) * pitch; +// float d20 = d_Data0[yptr0 + 1 * size]; +// float d21 = d_Data0[yptr1 + 1 * size]; +// float d22 = d_Data0[yptr2 + 1 * size]; +// float d31 = d_Data0[yptr1 + 2 * size]; +// float d11 = d_Data0[yptr1]; + +// float d10 = d_Data0[yptr0]; +// float d12 = d_Data0[yptr2]; +// float ymin1 = fminf(fminf(d10, d11), d12); +// float ymax1 = fmaxf(fmaxf(d10, d11), d12); +// float d30 = d_Data0[yptr0 + 2 * size]; +// float d32 = d_Data0[yptr2 + 2 * size]; +// float ymin3 = fminf(fminf(d30, d31), d32); +// float ymax3 = fmaxf(fmaxf(d30, d31), d32); +// float ymin2 = fminf(fminf(ymin1, fminf(fminf(d20, d22), d21)), ymin3); +// float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d20, d22), d21)), ymax3); + +// float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1)); +// float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1)); +// if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx) +// { +// if (d21 < -thresh) +// { +// float minv = fminf(fminf(nmin2, ymin1), ymin3); +// minv = fminf(fminf(minv, d20), d22); +// if (d21 < minv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// if (d21 > thresh) +// { +// float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3); +// maxv = fmaxf(fmaxf(maxv, d20), d22); +// if (d21 > maxv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// } +// } +// __syncthreads(); +// if (ty == 0 && tx < cnt) +// { +// int xpos = points[3 * tx + 0]; +// int ypos = points[3 * tx + 1]; +// int scale = points[3 * tx + 2]; +// int ptr = xpos + (ypos + (scale + 1) * height) * pitch; +// float val = d_Data0[ptr]; +// float *data1 = &d_Data0[ptr]; +// float dxx = 2.0f * val - data1[-1] - data1[1]; +// float dyy = 2.0f * val - data1[-pitch] - data1[pitch]; +// float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]); +// float tra = dxx + dyy; +// float det = dxx * dyy - dxy * dxy; +// if (tra * tra < edgeLimit * det) +// { +// float edge = __fdividef(tra * tra, det); +// float dx = 0.5f * (data1[1] - data1[-1]); +// float dy = 0.5f * (data1[pitch] - data1[-pitch]); +// float *data0 = d_Data0 + ptr - height * pitch; +// float *data2 = d_Data0 + ptr + height * pitch; +// float ds = 0.5f * (data0[0] - data2[0]); +// float dss = 2.0f * val - data2[0] - data0[0]; +// float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]); +// float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]); +// float idxx = dyy * dss - dys * dys; +// float idxy = dys * dxs - dxy * dss; +// float idxs = dxy * dys - dyy * dxs; +// float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs); +// float idyy = dxx * dss - dxs * dxs; +// float idys = dxy * dxs - dxx * dys; +// float idss = dxx * dyy - dxy * dxy; +// float pdx = idet * (idxx * dx + idxy * dy + idxs * ds); +// float pdy = idet * (idxy * dx + idyy * dy + idys * ds); +// float pds = idet * (idxs * dx + idys * dy + idss * ds); +// if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f) +// { +// pdx = __fdividef(dx, dxx); +// pdy = __fdividef(dy, dyy); +// pds = __fdividef(ds, dss); +// } +// float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds); +// int maxPts = d_MaxNumPoints; +// float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor); +// if (sc >= lowestScale) +// { +// unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff); +// idx = (idx >= maxPts ? maxPts - 1 : idx); +// d_Sift[idx].xpos = xpos + pdx; +// d_Sift[idx].ypos = ypos + pdy; +// d_Sift[idx].scale = sc; +// d_Sift[idx].sharpness = val + dval; +// d_Sift[idx].edgeness = edge; +// d_Sift[idx].subsampling = subsampling; +// } +// } +// } +// } + +__global__ void FindPointsMultiNew(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave) +{ +#define MEMWID (MINMAX_W + 2) + __shared__ unsigned short points[2 * MEMWID]; + + if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0) + { + atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); + atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]); + } + int tx = threadIdx.x; + int block = blockIdx.x / NUM_SCALES; + int scale = blockIdx.x - NUM_SCALES * block; + int minx = block * MINMAX_W; + int maxx = min(minx + MINMAX_W, width); + int xpos = minx + tx; + int size = pitch * height; + int ptr = size * scale + max(min(xpos - 1, width - 1), 0); + + int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H); + float maxv = 0.0f; + for (int y = 0; y < yloops; y++) + { + int ypos = MINMAX_H * blockIdx.y + y; + int yptr1 = ptr + ypos * pitch; + float val = d_Data0[yptr1 + 1 * size]; + maxv = fmaxf(maxv, fabs(val)); + } + // if (tx==0) printf("XXX1\n"); + // if (!__any_sync(0xffffffff, maxv > thresh)) + if (!__any(maxv > thresh)) + return; + // if (tx==0) printf("XXX2\n"); + + int ptbits = 0; + for (int y = 0; y < yloops; y++) + { + + int ypos = MINMAX_H * blockIdx.y + y; + int yptr1 = ptr + ypos * pitch; + float d11 = d_Data0[yptr1 + 1 * size]; + // if (__any_sync(0xffffffff, fabs(d11) > thresh)) + if (__any(fabs(d11) > thresh)) + { + + int yptr0 = ptr + max(0, ypos - 1) * pitch; + int yptr2 = ptr + min(height - 1, ypos + 1) * pitch; + float d01 = d_Data0[yptr1]; + float d10 = d_Data0[yptr0 + 1 * size]; + float d12 = d_Data0[yptr2 + 1 * size]; + float d21 = d_Data0[yptr1 + 2 * size]; + + float d00 = d_Data0[yptr0]; + float d02 = d_Data0[yptr2]; + float ymin1 = fminf(fminf(d00, d01), d02); + float ymax1 = fmaxf(fmaxf(d00, d01), d02); + float d20 = d_Data0[yptr0 + 2 * size]; + float d22 = d_Data0[yptr2 + 2 * size]; + float ymin3 = fminf(fminf(d20, d21), d22); + float ymax3 = fmaxf(fmaxf(d20, d21), d22); + float ymin2 = fminf(fminf(ymin1, fminf(fminf(d10, d12), d11)), ymin3); + float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d10, d12), d11)), ymax3); + + float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1)); + float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1)); + float minv = fminf(fminf(nmin2, ymin1), ymin3); + minv = fminf(fminf(minv, d10), d12); + float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3); + maxv = fmaxf(fmaxf(maxv, d10), d12); + + if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx) + ptbits |= ((d11 < fminf(-thresh, minv)) | (d11 > fmaxf(thresh, maxv))) << y; + } + } + + unsigned int totbits = __popc(ptbits); + unsigned int numbits = totbits; + for (int d = 1; d < 32; d <<= 1) + { + unsigned int num = ShiftUp(totbits, d); + if (tx >= d) + totbits += num; + } + int pos = totbits - numbits; + for (int y = 0; y < yloops; y++) + { + int ypos = MINMAX_H * blockIdx.y + y; + if (ptbits & (1 << y) && pos < MEMWID) + { + points[2 * pos + 0] = xpos - 1; + points[2 * pos + 1] = ypos; + pos++; + } + } + + totbits = Shuffle(totbits, 31); + if (tx < totbits) + { + int xpos = points[2 * tx + 0]; + int ypos = points[2 * tx + 1]; + int ptr = xpos + (ypos + (scale + 1) * height) * pitch; + float val = d_Data0[ptr]; + float *data1 = &d_Data0[ptr]; + float dxx = 2.0f * val - data1[-1] - data1[1]; + float dyy = 2.0f * val - data1[-pitch] - data1[pitch]; + float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]); + float tra = dxx + dyy; + float det = dxx * dyy - dxy * dxy; + if (tra * tra < edgeLimit * det) + { + float edge = __fdividef(tra * tra, det); + float dx = 0.5f * (data1[1] - data1[-1]); + float dy = 0.5f * (data1[pitch] - data1[-pitch]); + float *data0 = d_Data0 + ptr - height * pitch; + float *data2 = d_Data0 + ptr + height * pitch; + float ds = 0.5f * (data0[0] - data2[0]); + float dss = 2.0f * val - data2[0] - data0[0]; + float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]); + float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]); + float idxx = dyy * dss - dys * dys; + float idxy = dys * dxs - dxy * dss; + float idxs = dxy * dys - dyy * dxs; + float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs); + float idyy = dxx * dss - dxs * dxs; + float idys = dxy * dxs - dxx * dys; + float idss = dxx * dyy - dxy * dxy; + float pdx = idet * (idxx * dx + idxy * dy + idxs * ds); + float pdy = idet * (idxy * dx + idyy * dy + idys * ds); + float pds = idet * (idxs * dx + idys * dy + idss * ds); + if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f) + { + pdx = __fdividef(dx, dxx); + pdy = __fdividef(dy, dyy); + pds = __fdividef(ds, dss); + } + float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds); + int maxPts = d_MaxNumPoints; + float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor); + if (sc >= lowestScale) + { + atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); + unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff); + idx = (idx >= maxPts ? maxPts - 1 : idx); + d_Sift[idx].xpos = xpos + pdx; + d_Sift[idx].ypos = ypos + pdy; + d_Sift[idx].scale = sc; + d_Sift[idx].sharpness = val + dval; + d_Sift[idx].edgeness = edge; + d_Sift[idx].subsampling = subsampling; + } + } + } +} + +// __global__ void FindPointsMulti(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave) +// { +// #define MEMWID (MINMAX_W + 2) +// __shared__ unsigned int cnt; +// __shared__ unsigned short points[3 * MEMWID]; + +// if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0) +// { +// atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); +// atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]); +// } +// int tx = threadIdx.x; +// int block = blockIdx.x / NUM_SCALES; +// int scale = blockIdx.x - NUM_SCALES * block; +// int minx = block * MINMAX_W; +// int maxx = min(minx + MINMAX_W, width); +// int xpos = minx + tx; +// int size = pitch * height; +// int ptr = size * scale + max(min(xpos - 1, width - 1), 0); + +// int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H); +// float maxv = 0.0f; +// for (int y = 0; y < yloops; y++) +// { +// int ypos = MINMAX_H * blockIdx.y + y; +// int yptr1 = ptr + ypos * pitch; +// float val = d_Data0[yptr1 + 1 * size]; +// maxv = fmaxf(maxv, fabs(val)); +// } +// maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W)); +// if (Shuffle(maxv, 0) <= thresh) +// return; + +// if (tx == 0) +// cnt = 0; +// __syncthreads(); + +// for (int y = 0; y < yloops; y++) +// { + +// int ypos = MINMAX_H * blockIdx.y + y; +// int yptr1 = ptr + ypos * pitch; +// int yptr0 = ptr + max(0, ypos - 1) * pitch; +// int yptr2 = ptr + min(height - 1, ypos + 1) * pitch; +// float d20 = d_Data0[yptr0 + 1 * size]; +// float d21 = d_Data0[yptr1 + 1 * size]; +// float d22 = d_Data0[yptr2 + 1 * size]; +// float d31 = d_Data0[yptr1 + 2 * size]; +// float d11 = d_Data0[yptr1]; + +// float d10 = d_Data0[yptr0]; +// float d12 = d_Data0[yptr2]; +// float ymin1 = fminf(fminf(d10, d11), d12); +// float ymax1 = fmaxf(fmaxf(d10, d11), d12); +// float d30 = d_Data0[yptr0 + 2 * size]; +// float d32 = d_Data0[yptr2 + 2 * size]; +// float ymin3 = fminf(fminf(d30, d31), d32); +// float ymax3 = fmaxf(fmaxf(d30, d31), d32); +// float ymin2 = fminf(fminf(ymin1, fminf(fminf(d20, d22), d21)), ymin3); +// float ymax2 = fmaxf(fmaxf(ymax1, fmaxf(fmaxf(d20, d22), d21)), ymax3); + +// float nmin2 = fminf(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1)); +// float nmax2 = fmaxf(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1)); +// if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx) +// { +// if (d21 < -thresh) +// { +// float minv = fminf(fminf(nmin2, ymin1), ymin3); +// minv = fminf(fminf(minv, d20), d22); +// if (d21 < minv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// if (d21 > thresh) +// { +// float maxv = fmaxf(fmaxf(nmax2, ymax1), ymax3); +// maxv = fmaxf(fmaxf(maxv, d20), d22); +// if (d21 > maxv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// } +// } +// if (tx < cnt) +// { +// int xpos = points[3 * tx + 0]; +// int ypos = points[3 * tx + 1]; +// int scale = points[3 * tx + 2]; +// int ptr = xpos + (ypos + (scale + 1) * height) * pitch; +// float val = d_Data0[ptr]; +// float *data1 = &d_Data0[ptr]; +// float dxx = 2.0f * val - data1[-1] - data1[1]; +// float dyy = 2.0f * val - data1[-pitch] - data1[pitch]; +// float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]); +// float tra = dxx + dyy; +// float det = dxx * dyy - dxy * dxy; +// if (tra * tra < edgeLimit * det) +// { +// float edge = __fdividef(tra * tra, det); +// float dx = 0.5f * (data1[1] - data1[-1]); +// float dy = 0.5f * (data1[pitch] - data1[-pitch]); +// float *data0 = d_Data0 + ptr - height * pitch; +// float *data2 = d_Data0 + ptr + height * pitch; +// float ds = 0.5f * (data0[0] - data2[0]); +// float dss = 2.0f * val - data2[0] - data0[0]; +// float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]); +// float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]); +// float idxx = dyy * dss - dys * dys; +// float idxy = dys * dxs - dxy * dss; +// float idxs = dxy * dys - dyy * dxs; +// float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs); +// float idyy = dxx * dss - dxs * dxs; +// float idys = dxy * dxs - dxx * dys; +// float idss = dxx * dyy - dxy * dxy; +// float pdx = idet * (idxx * dx + idxy * dy + idxs * ds); +// float pdy = idet * (idxy * dx + idyy * dy + idys * ds); +// float pds = idet * (idxs * dx + idys * dy + idss * ds); +// if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f) +// { +// pdx = __fdividef(dx, dxx); +// pdy = __fdividef(dy, dyy); +// pds = __fdividef(ds, dss); +// } +// float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds); +// int maxPts = d_MaxNumPoints; +// float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor); +// if (sc >= lowestScale) +// { +// atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); +// unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff); +// idx = (idx >= maxPts ? maxPts - 1 : idx); +// d_Sift[idx].xpos = xpos + pdx; +// d_Sift[idx].ypos = ypos + pdy; +// d_Sift[idx].scale = sc; +// d_Sift[idx].sharpness = val + dval; +// d_Sift[idx].edgeness = edge; +// d_Sift[idx].subsampling = subsampling; +// } +// } +// } +// } + +// __global__ void FindPointsMultiOld(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave) +// { +// #define MEMWID (MINMAX_W + 2) +// __shared__ float ymin1[MEMWID], ymin2[MEMWID], ymin3[MEMWID]; +// __shared__ float ymax1[MEMWID], ymax2[MEMWID], ymax3[MEMWID]; +// __shared__ unsigned int cnt; +// __shared__ unsigned short points[3 * MEMWID]; + +// if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0) +// { +// atomicMax(&d_PointCounter[2 * octave + 0], d_PointCounter[2 * octave - 1]); +// atomicMax(&d_PointCounter[2 * octave + 1], d_PointCounter[2 * octave - 1]); +// } +// int tx = threadIdx.x; +// int block = blockIdx.x / NUM_SCALES; +// int scale = blockIdx.x - NUM_SCALES * block; +// int minx = block * MINMAX_W; +// int maxx = min(minx + MINMAX_W, width); +// int xpos = minx + tx; +// int size = pitch * height; +// int ptr = size * scale + max(min(xpos - 1, width - 1), 0); + +// int yloops = min(height - MINMAX_H * blockIdx.y, MINMAX_H); +// float maxv = 0.0f; +// for (int y = 0; y < yloops; y++) +// { +// int ypos = MINMAX_H * blockIdx.y + y; +// int yptr1 = ptr + ypos * pitch; +// float val = d_Data0[yptr1 + 1 * size]; +// maxv = fmaxf(maxv, fabs(val)); +// } +// maxv = fmaxf(maxv, ShiftDown(maxv, 16, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 8, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 4, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 2, MINMAX_W)); +// maxv = fmaxf(maxv, ShiftDown(maxv, 1, MINMAX_W)); +// if (Shuffle(maxv, 0) <= thresh) +// return; + +// if (tx == 0) +// cnt = 0; +// __syncthreads(); + +// for (int y = 0; y < yloops; y++) +// { + +// int ypos = MINMAX_H * blockIdx.y + y; +// int yptr1 = ptr + ypos * pitch; +// int yptr0 = ptr + max(0, ypos - 1) * pitch; +// int yptr2 = ptr + min(height - 1, ypos + 1) * pitch; +// float d20 = d_Data0[yptr0 + 1 * size]; +// float d21 = d_Data0[yptr1 + 1 * size]; +// float d22 = d_Data0[yptr2 + 1 * size]; +// float d31 = d_Data0[yptr1 + 2 * size]; +// float d11 = d_Data0[yptr1]; + +// float d10 = d_Data0[yptr0]; +// float d12 = d_Data0[yptr2]; +// ymin1[tx] = fminf(fminf(d10, d11), d12); +// ymax1[tx] = fmaxf(fmaxf(d10, d11), d12); +// float d30 = d_Data0[yptr0 + 2 * size]; +// float d32 = d_Data0[yptr2 + 2 * size]; +// ymin3[tx] = fminf(fminf(d30, d31), d32); +// ymax3[tx] = fmaxf(fmaxf(d30, d31), d32); +// ymin2[tx] = fminf(fminf(ymin1[tx], fminf(fminf(d20, d22), d21)), ymin3[tx]); +// ymax2[tx] = fmaxf(fmaxf(ymax1[tx], fmaxf(fmaxf(d20, d22), d21)), ymax3[tx]); + +// __syncthreads(); + +// if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx) +// { +// if (d21 < -thresh) +// { +// float minv = fminf(fminf(fminf(ymin2[tx - 1], ymin2[tx + 1]), ymin1[tx]), ymin3[tx]); +// minv = fminf(fminf(minv, d20), d22); +// if (d21 < minv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// if (d21 > thresh) +// { +// float maxv = fmaxf(fmaxf(fmaxf(ymax2[tx - 1], ymax2[tx + 1]), ymax1[tx]), ymax3[tx]); +// maxv = fmaxf(fmaxf(maxv, d20), d22); +// if (d21 > maxv) +// { +// int pos = atomicInc(&cnt, MEMWID - 1); +// points[3 * pos + 0] = xpos - 1; +// points[3 * pos + 1] = ypos; +// points[3 * pos + 2] = scale; +// } +// } +// } +// __syncthreads(); +// } +// if (tx < cnt) +// { +// int xpos = points[3 * tx + 0]; +// int ypos = points[3 * tx + 1]; +// int scale = points[3 * tx + 2]; +// int ptr = xpos + (ypos + (scale + 1) * height) * pitch; +// float val = d_Data0[ptr]; +// float *data1 = &d_Data0[ptr]; +// float dxx = 2.0f * val - data1[-1] - data1[1]; +// float dyy = 2.0f * val - data1[-pitch] - data1[pitch]; +// float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]); +// float tra = dxx + dyy; +// float det = dxx * dyy - dxy * dxy; +// if (tra * tra < edgeLimit * det) +// { +// float edge = __fdividef(tra * tra, det); +// float dx = 0.5f * (data1[1] - data1[-1]); +// float dy = 0.5f * (data1[pitch] - data1[-pitch]); +// float *data0 = d_Data0 + ptr - height * pitch; +// float *data2 = d_Data0 + ptr + height * pitch; +// float ds = 0.5f * (data0[0] - data2[0]); +// float dss = 2.0f * val - data2[0] - data0[0]; +// float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]); +// float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]); +// float idxx = dyy * dss - dys * dys; +// float idxy = dys * dxs - dxy * dss; +// float idxs = dxy * dys - dyy * dxs; +// float idet = __fdividef(1.0f, idxx * dxx + idxy * dxy + idxs * dxs); +// float idyy = dxx * dss - dxs * dxs; +// float idys = dxy * dxs - dxx * dys; +// float idss = dxx * dyy - dxy * dxy; +// float pdx = idet * (idxx * dx + idxy * dy + idxs * ds); +// float pdy = idet * (idxy * dx + idyy * dy + idys * ds); +// float pds = idet * (idxs * dx + idys * dy + idss * ds); +// if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f) +// { +// pdx = __fdividef(dx, dxx); +// pdy = __fdividef(dy, dyy); +// pds = __fdividef(ds, dss); +// } +// float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds); +// int maxPts = d_MaxNumPoints; +// float sc = powf(2.0f, (float)scale / NUM_SCALES) * exp2f(pds * factor); +// if (sc >= lowestScale) +// { +// unsigned int idx = atomicInc(&d_PointCounter[2 * octave + 0], 0x7fffffff); +// idx = (idx >= maxPts ? maxPts - 1 : idx); +// d_Sift[idx].xpos = xpos + pdx; +// d_Sift[idx].ypos = ypos + pdy; +// d_Sift[idx].scale = sc; +// d_Sift[idx].sharpness = val + dval; +// d_Sift[idx].edgeness = edge; +// d_Sift[idx].subsampling = subsampling; +// } +// } +// } +// } + +__global__ void LaplaceMultiTex(hipTextureObject_t texObj, float *d_Result, int width, int pitch, int height, int octave) +{ + __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S]; + __shared__ float data2[LAPLACE_W * LAPLACE_S]; + const int tx = threadIdx.x; + const int xp = blockIdx.x * LAPLACE_W + tx; + const int yp = blockIdx.y; + const int scale = threadIdx.y; + float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; + float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale; + float x = xp - 3.5; + float y = yp + 0.5; + sdata1[tx] = kernel[0] * tex2D(texObj, x, y) + + kernel[1] * (tex2D(texObj, x, y - 1.0) + tex2D(texObj, x, y + 1.0)) + + kernel[2] * (tex2D(texObj, x, y - 2.0) + tex2D(texObj, x, y + 2.0)) + + kernel[3] * (tex2D(texObj, x, y - 3.0) + tex2D(texObj, x, y + 3.0)) + + kernel[4] * (tex2D(texObj, x, y - 4.0) + tex2D(texObj, x, y + 4.0)); + __syncthreads(); + float *sdata2 = data2 + LAPLACE_W * scale; + if (tx < LAPLACE_W) + { + sdata2[tx] = kernel[0] * sdata1[tx + 4] + + kernel[1] * (sdata1[tx + 3] + sdata1[tx + 5]) + + kernel[2] * (sdata1[tx + 2] + sdata1[tx + 6]) + + kernel[3] * (sdata1[tx + 1] + sdata1[tx + 7]) + + kernel[4] * (sdata1[tx + 0] + sdata1[tx + 8]); + } + __syncthreads(); + if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width) + d_Result[scale * height * pitch + yp * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W]; +} + +__global__ void LaplaceMultiMem(float *d_Image, float *d_Result, int width, int pitch, int height, int octave) +{ + __shared__ float buff[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S]; + const int tx = threadIdx.x; + const int xp = blockIdx.x * LAPLACE_W + tx; + const int yp = blockIdx.y; + float *data = d_Image + max(min(xp - LAPLACE_R, width - 1), 0); // multiply with 4 for max func + float temp[2 * LAPLACE_R + 1]; + + float kern[LAPLACE_S][LAPLACE_R + 1]; + if (xp < (width + 2 * LAPLACE_R)) + { + for (int i = 0; i <= 2 * LAPLACE_R; i++) + temp[i] = data[max(0, min(yp + i - LAPLACE_R, height - 1)) * pitch]; + for (int scale = 0; scale < LAPLACE_S; scale++) + { + float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; + float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; + for (int i = 0; i <= LAPLACE_R; i++) + { + kern[scale][i] = kernel[i]; + } + float sum = kern[scale][0] * temp[LAPLACE_R]; +#pragma unroll + for (int j = 1; j <= LAPLACE_R; j++) + sum += kern[scale][j] * (temp[LAPLACE_R - j] + temp[LAPLACE_R + j]); + buf[tx] = sum; + } + } + __syncthreads(); + if (tx < LAPLACE_W && xp < width) + { + int scale = 0; + float oldRes = kern[scale][0] * buff[tx + LAPLACE_R]; + +#pragma unroll + for (int j = 1; j <= LAPLACE_R; j++) + oldRes += kern[scale][j] * (buff[tx + LAPLACE_R - j] + buff[tx + LAPLACE_R + j]); + + for (int scale = 1; scale < LAPLACE_S; scale++) + { + float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; + + float res = kern[scale][0] * buf[tx + LAPLACE_R]; + +#pragma unroll + for (int j = 1; j <= LAPLACE_R; j++) + res += kern[scale][j] * (buf[tx + LAPLACE_R - j] + buf[tx + LAPLACE_R + j]); + + d_Result[(scale - 1) * height * pitch + yp * pitch + xp] = res - oldRes; + oldRes = res; + } + } +} + +// __global__ void LaplaceMultiMemWide(float *d_Image, float *d_Result, int width, int pitch, int height, int octave) +// { +// __shared__ float buff[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S]; +// const int tx = threadIdx.x; +// const int xp = blockIdx.x * LAPLACE_W + tx; +// const int xp4 = blockIdx.x * LAPLACE_W + 4 * tx; +// const int yp = blockIdx.y; +// float kern[LAPLACE_S][LAPLACE_R + 1]; +// float *data = d_Image + max(min(xp - 4, width - 1), 0); +// float temp[9]; +// if (xp < (width + 2 * LAPLACE_R)) +// { +// for (int i = 0; i < 4; i++) +// temp[i] = data[max(0, min(yp + i - 4, height - 1)) * pitch]; +// for (int i = 4; i < 8 + 1; i++) +// temp[i] = data[min(yp + i - 4, height - 1) * pitch]; +// for (int scale = 0; scale < LAPLACE_S; scale++) +// { +// float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; +// for (int i = 0; i <= LAPLACE_R; i++) +// kern[scale][i] = kernel[LAPLACE_R - i]; +// float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; +// buf[tx] = kern[scale][4] * temp[4] + +// kern[scale][3] * (temp[3] + temp[5]) + kern[scale][2] * (temp[2] + temp[6]) + +// kern[scale][1] * (temp[1] + temp[7]) + kern[scale][0] * (temp[0] + temp[8]); +// } +// } +// __syncthreads(); +// if (tx < LAPLACE_W / 4 && xp4 < width) +// { +// float4 b0 = reinterpret_cast(buff)[tx + 0]; +// float4 b1 = reinterpret_cast(buff)[tx + 1]; +// float4 b2 = reinterpret_cast(buff)[tx + 2]; +// float4 old4, new4, dif4; +// old4.x = kern[0][4] * b1.x + kern[0][3] * (b0.w + b1.y) + kern[0][2] * (b0.z + b1.z) + +// kern[0][1] * (b0.y + b1.w) + kern[0][0] * (b0.x + b2.x); +// old4.y = kern[0][4] * b1.y + kern[0][3] * (b1.x + b1.z) + kern[0][2] * (b0.w + b1.w) + +// kern[0][1] * (b0.z + b2.x) + kern[0][0] * (b0.y + b2.y); +// old4.z = kern[0][4] * b1.z + kern[0][3] * (b1.y + b1.w) + kern[0][2] * (b1.x + b2.x) + +// kern[0][1] * (b0.w + b2.y) + kern[0][0] * (b0.z + b2.z); +// old4.w = kern[0][4] * b1.w + kern[0][3] * (b1.z + b2.x) + kern[0][2] * (b1.y + b2.y) + +// kern[0][1] * (b1.x + b2.z) + kern[0][0] * (b0.w + b2.w); +// for (int scale = 1; scale < LAPLACE_S; scale++) +// { +// float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; +// float4 b0 = reinterpret_cast(buf)[tx + 0]; +// float4 b1 = reinterpret_cast(buf)[tx + 1]; +// float4 b2 = reinterpret_cast(buf)[tx + 2]; +// new4.x = kern[scale][4] * b1.x + kern[scale][3] * (b0.w + b1.y) + +// kern[scale][2] * (b0.z + b1.z) + kern[scale][1] * (b0.y + b1.w) + +// kern[scale][0] * (b0.x + b2.x); +// new4.y = kern[scale][4] * b1.y + kern[scale][3] * (b1.x + b1.z) + +// kern[scale][2] * (b0.w + b1.w) + kern[scale][1] * (b0.z + b2.x) + +// kern[scale][0] * (b0.y + b2.y); +// new4.z = kern[scale][4] * b1.z + kern[scale][3] * (b1.y + b1.w) + +// kern[scale][2] * (b1.x + b2.x) + kern[scale][1] * (b0.w + b2.y) + +// kern[scale][0] * (b0.z + b2.z); +// new4.w = kern[scale][4] * b1.w + kern[scale][3] * (b1.z + b2.x) + +// kern[scale][2] * (b1.y + b2.y) + kern[scale][1] * (b1.x + b2.z) + +// kern[scale][0] * (b0.w + b2.w); +// dif4.x = new4.x - old4.x; +// dif4.y = new4.y - old4.y; +// dif4.z = new4.z - old4.z; +// dif4.w = new4.w - old4.w; +// reinterpret_cast(&d_Result[(scale - 1) * height * pitch + yp * pitch + xp4])[0] = dif4; +// old4 = new4; +// } +// } +// } + +// __global__ void LaplaceMultiMemTest(float *d_Image, float *d_Result, int width, int pitch, int height, int octave) +// { +// __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S]; +// __shared__ float data2[LAPLACE_W * LAPLACE_S]; +// const int tx = threadIdx.x; +// const int xp = blockIdx.x * LAPLACE_W + tx; +// const int yp = LAPLACE_H * blockIdx.y; +// const int scale = threadIdx.y; +// float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; +// float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale; +// float *data = d_Image + max(min(xp - 4, width - 1), 0); +// int h = height - 1; +// float temp[8 + LAPLACE_H], kern[LAPLACE_R + 1]; +// for (int i = 0; i < 4; i++) +// temp[i] = data[max(0, min(yp + i - 4, h)) * pitch]; +// for (int i = 4; i < 8 + LAPLACE_H; i++) +// temp[i] = data[min(yp + i - 4, h) * pitch]; +// for (int i = 0; i <= LAPLACE_R; i++) +// kern[i] = kernel[LAPLACE_R - i]; +// for (int j = 0; j < LAPLACE_H; j++) +// { +// sdata1[tx] = kern[4] * temp[4 + j] + +// kern[3] * (temp[3 + j] + temp[5 + j]) + kern[2] * (temp[2 + j] + temp[6 + j]) + +// kern[1] * (temp[1 + j] + temp[7 + j]) + kern[0] * (temp[0 + j] + temp[8 + j]); +// __syncthreads(); +// float *sdata2 = data2 + LAPLACE_W * scale; +// if (tx < LAPLACE_W) +// { +// sdata2[tx] = kern[4] * sdata1[tx + 4] + +// kern[3] * (sdata1[tx + 3] + sdata1[tx + 5]) + kern[2] * (sdata1[tx + 2] + sdata1[tx + 6]) + +// kern[1] * (sdata1[tx + 1] + sdata1[tx + 7]) + kern[0] * (sdata1[tx + 0] + sdata1[tx + 8]); +// } +// __syncthreads(); +// if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width && (yp + j) < height) +// d_Result[scale * height * pitch + (yp + j) * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W]; +// } +// } + +// __global__ void LaplaceMultiMemOld(float *d_Image, float *d_Result, int width, int pitch, int height, int octave) +// { +// __shared__ float data1[(LAPLACE_W + 2 * LAPLACE_R) * LAPLACE_S]; +// __shared__ float data2[LAPLACE_W * LAPLACE_S]; +// const int tx = threadIdx.x; +// const int xp = blockIdx.x * LAPLACE_W + tx; +// const int yp = blockIdx.y; +// const int scale = threadIdx.y; +// float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; +// float *sdata1 = data1 + (LAPLACE_W + 2 * LAPLACE_R) * scale; +// float *data = d_Image + max(min(xp - 4, width - 1), 0); +// int h = height - 1; +// sdata1[tx] = kernel[0] * data[min(yp, h) * pitch] + +// kernel[1] * (data[max(0, min(yp - 1, h)) * pitch] + data[min(yp + 1, h) * pitch]) + +// kernel[2] * (data[max(0, min(yp - 2, h)) * pitch] + data[min(yp + 2, h) * pitch]) + +// kernel[3] * (data[max(0, min(yp - 3, h)) * pitch] + data[min(yp + 3, h) * pitch]) + +// kernel[4] * (data[max(0, min(yp - 4, h)) * pitch] + data[min(yp + 4, h) * pitch]); +// __syncthreads(); +// float *sdata2 = data2 + LAPLACE_W * scale; +// if (tx < LAPLACE_W) +// { +// sdata2[tx] = kernel[0] * sdata1[tx + 4] + +// kernel[1] * (sdata1[tx + 3] + sdata1[tx + 5]) + +// kernel[2] * (sdata1[tx + 2] + sdata1[tx + 6]) + +// kernel[3] * (sdata1[tx + 1] + sdata1[tx + 7]) + +// kernel[4] * (sdata1[tx + 0] + sdata1[tx + 8]); +// } +// __syncthreads(); +// if (tx < LAPLACE_W && scale < LAPLACE_S - 1 && xp < width) +// d_Result[scale * height * pitch + yp * pitch + xp] = sdata2[tx] - sdata2[tx + LAPLACE_W]; +// } + +__global__ void LowPass(float *d_Image, float *d_Result, int width, int pitch, int height) +{ + __shared__ float buffer[(LOWPASS_W + 2 * LOWPASS_R) * LOWPASS_H]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int xp = blockIdx.x * LOWPASS_W + tx; + const int yp = blockIdx.y * LOWPASS_H + ty; + float *kernel = d_LowPassKernel; + float *data = d_Image + max(min(xp - 4, width - 1), 0); + float *buff = buffer + ty * (LOWPASS_W + 2 * LOWPASS_R); + int h = height - 1; + if (yp < height) + buff[tx] = kernel[4] * data[min(yp, h) * pitch] + + kernel[3] * (data[max(0, min(yp - 1, h)) * pitch] + data[min(yp + 1, h) * pitch]) + + kernel[2] * (data[max(0, min(yp - 2, h)) * pitch] + data[min(yp + 2, h) * pitch]) + + kernel[1] * (data[max(0, min(yp - 3, h)) * pitch] + data[min(yp + 3, h) * pitch]) + + kernel[0] * (data[max(0, min(yp - 4, h)) * pitch] + data[min(yp + 4, h) * pitch]); + __syncthreads(); + if (tx < LOWPASS_W && xp < width && yp < height) + d_Result[yp * pitch + xp] = kernel[4] * buff[tx + 4] + + kernel[3] * (buff[tx + 3] + buff[tx + 5]) + kernel[2] * (buff[tx + 2] + buff[tx + 6]) + + kernel[1] * (buff[tx + 1] + buff[tx + 7]) + kernel[0] * (buff[tx + 0] + buff[tx + 8]); +} + +__global__ void LowPassBlockOld(float *d_Image, float *d_Result, int width, int pitch, int height) +{ + __shared__ float xrows[16][32]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int xp = blockIdx.x * LOWPASS_W + tx; + const int yp = blockIdx.y * LOWPASS_H + ty; + const int N = 16; + float *k = d_LowPassKernel; + int xl = max(min(xp - 4, width - 1), 0); + for (int l = -8; l <= LOWPASS_H; l += 4) + { + if (l < LOWPASS_H) + { + int yl = max(min(yp + l + 4, height - 1), 0); + float val = d_Image[yl * pitch + xl]; + xrows[(l + 8 + ty) % N][tx] = k[4] * ShiftDown(val, 4) + + k[3] * (ShiftDown(val, 5) + ShiftDown(val, 3)) + + k[2] * (ShiftDown(val, 6) + ShiftDown(val, 2)) + + k[1] * (ShiftDown(val, 7) + ShiftDown(val, 1)) + + k[0] * (ShiftDown(val, 8) + val); + } + if (l >= 4) + { + int ys = yp + l - 4; + if (xp < width && ys < height && tx < LOWPASS_W) + d_Result[ys * pitch + xp] = k[4] * xrows[(l + 0 + ty) % N][tx] + + k[3] * (xrows[(l - 1 + ty) % N][tx] + xrows[(l + 1 + ty) % N][tx]) + + k[2] * (xrows[(l - 2 + ty) % N][tx] + xrows[(l + 2 + ty) % N][tx]) + + k[1] * (xrows[(l - 3 + ty) % N][tx] + xrows[(l + 3 + ty) % N][tx]) + + k[0] * (xrows[(l - 4 + ty) % N][tx] + xrows[(l + 4 + ty) % N][tx]); + } + if (l >= 0) + __syncthreads(); + } +} + +__global__ void LowPassBlock(float *d_Image, float *d_Result, int width, int pitch, int height) +{ + __shared__ float xrows[16][32]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int xp = blockIdx.x * LOWPASS_W + tx; + const int yp = blockIdx.y * LOWPASS_H + ty; + const int N = 16; + float *k = d_LowPassKernel; + int xl = max(min(xp - 4, width - 1), 0); +#pragma unroll + for (int l = -8; l < 4; l += 4) + { + int ly = l + ty; + int yl = max(min(yp + l + 4, height - 1), 0); + float val = d_Image[yl * pitch + xl]; // d_Image[yl*pitch + xl].x + val = k[4] * ShiftDown(val, 4) + + k[3] * (ShiftDown(val, 5) + ShiftDown(val, 3)) + + k[2] * (ShiftDown(val, 6) + ShiftDown(val, 2)) + + k[1] * (ShiftDown(val, 7) + ShiftDown(val, 1)) + + k[0] * (ShiftDown(val, 8) + val); + xrows[ly + 8][tx] = val; + } + __syncthreads(); +#pragma unroll + for (int l = 4; l < LOWPASS_H; l += 4) + { + int ly = l + ty; + int yl = min(yp + l + 4, height - 1); + float val = d_Image[yl * pitch + xl]; + val = k[4] * ShiftDown(val, 4) + + k[3] * (ShiftDown(val, 5) + ShiftDown(val, 3)) + + k[2] * (ShiftDown(val, 6) + ShiftDown(val, 2)) + + k[1] * (ShiftDown(val, 7) + ShiftDown(val, 1)) + + k[0] * (ShiftDown(val, 8) + val); + xrows[(ly + 8) % N][tx] = val; + int ys = yp + l - 4; + if (xp < width && ys < height && tx < LOWPASS_W) + d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] + + k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) + + k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) + + k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) + + k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]); + __syncthreads(); + } + int ly = LOWPASS_H + ty; + int ys = yp + LOWPASS_H - 4; + if (xp < width && ys < height && tx < LOWPASS_W) + d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] + + k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) + + k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) + + k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) + + k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]); +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftD.h b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftD.h new file mode 100644 index 000000000..0d38fe57e --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftD.h @@ -0,0 +1,58 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +#ifndef CUDASIFTD_H +#define CUDASIFTD_H + +#define NUM_SCALES 5 + +// Scale down thread block width +#define SCALEDOWN_W 64 // 60 + +// Scale down thread block height +#define SCALEDOWN_H 16 // 8 + +// Scale up thread block width +#define SCALEUP_W 64 + +// Scale up thread block height +#define SCALEUP_H 8 + +// Find point thread block width +#define MINMAX_W 30 //32 + +// Find point thread block height +#define MINMAX_H 8 //16 + +// Laplace thread block width +#define LAPLACE_W 128 // 56 + +// Laplace rows per thread +#define LAPLACE_H 4 + +// Number of laplace scales +#define LAPLACE_S (NUM_SCALES+3) + +// Laplace filter kernel radius +#define LAPLACE_R 4 + +#define LOWPASS_W 24 //56 +#define LOWPASS_H 32 //16 +#define LOWPASS_R 4 + +//====================== Number of threads ====================// +// ScaleDown: SCALEDOWN_W + 4 +// LaplaceMulti: (LAPLACE_W+2*LAPLACE_R)*LAPLACE_S +// FindPointsMulti: MINMAX_W + 2 +// ComputeOrientations: 128 +// ExtractSiftDescriptors: 256 + +//====================== Number of blocks ====================// +// ScaleDown: (width/SCALEDOWN_W) * (height/SCALEDOWN_H) +// LaplceMulti: (width+2*LAPLACE_R)/LAPLACE_W * height +// FindPointsMulti: (width/MINMAX_W)*NUM_SCALES * (height/MINMAX_H) +// ComputeOrientations: numpts +// ExtractSiftDescriptors: numpts + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftH.cpp b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftH.cpp new file mode 100644 index 000000000..f9c0d9676 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftH.cpp @@ -0,0 +1,651 @@ +//********************************************************// +// CUDA SIFT extractor by Mårten Björkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include "hip/hip_runtime.h" +#include +#include +#include +#include +#include +#include +#include + +#include "cudautils.h" +#include "cudaImage.h" +#include "cudaSift.h" +#include "cudaSiftD.h" +#include "cudaSiftH.h" + +#include "cudaSiftD.cpp" + +void InitCuda(int devNum) +{ + int nDevices; + hipGetDeviceCount(&nDevices); + if (!nDevices) + { + std::cerr << "No CUDA devices available" << std::endl; + return; + } + devNum = std::min(nDevices - 1, devNum); + deviceInit(devNum); + hipDeviceProp_t prop; + hipGetDeviceProperties(&prop, devNum); + printf("Device Number: %d\n", devNum); + printf(" Device name: %s\n", prop.name); + printf(" Memory Clock Rate (MHz): %d\n", prop.memoryClockRate / 1000); + printf(" Clock Freq (MHz): %d\n", prop.clockRate / 1000); + printf(" Memory Bus Width (bits): %d\n", prop.memoryBusWidth); + printf(" Peak Memory Bandwidth (GB/s): %.1f\n\n", + 2.0 * prop.memoryClockRate * (prop.memoryBusWidth / 8) / 1.0e6); +} + +float *AllocSiftTempMemory(int width, int height, int numOctaves, float &time, bool scaleUp) +{ + const int nd = NUM_SCALES + 3; + int w = width * (scaleUp ? 2 : 1); + int h = height * (scaleUp ? 2 : 1); + int p = iAlignUp(w, 128); + int size = h * p; // image sizes + int sizeTmp = nd * h * p; // laplace buffer sizes + for (int i = 0; i < numOctaves; i++) + { + w /= 2; + h /= 2; + int p = iAlignUp(w, 128); + size += h * p; + sizeTmp += nd * h * p; + } + float *memoryTmp = NULL; + size_t pitch; + size += sizeTmp; + +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + hipMallocPitch((void **)&memoryTmp, &pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float)); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + // printf("Malloc time for memoryTmp = %.2f us\n", std::chrono::duration(stop_malloc - start_malloc).count()); + time += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + return memoryTmp; +} + +void FreeSiftTempMemory(float *memoryTmp) +{ + if (memoryTmp) + hipFree(memoryTmp); +} + +void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, + float thresh, float &totTime, float lowestScale, bool scaleUp, float *tempMemory) +{ + unsigned int *d_PointCounterAddr; + +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + hipGetSymbolAddress((void **)&d_PointCounterAddr, HIP_SYMBOL(d_PointCounter)); + hipMemset(d_PointCounterAddr, 0, (8 * 2 + 1) * sizeof(int)); + hipMemcpyToSymbol(HIP_SYMBOL(d_MaxNumPoints), &siftData.maxPts, sizeof(int)); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + + const int nd = NUM_SCALES + 3; + int w = img.width * (scaleUp ? 2 : 1); + int h = img.height * (scaleUp ? 2 : 1); + int p = iAlignUp(w, 128); + int width = w, height = h; + int size = h * p; // image sizes + int sizeTmp = nd * h * p; // laplace buffer sizes + for (int i = 0; i < numOctaves; i++) + { + w /= 2; + h /= 2; + int p = iAlignUp(w, 128); + size += h * p; + sizeTmp += nd * h * p; + } + float *memoryTmp = tempMemory; + size += sizeTmp; + if (!tempMemory) + { + size_t pitch; + +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + safeCall(hipMallocPitch((void **)&memoryTmp, &pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float))); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + } + float *memorySub = memoryTmp + sizeTmp; + + CudaImage lowImg; + lowImg.Allocate(width, height, iAlignUp(width, 128), false, totTime, memorySub); + if (!scaleUp) + { + float kernel[8 * 12 * 16]; + PrepareLaplaceKernels(numOctaves, 0.0f, kernel); + +#ifdef DEVICE_TIMER + auto start_memcpy1 = std::chrono::steady_clock::now(); +#endif + hipMemcpyToSymbol(HIP_SYMBOL(d_LaplaceKernel), kernel, 8 * 12 * 16 * sizeof(float), 0, hipMemcpyHostToDevice); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_memcpy1 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy1 - start_memcpy1).count(); +#endif + LowPass(lowImg, img, fmax(initBlur, 0.001f), totTime); + ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale, 1.0f, memoryTmp, + memorySub + height * iAlignUp(width, 128), totTime); + +#ifdef DEVICE_TIMER + auto start_memcpy2 = std::chrono::steady_clock::now(); +#endif + safeCall(hipMemcpy(&siftData.numPts, &d_PointCounterAddr[2 * numOctaves], sizeof(int), hipMemcpyDeviceToHost)); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_memcpy2 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy2 - start_memcpy2).count(); +#endif + siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts); + } + else + { + CudaImage upImg; + upImg.Allocate(width, height, iAlignUp(width, 128), false, totTime, memoryTmp); + ScaleUp(upImg, img, totTime); + LowPass(lowImg, upImg, max(initBlur, 0.001f), totTime); + float kernel[8 * 12 * 16]; + PrepareLaplaceKernels(numOctaves, 0.0f, kernel); + +#ifdef DEVICE_TIMER + auto start_memcpy3 = std::chrono::steady_clock::now(); +#endif + hipMemcpyToSymbol(HIP_SYMBOL(d_LaplaceKernel), kernel, 8 * 12 * 16 * sizeof(float), 0, hipMemcpyHostToDevice); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_memcpy3 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy3 - start_memcpy3).count(); +#endif + ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale * 2.0f, 1.0f, memoryTmp, + memorySub + height * iAlignUp(width, 128), totTime); + +#ifdef DEVICE_TIMER + auto start_memcpy4 = std::chrono::steady_clock::now(); +#endif + safeCall(hipMemcpy(&siftData.numPts, &d_PointCounterAddr[2 * numOctaves], sizeof(int), hipMemcpyDeviceToHost)); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_memcpy4 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy4 - start_memcpy4).count(); +#endif + siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts); + RescalePositions(siftData, 0.5f, totTime); + } + + if (!tempMemory) + safeCall(hipFree(memoryTmp)); +#ifdef MANAGEDMEM + safeCall(hipDeviceSynchronize()); +#else + if (siftData.h_data) + { +#ifdef DEVICE_TIMER + auto start_memcpy5 = std::chrono::steady_clock::now(); +#endif + safeCall(hipMemcpy(siftData.h_data, siftData.d_data, sizeof(SiftPoint) * siftData.numPts, hipMemcpyDeviceToHost)); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_memcpy5 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy5 - start_memcpy5).count(); + printf("Total time for sift extraction = %0.2f us %d \n\n", totTime, siftData.numPts); +#endif + } +#endif + printf("Number of Points after sift extraction = %d\n\n", siftData.numPts); +} + +int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, float lowestScale, + float subsampling, float *memoryTmp, float *memorySub, float &totTime) +{ + int w = img.width; + int h = img.height; + if (numOctaves > 1) + { + CudaImage subImg; + int p = iAlignUp(w / 2, 128); + subImg.Allocate(w / 2, h / 2, p, false, totTime, memorySub); + ScaleDown(subImg, img, 0.5f, totTime); + float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f; + ExtractSiftLoop(siftData, subImg, numOctaves - 1, totInitBlur, thresh, lowestScale, subsampling * 2.0f, + memoryTmp, memorySub + (h / 2) * p, totTime); + } + + ExtractSiftOctave(siftData, img, numOctaves, thresh, lowestScale, subsampling, memoryTmp, totTime); + return 0; +} + +void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh, + float lowestScale, float subsampling, float *memoryTmp, float &totTime) +{ + const int nd = NUM_SCALES + 3; + CudaImage diffImg[nd]; + int w = img.width; + int h = img.height; + int p = iAlignUp(w, 128); + for (int i = 0; i < nd - 1; i++) + diffImg[i].Allocate(w, h, p, false, totTime, memoryTmp + i * p * h); + + float baseBlur = pow(2.0f, -1.0f / NUM_SCALES); + float diffScale = pow(2.0f, 1.0f / NUM_SCALES); + LaplaceMulti(img, diffImg, octave, totTime); + FindPointsMulti(diffImg, siftData, thresh, 10.0f, 1.0f / NUM_SCALES, lowestScale / subsampling, subsampling, octave, totTime); + ComputeOrientations(img, siftData, octave, totTime); + ExtractSiftDescriptors(img.d_data, img.pitch, siftData, subsampling, octave, totTime); +} + +void InitSiftData(SiftData &data, float &time, int num, bool host, bool dev) +{ + data.numPts = 0; + data.maxPts = num; + int sz = sizeof(SiftPoint) * num; + data.h_data = NULL; + if (host) + data.h_data = (SiftPoint *)malloc(sz); + data.d_data = NULL; + if (dev) + { +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + safeCall(hipMalloc((void **)&data.d_data, sz)); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + time += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + } +} + +void FreeSiftData(SiftData &data) +{ +#ifdef MANAGEDMEM + safeCall(hipFree(data.m_data)); +#else + if (data.d_data != NULL) + safeCall(hipFree(data.d_data)); + data.d_data = NULL; + if (data.h_data != NULL) + free(data.h_data); +#endif + data.numPts = 0; + data.maxPts = 0; +} + +void PrintSiftData(SiftData &data) +{ +#ifdef MANAGEDMEM + SiftPoint *h_data = data.m_data; +#else + SiftPoint *h_data = data.h_data; + if (data.h_data == NULL) + { + h_data = (SiftPoint *)malloc(sizeof(SiftPoint) * data.maxPts); + safeCall(hipMemcpy(h_data, data.d_data, sizeof(SiftPoint) * data.numPts, hipMemcpyDeviceToHost)); + hipDeviceSynchronize(); + data.h_data = h_data; + } +#endif + for (int i = 0; i < data.numPts; i++) + { + printf("xpos = %.2f\n", h_data[i].xpos); + printf("ypos = %.2f\n", h_data[i].ypos); + printf("scale = %.2f\n", h_data[i].scale); + printf("sharpness = %.2f\n", h_data[i].sharpness); + printf("edgeness = %.2f\n", h_data[i].edgeness); + printf("orientation = %.2f\n", h_data[i].orientation); + printf("score = %.2f\n", h_data[i].score); + float *siftData = (float *)&h_data[i].data; + for (int j = 0; j < 8; j++) + { + if (j == 0) + printf("data = "); + else + printf(" "); + for (int k = 0; k < 16; k++) + if (siftData[j + 8 * k] < 0.05) + printf(" . "); + else + printf("%.2f ", siftData[j + 8 * k]); + printf("\n"); + } + } + printf("Number of available points: %d\n", data.numPts); + printf("Number of allocated points: %d\n", data.maxPts); +} + +/////////////////////////////////////////////////////////////////////////////// +// Host side master functions +/////////////////////////////////////////////////////////////////////////////// + +double ScaleDown(CudaImage &res, CudaImage &src, float variance, float &totTime) +{ + static float oldVariance = -1.0f; + if (res.d_data == NULL || src.d_data == NULL) + { + printf("ScaleDown: missing data\n"); + return 0.0; + } + if (oldVariance != variance) + { + float h_Kernel[5]; + float kernelSum = 0.0f; + for (int j = 0; j < 5; j++) + { + h_Kernel[j] = (float)expf(-(double)(j - 2) * (j - 2) / 2.0 / variance); + kernelSum += h_Kernel[j]; + } + for (int j = 0; j < 5; j++) + h_Kernel[j] /= kernelSum; + +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + safeCall(hipMemcpyToSymbol(HIP_SYMBOL(d_ScaleDownKernel), h_Kernel, 5 * sizeof(float))); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + oldVariance = variance; + } +#if 0 + dim3 blocks(iDivUp(src.width, SCALEDOWN_W), iDivUp(src.height, SCALEDOWN_H)); + dim3 threads(SCALEDOWN_W + 4, SCALEDOWN_H + 4); + hipLaunchKernelGGL(ScaleDownDenseShift, blocks, threads, 0, 0, res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch); +#else + dim3 blocks(iDivUp(src.width, SCALEDOWN_W), iDivUp(src.height, SCALEDOWN_H)); + dim3 threads(SCALEDOWN_W + 4); + +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + hipLaunchKernelGGL(ScaleDown, blocks, threads, 0, 0, res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif +#endif + checkMsg("ScaleDown() execution failed\n"); + return 0.0; +} + +double ScaleUp(CudaImage &res, CudaImage &src, float &totTime) +{ + if (res.d_data == NULL || src.d_data == NULL) + { + printf("ScaleUp: missing data\n"); + return 0.0; + } + dim3 blocks(iDivUp(res.width, SCALEUP_W), iDivUp(res.height, SCALEUP_H)); + dim3 threads(SCALEUP_W / 2, SCALEUP_H / 2); + +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + hipLaunchKernelGGL(ScaleUp, blocks, threads, 0, 0, res.d_data, src.d_data, src.width, src.pitch, src.height, res.pitch); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ScaleUp time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + + checkMsg("ScaleUp() execution failed\n"); + return 0.0; +} + +double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, float &totTime) +{ + dim3 blocks(512); + dim3 threads(256); + +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + hipLaunchKernelGGL(ComputeOrientationsCONSTNew, blocks, threads, 0, 0, src.d_data, src.width, src.pitch, src.height, siftData.d_data, octave); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ComputeOrientationsCONSTNew time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("ComputeOrientations() execution failed\n"); + return 0.0; +} + +double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, float &totTime) +{ + dim3 blocks(512); + dim3 threads(16, 8); + +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + hipLaunchKernelGGL(ExtractSiftDescriptorsCONSTNew, blocks, threads, 0, 0, texObj, pitch, siftData.d_data, subsampling, octave); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ExtractSiftDescriptorsCONSTNew time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("ExtractSiftDescriptors() execution failed\n"); + return 0.0; +} + +double RescalePositions(SiftData &siftData, float scale, float &totTime) +{ + dim3 blocks(iDivUp(siftData.numPts, 64)); + dim3 threads(64); + +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + hipLaunchKernelGGL(RescalePositions, blocks, threads, 0, 0, siftData.d_data, siftData.numPts, scale); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("RescalePositions time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + + checkMsg("RescapePositions() execution failed\n"); + return 0.0; +} + +double LowPass(CudaImage &res, CudaImage &src, float scale, float &totTime) +{ + float kernel[2 * LOWPASS_R + 1]; + static float oldScale = -1.0f; + if (scale != oldScale) + { + float kernelSum = 0.0f; + float ivar2 = 1.0f / (2.0f * scale * scale); + for (int j = -LOWPASS_R; j <= LOWPASS_R; j++) + { + kernel[j + LOWPASS_R] = (float)expf(-(double)j * j * ivar2); + kernelSum += kernel[j + LOWPASS_R]; + } + for (int j = -LOWPASS_R; j <= LOWPASS_R; j++) + kernel[j + LOWPASS_R] /= kernelSum; + +#ifdef DEVICE_TIMER + auto start_memcpy_1 = std::chrono::steady_clock::now(); +#endif + safeCall(hipMemcpyToSymbol(HIP_SYMBOL(d_LowPassKernel), kernel, (2 * LOWPASS_R + 1) * sizeof(float))); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_memcpy_1 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy_1 - start_memcpy_1).count(); +#endif + + oldScale = scale; + } + int width = res.width; + int pitch = res.pitch; + int height = res.height; + dim3 blocks(iDivUp(width, LOWPASS_W), iDivUp(height, LOWPASS_H)); //[80,34,1] +#if 1 + dim3 threads(LOWPASS_W + 2 * LOWPASS_R, 4); //[32,4,1] + +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + hipLaunchKernelGGL(LowPassBlockOld, blocks, threads, 0, 0, src.d_data, res.d_data, width, pitch, height); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("LowPassBlock time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif +#else + dim3 threads(LOWPASS_W + 2 * LOWPASS_R, LOWPASS_H); + hipLaunchKernelGGL(LowPass, blocks, threads, 0, 0, src.d_data, res.d_data, width, pitch, height); +#endif + checkMsg("LowPass() execution failed\n"); + return 0.0; +} + +//==================== Multi-scale functions ===================// + +void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel) +{ + if (numOctaves > 1) + { + float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f; + PrepareLaplaceKernels(numOctaves - 1, totInitBlur, kernel); + } + float scale = pow(2.0f, -1.0f / NUM_SCALES); + float diffScale = pow(2.0f, 1.0f / NUM_SCALES); + for (int i = 0; i < NUM_SCALES + 3; i++) + { + float kernelSum = 0.0f; + float var = scale * scale - initBlur * initBlur; + for (int j = 0; j <= LAPLACE_R; j++) + { + kernel[numOctaves * 12 * 16 + 16 * i + j] = (float)expf(-(double)j * j / 2.0 / var); + kernelSum += (j == 0 ? 1 : 2) * kernel[numOctaves * 12 * 16 + 16 * i + j]; + } + for (int j = 0; j <= LAPLACE_R; j++) + kernel[numOctaves * 12 * 16 + 16 * i + j] /= kernelSum; + scale *= diffScale; + } +} + +double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, float &totTime) +{ + int width = results[0].width; + int pitch = results[0].pitch; + int height = results[0].height; +#if 1 + dim3 threads(LAPLACE_W + 2 * LAPLACE_R); //(136) + dim3 blocks(iDivUp(width, LAPLACE_W), height); //(15) + +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + hipLaunchKernelGGL(LaplaceMultiMem, blocks, threads, 0, 0, baseImage.d_data, results[0].d_data, width, pitch, height, octave); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("LaplaceMultiMem time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif +#endif + checkMsg("LaplaceMulti() execution failed\n"); + return 0.0; +} + +double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor, + float lowestScale, float subsampling, int octave, float &totTime) +{ + if (sources->d_data == NULL) + { + printf("FindPointsMulti: missing data\n"); + return 0.0; + } + int w = sources->width; + int p = sources->pitch; + int h = sources->height; +#if 1 + dim3 blocks(iDivUp(w, MINMAX_W) * NUM_SCALES, iDivUp(h, MINMAX_H)); + dim3 threads(MINMAX_W + 2); + +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + hipLaunchKernelGGL(FindPointsMultiNew, blocks, threads, 0, 0, sources->d_data, siftData.d_data, w, p, h, subsampling, + lowestScale, thresh, factor, edgeLimit, octave); + hipDeviceSynchronize(); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("FindPointsMultiNew time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()) + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif +#endif + checkMsg("FindPointsMulti() execution failed\n"); + return 0.0; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftH.h b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftH.h new file mode 100644 index 000000000..6fdbafda9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudaSiftH.h @@ -0,0 +1,49 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#ifndef CUDASIFTH_H +#define CUDASIFTH_H + +#include "cudautils.h" +#include "cudaImage.h" +#include "cudaSift.h" + +int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, + float lowestScale, float subsampling, float *memoryTmp, float *memorySub, float &totTime); +void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh, float lowestScale, + float subsampling, float *memoryTmp, float &totTime); +double ScaleDown(CudaImage &res, CudaImage &src, float variance, float &totTime); +double ScaleUp(CudaImage &res, CudaImage &src, float &totTime); +double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, float &totTime); +double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, float &totTime); +double RescalePositions(SiftData &siftData, float scale, float &totTime); +double LowPass(CudaImage &res, CudaImage &src, float scale, float &totTime); +void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel); +double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, float &totTime); +double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor, + float lowestScale, float subsampling, int octave, float &totTime); + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/cudautils.h b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudautils.h new file mode 100644 index 000000000..f56f135a1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/cudautils.h @@ -0,0 +1,151 @@ +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#ifndef CUDAUTILS_H +#define CUDAUTILS_H + +#include +#include + +#ifdef WIN32 +#include +#endif + +#define safeCall(err) __safeCall(err, __FILE__, __LINE__) +#define safeThreadSync() __safeThreadSync(__FILE__, __LINE__) +#define checkMsg(msg) __checkMsg(msg, __FILE__, __LINE__) + +inline void __safeCall(hipError_t err, const char *file, const int line) +{ + if (hipSuccess != err) + { + fprintf(stderr, "safeCall() Runtime API error in file <%s>, line %i : %s.\n", file, line, hipGetErrorString(err)); + exit(-1); + } +} + +inline void __safeThreadSync(const char *file, const int line) +{ + hipError_t err = hipDeviceSynchronize(); + if (hipSuccess != err) + { + fprintf(stderr, "threadSynchronize() Driver API error in file '%s' in line %i : %s.\n", file, line, hipGetErrorString(err)); + exit(-1); + } +} + +inline void __checkMsg(const char *errorMessage, const char *file, const int line) +{ + hipError_t err = hipGetLastError(); + if (hipSuccess != err) + { + fprintf(stderr, "checkMsg() CUDA error: %s in file <%s>, line %i : %s.\n", errorMessage, file, line, hipGetErrorString(err)); + exit(-1); + } +} + +inline bool deviceInit(int dev) +{ + int deviceCount; + safeCall(hipGetDeviceCount(&deviceCount)); + if (deviceCount == 0) + { + fprintf(stderr, "CUDA error: no devices supporting CUDA.\n"); + return false; + } + if (dev < 0) + dev = 0; + if (dev > deviceCount - 1) + dev = deviceCount - 1; + hipDeviceProp_t deviceProp; + safeCall(hipGetDeviceProperties(&deviceProp, dev)); + if (deviceProp.major < 1) + { + fprintf(stderr, "error: device does not support CUDA.\n"); + return false; + } + safeCall(hipSetDevice(dev)); + return true; +} + +class TimerCPU +{ + static const int bits = 10; + +public: + long long beg_clock; + float freq; + TimerCPU(float freq_) : freq(freq_) + { // freq = clock frequency in MHz + beg_clock = getTSC(bits); + } + long long getTSC(int bits) + { +#ifdef WIN32 + return __rdtsc() / (1LL << bits); +#else + unsigned int low, high; + __asm__(".byte 0x0f, 0x31" + : "=a"(low), "=d"(high)); + return ((long long)high << (32 - bits)) | ((long long)low >> bits); +#endif + } + float read() + { + long long end_clock = getTSC(bits); + long long Kcycles = end_clock - beg_clock; + float time = (float)(1 << bits) * Kcycles / freq / 1e3f; + return time; + } +}; + +template +__device__ __inline__ T ShiftDown(T var, unsigned int delta, int width = 32) +{ +#if (CUDART_VERSION >= 9000) + return __shfl_down_sync(0xffffffff, var, delta, width); +#else + return __shfl_down(var, delta, width); +#endif +} + +template +__device__ __inline__ T ShiftUp(T var, unsigned int delta, int width = 32) +{ +#if (CUDART_VERSION >= 9000) + return __shfl_up_sync(0xffffffff, var, delta, width); +#else + return __shfl_up(var, delta, width); +#endif +} + +template +__device__ __inline__ T Shuffle(T var, unsigned int lane, int width = 32) +{ +#if (CUDART_VERSION >= 9000) + return __shfl_sync(0xffffffff, var, lane, width); +#else + return __shfl(var, lane, width); +#endif +} + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/geomFuncs.cpp b/third-party-programs/Velocity-Bench/cudaSift/HIP/geomFuncs.cpp new file mode 100644 index 000000000..c01e6e7d2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/geomFuncs.cpp @@ -0,0 +1,72 @@ +#include +#include +#include +#include "cudaSift.h" + +int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh) +{ +#ifdef MANAGEDMEM + SiftPoint *mpts = data.m_data; +#else + if (data.h_data==NULL) + return 0; + SiftPoint *mpts = data.h_data; +#endif + float limit = thresh*thresh; + int numPts = data.numPts; + cv::Mat M(8, 8, CV_64FC1); + cv::Mat A(8, 1, CV_64FC1), X(8, 1, CV_64FC1); + double Y[8]; + for (int i=0;i<8;i++) + A.at(i, 0) = homography[i] / homography[8]; + for (int loop=0;loopmaxAmbiguity) + continue; + float den = A.at(6)*pt.xpos + A.at(7)*pt.ypos + 1.0f; + float dx = (A.at(0)*pt.xpos + A.at(1)*pt.ypos + A.at(2)) / den - pt.match_xpos; + float dy = (A.at(3)*pt.xpos + A.at(4)*pt.ypos + A.at(5)) / den - pt.match_ypos; + float err = dx*dx + dy*dy; + float wei = (err(r,c) += (Y[c] * Y[r] * wei); + X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_xpos * wei); + Y[0] = Y[1] = Y[2] = 0.0; + Y[3] = pt.xpos; + Y[4] = pt.ypos; + Y[5] = 1.0; + Y[6] = - pt.xpos * pt.match_ypos; + Y[7] = - pt.ypos * pt.match_ypos; + for (int c=0;c<8;c++) + for (int r=0;r<8;r++) + M.at(r,c) += (Y[c] * Y[r] * wei); + X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_ypos * wei); + } + cv::solve(M, X, A, cv::DECOMP_CHOLESKY); + } + int numfit = 0; + for (int i=0;i(6)*pt.xpos + A.at(7)*pt.ypos + 1.0; + float dx = (A.at(0)*pt.xpos + A.at(1)*pt.ypos + A.at(2)) / den - pt.match_xpos; + float dy = (A.at(3)*pt.xpos + A.at(4)*pt.ypos + A.at(5)) / den - pt.match_ypos; + float err = dx*dx + dy*dy; + if (err(i); + homography[8] = 1.0f; + return numfit; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/mainSift.cpp b/third-party-programs/Velocity-Bench/cudaSift/HIP/mainSift.cpp new file mode 100644 index 000000000..28c04a3f7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/mainSift.cpp @@ -0,0 +1,280 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Björkman aka Celebrandil // +// celle @ csc.kth.se // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Utility.h" +#include "cudaImage.h" +#include "cudaSift.h" + +int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh); +void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img); +void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography); + +double ScaleUp(CudaImage &res, CudaImage &src); + +/////////////////////////////////////////////////////////////////////////////// +// Main program +/////////////////////////////////////////////////////////////////////////////// +int main(int argc, char **argv) +{ + auto totalProgTimer_start = std::chrono::steady_clock::now(); + int devNum = 0, imgSet = 0; + if (argc > 1) + devNum = std::atoi(argv[1]); + if (argc > 2) + imgSet = std::atoi(argv[2]); + + float totTime = 0.0; + float imageInitTime = 0.0; + float extractSiftTime = 0.0; + float matchingTime = 0.0; + float ioReadTime = 0.0; + float dataVerificationTime = 0.0; + + // Read images using OpenCV + cv::Mat limg, rimg; + auto ioRead_start = std::chrono::steady_clock::now(); + if (imgSet) + { + cv::imread("../../inputData/left.pgm", 0).convertTo(limg, CV_32FC1); + cv::imread("../../inputData/righ.pgm", 0).convertTo(rimg, CV_32FC1); + } + else + { + cv::imread("../../inputData/img1.png", 0).convertTo(limg, CV_32FC1); + cv::imread("../../inputData/img2.png", 0).convertTo(rimg, CV_32FC1); + } + auto ioRead_stop = std::chrono::steady_clock::now(); + ioReadTime = std::chrono::duration(ioRead_stop - ioRead_start).count(); + + unsigned int w = limg.cols; + unsigned int h = limg.rows; + std::cout << "Image size = (" << w << "," << h << ")" << std::endl; + + // Initial Cuda images and download images to device + std::cout << "Initializing data..." << std::endl; + auto start_deviceSet = std::chrono::steady_clock::now(); + hipSetDevice(0); + auto stop_deviceSet = std::chrono::steady_clock::now(); + + CudaImage img1, img2; + img1.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)limg.data); + img2.Allocate(w, h, iAlignUp(w, 128), false, imageInitTime, NULL, (float *)rimg.data); + img1.Download(imageInitTime); + img2.Download(imageInitTime); + + // Extract Sift features from images + SiftData siftData1, siftData2; + float initBlur = 1.0f; + float thresh = (imgSet ? 4.5f : 2.0f); + + InitSiftData(siftData1, imageInitTime, 32768, true, true); + InitSiftData(siftData2, imageInitTime, 32768, true, true); + + // A bit of benchmarking + // for (int thresh1=1.00f;thresh1<=4.01f;thresh1+=0.50f) { + float *memoryTmp = AllocSiftTempMemory(w, h, 5, imageInitTime, false); + + for (int i = 0; i < 50; i++) + { + float time = 0.0f; + ExtractSift(siftData1, img1, 5, initBlur, thresh, time, 0.0f, false, memoryTmp); + extractSiftTime += time; + time = 0.0f; + ExtractSift(siftData2, img2, 5, initBlur, thresh, time, 0.0f, false, memoryTmp); + extractSiftTime += time; + } + FreeSiftTempMemory(memoryTmp); + + // Match Sift features and find a homography + for (int i = 0; i < 1; i++) + MatchSiftData(siftData1, siftData2, matchingTime); + float homography[9]; + int numMatches; + FindHomography(siftData1, homography, &numMatches, matchingTime, 10000, 0.00f, 0.80f, 5.0); + int numFit = ImproveHomography(siftData1, homography, 5, 0.00f, 0.80f, 3.0); + float matchPercentage = 100.0f * numFit / std::min(siftData1.numPts, siftData2.numPts); + + std::cout << "Number of original features: " << siftData1.numPts << " " << siftData2.numPts << std::endl; + std::cout << "Number of matching features: " << numFit << " " << numMatches << " " << 100.0f * numFit / std::min(siftData1.numPts, siftData2.numPts) << "% " << initBlur << " " << thresh << "\n" + << std::endl; + + totTime = imageInitTime + extractSiftTime + matchingTime; + +#ifdef DEVICE_TIMER + std::cout << "Images initialization time = " << imageInitTime / 1000 << " ms" << std::endl; + std::cout << "Feature extraction time = " << extractSiftTime / 1000 << " ms" << std::endl; + std::cout << "Matching time = " << matchingTime / 1000 << " ms" + << "\n" + << std::endl; + std::cout << "Total Time = " << totTime / 1000 << " ms" + << "\n" + << std::endl; +#endif + // data validation + auto dataVerficationTimer_start = std::chrono::steady_clock::now(); + int data_verification_flag = Utility::RunDataVerification(thresh, matchPercentage); + auto dataVerficationTimer_stop = std::chrono::steady_clock::now(); + dataVerificationTime = std::chrono::duration(dataVerficationTimer_stop - dataVerficationTimer_start).count(); + // // Print out and store summary data + // // PrintMatchData(siftData1, siftData2, img1); + // cv::imwrite("data/limg_pts.pgm", limg); + + // MatchAll(siftData1, siftData2, homography); + + // Free Sift data from device + FreeSiftData(siftData1); + FreeSiftData(siftData2); + + auto totalProgTimer_end = std::chrono::steady_clock::now(); + float totalProgramTime = std::chrono::duration(totalProgTimer_end - totalProgTimer_start).count() - ioReadTime - dataVerificationTime; + std::cout << "Total workload time = " << totalProgramTime / 1000 << " ms" + << "\n" + << std::endl; + return data_verification_flag; +} + +void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography) +{ +#ifdef MANAGEDMEM + SiftPoint *sift1 = siftData1.m_data; + SiftPoint *sift2 = siftData2.m_data; +#else + SiftPoint *sift1 = siftData1.h_data; + SiftPoint *sift2 = siftData2.h_data; +#endif + int numPts1 = siftData1.numPts; + int numPts2 = siftData2.numPts; + int numFound = 0; +#if 1 + homography[0] = homography[4] = -1.0f; + homography[1] = homography[3] = homography[6] = homography[7] = 0.0f; + homography[2] = 1279.0f; + homography[5] = 959.0f; +#endif + for (int i = 0; i < numPts1; i++) + { + float *data1 = sift1[i].data; + std::cout << i << ":" << sift1[i].scale << ":" << (int)sift1[i].orientation << " " << sift1[i].xpos << " " << sift1[i].ypos << std::endl; + bool found = false; + for (int j = 0; j < numPts2; j++) + { + float *data2 = sift2[j].data; + float sum = 0.0f; + for (int k = 0; k < 128; k++) + sum += data1[k] * data2[k]; + float den = homography[6] * sift1[i].xpos + homography[7] * sift1[i].ypos + homography[8]; + float dx = (homography[0] * sift1[i].xpos + homography[1] * sift1[i].ypos + homography[2]) / den - sift2[j].xpos; + float dy = (homography[3] * sift1[i].xpos + homography[4] * sift1[i].ypos + homography[5]) / den - sift2[j].ypos; + float err = dx * dx + dy * dy; + if (err < 100.0f) // 100.0 + found = true; + if (err < 100.0f || j == sift1[i].match) + { // 100.0 + if (j == sift1[i].match && err < 100.0f) + std::cout << " *"; + else if (j == sift1[i].match) + std::cout << " -"; + else if (err < 100.0f) + std::cout << " +"; + else + std::cout << " "; + std::cout << j << ":" << sum << ":" << (int)sqrt(err) << ":" << sift2[j].scale << ":" << (int)sift2[j].orientation << " " << sift2[j].xpos << " " << sift2[j].ypos << " " << (int)dx << " " << (int)dy << std::endl; + } + } + std::cout << std::endl; + if (found) + numFound++; + } + std::cout << "Number of finds: " << numFound << " / " << numPts1 << std::endl; + std::cout << homography[0] << " " << homography[1] << " " << homography[2] << std::endl; //%%% + std::cout << homography[3] << " " << homography[4] << " " << homography[5] << std::endl; //%%% + std::cout << homography[6] << " " << homography[7] << " " << homography[8] << std::endl; //%%% +} + +void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img) +{ + int numPts = siftData1.numPts; +#ifdef MANAGEDMEM + SiftPoint *sift1 = siftData1.m_data; + SiftPoint *sift2 = siftData2.m_data; +#else + SiftPoint *sift1 = siftData1.h_data; + SiftPoint *sift2 = siftData2.h_data; +#endif + float *h_img = img.h_data; + int w = img.width; + int h = img.height; + std::cout << std::setprecision(3); + for (int j = 0; j < numPts; j++) + { + int k = sift1[j].match; + if (sift1[j].match_error < 5) + { + float dx = sift2[k].xpos - sift1[j].xpos; + float dy = sift2[k].ypos - sift1[j].ypos; +#if 0 + if (false && sift1[j].xpos>550 && sift1[j].xpos<600) { + std::cout << "pos1=(" << (int)sift1[j].xpos << "," << (int)sift1[j].ypos << ") "; + std::cout << j << ": " << "score=" << sift1[j].score << " ambiguity=" << sift1[j].ambiguity << " match=" << k << " "; + std::cout << "scale=" << sift1[j].scale << " "; + std::cout << "error=" << (int)sift1[j].match_error << " "; + std::cout << "orient=" << (int)sift1[j].orientation << "," << (int)sift2[k].orientation << " "; + std::cout << " delta=(" << (int)dx << "," << (int)dy << ")" << std::endl; + } +#endif +#if 1 + int len = (int)(fabs(dx) > fabs(dy) ? fabs(dx) : fabs(dy)); + for (int l = 0; l < len; l++) + { + int x = (int)(sift1[j].xpos + dx * l / len); + int y = (int)(sift1[j].ypos + dy * l / len); + h_img[y * w + x] = 255.0f; + } +#endif + } + int x = (int)(sift1[j].xpos + 0.5); + int y = (int)(sift1[j].ypos + 0.5); + int s = std::min(x, std::min(y, std::min(w - x - 2, std::min(h - y - 2, (int)(1.41 * sift1[j].scale))))); + int p = y * w + x; + p += (w + 1); + for (int k = 0; k < s; k++) + h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 0.0f; + p -= (w + 1); + for (int k = 0; k < s; k++) + h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 255.0f; + } + std::cout << std::setprecision(6); +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/HIP/matching.cpp b/third-party-programs/Velocity-Bench/cudaSift/HIP/matching.cpp new file mode 100644 index 000000000..bdfad49ea --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/HIP/matching.cpp @@ -0,0 +1,1540 @@ +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include "hip/hip_runtime.h" +#include +#include +#include "cudaSift.h" +#include "cudautils.h" + +#define OCML_BASIC_ROUNDED_OPERATIONS + +//================= Device matching functions =====================// + +__global__ void MatchSiftPoints(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2) +{ + __shared__ float siftPoint[128]; + __shared__ float sums[16 * 16]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int p1 = blockIdx.x; + const int p2 = blockIdx.y * 16 + ty; + const float *ptr1 = sift1[p1].data; + const float *ptr2 = sift2[p2].data; + const int i = 16 * ty + tx; + if (ty < 8) + siftPoint[i] = ptr1[i]; + __syncthreads(); + float sum = 0.0f; + if (p2 < numPts2) + for (int j = 0; j < 8; j++) + sum += siftPoint[16 * j + tx] * ptr2[16 * j + tx]; + sums[i] = sum; + __syncthreads(); + if (tx < 8) + sums[i] += sums[i + 8]; + __syncthreads(); + if (tx < 4) + sums[i] += sums[i + 4]; + __syncthreads(); + if (ty == 0) + { + sum = sums[16 * tx + 0] + sums[16 * tx + 1] + sums[16 * tx + 2] + sums[16 * tx + 3]; + corrData[p1 * gridDim.y * 16 + blockIdx.y * 16 + tx] = sum; + } + __syncthreads(); +} + +__global__ void FindMaxCorr(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int corrWidth, int siftSize) +{ + __shared__ float maxScore[16 * 16]; + __shared__ float maxScor2[16 * 16]; + __shared__ int maxIndex[16 * 16]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int idx = ty * 16 + tx; + int p1 = blockIdx.x * 16 + threadIdx.y; + p1 = (p1 >= numPts1 ? numPts1 - 1 : p1); + maxScore[idx] = -1.0f; + maxScor2[idx] = -1.0f; + maxIndex[idx] = -1; + __syncthreads(); + float *corrs = &corrData[p1 * corrWidth]; + for (int i = tx; i < corrWidth; i += 16) + { + float val = corrs[i]; + if (val > maxScore[idx]) + { + maxScor2[idx] = maxScore[idx]; + maxScore[idx] = val; + maxIndex[idx] = i; + } + else if (val > maxScor2[idx]) + maxScor2[idx] = val; + } + __syncthreads(); + for (int len = 8; len > 0; len /= 2) + { + if (tx < 8) + { + float val = maxScore[idx + len]; + int i = maxIndex[idx + len]; + if (val > maxScore[idx]) + { + maxScor2[idx] = maxScore[idx]; + maxScore[idx] = val; + maxIndex[idx] = i; + } + else if (val > maxScor2[idx]) + maxScor2[idx] = val; + float va2 = maxScor2[idx + len]; + if (va2 > maxScor2[idx]) + maxScor2[idx] = va2; + } + __syncthreads(); + } + if (tx == 0) + { + sift1[p1].score = maxScore[ty * 16]; + sift1[p1].ambiguity = maxScor2[ty * 16] / (maxScore[ty * 16] + 1e-6); + sift1[p1].match = maxIndex[ty * 16]; + sift1[p1].match_xpos = sift2[maxIndex[ty * 16]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[ty * 16]].ypos; + } +} + +// Version based on suggestion by Nicholas Lin +__global__ void FindMaxCorr3(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + int block_dim = blockDim.x; // blockDim.x == 16 + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int p1 = blockIdx.x * block_dim + ty; + const int idx = ty * 16 + tx; + + __shared__ int maxIndex[16 * 16]; + maxIndex[idx] = 0; + __syncthreads(); + + float *corrs = NULL; + if (p1 < numPts1) + { + corrs = &corrData[p1 * block_dim * 2]; + corrs[tx] = 0.0f; + corrs[tx + 16] = 0.0f; + const float *pt1 = sift1[p1].data; + for (int p2 = tx; p2 < numPts2; p2 += 16) + { + float *pt2 = sift2[p2].data; + float sum = 0.0f; + for (int i = 0; i < 128; i++) + sum += pt1[i] * pt2[i]; + if (sum > corrs[tx]) + { + corrs[tx + 16] = corrs[tx]; + corrs[tx] = sum; + maxIndex[idx] = p2; + } + else if (sum > corrs[tx + 16]) + corrs[tx + 16] = sum; + } + } + __syncthreads(); + if (p1 < numPts1) + { + for (int len = 8; len > 0; len /= 2) + { + if (tx < len) + { + float val = corrs[tx + len]; + int i = maxIndex[idx + len]; + if (val > corrs[tx]) + { + corrs[tx + 16] = corrs[tx]; + corrs[tx] = val; + maxIndex[idx] = i; + } + else if (val > corrs[tx + 16]) + corrs[tx + 16] = val; + float va2 = corrs[tx + 16 + len]; + if (va2 > corrs[tx + 16]) + corrs[tx + 16] = va2; + } + __syncthreads(); + } + if (tx == 0) + { + sift1[p1].score = corrs[0]; + sift1[p1].ambiguity = corrs[16] / (corrs[0] + 1e-6); + sift1[p1].match = maxIndex[ty << 4]; + sift1[p1].match_xpos = sift2[maxIndex[ty << 4]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[ty << 4]].ypos; + } + } +} + +#define FMC2W 16 +#define FMC2H 4 + +__global__ void FindMaxCorr2(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float siftPoint[128]; + __shared__ float maxScore[FMC2H]; + __shared__ float maxScor2[FMC2H]; + __shared__ int maxIndex[FMC2H]; + const int p1 = blockIdx.x; + if (p1 >= numPts1) + return; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int idx = ty * FMC2W + tx; + if (idx < FMC2H) + { + maxScore[idx] = -1.0f; + maxScor2[idx] = -1.0f; + maxIndex[idx] = 0; + } + __syncthreads(); + const float *pt1 = sift1[p1].data; + for (int i = idx; i < 128; i += FMC2W * FMC2H) + siftPoint[i] = pt1[i]; + __syncthreads(); + for (int p2 = ty; p2 < numPts2; p2 += FMC2H) + { + const float *pt2 = sift2[p2].data; + float sum = 0.0f; + for (int j = tx; j < 128; j += FMC2W) + sum += siftPoint[j] * pt2[j]; + for (int j = FMC2W / 2; j > 0; j /= 2) + sum += ShiftDown(sum, j); + if (tx == 0) + { + if (sum > maxScore[ty]) + { + maxScor2[ty] = maxScore[ty]; + maxScore[ty] = sum; + maxIndex[ty] = p2; + } + else if (sum > maxScor2[ty]) + maxScor2[ty] = sum; + } + } + __syncthreads(); + for (int len = FMC2H / 2; len > 0; len /= 2) + { + if (ty == 0 && tx < len) + { + float val = maxScore[tx + len]; + int p2 = maxIndex[tx + len]; + if (val > maxScore[tx]) + { + maxScor2[tx] = maxScore[tx]; + maxScore[tx] = val; + maxIndex[tx] = p2; + } + else if (val > maxScor2[tx]) + maxScor2[tx] = val; + float va2 = maxScor2[tx + len]; + if (va2 > maxScor2[tx]) + maxScor2[tx] = va2; + } + __syncthreads(); + } + if (ty == 0 && tx == 0) + { + sift1[p1].score = maxScore[0]; + sift1[p1].ambiguity = maxScor2[0] / (maxScore[0] + 1e-6); + sift1[p1].match = maxIndex[0]; + sift1[p1].match_xpos = sift2[maxIndex[0]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[0]].ypos; + } +} + +__global__ void FindMaxCorr4(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float siftPoint[128 * FMC2H]; + __shared__ float maxScore[FMC2H]; + __shared__ float maxScor2[FMC2H]; + __shared__ int maxIndex[FMC2H]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + if (tx == 0) + { + maxScore[ty] = -1.0f; + maxScor2[ty] = -1.0f; + maxIndex[ty] = 0; + } + const int p1 = blockIdx.x * FMC2H + ty; + const float *pt1 = sift1[p1].data; + for (int j = tx; j < 128; j += FMC2W) + siftPoint[128 * ty + j] = pt1[j]; + __syncthreads(); + for (int p2 = 0; p2 < numPts2; p2++) + { + const float *pt2 = sift2[p2].data; + float sum = 0.0f; + for (int j = tx; j < 128; j += FMC2W) + sum += siftPoint[128 * ty + j] * pt2[j]; + for (int j = FMC2W / 2; j > 0; j /= 2) + sum += ShiftDown(sum, j); + if (tx == 0) + { + if (sum > maxScore[ty]) + { + maxScor2[ty] = maxScore[ty]; + maxScore[ty] = sum; + maxIndex[ty] = p2; + } + else if (sum > maxScor2[ty]) + maxScor2[ty] = sum; + } + } + __syncthreads(); + if (tx == 0) + { + sift1[p1].score = maxScore[ty]; + sift1[p1].ambiguity = maxScor2[ty] / (maxScore[ty] + 1e-6); + sift1[p1].match = maxIndex[ty]; + sift1[p1].match_xpos = sift2[maxIndex[ty]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[ty]].ypos; + } +} + +__global__ void memcopyKernel(float *src, float *dst, size_t src_pitch, size_t dst_pitch, int numPts, size_t width) +{ + char *d_src = (char *)src; + char *d_dst = (char *)dst; + + for (int i = 0; i < numPts; ++i) + { + for (int j = 0; j < width; ++j) + { + d_dst[j] = d_src[j]; + } + d_src = d_src + src_pitch; + d_dst = d_dst + dst_pitch; + } +} + +__global__ void +CleanMatches(SiftPoint *sift1, int numPts1) +{ + const int p1 = min(blockIdx.x * 64 + threadIdx.x, numPts1 - 1); + sift1[p1].score = 0.0f; +} + +#define M7W 32 +#define M7H 32 +#define M7R 4 +#define NRX 2 +#define NDIM 128 + +__global__ void FindMaxCorr10(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float4 buffer1[M7W * NDIM / 4]; + __shared__ float4 buffer2[M7H * NDIM / 4]; + int tx = threadIdx.x; + int ty = threadIdx.y; + int bp1 = M7W * blockIdx.x; + for (int j = ty; j < M7W; j += M7H / M7R) + { + int p1 = min(bp1 + j, numPts1 - 1); + for (int d = tx; d < NDIM / 4; d += M7W) + buffer1[j * NDIM / 4 + (d + j) % (NDIM / 4)] = ((float4 *)&sift1[p1].data)[d]; + } + + float max_score[NRX]; + float sec_score[NRX]; + int index[NRX]; + for (int i = 0; i < NRX; i++) + { + max_score[i] = 0.0f; + sec_score[i] = 0.0f; + index[i] = -1; + } + + int idx = ty * M7W + tx; + int ix = idx % (M7W / NRX); + int iy = idx / (M7W / NRX); + for (int bp2 = 0; bp2 < numPts2 - M7H + 1; bp2 += M7H) + { + for (int j = ty; j < M7H; j += M7H / M7R) + { + int p2 = min(bp2 + j, numPts2 - 1); + for (int d = tx; d < NDIM / 4; d += M7W) + buffer2[j * NDIM / 4 + d] = ((float4 *)&sift2[p2].data)[d]; + } + __syncthreads(); + + if (idx < M7W * M7H / M7R / NRX) + { + float score[M7R][NRX]; + for (int dy = 0; dy < M7R; dy++) + for (int i = 0; i < NRX; i++) + score[dy][i] = 0.0f; + for (int d = 0; d < NDIM / 4; d++) + { + float4 v1[NRX]; + for (int i = 0; i < NRX; i++) + v1[i] = buffer1[((M7W / NRX) * i + ix) * NDIM / 4 + (d + (M7W / NRX) * i + ix) % (NDIM / 4)]; + for (int dy = 0; dy < M7R; dy++) + { + float4 v2 = buffer2[(M7R * iy + dy) * (NDIM / 4) + d]; + for (int i = 0; i < NRX; i++) + { + score[dy][i] += v1[i].x * v2.x; + score[dy][i] += v1[i].y * v2.y; + score[dy][i] += v1[i].z * v2.z; + score[dy][i] += v1[i].w * v2.w; + } + } + } + for (int dy = 0; dy < M7R; dy++) + { + for (int i = 0; i < NRX; i++) + { + if (score[dy][i] > max_score[i]) + { + sec_score[i] = max_score[i]; + max_score[i] = score[dy][i]; + index[i] = min(bp2 + M7R * iy + dy, numPts2 - 1); + } + else if (score[dy][i] > sec_score[i]) + sec_score[i] = score[dy][i]; + } + } + } + __syncthreads(); + } + float *scores1 = (float *)buffer1; + float *scores2 = &scores1[M7W * M7H / M7R]; + int *indices = (int *)&scores2[M7W * M7H / M7R]; + if (idx < M7W * M7H / M7R / NRX) + { + for (int i = 0; i < NRX; i++) + { + scores1[iy * M7W + (M7W / NRX) * i + ix] = max_score[i]; + scores2[iy * M7W + (M7W / NRX) * i + ix] = sec_score[i]; + indices[iy * M7W + (M7W / NRX) * i + ix] = index[i]; + } + } + __syncthreads(); + + if (ty == 0) + { + float max_score = scores1[tx]; + float sec_score = scores2[tx]; + int index = indices[tx]; + for (int y = 0; y < M7H / M7R; y++) + if (index != indices[y * M7W + tx]) + { + if (scores1[y * M7W + tx] > max_score) + { + sec_score = max(max_score, sec_score); + max_score = scores1[y * M7W + tx]; + index = indices[y * M7W + tx]; + } + else if (scores1[y * M7W + tx] > sec_score) + sec_score = scores1[y * M7W + tx]; + } + sift1[bp1 + tx].score = max_score; + sift1[bp1 + tx].match = index; + sift1[bp1 + tx].match_xpos = sift2[index].xpos; + sift1[bp1 + tx].match_ypos = sift2[index].ypos; + sift1[bp1 + tx].ambiguity = sec_score / (max_score + 1e-6f); + } +} + +#define FMC_GH 512 +#define FMC_BW 32 +#define FMC_BH 32 +#define FMC_BD 16 +#define FMC_TW 1 +#define FMC_TH 4 +#define FMC_NW (FMC_BW / FMC_TW) // 32 +#define FMC_NH (FMC_BH / FMC_TH) // 8 +#define FMC_NT (FMC_NW * FMC_NH) // 256 = 8 warps + +__device__ volatile int lock = 0; + +__global__ void FindMaxCorr9(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float4 siftParts1[FMC_BW * FMC_BD]; // 4*32*8 = 1024 + __shared__ float4 siftParts2[FMC_BH * FMC_BD]; // 4*32*8 = 1024 + //__shared__ float blksums[FMC_BW*FMC_BH]; // 32*32 = 1024 + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int idx = ty * FMC_NW + tx; + float4 *pts1 = 0, *pts2 = 0; + if (idx < FMC_BW) + { + const int p1l = min(blockIdx.x * FMC_BW + idx, numPts1 - 1); + pts1 = (float4 *)sift1[p1l].data; + } + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < min(FMC_GH, numPts2 - FMC_BH + 1); k += FMC_BH) + { + if (idx < FMC_BH) + { + const int p2l = min(blockIdx.y * FMC_GH + k + idx, numPts2 - 1); + pts2 = (float4 *)sift2[p2l].data; + } + float sums[FMC_TW * FMC_TH]; + for (int i = 0; i < FMC_TW * FMC_TH; i++) + sums[i] = 0.0f; + + if (idx < FMC_BW) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts1[(i + 0) * FMC_BW + idx] = pts1[0 + i]; + if (idx < FMC_BH) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts2[(i + 0) * FMC_BH + idx] = pts2[0 + i]; + __syncthreads(); + + int b = FMC_BD / 2; + for (int d = FMC_BD / 2; d < 32; d += FMC_BD / 2) + { + if (idx < FMC_BW) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts1[(i + b) * FMC_BW + idx] = pts1[d + i]; + if (idx < FMC_BH) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts2[(i + b) * FMC_BH + idx] = pts2[d + i]; + + b ^= FMC_BD / 2; + for (int i = 0; i < FMC_BD / 2; i++) + { + float4 v1[FMC_TW]; + for (int ix = 0; ix < FMC_TW; ix++) + v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)]; + for (int iy = 0; iy < FMC_TH; iy++) + { + float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)]; + for (int ix = 0; ix < FMC_TW; ix++) + { + sums[iy * FMC_TW + ix] += v1[ix].x * v2.x; + sums[iy * FMC_TW + ix] += v1[ix].y * v2.y; + sums[iy * FMC_TW + ix] += v1[ix].z * v2.z; + sums[iy * FMC_TW + ix] += v1[ix].w * v2.w; + } + } + } + __syncthreads(); + } + + b ^= FMC_BD / 2; + for (int i = 0; i < FMC_BD / 2; i++) + { + float4 v1[FMC_TW]; + for (int ix = 0; ix < FMC_TW; ix++) + v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)]; + for (int iy = 0; iy < FMC_TH; iy++) + { + float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)]; + for (int ix = 0; ix < FMC_TW; ix++) + { + sums[iy * FMC_TW + ix] += v1[ix].x * v2.x; + sums[iy * FMC_TW + ix] += v1[ix].y * v2.y; + sums[iy * FMC_TW + ix] += v1[ix].z * v2.z; + sums[iy * FMC_TW + ix] += v1[ix].w * v2.w; + } + } + } + __syncthreads(); + + float *blksums = (float *)siftParts1; + for (int iy = 0; iy < FMC_TH; iy++) + for (int ix = 0; ix < FMC_TW; ix++) + blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix]; + __syncthreads(); + if (idx < FMC_BW) + { + for (int j = 0; j < FMC_BH; j++) + { + float sum = blksums[j * FMC_BW + idx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = min(blockIdx.y * FMC_GH + k + j, numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + __syncthreads(); + } + const int p1 = min(blockIdx.x * FMC_BW + idx, numPts1 - 1); + if (idx == 0) + while (atomicCAS((int *)&lock, 0, 1) != 0) + ; + __syncthreads(); + if (idx < FMC_BW) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + __syncthreads(); + if (idx == 0) + atomicExch((int *)&lock, 0); +} + +__global__ void FindMaxCorr8(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float4 siftParts1[FMC_BW * FMC_BD]; // 4*32*8 = 1024 + __shared__ float4 siftParts2[FMC_BH * FMC_BD]; // 4*32*8 = 1024 + __shared__ float blksums[FMC_BW * FMC_BH]; // 32*32 = 1024 + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int idx = ty * FMC_NW + tx; + float4 *pts1 = 0, *pts2 = 0; + if (idx < FMC_BW) + { + const int p1l = min(blockIdx.x * FMC_BW + idx, numPts1 - 1); + pts1 = (float4 *)sift1[p1l].data; + } + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < min(FMC_GH, numPts2 - FMC_BH + 1); k += FMC_BH) + { + if (idx < FMC_BH) + { + const int p2l = min(blockIdx.y * FMC_GH + k + idx, numPts2 - 1); + pts2 = (float4 *)sift2[p2l].data; + } + float sums[FMC_TW * FMC_TH]; + for (int i = 0; i < FMC_TW * FMC_TH; i++) + sums[i] = 0.0f; + for (int d = 0; d < 32; d += FMC_BD) + { + if (idx < FMC_BW) + for (int i = 0; i < FMC_BD; i++) + siftParts1[i * FMC_BW + idx] = pts1[d + i]; + if (idx < FMC_BH) + for (int i = 0; i < FMC_BD; i++) + siftParts2[i * FMC_BH + idx] = pts2[d + i]; + __syncthreads(); + + for (int i = 0; i < FMC_BD; i++) + { + float4 v1[FMC_TW]; + for (int ix = 0; ix < FMC_TW; ix++) + v1[ix] = siftParts1[i * FMC_BW + (tx * FMC_TW + ix)]; + for (int iy = 0; iy < FMC_TH; iy++) + { + float4 v2 = siftParts2[i * FMC_BH + (ty * FMC_TH + iy)]; + for (int ix = 0; ix < FMC_TW; ix++) + { + sums[iy * FMC_TW + ix] += v1[ix].x * v2.x; + sums[iy * FMC_TW + ix] += v1[ix].y * v2.y; + sums[iy * FMC_TW + ix] += v1[ix].z * v2.z; + sums[iy * FMC_TW + ix] += v1[ix].w * v2.w; + } + } + } + __syncthreads(); + } + // float *blksums = (float*)siftParts1; + for (int iy = 0; iy < FMC_TH; iy++) + for (int ix = 0; ix < FMC_TW; ix++) + blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix]; + __syncthreads(); + if (idx < FMC_BW) + { + for (int j = 0; j < FMC_BH; j++) + { + float sum = blksums[j * FMC_BW + idx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = min(blockIdx.y * FMC_GH + k + j, numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + __syncthreads(); + } + const int p1 = min(blockIdx.x * FMC_BW + idx, numPts1 - 1); + if (idx == 0) + while (atomicCAS((int *)&lock, 0, 1) != 0) + ; + __syncthreads(); + if (idx < FMC_BW) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + __syncthreads(); + if (idx == 0) + atomicExch((int *)&lock, 0); +} + +__global__ void FindMaxCorr7(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float siftParts1[17 * 64]; // features in columns + __shared__ float siftParts2[16 * 64]; // one extra to avoid shared conflicts + float4 *pts1 = (float4 *)siftParts1; + float4 *pts2 = (float4 *)siftParts2; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int p1l = min(blockIdx.x * 16 + ty, numPts1 - 1); + const float4 *p1l4 = (float4 *)sift1[p1l].data; + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < 512 / 16; k++) + { + const int p2l = min(blockIdx.y * 512 + k * 16 + ty, numPts2 - 1); + const float4 *p2l4 = (float4 *)sift2[p2l].data; +#define NUM 4 + float sum[NUM]; + if (ty < (16 / NUM)) + for (int l = 0; l < NUM; l++) + sum[l] = 0.0f; + __syncthreads(); + for (int i = 0; i < 2; i++) + { + pts1[17 * tx + ty] = p1l4[i * 16 + tx]; + pts2[16 * ty + tx] = p2l4[i * 16 + tx]; + __syncthreads(); + if (ty < (16 / NUM)) + { +#pragma unroll + for (int j = 0; j < 16; j++) + { + float4 p1v = pts1[17 * j + tx]; +#pragma unroll + for (int l = 0; l < NUM; l++) + { + float4 p2v = pts2[16 * (ty + l * (16 / NUM)) + j]; + sum[l] += p1v.x * p2v.x; + sum[l] += p1v.y * p2v.y; + sum[l] += p1v.z * p2v.z; + sum[l] += p1v.w * p2v.w; + } + } + } + __syncthreads(); + } + float *sums = siftParts1; + if (ty < (16 / NUM)) + for (int l = 0; l < NUM; l++) + sums[16 * (ty + l * (16 / NUM)) + tx] = sum[l]; + __syncthreads(); + if (ty == 0) + { + for (int j = 0; j < 16; j++) + { + float sum = sums[16 * j + tx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = min(blockIdx.y * 512 + k * 16 + j, numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + __syncthreads(); + } + const int p1 = min(blockIdx.x * 16 + tx, numPts1 - 1); + if (tx == 0 && ty == 0) + while (atomicCAS((int *)&lock, 0, 1) != 0) + ; + __syncthreads(); + if (ty == 0) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + __syncthreads(); + if (tx == 0 && ty == 0) + atomicExch((int *)&lock, 0); +} + +__global__ void FindMaxCorr6(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + //__shared__ float siftParts1[128*16]; // features in columns + __shared__ float siftParts2[128 * 16]; // one extra to avoid shared conflicts + __shared__ float sums[16 * 16]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int p1l = min(blockIdx.x * 16 + ty, numPts1 - 1); + float *pt1l = sift1[p1l].data; + float4 part1 = reinterpret_cast(pt1l)[tx]; + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < 512; k += 16) + { + const int p2l = min(blockIdx.y * 512 + k + ty, numPts2 - 1); + float *pt2l = sift2[p2l].data; + reinterpret_cast(siftParts2)[32 * ty + tx] = reinterpret_cast(pt2l)[tx]; + __syncthreads(); + for (int i = 0; i < 16; i++) + { + float4 part2 = reinterpret_cast(siftParts2)[32 * i + tx]; + float sum = part1.x * part2.x + part1.y * part2.y + part1.z * part2.z + part1.w * part2.w; + sum += ShiftDown(sum, 16); + sum += ShiftDown(sum, 8); + sum += ShiftDown(sum, 4); + sum += ShiftDown(sum, 2); + sum += ShiftDown(sum, 1); + if (tx == 0) + sums[16 * i + ty] = sum; + } + __syncthreads(); + if (ty == 0 && tx < 16) + { + for (int j = 0; j < 16; j++) + { + float sum = sums[16 * j + tx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = min(blockIdx.y * 512 + k + j, numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + __syncthreads(); + } + if (tx == 0 && ty == 0) + while (atomicCAS((int *)&lock, 0, 1) != 0) + ; + __syncthreads(); + if (ty == 0 && tx < 16) + { + const int p1 = min(blockIdx.x * 16 + tx, numPts1 - 1); + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + __syncthreads(); + if (tx == 0 && ty == 0) + atomicExch((int *)&lock, 0); +} + +__global__ void FindMaxCorr5(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2) +{ + __shared__ float siftParts1[17 * 16]; // features in columns + __shared__ float siftParts2[17 * 16]; // one extra to avoid shared conflicts + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int p1l = min(blockIdx.x * 16 + ty, numPts1 - 1); + const float *pt1l = sift1[p1l].data; + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < 512 / 16; k++) + { + const int p2l = min(blockIdx.y * 512 + k * 16 + ty, numPts2 - 1); + const float *pt2l = sift2[p2l].data; + float sum = 0.0f; + for (int i = 0; i < 8; i++) + { + siftParts1[17 * tx + ty] = pt1l[i * 16 + tx]; // load and transpose + siftParts2[17 * tx + ty] = pt2l[i * 16 + tx]; + __syncthreads(); + for (int j = 0; j < 16; j++) + sum += siftParts1[17 * j + tx] * siftParts2[17 * j + ty]; + __syncthreads(); + } + float *sums = siftParts1; + sums[16 * ty + tx] = sum; + __syncthreads(); + if (ty == 0) + { + for (int j = 0; j < 16; j++) + { + float sum = sums[16 * j + tx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = min(blockIdx.y * 512 + k * 16 + j, numPts2 - 1); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + __syncthreads(); + } + const int p1 = min(blockIdx.x * 16 + tx, numPts1 - 1); + if (tx == 0 && ty == 0) + while (atomicCAS((int *)&lock, 0, 1) != 0) + ; + __syncthreads(); + if (ty == 0) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + __syncthreads(); + if (tx == 0 && ty == 0) + atomicExch((int *)&lock, 0); +} + +template +__device__ void InvertMatrix(float elem[size][size], float res[size][size]) +{ + int indx[size]; + float b[size]; + float vv[size]; + for (int i = 0; i < size; i++) + indx[i] = 0; + int imax = 0; + float d = 1.0; + for (int i = 0; i < size; i++) + { // find biggest element for each row + float big = 0.0; + for (int j = 0; j < size; j++) + { + float temp = fabs(elem[i][j]); + if (temp > big) + big = temp; + } + if (big > 0.0) + vv[i] = 1.0 / big; + else + vv[i] = 1e16; + } + for (int j = 0; j < size; j++) + { + for (int i = 0; i < j; i++) + { // ik (upper right), k=j + float sum = elem[i][j]; // i>=j (upper right) + for (int k = 0; k < j; k++) // kk (upper right), k=j (upper right) + float dum = vv[i] * fabs(sum); + if (dum >= big) + { + big = dum; + imax = i; + } + } + if (j != imax) + { // imax>j + for (int k = 0; k < size; k++) + { + float dum = elem[imax][k]; // upper right and lower left + elem[imax][k] = elem[j][k]; + elem[j][k] = dum; + } + d = -d; + vv[imax] = vv[j]; + } + indx[j] = imax; + if (elem[j][j] == 0.0) // j==j (upper right) + elem[j][j] = 1e-16; + if (j != (size - 1)) + { + float dum = 1.0 / elem[j][j]; + for (int i = j + 1; i < size; i++) // i>j + elem[i][j] *= dum; // i>j (upper right) + } + } + for (int j = 0; j < size; j++) + { + for (int k = 0; k < size; k++) + b[k] = 0.0; + b[j] = 1.0; + int ii = -1; + for (int i = 0; i < size; i++) + { + int ip = indx[i]; + float sum = b[ip]; + b[ip] = b[i]; + if (ii != -1) + for (int j = ii; j < i; j++) + sum -= elem[i][j] * b[j]; // i>j (upper right) + else if (sum != 0.0) + ii = i; + b[i] = sum; + } + for (int i = size - 1; i >= 0; i--) + { + float sum = b[i]; + for (int j = i + 1; j < size; j++) + sum -= elem[i][j] * b[j]; // i(a, ia); + __syncthreads(); + for (int j = 0; j < 8; j++) + { + float sum = 0.0f; + for (int i = 0; i < 8; i++) + sum += ia[j][i] * b[i]; + homo[j * numLoops + idx] = sum; + } + __syncthreads(); +} + +#define TESTHOMO_TESTS 16 // number of tests per block, alt. 32, 32 +#define TESTHOMO_LOOPS 16 // number of loops per block, alt. 8, 16 + +__global__ void TestHomographies(float *d_coord, float *d_homo, + int *d_counts, int numPts, float thresh2) +{ + __shared__ float homo[8 * TESTHOMO_LOOPS]; + __shared__ int cnts[TESTHOMO_TESTS * TESTHOMO_LOOPS]; + const int tx = threadIdx.x; + const int ty = threadIdx.y; + const int idx = blockIdx.y * blockDim.y + tx; + const int numLoops = blockDim.y * gridDim.y; + if (ty < 8 && tx < TESTHOMO_LOOPS) + homo[tx * 8 + ty] = d_homo[idx + ty * numLoops]; + __syncthreads(); + float a[8]; + for (int i = 0; i < 8; i++) + a[i] = homo[ty * 8 + i]; + int cnt = 0; + // for (int i = tx; i < numPts; i += TESTHOMO_TESTS) + // { + // float x1 = d_coord[i + 0 * numPts]; + // float y1 = d_coord[i + 1 * numPts]; + // float x2 = d_coord[i + 2 * numPts]; + // float y2 = d_coord[i + 3 * numPts]; + // float nomx = __fmul_rz(a[0], x1) + __fmul_rz(a[1], y1) + a[2]; + // float nomy = __fmul_rz(a[3], x1) + __fmul_rz(a[4], y1) + a[5]; + // float deno = __fmul_rz(a[6], x1) + __fmul_rz(a[7], y1) + 1.0f; + // float errx = __fmul_rz(x2, deno) - nomx; + // float erry = __fmul_rz(y2, deno) - nomy; + // float err2 = __fmul_rz(errx, errx) + __fmul_rz(erry, erry); + // if (err2 < __fmul_rz(thresh2, __fmul_rz(deno, deno))) + // cnt++; + // } + + for (int i = tx; i < numPts; i += TESTHOMO_TESTS) + { + float x1 = d_coord[i + 0 * numPts]; + float y1 = d_coord[i + 1 * numPts]; + float x2 = d_coord[i + 2 * numPts]; + float y2 = d_coord[i + 3 * numPts]; + float nomx = __fmul_rn(a[0], x1) + __fmul_rn(a[1], y1) + a[2]; + float nomy = __fmul_rn(a[3], x1) + __fmul_rn(a[4], y1) + a[5]; + float deno = __fmul_rn(a[6], x1) + __fmul_rn(a[7], y1) + 1.0f; + float errx = __fmul_rn(x2, deno) - nomx; + float erry = __fmul_rn(y2, deno) - nomy; + float err2 = __fmul_rn(errx, errx) + __fmul_rn(erry, erry); + if (err2 < __fmul_rn(thresh2, __fmul_rn(deno, deno))) + cnt++; + } + + int kty = TESTHOMO_TESTS * ty; + cnts[kty + tx] = cnt; + __syncthreads(); + int len = TESTHOMO_TESTS / 2; + while (len > 0) + { + if (tx < len) + cnts[kty + tx] += cnts[kty + tx + len]; + len /= 2; + __syncthreads(); + } + if (tx < TESTHOMO_LOOPS && ty == 0) + d_counts[idx] = cnts[TESTHOMO_TESTS * tx]; + __syncthreads(); +} + +//================= Host matching functions =====================// + +double FindHomography(SiftData &data, float *homography, int *numMatches, float &matchTime, int numLoops, float minScore, float maxAmbiguity, float thresh) +{ + *numMatches = 0; + homography[0] = homography[4] = homography[8] = 1.0f; + homography[1] = homography[2] = homography[3] = 0.0f; + homography[5] = homography[6] = homography[7] = 0.0f; +#ifdef MANAGEDMEM + SiftPoint *d_sift = data.m_data; +#else + if (data.d_data == NULL) + return 0.0f; + SiftPoint *d_sift = data.d_data; +#endif + numLoops = iDivUp(numLoops, 16) * 16; + int numPts = data.numPts; + if (numPts < 8) + return 0.0f; + int numPtsUp = iDivUp(numPts, 16) * 16; + float *d_coord, *d_homo; + int *d_randPts, *h_randPts; + int randSize = 4 * sizeof(int) * numLoops; + int szFl = sizeof(float); + int szPt = sizeof(SiftPoint); + +#ifdef DEVICE_TIMER + auto start_malloc_1 = std::chrono::steady_clock::now(); +#endif + + safeCall(hipMalloc((void **)&d_coord, 4 * sizeof(float) * numPtsUp)); + safeCall(hipMalloc((void **)&d_randPts, randSize)); + safeCall(hipMalloc((void **)&d_homo, 8 * sizeof(float) * numLoops)); + +#ifdef DEVICE_TIMER + auto stop_malloc_1 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_malloc_1 - start_malloc_1).count(); +#endif + + h_randPts = (int *)malloc(randSize); + float *h_scores = (float *)malloc(sizeof(float) * numPtsUp); + float *h_ambiguities = (float *)malloc(sizeof(float) * numPtsUp); + + // temp variables are for host memory allocation, device data is transferred to temp + float *temp1 = (float *)malloc(szPt * numPtsUp); + float *temp2 = (float *)malloc(szPt * numPtsUp); + +#ifdef DEVICE_TIMER + auto start_memcpy_1 = std::chrono::steady_clock::now(); +#endif + + safeCall(hipMemcpy(temp1, &d_sift[0].score, szPt * numPts, hipMemcpyDeviceToHost)); + safeCall(hipMemcpy(temp2, &d_sift[0].ambiguity, szPt * numPts, hipMemcpyDeviceToHost)); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_memcpy_1 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_1 - start_memcpy_1).count(); +#endif + + char *src_score = (char *)temp1; + char *src_ambiguity = (char *)temp2; + char *dst_score = (char *)h_scores; + char *dst_ambiguity = (char *)h_ambiguities; + + for (int i = 0; i < numPts; ++i) + { + memcpy(dst_score, src_score, szFl); + memcpy(dst_ambiguity, src_ambiguity, szFl); + + src_score += szPt; + src_ambiguity += szPt; + dst_score += szFl; + dst_ambiguity += szFl; + } + + int *validPts = (int *)malloc(sizeof(int) * numPts); + int numValid = 0; + for (int i = 0; i < numPts; i++) + { + if (h_scores[i] > minScore && h_ambiguities[i] < maxAmbiguity) + validPts[numValid++] = i; + } + free(h_scores); + free(h_ambiguities); + if (numValid >= 8) + { + std::random_device rd; + uint32_t seed = rd(); + std::mt19937 rnd(seed); // mersenne_twister_engine + std::uniform_int_distribution dis(0, UINT32_MAX); + for (int i = 0; i < numLoops; i++) + { + int p1 = dis(rnd) % numValid; + int p2 = dis(rnd) % numValid; + int p3 = dis(rnd) % numValid; + int p4 = dis(rnd) % numValid; + while (p2 == p1) + p2 = dis(rnd) % numValid; + while (p3 == p1 || p3 == p2) + p3 = dis(rnd) % numValid; + while (p4 == p1 || p4 == p2 || p4 == p3) + p4 = dis(rnd) % numValid; + h_randPts[i + 0 * numLoops] = validPts[p1]; + h_randPts[i + 1 * numLoops] = validPts[p2]; + h_randPts[i + 2 * numLoops] = validPts[p3]; + h_randPts[i + 3 * numLoops] = validPts[p4]; + } + + float *temp3, *temp4, *temp5, *temp6; + +#ifdef DEVICE_TIMER + auto start_malloc_2 = std::chrono::steady_clock::now(); +#endif + safeCall(hipMalloc((void **)&temp3, szPt * numPtsUp)); + safeCall(hipMalloc((void **)&temp4, szPt * numPtsUp)); + safeCall(hipMalloc((void **)&temp5, szPt * numPtsUp)); + safeCall(hipMalloc((void **)&temp6, szPt * numPtsUp)); + +#ifdef DEVICE_TIMER + auto stop_malloc_2 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_malloc_2 - start_malloc_2).count(); +#endif + +#ifdef DEVICE_TIMER + auto start_memcpy_2 = std::chrono::steady_clock::now(); +#endif + + safeCall(hipMemcpy(d_randPts, h_randPts, randSize, hipMemcpyHostToDevice)); + hipDeviceSynchronize(); + + safeCall(hipMemcpy(temp3, &d_sift[0].xpos, szPt * numPts, hipMemcpyDeviceToDevice)); + safeCall(hipMemcpy(temp4, &d_sift[0].ypos, szPt * numPts, hipMemcpyDeviceToDevice)); + safeCall(hipMemcpy(temp5, &d_sift[0].match_xpos, szPt * numPts, hipMemcpyDeviceToDevice)); + safeCall(hipMemcpy(temp6, &d_sift[0].match_ypos, szPt * numPts, hipMemcpyDeviceToDevice)); + hipDeviceSynchronize(); + + // kernel call to transfer memory from device to device + hipLaunchKernelGGL(memcopyKernel, 1, 1, 0, 0, temp3, &d_coord[0 * numPtsUp], szPt, szFl, numPts, szFl); + safeCall(hipDeviceSynchronize()); + hipLaunchKernelGGL(memcopyKernel, 1, 1, 0, 0, temp4, &d_coord[1 * numPtsUp], szPt, szFl, numPts, szFl); + safeCall(hipDeviceSynchronize()); + hipLaunchKernelGGL(memcopyKernel, 1, 1, 0, 0, temp5, &d_coord[2 * numPtsUp], szPt, szFl, numPts, szFl); + safeCall(hipDeviceSynchronize()); + hipLaunchKernelGGL(memcopyKernel, 1, 1, 0, 0, temp6, &d_coord[3 * numPtsUp], szPt, szFl, numPts, szFl); + safeCall(hipDeviceSynchronize()); + +#ifdef DEVICE_TIMER + auto stop_memcpy_2 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_2 - start_memcpy_2).count(); +#endif + +#ifdef DEVICE_TIMER + auto start_kernel_1 = std::chrono::steady_clock::now(); +#endif + hipLaunchKernelGGL(ComputeHomographies, numLoops / 16, 16, 0, 0, d_coord, d_randPts, d_homo, numPtsUp); + safeCall(hipDeviceSynchronize()); + +#ifdef DEVICE_TIMER + auto stop_kernel_1 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_kernel_1 - start_kernel_1).count(); + // printf("ComputeHomographies time = %.2f us\n", std::chrono::duration(stop_kernel_1 - start_kernel_1).count()); +#endif + checkMsg("ComputeHomographies() execution failed\n"); + dim3 blocks(1, numLoops / TESTHOMO_LOOPS); + dim3 threads(TESTHOMO_TESTS, TESTHOMO_LOOPS); + +#ifdef DEVICE_TIMER + auto start_kernel_2 = std::chrono::steady_clock::now(); +#endif + hipLaunchKernelGGL(TestHomographies, blocks, threads, 0, 0, d_coord, d_homo, d_randPts, numPtsUp, thresh * thresh); + safeCall(hipDeviceSynchronize()); + +#ifdef DEVICE_TIMER + auto stop_kernel_2 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_kernel_2 - start_kernel_2).count(); + // printf("TestHomographies time = %.2f us\n", std::chrono::duration(stop_kernel_2 - start_kernel_2).count()); +#endif + checkMsg("TestHomographies() execution failed\n"); + +#ifdef DEVICE_TIMER + auto start_memcpy_3 = std::chrono::steady_clock::now(); +#endif + safeCall(hipMemcpy(h_randPts, d_randPts, sizeof(int) * numLoops, hipMemcpyDeviceToHost)); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_memcpy_3 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_3 - start_memcpy_3).count(); +#endif + + int maxIndex = -1, maxCount = -1; + for (int i = 0; i < numLoops; i++) + if (h_randPts[i] > maxCount) + { + maxCount = h_randPts[i]; + maxIndex = i; + } + + *numMatches = maxCount; + +#ifdef DEVICE_TIMER + auto start_memcpy_4 = std::chrono::steady_clock::now(); +#endif + safeCall(hipMemcpy2D(homography, szFl, &d_homo[maxIndex], sizeof(float) * numLoops, szFl, 8, hipMemcpyDeviceToHost)); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_memcpy_4 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_4 - start_memcpy_4).count(); +#endif + } + + free(validPts); + free(h_randPts); + + safeCall(hipFree(d_homo)); + safeCall(hipFree(d_randPts)); + safeCall(hipFree(d_coord)); + double gpuTime = 0.0; + return gpuTime; +} + +double MatchSiftData(SiftData &data1, SiftData &data2, float &matchTime) +{ + float matchSiftDataTime = 0.0; + int numPts1 = data1.numPts; + int numPts2 = data2.numPts; + if (!numPts1 || !numPts2) + return 0.0; +#ifdef MANAGEDMEM + SiftPoint *sift1 = data1.m_data; + SiftPoint *sift2 = data2.m_data; +#else + if (data1.d_data == NULL || data2.d_data == NULL) + return 0.0f; + SiftPoint *sift1 = data1.d_data; + SiftPoint *sift2 = data2.d_data; +#endif + +// Original version with correlation and maximization in two different kernels +// Global memory reguirement: O(N^2) +#if 0 + float *d_corrData; + int corrWidth = iDivUp(numPts2, 16)*16; + int corrSize = sizeof(float)*numPts1*corrWidth; + safeCall(hipMalloc((void **)&d_corrData, corrSize)); +#if 0 // K40c 10.9ms, 1080 Ti 3.8ms + dim3 blocks1(numPts1, iDivUp(numPts2, 16)); + dim3 threads1(16, 16); // each block: 1 points x 16 points + hipLaunchKernelGGL(MatchSiftPoints, blocks1, threads1, 0, 0, sift1, sift2, d_corrData, numPts1, numPts2); +#else // K40c 7.6ms, 1080 Ti 1.4ms + dim3 blocks(iDivUp(numPts1,16), iDivUp(numPts2, 16)); + dim3 threads(16, 16); // each block: 16 points x 16 points + // hipLaunchKernelGGL(MatchSiftPoints2, blocks, threads, 0, 0, sift1, sift2, d_corrData, numPts1, numPts2); +#endif + safeCall(hipDeviceSynchronize()); + dim3 blocksMax(iDivUp(numPts1, 16)); + dim3 threadsMax(16, 16); + hipLaunchKernelGGL(FindMaxCorr, blocksMax, threadsMax, 0, 0, d_corrData, sift1, sift2, numPts1, corrWidth, sizeof(SiftPoint)); + safeCall(hipDeviceSynchronize()); + checkMsg("FindMaxCorr() execution failed\n"); + safeCall(hipFree(d_corrData)); +#endif + +// Version suggested by Nicholas Lin with combined correlation and maximization +// Global memory reguirement: O(N) +#if 0 // K40c 51.2ms, 1080 Ti 9.6ms + int block_dim = 16; + float *d_corrData; + int corrSize = numPts1 * block_dim * 2; + safeCall(hipMalloc((void **)&d_corrData, sizeof(float) * corrSize)); + dim3 blocks(iDivUp(numPts1, block_dim)); + dim3 threads(block_dim, block_dim); + hipLaunchKernelGGL(FindMaxCorr3, blocks, threads , 0, 0, d_corrData, sift1, sift2, numPts1, numPts2); + safeCall(hipDeviceSynchronize()); + checkMsg("FindMaxCorr3() execution failed\n"); + safeCall(hipFree(d_corrData)); +#endif + +// Combined version with no global memory requirement using one 1 point per block +#if 0 // K40c 8.9ms, 1080 Ti 2.1ms, 2080 Ti 1.0ms + dim3 blocksMax(numPts1); + dim3 threadsMax(FMC2W, FMC2H); + hipLaunchKernelGGL(FindMaxCorr2, blocksMax, threadsMax, 0, 0, sift1, sift2, numPts1, numPts2); + safeCall(hipDeviceSynchronize()); + checkMsg("FindMaxCorr2() execution failed\n"); +#endif + +// Combined version with no global memory requirement using one FMC2H points per block +#if 0 // K40c 9.2ms, 1080 Ti 1.3ms, 2080 Ti 1.1ms + dim3 blocksMax2(iDivUp(numPts1, FMC2H)); + dim3 threadsMax2(FMC2W, FMC2H); + hipLaunchKernelGGL(FindMaxCorr4, blocksMax2, threadsMax2, 0, 0, sift1, sift2, numPts1, numPts2); + safeCall(hipDeviceSynchronize()); + checkMsg("FindMaxCorr4() execution failed\n"); +#endif + +// Combined version with no global memory requirement using global locks +#if 1 + dim3 blocksMax3(iDivUp(numPts1, 16), iDivUp(numPts2, 512)); + dim3 threadsMax3(16, 16); + +#ifdef DEVICE_TIMER + auto start_kernel1 = std::chrono::steady_clock::now(); +#endif + hipLaunchKernelGGL(CleanMatches, iDivUp(numPts1, 64), 64, 0, 0, sift1, numPts1); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_kernel1 = std::chrono::steady_clock::now(); + // printf("CleanMatches time = %.2f us\n", std::chrono::duration(stop_kernel1 - start_kernel1).count()); + matchTime += std::chrono::duration(stop_kernel1 - start_kernel1).count(); + matchSiftDataTime += std::chrono::duration(stop_kernel1 - start_kernel1).count(); +#endif + int mode = 10; + if (mode == 5) + hipLaunchKernelGGL(FindMaxCorr5, blocksMax3, threadsMax3, 0, 0, sift1, sift2, numPts1, numPts2); + else if (mode == 6) + { + threadsMax3 = dim3(32, 16); + hipLaunchKernelGGL(FindMaxCorr6, blocksMax3, threadsMax3, 0, 0, sift1, sift2, numPts1, numPts2); + } + else if (mode == 7) + hipLaunchKernelGGL(FindMaxCorr7, blocksMax3, threadsMax3, 0, 0, sift1, sift2, numPts1, numPts2); + else if (mode == 8) + { + blocksMax3 = dim3(iDivUp(numPts1, FMC_BW), iDivUp(numPts2, FMC_GH)); + threadsMax3 = dim3(FMC_NW, FMC_NH); + hipLaunchKernelGGL(FindMaxCorr8, blocksMax3, threadsMax3, 0, 0, sift1, sift2, numPts1, numPts2); + } + else if (mode == 9) + { + blocksMax3 = dim3(iDivUp(numPts1, FMC_BW), iDivUp(numPts2, FMC_GH)); + threadsMax3 = dim3(FMC_NW, FMC_NH); + hipLaunchKernelGGL(FindMaxCorr9, blocksMax3, threadsMax3, 0, 0, sift1, sift2, numPts1, numPts2); + } + else if (mode == 10) + { + blocksMax3 = dim3(iDivUp(numPts1, M7W)); + threadsMax3 = dim3(M7W, M7H / M7R); + +#ifdef DEVICE_TIMER + auto start_kernel2 = std::chrono::steady_clock::now(); +#endif + hipLaunchKernelGGL(FindMaxCorr10, blocksMax3, threadsMax3, 0, 0, sift1, sift2, numPts1, numPts2); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_kernel2 = std::chrono::steady_clock::now(); + // printf("FindMaxCorr10 time = %.2f us\n", std::chrono::duration(stop_kernel2 - start_kernel2).count()); + matchTime += std::chrono::duration(stop_kernel2 - start_kernel2).count(); + matchSiftDataTime += std::chrono::duration(stop_kernel2 - start_kernel2).count(); +#endif + } + checkMsg("FindMaxCorr10() execution failed\n"); +#endif + + if (data1.h_data != NULL) + { + float *h_ptr = &data1.h_data[0].score; + float *d_ptr = &data1.d_data[0].score; + +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + safeCall(hipMemcpy(h_ptr, d_ptr, sizeof(SiftPoint) * data1.numPts, hipMemcpyDeviceToHost)); + hipDeviceSynchronize(); + +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); + matchSiftDataTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + } + return matchTime; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/LICENSE b/third-party-programs/Velocity-Bench/cudaSift/LICENSE new file mode 100644 index 000000000..bee8393e0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/LICENSE @@ -0,0 +1,21 @@ +Modifications Copyright (C) 2023 Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +SPDX-License-Identifier: MIT \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/cudaSift/README.md b/third-party-programs/Velocity-Bench/cudaSift/README.md new file mode 100755 index 000000000..f0e66b284 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/README.md @@ -0,0 +1,92 @@ +# CudaSift +CudaSift - SIFT features with SYCL, CUDA & HIP + +# Building CudaSift +**To build cuda version** + +mkdir build && cd build + +//For A100 Machine + +cmake ../ -DUSE_SM=80 + +//For H100 Machine + +cmake ../ -DUSE_SM=90 + +make + +**To build SYCL version** + +mkdir build + +cd build + +#update the path for OpenCV_DIR + +CXX=icpx cmake ../ -DGPU_AOT=pvc + +make -sj + +**To build SYCL version on NVIDIA Backend** + +source /path/to/clang/ + +mkdir build && cd build + +//For A100 Machine + +CC=clang CXX=clang++ cmake ../ -DUSE_NVIDIA_BACKEND=YES -DUSE_SM=80 + +//For H100 Machine + +CC=clang CXX=clang++ cmake ../ -DUSE_NVIDIA_BACKEND=YES -DUSE_SM=90 + +make -sj + +**To build SYCL version on AMD Backend** + +source /path/to/clang/ + +mkdir build && cd build + +//For MI-100 Machine + +CC=clang CXX=clang++ cmake ../ -DUSE_AMDHIP_BACKEND=gfx908 + +//For MI-250 Machine + +CC=clang CXX=clang++ cmake ../ -DUSE_AMDHIP_BACKEND=gfx90a + +make -sj + +**To build HIP version** + +mkdir build && cd build + +CXX=hipcc cmake ../ -DROCM_PATH=/path/to/rocm +For e.g CXX=hipcc cmake ../ -DROCM_PATH/opt/rocm-5.4.3 + +make -sj + +# Running CudaSift + +**To run sycl version** + +./cudasift + +**To run SYCL on NVIDIA Backend** + +./cudaSift + +**To run SYCL on AMD Backend** + +ONEAPI_DEVICE_SELECTOR=hip:* ./cudaSift + +**To run cuda version** + +./cudasift + +**To run hip version** + +./cudasift diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/CMakeLists.txt b/third-party-programs/Velocity-Bench/cudaSift/SYCL/CMakeLists.txt new file mode 100644 index 000000000..536654df9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/CMakeLists.txt @@ -0,0 +1,153 @@ +# Modifications Copyright (C) 2023 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +# OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +# OR OTHER DEALINGS IN THE SOFTWARE. + +# SPDX-License-Identifier: MIT + +cmake_minimum_required(VERSION 3.10) +project(cudaSift LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) # SYCL code requires this +set(CMAKE_CXX_STANDARD_REQUIRED ON) # Enable modern C++ standards +set(CMAKE_CXX_EXTENSIONS OFF) # Use -std, not -gnu + +option(GPU_AOT "Build AOT for Intel GPU" OFF) +option(USE_NVIDIA_BACKEND "Build for NVIDIA backend" OFF) +option(USE_AMDHIP_BACKEND "Build for AMD HIP backend" OFF) +option(USE_SM "Specifies which streaming multiprocessor architecture to use") +option(OpenCV_DIR "Path to OpenCV_DIR") +option(DEVICE_TIMER "Build using Device Timer" OFF) + +# Find OpenCV, you may need to set OpenCV_DIR variable +# to the absolute path to the directory containing OpenCVConfig.cmake file +# via the command line or GUI +find_package(OpenCV REQUIRED) + +# If the package has been found, several variables will +# be set, you can find the full list with descriptions +# in the OpenCVConfig.cmake file. +# Print some message showing some of them +message(STATUS "OpenCV library status:") +message(STATUS " version: ${OpenCV_VERSION}") +message(STATUS " libraries: ${OpenCV_LIBS}") +message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}") + +if(CMAKE_VERSION VERSION_LESS "2.8.11") + # Add OpenCV headers location to your include paths + include_directories(${OpenCV_INCLUDE_DIRS}) +endif() + +set(SOURCES + ${CMAKE_SOURCE_DIR}/../common/Utility.cpp + cudaImage.dp.cpp + cudaImage.h + cudaSiftH.dp.cpp + cudaSiftH.h + matching.dp.cpp + cudaSiftD.h + cudaSift.h + geomFuncs.cpp + mainSift.cpp +) + +include_directories( + ${CMAKE_SOURCE_DIR}/../common/ + ${CMAKE_SOURCE_DIR} + ${OpenCV_INCLUDE_DIRS} +) + +if(DEVICE_TIMER) + message(STATUS "Enabling Device Timer") + add_compile_options(-DDEVICE_TIMER) +endif() + +if(USE_NVIDIA_BACKEND) + message(STATUS "Nvidia backend") + add_compile_options(-DUSE_NVIDIA_BACKEND) +endif() + +if(USE_AMDHIP_BACKEND) + message(STATUS "AMD backend") + add_compile_options(-DUSE_AMDHIP_BACKEND) +endif() + +# Use either default or user defined CXX flags +# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags +set(USE_DEFAULT_FLAGS ON) + +set(DEF_INTEL_WL_CXX_FLAGS " ") +set(DEF_NVIDIA_WL_CXX_FLAGS " ") +set(DEF_AMD_WL_CXX_FLAGS " -D__HIP_PLATFORM_AMD__ ") + +set(DEF_INTEL_GENERAL_CXX_FLAGS " -O3 -fsycl -ffast-math ") +set(DEF_NVIDIA_GENERAL_CXX_FLAGS " -O3 -fsycl -ffast-math ") +set(DEF_AMD_GENERAL_CXX_FLAGS " -O3 -fsycl -ffast-math ") + +# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific) +# -DOVERRIDE_GENERAL_CXX_FLAGS=" -blah -blah " overrides the general flags only (and not the workload specific flags) +# passing in both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS is not allowed, in order to prevent ambiguity + +if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "") + message(FATAL_ERROR "Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together") +elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "") + message(STATUS "Using DEFAULT compilation flags") + set(INTEL_GPU_CXX_FLAGS "${DEF_INTEL_GENERAL_CXX_FLAGS} ${DEF_INTEL_WL_CXX_FLAGS}") + set(NVIDIA_GPU_CXX_FLAGS "${DEF_NVIDIA_GENERAL_CXX_FLAGS} ${DEF_NVIDIA_WL_CXX_FLAGS}") + set(AMD_GPU_CXX_FLAGS "${DEF_AMD_GENERAL_CXX_FLAGS} ${DEF_AMD_WL_CXX_FLAGS}") +elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "") + message(STATUS "OVERRIDING GENERAL compilation flags") + set(INTEL_GPU_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS} ${DEF_INTEL_WL_CXX_FLAGS}") + set(NVIDIA_GPU_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS} ${DEF_NVIDIA_WL_CXX_FLAGS}") + set(AMD_GPU_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS} ${DEF_AMD_WL_CXX_FLAGS}") +elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "") + message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags") + set(INTEL_GPU_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(NVIDIA_GPU_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(AMD_GPU_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +endif() + +# JIT compilation +if(GPU_AOT) + message(STATUS "Enabling INTEL backend") + set(CMAKE_CXX_FLAGS "${INTEL_GPU_CXX_FLAGS}") + if((${GPU_AOT} STREQUAL "pvc") OR(${GPU_AOT} STREQUAL "PVC")) + message(STATUS "Enabling Intel GPU AOT compilation for ${GPU_AOT}") + string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=spir64_gen -Xs \"-device 0x0bd5 -revision_id 0x2f\" -Xs \"-options -ze-opt-large-register-file\" ") + else() + message(STATUS "Using custom AOT compilation flag ${GPU_AOT}") + string(APPEND CMAKE_CXX_FLAGS " ${GPU_AOT} ") # User should be aware of advanced AOT compilation flags + endif() +elseif(USE_NVIDIA_BACKEND) + message(STATUS "Enabling NVIDIA backend") + set(CMAKE_CXX_FLAGS "${NVIDIA_GPU_CXX_FLAGS}") + string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_${USE_SM}") +elseif(USE_AMDHIP_BACKEND) + message(STATUS "Enabling AMD HIP backend for ${USE_AMDHIP_BACKEND} AMD architecture") + set(CMAKE_CXX_FLAGS "${AMD_GPU_CXX_FLAGS}") + string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${USE_AMDHIP_BACKEND} ") +else() + message(STATUS "Enabling INTEL backend") + set(CMAKE_CXX_FLAGS "${INTEL_GPU_CXX_FLAGS}") +endif() + +# Output the compiler flags that were constructed for visual inspection +message(STATUS "Compilation flags set to: ${CMAKE_CXX_FLAGS}") + +add_executable(${PROJECT_NAME} ${SOURCES}) +target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} stdc++ stdc++fs) diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaImage.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaImage.dp.cpp new file mode 100644 index 000000000..4c0a97d04 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaImage.dp.cpp @@ -0,0 +1,109 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include + +#include "infra/memory.hpp" +#include "cudautils.h" +#include "cudaImage.h" + +int iDivUp(int a, int b) { return (a % b != 0) ? (a / b + 1) : (a / b); } +int iDivDown(int a, int b) { return a / b; } +int iAlignUp(int a, int b) { return (a % b != 0) ? (a - a % b + b) : a; } +int iAlignDown(int a, int b) { return a - a % b; } + +void CudaImage::Allocate(int w, int h, int p, bool host, sycl::queue &q_ct, float &time, float *devmem, float *hostmem) +{ + width = w; + height = h; + pitch = p; + d_data = devmem; + h_data = hostmem; + if (devmem == NULL) + { +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + d_data = (float *)infra::sift_malloc(pitch, (size_t)(sizeof(float) * width), (size_t)height, q_ct); + q_ct.wait(); +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + std::cout << "Allocate Time is " << std::chrono::duration(stop_malloc - start_malloc).count() << " us" << std::endl; + time += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + + pitch /= sizeof(float); + if (d_data == NULL) + printf("Failed to allocate device data\n"); + d_internalAlloc = true; + } + if (host && hostmem == NULL) + { + h_data = (float *)malloc(sizeof(float) * pitch * height); + h_internalAlloc = true; + } +} + +CudaImage::CudaImage() : width(0), height(0), pitch(0), d_data(NULL), h_data(NULL), /*t_data(NULL), */ d_internalAlloc(false), h_internalAlloc(false) +{ +} + +CudaImage::~CudaImage() +{ + if (d_internalAlloc && d_data != NULL) + try{ + safeCall((sycl::free(d_data, infra::get_default_queue()), 0)); + } catch (std::exception const &e) { + std::cerr << e.what() << '\n'; + } + d_data = NULL; + if (h_internalAlloc && h_data != NULL) + free(h_data); + h_data = NULL; +} + +double CudaImage::Download(sycl::queue &q_ct, float &time) +{ + double downloadTime = 0.0; + int p = sizeof(float) * pitch; + if (d_data != NULL && h_data != NULL) + { +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + infra::sift_memcpy(d_data, p, h_data, sizeof(float) * width, sizeof(float) * width, height, infra::host_to_device, q_ct); + q_ct.wait(); + +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + downloadTime = std::chrono::duration(stop_memcpy - start_memcpy).count(); + time += downloadTime; + std::cout << "Download Time is " << downloadTime << " us" << std::endl; +#endif + } + return downloadTime; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaImage.h b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaImage.h new file mode 100644 index 000000000..0ce1a922e --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaImage.h @@ -0,0 +1,59 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#ifndef CUDAIMAGE_H +#define CUDAIMAGE_H + +#include +#include + +class CudaImage +{ +public: + int width, height; + size_t pitch; + float *h_data; + float *d_data; + bool d_internalAlloc; + bool h_internalAlloc; + +public: + CudaImage(); + CudaImage(const CudaImage &) = delete; + CudaImage &operator=(const CudaImage &) = delete; + ~CudaImage(); + void Allocate(int width, int height, int pitch, bool withHost, sycl::queue &q_ct, float &totTime, float *devMem = NULL, float *hostMem = NULL); + double Download(sycl::queue &q_ct, float &totTime); +}; + +int iDivUp(int a, int b); +int iDivDown(int a, int b); +int iAlignUp(int a, int b); +int iAlignDown(int a, int b); +void StartTimer(unsigned int *hTimer); +double StopTimer(unsigned int hTimer); + +#endif // CUDAIMAGE_H diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSift.h b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSift.h new file mode 100644 index 000000000..8bdada3ec --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSift.h @@ -0,0 +1,87 @@ +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#ifndef CUDASIFT_H +#define CUDASIFT_H + +#include "cudaImage.h" + +struct rawImg_data +{ + float *raw_d_data; + int pitch; + + void set_pitch(int pitch) + { + this->pitch = pitch; + } + + float read(float xf, float yf) + { + int xi = xf; + int yi = yf; + return *(raw_d_data + yi * pitch + xi); + } +}; + +typedef struct +{ + float xpos; + float ypos; + float scale; + float sharpness; + float edgeness; + float orientation; + float score; + float ambiguity; + int match; + float match_xpos; + float match_ypos; + float match_error; + float subsampling; + float empty[3]; + float data[128]; +} SiftPoint; + +typedef struct +{ + int numPts; // Number of available Sift points + int maxPts; // Number of allocated Sift points +#ifdef MANAGEDMEM + SiftPoint *m_data; // Managed data +#else + SiftPoint *h_data; // Host (CPU) data + SiftPoint *d_data; // Device (GPU) data +#endif +} SiftData; + +void InitCuda(sycl::queue &q_ct, int devNum = 0); +float *AllocSiftTempMemory(int width, int height, int numOctaves, sycl::queue &q_ct, float &totTime, bool scaleUp = false); +void FreeSiftTempMemory(float *memoryTmp, sycl::queue &q_ct); +void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, sycl::queue &q_ct, float &totTime, float lowestScale = 0.0f, bool scaleUp = false, float *tempMemory = 0); +void InitSiftData(SiftData &data, sycl::queue &q_ct, float &totTime, int num = 1024, bool host = false, bool dev = true); +void FreeSiftData(SiftData &data, sycl::queue &q_ct); +void PrintSiftData(SiftData &data, sycl::queue &q_ct); +double MatchSiftData(SiftData &data1, SiftData &data2, sycl::queue &q_ct, float &time); +double FindHomography(SiftData &data, float *homography, int *numMatches, sycl::queue &q_ct, float &time, int numLoops = 1000, float minScore = 0.85f, float maxAmbiguity = 0.95f, float thresh = 5.0f); + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftD.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftD.dp.cpp new file mode 100644 index 000000000..dc4ff5c01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftD.dp.cpp @@ -0,0 +1,1247 @@ +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include + +#include "infra/infra.hpp" +#include "cudautils.h" +#include "cudaSiftD.h" +#include "cudaSift.h" + +/////////////////////////////////////////////////////////////////////////////// +// Kernel configuration +/////////////////////////////////////////////////////////////////////////////// + +infra::constant_memory d_MaxNumPoints; +infra::global_memory d_PointCounter(8 * 2 + 1); +infra::constant_memory d_ScaleDownKernel(5); +infra::constant_memory d_LowPassKernel(2 * LOWPASS_R + 1); +infra::constant_memory d_LaplaceKernel(8 * 12 * 16); + +/////////////////////////////////////////////////////////////////////////////// +// Lowpass filter and subsample image +/////////////////////////////////////////////////////////////////////////////// +void ScaleDownDenseShift(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch, + sycl::nd_item<3> item_ct1, float *d_ScaleDownKernel, + float *brows) +{ +#define BW (SCALEDOWN_W + 4) +#define BH (SCALEDOWN_H + 4) +#define W2 (SCALEDOWN_W / 2) +#define H2 (SCALEDOWN_H / 2) + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int xp = item_ct1.get_group(2) * SCALEDOWN_W + tx; + const int yp = item_ct1.get_group(1) * SCALEDOWN_H + ty; + const float k0 = d_ScaleDownKernel[0]; + const float k1 = d_ScaleDownKernel[1]; + const float k2 = d_ScaleDownKernel[2]; + const int xl = sycl::min((int)(width - 1), sycl::max(0, (int)(xp - 2))); + const int yl = sycl::min((int)(height - 1), sycl::max(0, (int)(yp - 2))); + if (xp < (width + 4) && yp < (height + 4)) + { + float v = d_Data[yl * pitch + xl]; + brows[BW * ty + tx] = + k0 * (v + ShiftDown(v, 4, item_ct1)) + + k1 * (ShiftDown(v, 1, item_ct1) + ShiftDown(v, 3, item_ct1)) + + k2 * ShiftDown(v, 2, item_ct1); + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + const int xs = item_ct1.get_group(2) * W2 + tx; + const int ys = item_ct1.get_group(1) * H2 + ty; + if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2)) + { + float *ptr = &brows[BW * (ty * 2) + (tx * 2)]; + d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * BW]) + k1 * (ptr[1 * BW] + ptr[3 * BW]) + k2 * ptr[2 * BW]; + } +} + +void ScaleDownDense(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch, + sycl::nd_item<3> item_ct1, float *d_ScaleDownKernel, + float *irows, float *brows) +{ +#define BW (SCALEDOWN_W + 4) +#define BH (SCALEDOWN_H + 4) +#define W2 (SCALEDOWN_W / 2) +#define H2 (SCALEDOWN_H / 2) + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int xp = item_ct1.get_group(2) * SCALEDOWN_W + tx; + const int yp = item_ct1.get_group(1) * SCALEDOWN_H + ty; + const int xl = sycl::min((int)(width - 1), sycl::max(0, (int)(xp - 2))); + const int yl = sycl::min((int)(height - 1), sycl::max(0, (int)(yp - 2))); + const float k0 = d_ScaleDownKernel[0]; + const float k1 = d_ScaleDownKernel[1]; + const float k2 = d_ScaleDownKernel[2]; + if (xp < (width + 4) && yp < (height + 4)) + irows[BW * ty + tx] = d_Data[yl * pitch + xl]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + if (yp < (height + 4) && tx < W2) + { + float *ptr = &irows[BW * ty + 2 * tx]; + brows[W2 * ty + tx] = k0 * (ptr[0] + ptr[4]) + k1 * (ptr[1] + ptr[3]) + k2 * ptr[2]; + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + const int xs = item_ct1.get_group(2) * W2 + tx; + const int ys = item_ct1.get_group(1) * H2 + ty; + if (tx < W2 && ty < H2 && xs < (width / 2) && ys < (height / 2)) + { + float *ptr = &brows[W2 * (ty * 2) + tx]; + d_Result[ys * newpitch + xs] = k0 * (ptr[0] + ptr[4 * W2]) + k1 * (ptr[1 * W2] + ptr[3 * W2]) + k2 * ptr[2 * W2]; + } +} + +void ScaleDown(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch, + sycl::nd_item<3> item_ct1, float *d_ScaleDownKernel, float *inrow, + float *brow, int *yRead, int *yWrite) +{ + +#define dx2 (SCALEDOWN_W / 2) + const int tx = item_ct1.get_local_id(2); + const int tx0 = tx + 0 * dx2; + const int tx1 = tx + 1 * dx2; + const int tx2 = tx + 2 * dx2; + const int tx3 = tx + 3 * dx2; + const int tx4 = tx + 4 * dx2; + const int xStart = item_ct1.get_group(2) * SCALEDOWN_W; + const int yStart = item_ct1.get_group(1) * SCALEDOWN_H; + const int xWrite = xStart / 2 + tx; + float k0 = d_ScaleDownKernel[0]; + float k1 = d_ScaleDownKernel[1]; + float k2 = d_ScaleDownKernel[2]; + if (tx < SCALEDOWN_H + 4) + { + int y = yStart + tx - 2; + y = (y < 0 ? 0 : y); + y = (y >= height ? height - 1 : y); + yRead[tx] = y * pitch; + yWrite[tx] = (yStart + tx - 4) / 2 * newpitch; + } + + + item_ct1.barrier(sycl::access::fence_space::local_space); + int xRead = xStart + tx - 2; + xRead = (xRead < 0 ? 0 : xRead); + xRead = (xRead >= width ? width - 1 : xRead); + + int maxtx = sycl::min(dx2, (int)(width / 2 - xStart / 2)); + + #pragma unroll + for (int dy = 0; dy < SCALEDOWN_H + 4; dy += 5) + { + { + inrow[tx] = d_Data[yRead[dy + 0] + xRead]; + + + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < maxtx) + { + brow[tx4] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 4 && !(dy & 1)) + d_Result[yWrite[dy + 0] + xWrite] = k2 * brow[tx2] + k0 * (brow[tx0] + brow[tx4]) + k1 * (brow[tx1] + brow[tx3]); + } + + + item_ct1.barrier(sycl::access::fence_space::local_space); + } + if (dy < (SCALEDOWN_H + 3)) + { + inrow[tx] = d_Data[yRead[dy + 1] + xRead]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < maxtx) + { + brow[tx0] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 3 && (dy & 1)) + d_Result[yWrite[dy + 1] + xWrite] = k2 * brow[tx3] + k0 * (brow[tx1] + brow[tx0]) + k1 * (brow[tx2] + brow[tx4]); + } + + + item_ct1.barrier(sycl::access::fence_space::local_space); + } + if (dy < (SCALEDOWN_H + 2)) + { + inrow[tx] = d_Data[yRead[dy + 2] + xRead]; + + + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < maxtx) + { + brow[tx1] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 2 && !(dy & 1)) + d_Result[yWrite[dy + 2] + xWrite] = k2 * brow[tx4] + k0 * (brow[tx2] + brow[tx1]) + k1 * (brow[tx3] + brow[tx0]); + } + + + item_ct1.barrier(sycl::access::fence_space::local_space); + } + if (dy < (SCALEDOWN_H + 1)) + { + inrow[tx] = d_Data[yRead[dy + 3] + xRead]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < maxtx) + { + brow[tx2] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (dy >= 1 && (dy & 1)) + d_Result[yWrite[dy + 3] + xWrite] = k2 * brow[tx0] + k0 * (brow[tx3] + brow[tx2]) + k1 * (brow[tx4] + brow[tx1]); + } + + + item_ct1.barrier(sycl::access::fence_space::local_space); + } + if (dy < SCALEDOWN_H) + { + inrow[tx] = d_Data[yRead[dy + 4] + xRead]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < dx2 && xWrite < width / 2) + { + brow[tx3] = k0 * (inrow[2 * tx] + inrow[2 * tx + 4]) + k1 * (inrow[2 * tx + 1] + inrow[2 * tx + 3]) + k2 * inrow[2 * tx + 2]; + if (!(dy & 1)) + d_Result[yWrite[dy + 4] + xWrite] = k2 * brow[tx1] + k0 * (brow[tx4] + brow[tx3]) + k1 * (brow[tx0] + brow[tx2]); + } + + + item_ct1.barrier(sycl::access::fence_space::local_space); + } + } +} + +void ScaleUp(float *d_Result, float *d_Data, int width, int pitch, int height, int newpitch, + sycl::nd_item<3> item_ct1) +{ + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + int x = item_ct1.get_group(2) * SCALEUP_W + 2 * tx; + int y = item_ct1.get_group(1) * SCALEUP_H + 2 * ty; + if (x < 2 * width && y < 2 * height) + { + int xl = item_ct1.get_group(2) * (SCALEUP_W / 2) + tx; + int yu = item_ct1.get_group(1) * (SCALEUP_H / 2) + ty; + int xr = sycl::min((int)(xl + 1), (int)(width - 1)); + int yd = sycl::min((int)(yu + 1), (int)(height - 1)); + float vul = d_Data[yu * pitch + xl]; + float vur = d_Data[yu * pitch + xr]; + float vdl = d_Data[yd * pitch + xl]; + float vdr = d_Data[yd * pitch + xr]; + d_Result[(y + 0) * newpitch + x + 0] = vul; + d_Result[(y + 0) * newpitch + x + 1] = 0.50f * (vul + vur); + d_Result[(y + 1) * newpitch + x + 0] = 0.50f * (vul + vdl); + d_Result[(y + 1) * newpitch + x + 1] = 0.25f * (vul + vur + vdl + vdr); + } +} + +float FastAtan2(float y, float x) +{ + float absx = sycl::fabs(x); + float absy = sycl::fabs(y); + + float a = sycl::min(absx, absy) / sycl::max(absx, absy); + float s = a * a; + float r = ((-0.0464964749f * s + 0.15931422f) * s - 0.327622764f) * s * a + a; + r = (absy > absx ? 1.57079637f - r : r); + r = (x < 0 ? 3.14159274f - r : r); + r = (y < 0 ? -r : r); + return r; +} + +void ExtractSiftDescriptorsCONSTNew( + + float *texObj, int pitch, SiftPoint *d_sift, + float subsampling, int octave, sycl::nd_item<3> item_ct1, + int d_MaxNumPoints, unsigned int *d_PointCounter, float *gauss, + float *buffer, float *sums) +{ + + const int tx = item_ct1.get_local_id(2); // 0 -> 16 + const int ty = item_ct1.get_local_id(1); // 0 -> 8 + const int idx = ty * 16 + tx; + if (ty == 0) + gauss[tx] = sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + + int fstPts = + sycl::min(d_PointCounter[2 * octave - 1], (unsigned int)d_MaxNumPoints); + int totPts = + sycl::min(d_PointCounter[2 * octave + 1], (unsigned int)d_MaxNumPoints); + +#pragma unroll + for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts; + bx += item_ct1.get_group_range(2)) + { + + buffer[idx] = 0.0f; + + + item_ct1.barrier(sycl::access::fence_space::local_space); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sycl::sin(theta); // cosa -sina + float cosa = sycl::cos(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + +#pragma unroll + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + + int xi1 = xpos + cosa; + int yi1 = ypos + sina; + + int xi2 = xpos - cosa; + int yi2 = ypos - sina; + + float dx = *(texObj + yi1 * pitch + xi1) - + *(texObj + yi2 * pitch + xi2); + + xi1 = xpos - sina; + yi1 = ypos + cosa; + + xi2 = xpos + sina; + yi2 = ypos - cosa; + + float dy = *(texObj + yi1 * pitch + xi1) - + *(texObj + yi2 * pitch + xi2); + float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * FastAtan2(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + infra::atomic_fetch_add( + buffer + p1, iangf * grad2); + infra::atomic_fetch_add( + buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + infra::atomic_fetch_add( + buffer + p1 + 32, iangf * grad2); + infra::atomic_fetch_add( + buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + infra::atomic_fetch_add( + buffer + p1 + 8, iangf * grad2); + infra::atomic_fetch_add( + buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + infra::atomic_fetch_add( + buffer + p1 + 40, iangf * grad2); + infra::atomic_fetch_add( + buffer + p2 + 40, angf * grad2); + } + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + + #pragma unroll + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i, item_ct1); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + item_ct1.barrier(sycl::access::fence_space::local_space); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = sycl::min((float)(buffer[idx] * sycl::rsqrt(tsum1)), 0.2f); + + sum = tsum1 * tsum1; + + #pragma unroll + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i, item_ct1); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + + item_ct1.barrier(sycl::access::fence_space::local_space); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * sycl::rsqrt(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + } +} + +void ExtractSiftDescriptor(rawImg_data texObj, + SiftPoint *d_sift, float subsampling, int octave, + int bx, sycl::nd_item<3> item_ct1, float *gauss, + float *buffer, float *sums) +{ + + const int idx = item_ct1.get_local_id(2); + const int tx = idx & 15; // 0 -> 16 + const int ty = idx / 16; // 0 -> 8 + if (ty == 0) + gauss[tx] = sycl::exp(-(tx - 7.5f) * (tx - 7.5f) / 128.0f); + buffer[idx] = 0.0f; + item_ct1.barrier(sycl::access::fence_space::local_space); + + // Compute angles and gradients + float theta = 2.0f * 3.1415f / 360.0f * d_sift[bx].orientation; + float sina = sycl::sin(theta); // cosa -sina + float cosa = sycl::cos(theta); // sina cosa + float scale = 12.0f / 16.0f * d_sift[bx].scale; + float ssina = scale * sina; + float scosa = scale * cosa; + +#pragma unroll + for (int y = ty; y < 16; y += 8) + { + float xpos = d_sift[bx].xpos + (tx - 7.5f) * scosa - (y - 7.5f) * ssina + 0.5f; + float ypos = d_sift[bx].ypos + (tx - 7.5f) * ssina + (y - 7.5f) * scosa + 0.5f; + float dx = texObj.read(xpos + cosa, ypos + sina) - + texObj.read(xpos - cosa, ypos - sina); + float dy = texObj.read(xpos - sina, ypos + cosa) - + texObj.read(xpos + sina, ypos - cosa); + float grad = gauss[y] * gauss[tx] * sycl::sqrt(dx * dx + dy * dy); + float angf = 4.0f / 3.1415f * sycl::atan2(dy, dx) + 4.0f; + + int hori = (tx + 2) / 4 - 1; // Convert from (tx,y,angle) to bins + float horf = (tx - 1.5f) / 4.0f - hori; + float ihorf = 1.0f - horf; + int veri = (y + 2) / 4 - 1; + float verf = (y - 1.5f) / 4.0f - veri; + float iverf = 1.0f - verf; + int angi = angf; + int angp = (angi < 7 ? angi + 1 : 0); + angf -= angi; + float iangf = 1.0f - angf; + + int hist = 8 * (4 * veri + hori); // Each gradient measure is interpolated + int p1 = angi + hist; // in angles, xpos and ypos -> 8 stores + int p2 = angp + hist; + if (tx >= 2) + { + float grad1 = ihorf * grad; + if (y >= 2) + { // Upper left + float grad2 = iverf * grad1; + infra::atomic_fetch_add( + buffer + p1, iangf * grad2); + infra::atomic_fetch_add( + buffer + p2, angf * grad2); + } + if (y <= 13) + { // Lower left + float grad2 = verf * grad1; + infra::atomic_fetch_add( + buffer + p1 + 32, iangf * grad2); + infra::atomic_fetch_add( + buffer + p2 + 32, angf * grad2); + } + } + if (tx <= 13) + { + float grad1 = horf * grad; + if (y >= 2) + { // Upper right + float grad2 = iverf * grad1; + infra::atomic_fetch_add( + buffer + p1 + 8, iangf * grad2); + infra::atomic_fetch_add( + buffer + p2 + 8, angf * grad2); + } + if (y <= 13) + { // Lower right + float grad2 = verf * grad1; + infra::atomic_fetch_add( + buffer + p1 + 40, iangf * grad2); + infra::atomic_fetch_add( + buffer + p2 + 40, angf * grad2); + } + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + + // Normalize twice and suppress peaks first time + float sum = buffer[idx] * buffer[idx]; + + #pragma unroll + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i, item_ct1); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + + item_ct1.barrier(sycl::access::fence_space::local_space); + float tsum1 = sums[0] + sums[1] + sums[2] + sums[3]; + tsum1 = sycl::min((float)(buffer[idx] * sycl::rsqrt(tsum1)), 0.2f); + + sum = tsum1 * tsum1; + + #pragma unroll + for (int i = 16; i > 0; i /= 2) + sum += ShiftDown(sum, i, item_ct1); + if ((idx & 31) == 0) + sums[idx / 32] = sum; + + item_ct1.barrier(sycl::access::fence_space::local_space); + + float tsum2 = sums[0] + sums[1] + sums[2] + sums[3]; + float *desc = d_sift[bx].data; + desc[idx] = tsum1 * sycl::rsqrt(tsum2); + if (idx == 0) + { + d_sift[bx].xpos *= subsampling; + d_sift[bx].ypos *= subsampling; + d_sift[bx].scale *= subsampling; + } + + item_ct1.barrier(sycl::access::fence_space::local_space); +} + +void RescalePositions(SiftPoint *d_sift, int numPts, float scale, + sycl::nd_item<3> item_ct1) +{ + int num = item_ct1.get_group(2) * item_ct1.get_local_range().get(2) + + item_ct1.get_local_id(2); + if (num < numPts) + { + d_sift[num].xpos *= scale; + d_sift[num].ypos *= scale; + d_sift[num].scale *= scale; + } +} + +// With constant number of blocks +void ComputeOrientationsCONSTNew(float *image, int w, int p, int h, SiftPoint *d_Sift, int octave, + sycl::nd_item<3> item_ct1, int d_MaxNumPoints, + unsigned int *d_PointCounter, + sycl::accessor img, + sycl::accessor tmp, + float *hist, float *gaussx, float *gaussy) +{ +#define RAD 9 +#define WID (2 * RAD + 1) +#define LEN 32 //%%%% Note: Lowe suggests 36, not 32 + + const int tx = item_ct1.get_local_id(2); + + int fstPts = + sycl::min(d_PointCounter[2 * octave - 1], (unsigned int)d_MaxNumPoints); + int totPts = + sycl::min(d_PointCounter[2 * octave + 0], (unsigned int)d_MaxNumPoints); + + #pragma unroll + for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts; + bx += item_ct1.get_group_range(2)) + { + + float sc = d_Sift[bx].scale; + + #pragma unroll + for (int i = tx; i < 2 * LEN; i += item_ct1.get_local_range().get(2)) + hist[i] = 0.0f; + float xp = d_Sift[bx].xpos; + float yp = d_Sift[bx].ypos; + int xi = (int)xp; + int yi = (int)yp; + float xf = xp - xi; + float yf = yp - yi; + + #pragma unroll + for (int i = tx; i < WID * WID; i += item_ct1.get_local_range().get(2)) + { + int y = i / WID; + int x = i - y * WID; + int xp = sycl::max(sycl::min((int)(x - RAD + xi), (int)(w - 1)), 0); + int yp = sycl::max(sycl::min((int)(y - RAD + yi), (int)(h - 1)), 0); + img[y][x] = image[yp * p + xp]; + } + float fac[5]; + fac[1] = fac[3] = + (sc > 0.5f ? sycl::exp(-1.0f / (2.0f * (sc * sc - 0.25f))) : 0.0f); + fac[0] = fac[4] = + (sc > 0.5f ? sycl::exp(-4.0f / (2.0f * (sc * sc - 0.25f))) : 0.0f); + fac[2] = 1.0f; + float i2sigma2 = -1.0f / (2.0f * 2.0f * 2.0f * sc * sc); //%%%% Note: Lowe suggests 1.5, not 2.0 + if (tx < WID) + { + gaussx[tx] = sycl::exp(i2sigma2 * (tx - RAD - xf) * (tx - RAD - xf)); + gaussy[tx] = sycl::exp(i2sigma2 * (tx - RAD - yf) * (tx - RAD - yf)); + } + item_ct1.barrier(sycl::access::fence_space::local_space); + + #pragma unroll + for (int i = tx; i < (WID - 4) * WID; + i += item_ct1.get_local_range().get(2)) + { + int y = i / WID; + int x = i - y * WID; + y += 2; + tmp[y][x] = img[y][x] + fac[1] * (img[y - 1][x] + img[y + 1][x]) + + fac[0] * (img[y - 2][x] + img[y + 2][x]); + } + item_ct1.barrier(sycl::access::fence_space::local_space); + + #pragma unroll + for (int i = tx; i < (WID - 4) * (WID - 4); + i += item_ct1.get_local_range().get(2)) + { + int y = i / (WID - 4); + int x = i - y * (WID - 4); + x += 2; + y += 2; + img[y][x] = tmp[y][x] + fac[1] * (tmp[y][x - 1] + tmp[y][x + 1]) + + fac[0] * (tmp[y][x - 2] + tmp[y][x + 2]); + } + item_ct1.barrier(sycl::access::fence_space::local_space); + + #pragma unroll + for (int i = tx; i < (WID - 6) * (WID - 6); + i += item_ct1.get_local_range().get(2)) + { + int y = i / (WID - 6); + int x = i - y * (WID - 6); + x += 3; + y += 3; + float dx = img[y][x + 1] - img[y][x - 1]; + float dy = img[y + 1][x] - img[y - 1][x]; + int bin = + (int)((LEN / 2) * sycl::atan2(dy, dx) / 3.1416f + (LEN / 2) + 0.5f) % + LEN; + float grad = sycl::sqrt(dx * dx + dy * dy); + infra::atomic_fetch_add( + &hist[LEN + bin], grad * gaussx[x] * gaussy[y]); + } + item_ct1.barrier(sycl::access::fence_space::local_space); + int x1m = (tx >= 1 ? tx - 1 : tx + LEN - 1); + int x1p = (tx < (LEN - 1) ? tx + 1 : tx - LEN + 1); + int x2m = (tx >= 2 ? tx - 2 : tx + LEN - 2); + int x2p = (tx < (LEN - 2) ? tx + 2 : tx - LEN + 2); + if (tx < LEN) + { + hist[tx] = 6.0f * hist[tx + LEN] + 4.0f * (hist[x1m + LEN] + hist[x1p + LEN]) + + 1.0f * (hist[x2m + LEN] + hist[x2p + LEN]); + hist[tx + LEN] = 8.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + + 0.0f * (hist[x2m] + hist[x2p]); + float val = hist[tx + LEN]; + hist[tx] = (val > hist[x1m + LEN] && val >= hist[x1p + LEN] ? val : 0.0f); + } + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + + #pragma unroll + for (int i = 0; i < LEN; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[LEN + ((i1 + 1) % LEN)]; + float val2 = hist[LEN + ((i1 + LEN - 1) % LEN)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN; + sycl::atomic( + sycl::global_ptr(&d_PointCounter[2 * octave + 1])) + .fetch_max(d_PointCounter[2 * octave + 0]); + if (maxval2 > 0.8f * maxval1 && true) + { + float val1 = hist[LEN + ((i2 + 1) % LEN)]; + float val2 = hist[LEN + ((i2 + LEN - 1) % LEN)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + unsigned int idx = infra::atomic_fetch_compare_inc( + &d_PointCounter[2 * octave + 1], (unsigned int)0x7fffffff); + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = sc; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 360.0f * (peak < 0.0f ? peak + LEN : peak) / LEN; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } + } +#undef RAD +#undef WID +#undef LEN +} + +// With constant number of blocks +void ComputeOrientationsCONST(rawImg_data texObj, + SiftPoint *d_Sift, int octave, + sycl::nd_item<3> item_ct1, int d_MaxNumPoints, + unsigned int *d_PointCounter, float *hist, + float *gauss) +{ + + const int tx = item_ct1.get_local_id(2); + + int fstPts = + sycl::min(d_PointCounter[2 * octave - 1], (unsigned int)d_MaxNumPoints); + int totPts = + sycl::min(d_PointCounter[2 * octave + 0], (unsigned int)d_MaxNumPoints); + + #pragma unroll + for (int bx = item_ct1.get_group(2) + fstPts; bx < totPts; + bx += item_ct1.get_group_range(2)) + { + + float i2sigma2 = -1.0f / (2.0f * 1.5f * 1.5f * d_Sift[bx].scale * d_Sift[bx].scale); + if (tx < 11) + gauss[tx] = sycl::exp(i2sigma2 * (tx - 5) * (tx - 5)); + if (tx < 64) + hist[tx] = 0.0f; + item_ct1.barrier(sycl::access::fence_space::local_space); + float xp = d_Sift[bx].xpos - 4.5f; + float yp = d_Sift[bx].ypos - 4.5f; + int yd = tx / 11; + int xd = tx - yd * 11; + float xf = xp + xd; + float yf = yp + yd; + if (yd < 11) + { + float dx = texObj.read(xf + 1.0, yf) - texObj.read(xf - 1.0, yf); // src_d_data[yf * pitch + xf] + float dy = texObj.read(xf, yf + 1.0) - texObj.read(xf, yf - 1.0); + int bin = 16.0f * sycl::atan2(dy, dx) / 3.1416f + 16.5f; + if (bin > 31) + bin = 0; + float grad = sycl::sqrt(dx * dx + dy * dy); + infra::atomic_fetch_add( + &hist[bin], grad * gauss[xd] * gauss[yd]); + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + int x1m = (tx >= 1 ? tx - 1 : tx + 31); + int x1p = (tx <= 30 ? tx + 1 : tx - 31); + if (tx < 32) + { + int x2m = (tx >= 2 ? tx - 2 : tx + 30); + int x2p = (tx <= 29 ? tx + 2 : tx - 30); + hist[tx + 32] = 6.0f * hist[tx] + 4.0f * (hist[x1m] + hist[x1p]) + (hist[x2m] + hist[x2p]); + } + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < 32) + { + float v = hist[32 + tx]; + hist[tx] = (v > hist[32 + x1m] && v >= hist[32 + x1p] ? v : 0.0f); + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx == 0) + { + float maxval1 = 0.0; + float maxval2 = 0.0; + int i1 = -1; + int i2 = -1; + + #pragma unroll + for (int i = 0; i < 32; i++) + { + float v = hist[i]; + if (v > maxval1) + { + maxval2 = maxval1; + maxval1 = v; + i2 = i1; + i1 = i; + } + else if (v > maxval2) + { + maxval2 = v; + i2 = i; + } + } + float val1 = hist[32 + ((i1 + 1) & 31)]; + float val2 = hist[32 + ((i1 + 31) & 31)]; + float peak = i1 + 0.5f * (val1 - val2) / (2.0f * maxval1 - val1 - val2); + d_Sift[bx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + sycl::atomic( + sycl::global_ptr(&d_PointCounter[2 * octave + 1])) + .fetch_max(d_PointCounter[2 * octave + 0]); + if (maxval2 > 0.8f * maxval1 && true) + { + float val1 = hist[32 + ((i2 + 1) & 31)]; + float val2 = hist[32 + ((i2 + 31) & 31)]; + float peak = i2 + 0.5f * (val1 - val2) / (2.0f * maxval2 - val1 - val2); + unsigned int idx = infra::atomic_fetch_compare_inc( + &d_PointCounter[2 * octave + 1], (unsigned int)0x7fffffff); + if (idx < d_MaxNumPoints) + { + d_Sift[idx].xpos = d_Sift[bx].xpos; + d_Sift[idx].ypos = d_Sift[bx].ypos; + d_Sift[idx].scale = d_Sift[bx].scale; + d_Sift[idx].sharpness = d_Sift[bx].sharpness; + d_Sift[idx].edgeness = d_Sift[bx].edgeness; + d_Sift[idx].orientation = 11.25f * (peak < 0.0f ? peak + 32.0f : peak); + ; + d_Sift[idx].subsampling = d_Sift[bx].subsampling; + } + } + } + item_ct1.barrier(sycl::access::fence_space::local_space); + } +} + +void FindPointsMultiNew(float *d_Data0, SiftPoint *d_Sift, int width, int pitch, int height, float subsampling, float lowestScale, float thresh, float factor, float edgeLimit, int octave, + sycl::nd_item<3> item_ct1, int d_MaxNumPoints, + unsigned int *d_PointCounter, unsigned short *points) +{ +#define MEMWID (MINMAX_W + 2) + + if (item_ct1.get_group(2) == 0 && item_ct1.get_group(1) == 0 && + item_ct1.get_local_id(2) == 0) + { + sycl::atomic( + sycl::global_ptr(&d_PointCounter[2 * octave + 0])) + .fetch_max(d_PointCounter[2 * octave - 1]); + sycl::atomic( + sycl::global_ptr(&d_PointCounter[2 * octave + 1])) + .fetch_max(d_PointCounter[2 * octave - 1]); + } + int tx = item_ct1.get_local_id(2); + int block = item_ct1.get_group(2) / NUM_SCALES; + int scale = item_ct1.get_group(2) - NUM_SCALES * block; + int minx = block * MINMAX_W; + int maxx = sycl::min((int)(minx + MINMAX_W), width); + int xpos = minx + tx; + int size = pitch * height; + int ptr = + size * scale + sycl::max(sycl::min((int)(xpos - 1), (int)(width - 1)), 0); + + int yloops = + sycl::min((unsigned int)(height - MINMAX_H * item_ct1.get_group(1)), + (unsigned int)(MINMAX_H)); + float maxv = 0.0f; + + #pragma unroll + for (int y = 0; y < yloops; y++) + { + int ypos = MINMAX_H * item_ct1.get_group(1) + y; + int yptr1 = ptr + ypos * pitch; + float val = d_Data0[yptr1 + 1 * size]; + maxv = sycl::fmax(maxv, sycl::fabs(val)); + } + // if (tx==0) printf("XXX1\n"); + if (!sycl::any_of_group( + item_ct1.get_sub_group(), + (0xffffffff & + (0x1 << item_ct1.get_sub_group().get_local_linear_id())) && + maxv > thresh)) + return; + // if (tx==0) printf("XXX2\n"); + + int ptbits = 0; + + #pragma unroll + for (int y = 0; y < yloops; y++) + { + + int ypos = MINMAX_H * item_ct1.get_group(1) + y; + int yptr1 = ptr + ypos * pitch; + float d11 = d_Data0[yptr1 + 1 * size]; + if (sycl::any_of_group( + item_ct1.get_sub_group(), + (0xffffffff & + (0x1 << item_ct1.get_sub_group().get_local_linear_id())) && + sycl::fabs(d11) > thresh)) + { + + int yptr0 = ptr + sycl::max(0, (int)(ypos - 1)) * pitch; + int yptr2 = ptr + sycl::min((int)(height - 1), (int)(ypos + 1)) * pitch; + float d01 = d_Data0[yptr1]; + float d10 = d_Data0[yptr0 + 1 * size]; + float d12 = d_Data0[yptr2 + 1 * size]; + float d21 = d_Data0[yptr1 + 2 * size]; + + float d00 = d_Data0[yptr0]; + float d02 = d_Data0[yptr2]; + float ymin1 = sycl::fmin(sycl::fmin(d00, d01), d02); + float ymax1 = sycl::fmax(sycl::fmax(d00, d01), d02); + float d20 = d_Data0[yptr0 + 2 * size]; + float d22 = d_Data0[yptr2 + 2 * size]; + float ymin3 = sycl::fmin(sycl::fmin(d20, d21), d22); + float ymax3 = sycl::fmax(sycl::fmax(d20, d21), d22); + float ymin2 = sycl::fmin( + sycl::fmin(ymin1, sycl::fmin(sycl::fmin(d10, d12), d11)), ymin3); + float ymax2 = sycl::fmax( + sycl::fmax(ymax1, sycl::fmax(sycl::fmax(d10, d12), d11)), ymax3); + + // float nmin2 = sycl::fmin(ShiftUp(ymin2, 1), ShiftDown(ymin2, 1)); + // float nmax2 = sycl::fmax(ShiftUp(ymax2, 1), ShiftDown(ymax2, 1)); + + float nmin2 = sycl::fmin(ShiftUp(ymin2, 1, item_ct1), ShiftDown(ymin2, 1, item_ct1)); + float nmax2 = sycl::fmax(ShiftUp(ymax2, 1, item_ct1), ShiftDown(ymax2, 1, item_ct1)); + + float minv = sycl::fmin(sycl::fmin(nmin2, ymin1), ymin3); + minv = sycl::fmin(sycl::fmin(minv, d10), d12); + float maxv = sycl::fmax(sycl::fmax(nmax2, ymax1), ymax3); + maxv = sycl::fmax(sycl::fmax(maxv, d10), d12); + + if (tx > 0 && tx < MINMAX_W + 1 && xpos <= maxx) + ptbits |= ((d11 < sycl::fmin(-thresh, minv)) | + (d11 > sycl::fmax(thresh, maxv))) + << y; + } + } + + unsigned int totbits = sycl::popcount(ptbits); + unsigned int numbits = totbits; + + #pragma unroll + for (int d = 1; d < 32; d <<= 1) + { + unsigned int num = ShiftUp(totbits, d, item_ct1); + if (tx >= d) + totbits += num; + } + int pos = totbits - numbits; + + #pragma unroll + for (int y = 0; y < yloops; y++) + { + int ypos = MINMAX_H * item_ct1.get_group(1) + y; + if (ptbits & (1 << y) && pos < MEMWID) + { + points[2 * pos + 0] = xpos - 1; + points[2 * pos + 1] = ypos; + pos++; + } + } + + totbits = Shuffle(totbits, 31, item_ct1); + if (tx < totbits) + { + int xpos = points[2 * tx + 0]; + int ypos = points[2 * tx + 1]; + int ptr = xpos + (ypos + (scale + 1) * height) * pitch; + float val = d_Data0[ptr]; + float *data1 = &d_Data0[ptr]; + float dxx = 2.0f * val - data1[-1] - data1[1]; + float dyy = 2.0f * val - data1[-pitch] - data1[pitch]; + float dxy = 0.25f * (data1[+pitch + 1] + data1[-pitch - 1] - data1[-pitch + 1] - data1[+pitch - 1]); + float tra = dxx + dyy; + float det = dxx * dyy - dxy * dxy; + if (tra * tra < edgeLimit * det) + { + float edge = (tra * tra) / det; + float dx = 0.5f * (data1[1] - data1[-1]); + float dy = 0.5f * (data1[pitch] - data1[-pitch]); + float *data0 = d_Data0 + ptr - height * pitch; + float *data2 = d_Data0 + ptr + height * pitch; + float ds = 0.5f * (data0[0] - data2[0]); + float dss = 2.0f * val - data2[0] - data0[0]; + float dxs = 0.25f * (data2[1] + data0[-1] - data0[1] - data2[-1]); + float dys = 0.25f * (data2[pitch] + data0[-pitch] - data2[-pitch] - data0[pitch]); + float idxx = dyy * dss - dys * dys; + float idxy = dys * dxs - dxy * dss; + float idxs = dxy * dys - dyy * dxs; + float idet = 1.0f / (idxx * dxx + idxy * dxy + idxs * dxs); + float idyy = dxx * dss - dxs * dxs; + float idys = dxy * dxs - dxx * dys; + float idss = dxx * dyy - dxy * dxy; + float pdx = idet * (idxx * dx + idxy * dy + idxs * ds); + float pdy = idet * (idxy * dx + idyy * dy + idys * ds); + float pds = idet * (idxs * dx + idys * dy + idss * ds); + if (pdx < -0.5f || pdx > 0.5f || pdy < -0.5f || pdy > 0.5f || pds < -0.5f || pds > 0.5f) + { + pdx = dx / dxx; + pdy = dy / dyy; + pds = ds / dss; + } + float dval = 0.5f * (dx * pdx + dy * pdy + ds * pds); + int maxPts = d_MaxNumPoints; + float sc = sycl::pow(2.0f, (float)scale / NUM_SCALES) * + sycl::exp2(pds * factor); + if (sc >= lowestScale) + { + sycl::atomic( + sycl::global_ptr(&d_PointCounter[2 * octave + 0])) + .fetch_max(d_PointCounter[2 * octave - 1]); + unsigned int idx = infra::atomic_fetch_compare_inc( + &d_PointCounter[2 * octave + 0], (unsigned int)0x7fffffff); + idx = (idx >= maxPts ? maxPts - 1 : idx); + d_Sift[idx].xpos = xpos + pdx; + d_Sift[idx].ypos = ypos + pdy; + d_Sift[idx].scale = sc; + d_Sift[idx].sharpness = val + dval; + d_Sift[idx].edgeness = edge; + d_Sift[idx].subsampling = subsampling; + } + } + } +} + +void LaplaceMultiMem(float *d_Image, float *d_Result, int width, int pitch, int height, int octave, sycl::nd_item<3> item_ct1, float *d_LaplaceKernel, float *buff) +{ + const int tx = item_ct1.get_local_id(2); + const int xp = item_ct1.get_group(2) * LAPLACE_W + tx; + const int yp = item_ct1.get_group(1); + float *data = d_Image + sycl::max(sycl::min((int)(xp - LAPLACE_R), (int)(width - 1)), 0); + float temp[2 * LAPLACE_R + 1]; + + float kern[LAPLACE_S][LAPLACE_R + 1]; + // float kern[LAPLACE_S * (LAPLACE_R + 1)]; // 2d to 1d + + // float kern_temp[LAPLACE_S * (LAPLACE_R + 1)]; + + if (xp < (width + 2 * LAPLACE_R)) + { + #pragma unroll + for (int i = 0; i <= 2 * LAPLACE_R; i++) + temp[i] = data[sycl::max(0, sycl::min((int)(yp + i - LAPLACE_R), + (int)(height - 1))) * + pitch]; + + #pragma unroll + for (int scale = 0; scale < LAPLACE_S; scale++) + { + float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; + float *kernel = d_LaplaceKernel + octave * 12 * 16 + scale * 16; + kern[scale][0] = kernel[0]; + kern[scale][1] = kernel[1]; + kern[scale][2] = kernel[2]; + kern[scale][3] = kernel[3]; + kern[scale][4] = kernel[4]; + + float sum = kern[scale][0] * temp[LAPLACE_R]; + + #pragma unroll + for (int j = 1; j <= LAPLACE_R; j++) + sum += kern[scale][j] * (temp[LAPLACE_R - j] + temp[LAPLACE_R + j]); + buf[tx] = sum; + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < LAPLACE_W && xp < (width + 2 * LAPLACE_R)) + { + int scale = 0; + float oldRes = kern[scale][0] * buff[tx + LAPLACE_R]; + + #pragma unroll + for (int j = 1; j <= LAPLACE_R; j++) + oldRes += kern[scale][j] * (buff[tx + LAPLACE_R - j] + buff[tx + LAPLACE_R + j]); + + #pragma unroll + for (int scale = 1; scale < LAPLACE_S; scale++) + { + float *buf = buff + (LAPLACE_W + 2 * LAPLACE_R) * scale; + float res = kern[scale][0] * buf[tx + LAPLACE_R]; + + #pragma unroll + for (int j = 1; j <= LAPLACE_R; j++) + res += kern[scale][j] * (buf[tx + LAPLACE_R - j] + buf[tx + LAPLACE_R + j]); + d_Result[(scale - 1) * height * pitch + yp * pitch + xp] = res - oldRes; + oldRes = res; + } + } +} + +void LowPass(float *d_Image, float *d_Result, int width, int pitch, int height, + sycl::nd_item<3> item_ct1, float *d_LowPassKernel, float *buffer) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int xp = item_ct1.get_group(2) * LOWPASS_W + tx; + const int yp = item_ct1.get_group(1) * LOWPASS_H + ty; + float *kernel = d_LowPassKernel; + float *data = + d_Image + sycl::max(sycl::min((int)(xp - 4), (int)(width - 1)), 0); + float *buff = buffer + ty * (LOWPASS_W + 2 * LOWPASS_R); + int h = height - 1; + if (yp < height) + buff[tx] = + kernel[4] * data[sycl::min(yp, h) * pitch] + + kernel[3] * (data[sycl::max(0, sycl::min((int)(yp - 1), h)) * pitch] + + data[sycl::min((int)(yp + 1), h) * pitch]) + + kernel[2] * (data[sycl::max(0, sycl::min((int)(yp - 2), h)) * pitch] + + data[sycl::min((int)(yp + 2), h) * pitch]) + + kernel[1] * (data[sycl::max(0, sycl::min((int)(yp - 3), h)) * pitch] + + data[sycl::min((int)(yp + 3), h) * pitch]) + + kernel[0] * (data[sycl::max(0, sycl::min((int)(yp - 4), h)) * pitch] + + data[sycl::min((int)(yp + 4), h) * pitch]); + + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < LOWPASS_W && xp < width && yp < height) + d_Result[yp * pitch + xp] = kernel[4] * buff[tx + 4] + + kernel[3] * (buff[tx + 3] + buff[tx + 5]) + kernel[2] * (buff[tx + 2] + buff[tx + 6]) + + kernel[1] * (buff[tx + 1] + buff[tx + 7]) + kernel[0] * (buff[tx + 0] + buff[tx + 8]); +} + +void LowPassBlockOld(float *d_Image, float *d_Result, int width, int pitch, int height, + sycl::nd_item<3> item_ct1, float *d_LowPassKernel, + sycl::accessor xrows) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int xp = item_ct1.get_group(2) * LOWPASS_W + tx; + const int yp = item_ct1.get_group(1) * LOWPASS_H + ty; + const int N = 16; + float *k = d_LowPassKernel; + int xl = sycl::max(sycl::min((int)(xp - 4), (int)(width - 1)), 0); + + #pragma unroll + for (int l = -8; l <= LOWPASS_H; l += 4) + { + if (l < LOWPASS_H) + { + int yl = sycl::max(sycl::min((int)(yp + l + 4), (int)(height - 1)), 0); + float val = d_Image[yl * pitch + xl]; + xrows[(l + 8 + ty) % N][tx] = + k[4] * ShiftDown(val, 4, item_ct1) + + k[3] * (ShiftDown(val, 5, item_ct1) + ShiftDown(val, 3, item_ct1)) + + k[2] * (ShiftDown(val, 6, item_ct1) + ShiftDown(val, 2, item_ct1)) + + k[1] * (ShiftDown(val, 7, item_ct1) + ShiftDown(val, 1, item_ct1)) + + k[0] * (ShiftDown(val, 8, item_ct1) + val); + } + if (l >= 4) + { + int ys = yp + l - 4; + if (xp < width && ys < height && tx < LOWPASS_W) + d_Result[ys * pitch + xp] = k[4] * xrows[(l + 0 + ty) % N][tx] + + k[3] * (xrows[(l - 1 + ty) % N][tx] + xrows[(l + 1 + ty) % N][tx]) + + k[2] * (xrows[(l - 2 + ty) % N][tx] + xrows[(l + 2 + ty) % N][tx]) + + k[1] * (xrows[(l - 3 + ty) % N][tx] + xrows[(l + 3 + ty) % N][tx]) + + k[0] * (xrows[(l - 4 + ty) % N][tx] + xrows[(l + 4 + ty) % N][tx]); + } + if (l >= 0) + item_ct1.barrier(sycl::access::fence_space::local_space); + } +} + +void LowPassBlock(float *d_Image, float *d_Result, int width, int pitch, int height, + sycl::nd_item<3> item_ct1, float *d_LowPassKernel, + sycl::accessor xrows) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int xp = item_ct1.get_group(2) * LOWPASS_W + tx; + const int yp = item_ct1.get_group(1) * LOWPASS_H + ty; + const int N = 16; + float *k = d_LowPassKernel; + int xl = sycl::max(sycl::min((int)(xp - 4), (int)(width - 1)), 0); + + #pragma unroll + for (int l = -8; l < 4; l += 4) + { + int ly = l + ty; + int yl = sycl::max(sycl::min((int)(yp + l + 4), (int)(height - 1)), 0); + float val = d_Image[yl * pitch + xl]; + val = k[4] * ShiftDown(val, 4, item_ct1) + + k[3] * (ShiftDown(val, 5, item_ct1) + ShiftDown(val, 3, item_ct1)) + + k[2] * (ShiftDown(val, 6, item_ct1) + ShiftDown(val, 2, item_ct1)) + + k[1] * (ShiftDown(val, 7, item_ct1) + ShiftDown(val, 1, item_ct1)) + + k[0] * (ShiftDown(val, 8, item_ct1) + val); + xrows[ly + 8][tx] = val; + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + + #pragma unroll + for (int l = 4; l < LOWPASS_H; l += 4) + { + int ly = l + ty; + int yl = sycl::min((int)(yp + l + 4), (int)(height - 1)); + float val = d_Image[yl * pitch + xl]; + val = k[4] * ShiftDown(val, 4, item_ct1) + + k[3] * (ShiftDown(val, 5, item_ct1) + ShiftDown(val, 3, item_ct1)) + + k[2] * (ShiftDown(val, 6, item_ct1) + ShiftDown(val, 2, item_ct1)) + + k[1] * (ShiftDown(val, 7, item_ct1) + ShiftDown(val, 1, item_ct1)) + + k[0] * (ShiftDown(val, 8, item_ct1) + val); + xrows[(ly + 8) % N][tx] = val; + int ys = yp + l - 4; + if (xp < width && ys < height && tx < LOWPASS_W) + d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] + + k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) + + k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) + + k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) + + k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]); + + item_ct1.barrier(sycl::access::fence_space::local_space); + } + int ly = LOWPASS_H + ty; + int ys = yp + LOWPASS_H - 4; + if (xp < width && ys < height && tx < LOWPASS_W) + d_Result[ys * pitch + xp] = k[4] * xrows[(ly + 0) % N][tx] + + k[3] * (xrows[(ly - 1) % N][tx] + xrows[(ly + 1) % N][tx]) + + k[2] * (xrows[(ly - 2) % N][tx] + xrows[(ly + 2) % N][tx]) + + k[1] * (xrows[(ly - 3) % N][tx] + xrows[(ly + 3) % N][tx]) + + k[0] * (xrows[(ly - 4) % N][tx] + xrows[(ly + 4) % N][tx]); +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftD.h b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftD.h new file mode 100644 index 000000000..0d38fe57e --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftD.h @@ -0,0 +1,58 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +#ifndef CUDASIFTD_H +#define CUDASIFTD_H + +#define NUM_SCALES 5 + +// Scale down thread block width +#define SCALEDOWN_W 64 // 60 + +// Scale down thread block height +#define SCALEDOWN_H 16 // 8 + +// Scale up thread block width +#define SCALEUP_W 64 + +// Scale up thread block height +#define SCALEUP_H 8 + +// Find point thread block width +#define MINMAX_W 30 //32 + +// Find point thread block height +#define MINMAX_H 8 //16 + +// Laplace thread block width +#define LAPLACE_W 128 // 56 + +// Laplace rows per thread +#define LAPLACE_H 4 + +// Number of laplace scales +#define LAPLACE_S (NUM_SCALES+3) + +// Laplace filter kernel radius +#define LAPLACE_R 4 + +#define LOWPASS_W 24 //56 +#define LOWPASS_H 32 //16 +#define LOWPASS_R 4 + +//====================== Number of threads ====================// +// ScaleDown: SCALEDOWN_W + 4 +// LaplaceMulti: (LAPLACE_W+2*LAPLACE_R)*LAPLACE_S +// FindPointsMulti: MINMAX_W + 2 +// ComputeOrientations: 128 +// ExtractSiftDescriptors: 256 + +//====================== Number of blocks ====================// +// ScaleDown: (width/SCALEDOWN_W) * (height/SCALEDOWN_H) +// LaplceMulti: (width+2*LAPLACE_R)/LAPLACE_W * height +// FindPointsMulti: (width/MINMAX_W)*NUM_SCALES * (height/MINMAX_H) +// ComputeOrientations: numpts +// ExtractSiftDescriptors: numpts + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftH.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftH.dp.cpp new file mode 100644 index 000000000..bae0c62e4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftH.dp.cpp @@ -0,0 +1,826 @@ +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include + +#include "cudautils.h" +#include "cudaImage.h" +#include "cudaSift.h" +#include "cudaSiftD.h" +#include "cudaSiftH.h" +#include "cudaSiftD.dp.cpp" + +#define PITCH_DEFAULT_ALIGN(x) (((x) + 31) & ~(0x1F)) + +template <> +struct sycl::is_device_copyable : std::true_type +{ +}; + +void InitCuda(sycl::queue &q_ct, int devNum) +{ + auto device = q_ct.get_device(); + std::cout << "Device Name: " << device.get_info() << std::endl; + std::cout << "Max workgroup size: " << device.get_info() << std::endl; + std::cout << "Max clock freq: " << device.get_info() << std::endl; +} + +float *AllocSiftTempMemory(int width, int height, int numOctaves, sycl::queue &q_ct, float &time, bool scaleUp) +{ + const int nd = NUM_SCALES + 3; + int w = width * (scaleUp ? 2 : 1); + int h = height * (scaleUp ? 2 : 1); + int p = iAlignUp(w, 128); + int size = h * p; // image sizes + int sizeTmp = nd * h * p; // laplace buffer sizes + for (int i = 0; i < numOctaves; i++) + { + w /= 2; + h /= 2; + int p = iAlignUp(w, 128); + size += h * p; + sizeTmp += nd * h * p; + } + float *memoryTmp = NULL; + size_t pitch; + size += sizeTmp; + +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + memoryTmp = (float *)infra::sift_malloc(pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float), q_ct); + q_ct.wait(); +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + // printf("Malloc time for memoryTmp = %.2f us\n", std::chrono::duration(stop_malloc - start_malloc).count()); + time += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + return memoryTmp; +} + +void FreeSiftTempMemory(float *memoryTmp, sycl::queue &q_ct) +{ + if (memoryTmp) + + safeCall((sycl::free(memoryTmp, q_ct), 0)); +} + +void ExtractSift(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, sycl::queue &q_ct, + float &totTime, float lowestScale, bool scaleUp, float *tempMemory) +{ + unsigned int *d_PointCounterAddr; + +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + *((void **)&d_PointCounterAddr) = d_PointCounter.get_ptr(); + q_ct.memset(d_PointCounterAddr, 0, (8 * 2 + 1) * sizeof(int)); + q_ct.memcpy(d_MaxNumPoints.get_ptr(), &siftData.maxPts, sizeof(int)); + q_ct.wait(); + +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + + const int nd = NUM_SCALES + 3; + int w = img.width * (scaleUp ? 2 : 1); + int h = img.height * (scaleUp ? 2 : 1); + int p = iAlignUp(w, 128); + int width = w, height = h; + int size = h * p; // image sizes + int sizeTmp = nd * h * p; // laplace buffer sizes + for (int i = 0; i < numOctaves; i++) + { + w /= 2; + h /= 2; + int p = iAlignUp(w, 128); + size += h * p; + sizeTmp += nd * h * p; + } + float *memoryTmp = tempMemory; + size += sizeTmp; + if (!tempMemory) + { + size_t pitch; +#ifdef DEVICE_TIMER + auto start_malloc2 = std::chrono::steady_clock::now(); +#endif + memoryTmp = (float *)infra::sift_malloc(pitch, (size_t)4096, (size + 4095) / 4096 * sizeof(float), q_ct); + q_ct.wait(); + +#ifdef DEVICE_TIMER + auto stop_malloc2 = std::chrono::steady_clock::now(); + // printf("Malloc time for memoryTmp = %.2f us\n", std::chrono::duration(stop_malloc - start_malloc).count()); + totTime += std::chrono::duration(stop_malloc2 - start_malloc2).count(); +#endif + } + float *memorySub = memoryTmp + sizeTmp; + + CudaImage lowImg; + lowImg.Allocate(width, height, iAlignUp(width, 128), false, q_ct, totTime, memorySub); + if (!scaleUp) + { + float kernel[8 * 12 * 16]; + PrepareLaplaceKernels(numOctaves, 0.0f, kernel); +#ifdef DEVICE_TIMER + auto start_memcpy1 = std::chrono::steady_clock::now(); +#endif + q_ct.memcpy(d_LaplaceKernel.get_ptr(), kernel, 8 * 12 * 16 * sizeof(float)); + q_ct.wait(); + +#ifdef DEVICE_TIMER + auto stop_memcpy1 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy1 - start_memcpy1).count(); +#endif + + LowPass(lowImg, img, fmax(initBlur, 0.001f), q_ct, totTime); + + ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale, 1.0f, memoryTmp, + memorySub + height * iAlignUp(width, 128), q_ct, totTime); + +#ifdef DEVICE_TIMER + auto start_memcpy2 = std::chrono::steady_clock::now(); +#endif + q_ct.memcpy(&siftData.numPts, &d_PointCounterAddr[2 * numOctaves], sizeof(int)); + q_ct.wait(); +#ifdef DEVICE_TIMER + auto stop_memcpy2 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy2 - start_memcpy2).count(); +#endif + siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts); + } + else + { + CudaImage upImg; + upImg.Allocate(width, height, iAlignUp(width, 128), false, q_ct, totTime, memoryTmp); + ScaleUp(upImg, img, q_ct, totTime); + LowPass(lowImg, upImg, fmax(initBlur, 0.001f), q_ct, totTime); + float kernel[8 * 12 * 16]; + PrepareLaplaceKernels(numOctaves, 0.0f, kernel); +#ifdef DEVICE_TIMER + auto start_memcpy3 = std::chrono::steady_clock::now(); +#endif + safeCall( + (q_ct.memcpy(d_LaplaceKernel.get_ptr(), kernel, + 8 * 12 * 16 * sizeof(float)), + 0)); + q_ct.wait(); +#ifdef DEVICE_TIMER + auto stop_memcpy3 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy3 - start_memcpy3).count(); +#endif + ExtractSiftLoop(siftData, lowImg, numOctaves, 0.0f, thresh, lowestScale * 2.0f, 1.0f, memoryTmp, + memorySub + height * iAlignUp(width, 128), q_ct, totTime); +#ifdef DEVICE_TIMER + auto start_memcpy4 = std::chrono::steady_clock::now(); +#endif + safeCall((q_ct.memcpy(&siftData.numPts, &d_PointCounterAddr[2 * numOctaves], + sizeof(int)), + 0)); + q_ct.wait(); +#ifdef DEVICE_TIMER + auto stop_memcpy4 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy4 - start_memcpy4).count(); +#endif + siftData.numPts = (siftData.numPts < siftData.maxPts ? siftData.numPts : siftData.maxPts); + RescalePositions(siftData, 0.5f, q_ct, totTime); + } + + if (!tempMemory) + safeCall((sycl::free(memoryTmp, q_ct), 0)); + if (siftData.h_data) + { +#ifdef DEVICE_TIMER + auto start_memcpy5 = std::chrono::steady_clock::now(); +#endif + q_ct.memcpy(siftData.h_data, siftData.d_data, sizeof(SiftPoint) * siftData.numPts); + q_ct.wait(); +#ifdef DEVICE_TIMER + auto stop_memcpy5 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy5 - start_memcpy5).count(); + printf("Total time for sift extraction = %.2f us\n\n", totTime); +#endif + printf("Number of Points after sift extraction = %d\n\n", siftData.numPts); + } +} + +int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, float lowestScale, + float subsampling, float *memoryTmp, float *memorySub, sycl::queue &q_ct, float &totTime) +{ + int w = img.width; + int h = img.height; + if (numOctaves > 1) + { + CudaImage subImg; + int p = iAlignUp(w / 2, 128); + subImg.Allocate(w / 2, h / 2, p, false, q_ct, totTime, memorySub); + ScaleDown(subImg, img, 0.5f, q_ct, totTime); + float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f; + ExtractSiftLoop(siftData, subImg, numOctaves - 1, totInitBlur, thresh, lowestScale, + subsampling * 2.0f, memoryTmp, memorySub + (h / 2) * p, q_ct, totTime); + } + ExtractSiftOctave(siftData, img, numOctaves, thresh, lowestScale, subsampling, memoryTmp, q_ct, totTime); + return 0; +} + +void c1toc4(float *f_ptr, sycl::float4 *f4_ptr, int width, int height, + int f_pitch, int f4_pitch, sycl::id<2> idx) +{ + const int workItm_row = idx[0]; + const int workItm_col = idx[1]; + float *f_row_begin = f_ptr + f_pitch * workItm_row; + sycl::float4 *f4_row_begin = f4_ptr + f4_pitch * workItm_row; + + f4_row_begin[workItm_col].x() = f_row_begin[workItm_col]; +} + +void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh, float lowestScale, + float subsampling, float *memoryTmp, sycl::queue &q_ct, float &totTime) +{ + const int nd = NUM_SCALES + 3; + CudaImage diffImg[nd]; + int w = img.width; + int h = img.height; + int p = iAlignUp(w, 128); + for (int i = 0; i < nd - 1; i++) + diffImg[i].Allocate(w, h, p, false, q_ct, totTime, memoryTmp + i * p * h); + float baseBlur = pow(2.0f, -1.0f / NUM_SCALES); + float diffScale = pow(2.0f, 1.0f / NUM_SCALES); + LaplaceMulti(img, diffImg, octave, q_ct, totTime); + FindPointsMulti(diffImg, siftData, thresh, 10.0f, 1.0f / NUM_SCALES, lowestScale / subsampling, subsampling, octave, q_ct, totTime); + ComputeOrientations(img, siftData, octave, q_ct, totTime); + ExtractSiftDescriptors(img.d_data, img.pitch, siftData, subsampling, octave, q_ct, totTime); +} + +void InitSiftData(SiftData &data, sycl::queue &q_ct, float &time, int num, bool host, bool dev) +{ + data.numPts = 0; + data.maxPts = num; + int sz = sizeof(SiftPoint) * num; + data.h_data = NULL; + if (host) + data.h_data = (SiftPoint *)malloc(sz); + data.d_data = NULL; + if (dev) + { +#ifdef DEVICE_TIMER + auto start_malloc = std::chrono::steady_clock::now(); +#endif + data.d_data = (SiftPoint *)sycl::malloc_device(sz, q_ct); + q_ct.wait(); +#ifdef DEVICE_TIMER + auto stop_malloc = std::chrono::steady_clock::now(); + time += std::chrono::duration(stop_malloc - start_malloc).count(); +#endif + } +} + +void FreeSiftData(SiftData &data, sycl::queue &q_ct) +{ + if (data.d_data != NULL) + sycl::free(data.d_data, q_ct.get_context()); + data.d_data = NULL; + if (data.h_data != NULL) + free(data.h_data); + data.numPts = 0; + data.maxPts = 0; +} + +void PrintSiftData(SiftData &data, sycl::queue &q_ct) +{ + SiftPoint *h_data = data.h_data; + if (data.h_data == NULL) + { + h_data = (SiftPoint *)malloc(sizeof(SiftPoint) * data.maxPts); + q_ct.memcpy(h_data, data.d_data, sizeof(SiftPoint) * data.numPts) + .wait(); + data.h_data = h_data; + } + for (int i = 0; i < data.numPts; i++) + { + printf("xpos = %.2f\n", h_data[i].xpos); + printf("ypos = %.2f\n", h_data[i].ypos); + printf("scale = %.2f\n", h_data[i].scale); + printf("sharpness = %.2f\n", h_data[i].sharpness); + printf("edgeness = %.2f\n", h_data[i].edgeness); + printf("orientation = %.2f\n", h_data[i].orientation); + printf("score = %.2f\n", h_data[i].score); + float *siftData = (float *)&h_data[i].data; + for (int j = 0; j < 8; j++) + { + if (j == 0) + printf("data = "); + else + printf(" "); + for (int k = 0; k < 16; k++) + if (siftData[j + 8 * k] < 0.05) + printf(" . "); + else + printf("%.2f ", siftData[j + 8 * k]); + printf("\n"); + } + } + printf("Number of available points: %d\n", data.numPts); + printf("Number of allocated points: %d\n", data.maxPts); +} + +/////////////////////////////////////////////////////////////////////////////// +// Host side master functions +/////////////////////////////////////////////////////////////////////////////// + +double ScaleDown(CudaImage &res, CudaImage &src, float variance, sycl::queue &q_ct, float &totTime) +{ + static float oldVariance = -1.0f; + if (res.d_data == NULL || src.d_data == NULL) + { + printf("ScaleDown: missing data\n"); + return 0.0; + } + if (oldVariance != variance) + { + float h_Kernel[5]; + float kernelSum = 0.0f; + for (int j = 0; j < 5; j++) + { + h_Kernel[j] = (float)expf(-(double)(j - 2) * (j - 2) / 2.0 / variance); + kernelSum += h_Kernel[j]; + } + for (int j = 0; j < 5; j++) + h_Kernel[j] /= kernelSum; + +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + q_ct.memcpy(d_ScaleDownKernel.get_ptr(), h_Kernel, 5 * sizeof(float)).wait(); +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + oldVariance = variance; + } +#if 0 + dim3 blocks(iDivUp(src.width, SCALEDOWN_W), iDivUp(src.height, SCALEDOWN_H)); + dim3 threads(SCALEDOWN_W + 4, SCALEDOWN_H + 4); +#else + sycl::range<3> blocks(1, iDivUp(src.height, SCALEDOWN_H), + iDivUp(src.width, SCALEDOWN_W)); + sycl::range<3> threads(1, 1, SCALEDOWN_W + 4); + +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + q_ct.submit([&](sycl::handler &cgh) + { + d_ScaleDownKernel.init(); + + auto d_ScaleDownKernel_ptr_ct1 = d_ScaleDownKernel.get_ptr(); + + sycl::accessor + inrow_acc_ct1(sycl::range<1>(68 /*SCALEDOWN_W+4*/), cgh); + sycl::accessor + brow_acc_ct1(sycl::range<1>(160 /*5*(SCALEDOWN_W/2)*/), cgh); + sycl::accessor + yRead_acc_ct1(sycl::range<1>(20 /*SCALEDOWN_H+4*/), cgh); + sycl::accessor + yWrite_acc_ct1(sycl::range<1>(20 /*SCALEDOWN_H+4*/), cgh); + + auto res_data_ct1 = res.d_data; + auto src_data_ct1 = src.d_data; + auto src_width = src.width; + auto src_pitch = src.pitch; + auto src_height = src.height; + auto res_pitch = res.pitch; + + cgh.parallel_for( + sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + ScaleDown(res_data_ct1, src_data_ct1, src_width, src_pitch, src_height, + res_pitch, item_ct1, d_ScaleDownKernel_ptr_ct1, + inrow_acc_ct1.get_pointer(), brow_acc_ct1.get_pointer(), + yRead_acc_ct1.get_pointer(), yWrite_acc_ct1.get_pointer()); + }); }) + .wait(); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ScaleDown time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif +#endif + checkMsg("ScaleDown() execution failed\n"); + return 0.0; +} + +double ScaleUp(CudaImage &res, CudaImage &src, sycl::queue &q_ct, float &totTime) +{ + if (res.d_data == NULL || src.d_data == NULL) + { + printf("ScaleUp: missing data\n"); + return 0.0; + } + sycl::range<3> blocks(1, iDivUp(res.height, SCALEUP_H), + iDivUp(res.width, SCALEUP_W)); + sycl::range<3> threads(1, SCALEUP_H / 2, SCALEUP_W / 2); + +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + + q_ct.submit([&](sycl::handler &cgh) + { + auto src_data_ct1 = src.d_data; + auto res_data_ct1 = res.d_data; + auto src_width = src.width; + auto src_pitch = src.pitch; + auto src_height = src.height; + auto res_pitch = res.pitch; + cgh.parallel_for( + sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + ScaleUp(res_data_ct1, src_data_ct1, src_width, src_pitch, src_height, + res_pitch, item_ct1); + }); }) + .wait(); + +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ScaleUp time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("ScaleUp() execution failed\n"); + return 0.0; +} + +double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, sycl::queue &q_ct, float &totTime) +{ + sycl::range<3> blocks(1, 1, 512); + sycl::range<3> threads(1, 1, 256); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + q_ct.submit([&](sycl::handler &cgh) + { + + auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr(); + auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr(); + + sycl::accessor + img_acc_ct1(sycl::range<2>(19 /*WID*/, 19 /*WID*/), cgh); + sycl::accessor + tmp_acc_ct1(sycl::range<2>(19 /*WID*/, 19 /*WID*/), cgh); + sycl::accessor + hist_acc_ct1(sycl::range<1>(64 /*2*LEN*/), cgh); + sycl::accessor + gaussx_acc_ct1(sycl::range<1>(19 /*WID*/), cgh); + sycl::accessor + gaussy_acc_ct1(sycl::range<1>(19 /*WID*/), cgh); + + auto src_data_ct1 = src.d_data; + auto src_width = src.width; + auto src_pitch = src.pitch; + auto src_height = src.height; + auto siftData_data_ct1 = siftData.d_data; + + cgh.parallel_for( + sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + ComputeOrientationsCONSTNew( + src_data_ct1, src_width, src_pitch, src_height, siftData_data_ct1, + octave, item_ct1, *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1, + img_acc_ct1, tmp_acc_ct1, hist_acc_ct1.get_pointer(), + gaussx_acc_ct1.get_pointer(), gaussy_acc_ct1.get_pointer()); + }); }) + .wait(); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ComputeOrientationsCONSTNew time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("ComputeOrientations() execution failed\n"); + return 0.0; +} + +double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, int octave, sycl::queue &q_ct, float &totTime) +{ + sycl::range<3> blocks(1, 1, 512); + sycl::range<3> threads(1, 8, 16); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + q_ct.submit([&](sycl::handler &cgh) + { + d_MaxNumPoints.init(); + d_PointCounter.init(); + + auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr(); + auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr(); + + sycl::accessor + gauss_acc_ct1(sycl::range<1>(16), cgh); + sycl::accessor + buffer_acc_ct1(sycl::range<1>(128), cgh); + sycl::accessor + sums_acc_ct1(sycl::range<1>(4), cgh); + + auto siftData_data_ct1 = siftData.d_data; + + cgh.parallel_for( + sycl::nd_range<3>(blocks * threads, threads), [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + ExtractSiftDescriptorsCONSTNew( + texObj, pitch, + siftData_data_ct1, subsampling, octave, item_ct1, + *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1, + gauss_acc_ct1.get_pointer(), buffer_acc_ct1.get_pointer(), + sums_acc_ct1.get_pointer()); }); }) + .wait(); + +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("ExtractSiftDescriptorsCONSTNew time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("ExtractSiftDescriptors() execution failed\n"); + return 0.0; +} + +double RescalePositions(SiftData &siftData, float scale, sycl::queue &q_ct, float &totTime) +{ + sycl::range<3> blocks(1, 1, iDivUp(siftData.numPts, 64)); + sycl::range<3> threads(1, 1, 64); +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + q_ct.submit([&](sycl::handler &cgh) + { + auto siftData_data_ct1 = siftData.d_data; + auto sifData_numPts = siftData.numPts; + cgh.parallel_for( + sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + RescalePositions(siftData_data_ct1, sifData_numPts, scale, item_ct1); + }); }) + .wait(); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("RescalePositions time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("RescapePositions() execution failed\n"); + return 0.0; +} + +double LowPass(CudaImage &res, CudaImage &src, float scale, sycl::queue &q_ct, float &totTime) +{ + try + { + float kernel[2 * LOWPASS_R + 1]; + static float oldScale = -1.0f; + if (scale != oldScale) + { + float kernelSum = 0.0f; + float ivar2 = 1.0f / (2.0f * scale * scale); + for (int j = -LOWPASS_R; j <= LOWPASS_R; j++) + { + kernel[j + LOWPASS_R] = (float)expf(-(double)j * j * ivar2); + kernelSum += kernel[j + LOWPASS_R]; + } + for (int j = -LOWPASS_R; j <= LOWPASS_R; j++) + kernel[j + LOWPASS_R] /= kernelSum; + +#ifdef DEVICE_TIMER + auto start_memcpy_1 = std::chrono::steady_clock::now(); +#endif + q_ct.memcpy(d_LowPassKernel.get_ptr(), kernel, + (2 * LOWPASS_R + 1) * sizeof(float)); + q_ct.wait(); +#ifdef DEVICE_TIMER + auto stop_memcpy_1 = std::chrono::steady_clock::now(); + totTime += std::chrono::duration(stop_memcpy_1 - start_memcpy_1).count(); +#endif + oldScale = scale; + } + int width = res.width; + int pitch = res.pitch; + int height = res.height; + sycl::range<3> blocks(1, iDivUp(height, LOWPASS_H), iDivUp(width, LOWPASS_W)); //(1, 34, 80) + sycl::range<3> threads(1, 4, LOWPASS_W + 2 * LOWPASS_R); //(1, 4, 32) + +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + q_ct.submit([&](sycl::handler &cgh) + { + auto d_LowPassKernel_ptr_ct1 = d_LowPassKernel.get_ptr(); + + auto src_data_ct1 = src.d_data; + auto res_data_ct1 = res.d_data; + + sycl::accessor + xrows_acc_ct1(sycl::range<2>(16, 32), cgh); + cgh.parallel_for( + sycl::nd_range<3>(blocks * threads, threads), [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { LowPassBlockOld(src_data_ct1, res_data_ct1, width, pitch, height, item_ct1, + d_LowPassKernel_ptr_ct1, xrows_acc_ct1); }); }) + .wait(); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("LowPassBlock time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif + checkMsg("LowPass() execution failed\n"); + return 0.0; + } + catch (sycl::exception const &e) + { + std::cout << e.what() << '\n'; + } +} + +//==================== Multi-scale functions ===================// + +void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel) +{ + if (numOctaves > 1) + { + float totInitBlur = (float)sqrt(initBlur * initBlur + 0.5f * 0.5f) / 2.0f; + PrepareLaplaceKernels(numOctaves - 1, totInitBlur, kernel); + } + float scale = pow(2.0f, -1.0f / NUM_SCALES); + float diffScale = pow(2.0f, 1.0f / NUM_SCALES); + for (int i = 0; i < NUM_SCALES + 3; i++) + { + float kernelSum = 0.0f; + float var = scale * scale - initBlur * initBlur; + for (int j = 0; j <= LAPLACE_R; j++) + { + kernel[numOctaves * 12 * 16 + 16 * i + j] = (float)expf(-(double)j * j / 2.0 / var); + kernelSum += (j == 0 ? 1 : 2) * kernel[numOctaves * 12 * 16 + 16 * i + j]; + } + for (int j = 0; j <= LAPLACE_R; j++) + kernel[numOctaves * 12 * 16 + 16 * i + j] /= kernelSum; + scale *= diffScale; + } +} + +double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, sycl::queue &q_ct, float &totTime) +{ + int width = results[0].width; + int pitch = results[0].pitch; + int height = results[0].height; + +#if 1 + sycl::range<3> threads(1, 1, LAPLACE_W + 2 * LAPLACE_R); //(1, 1, 136) + sycl::range<3> blocks(1, height, iDivUp(width, LAPLACE_W)); //(1, 1080, 15) + +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + + q_ct.submit([&](sycl::handler &cgh) + { + float *d_LaplaceKernel_ptr_ct1 = d_LaplaceKernel.get_ptr(); + sycl::accessor + buff_acc_ct1( + sycl::range<1>(1088 /*(LAPLACE_W + 2*LAPLACE_R)*LAPLACE_S*/), cgh); + + float *results_d_data_ct1 = results[0].d_data; + float *baseImage_data_ct1 = baseImage.d_data; + cgh.parallel_for( + sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + LaplaceMultiMem(baseImage_data_ct1, results_d_data_ct1, + width, pitch, height, octave, item_ct1, + d_LaplaceKernel_ptr_ct1, + buff_acc_ct1.get_pointer()); + }); }) + .wait(); + +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("LaplaceMultiMem time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()); + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif +#endif + checkMsg("LaplaceMulti() execution failed\n"); + return 0.0; +} + +double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor, + float lowestScale, float subsampling, int octave, sycl::queue &q_ct, float &totTime) +{ + if (sources->d_data == NULL) + { + printf("FindPointsMulti: missing data\n"); + return 0.0; + } + int w = sources->width; + int p = sources->pitch; + int h = sources->height; +#if 1 + sycl::range<3> blocks(1, iDivUp(h, MINMAX_H), + iDivUp(w, MINMAX_W) * NUM_SCALES); + sycl::range<3> threads(1, 1, MINMAX_W + 2); + +#ifdef DEVICE_TIMER + auto start_kernel = std::chrono::steady_clock::now(); +#endif + auto event_FindPointsMulti = q_ct.submit([&](sycl::handler &cgh) + { + d_MaxNumPoints.init(); + d_PointCounter.init(); + + auto d_MaxNumPoints_ptr_ct1 = d_MaxNumPoints.get_ptr(); + auto d_PointCounter_ptr_ct1 = d_PointCounter.get_ptr(); + + sycl::accessor + points_acc_ct1(sycl::range<1>(64 /*2*MEMWID*/), cgh); + + auto sources_d_data_ct0 = sources->d_data; + auto siftData_data_ct1 = siftData.d_data; + + cgh.parallel_for( + sycl::nd_range<3>(blocks * threads, threads), [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { FindPointsMultiNew(sources_d_data_ct0, siftData_data_ct1, w, p, h, + subsampling, lowestScale, thresh, factor, + edgeLimit, octave, item_ct1, + *d_MaxNumPoints_ptr_ct1, d_PointCounter_ptr_ct1, + points_acc_ct1.get_pointer()); }); }); + event_FindPointsMulti.wait(); +#ifdef DEVICE_TIMER + auto stop_kernel = std::chrono::steady_clock::now(); + // printf("FindPointsMultiNew time = %.2f us\n", std::chrono::duration(stop_kernel - start_kernel).count()) + totTime += std::chrono::duration(stop_kernel - start_kernel).count(); +#endif +#endif + checkMsg("FindPointsMulti() execution failed\n"); + return 0.0; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftH.h b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftH.h new file mode 100644 index 000000000..746c25a8e --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudaSiftH.h @@ -0,0 +1,52 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#ifndef CUDASIFTH_H +#define CUDASIFTH_H + +#include + +#include "infra/infra.hpp" +#include "cudautils.h" +#include "cudaImage.h" +#include "cudaSift.h" + +int ExtractSiftLoop(SiftData &siftData, CudaImage &img, int numOctaves, double initBlur, float thresh, + float lowestScale, float subsampling, float *memoryTmp, float *memorySub, sycl::queue &q_ct, float &totTime); +void ExtractSiftOctave(SiftData &siftData, CudaImage &img, int octave, float thresh, float lowestScale, float subsampling, + float *memoryTmp, sycl::queue &q_ct, float &totTime); +double ScaleDown(CudaImage &res, CudaImage &src, float variance, sycl::queue &q_ct, float &totTime); +double ScaleUp(CudaImage &res, CudaImage &src, sycl::queue &q_ct, float &totTime); +double ComputeOrientations(CudaImage &src, SiftData &siftData, int octave, sycl::queue &q_ct, float &totTime); +double ExtractSiftDescriptors(float *texObj, int pitch, SiftData &siftData, float subsampling, + int octave, sycl::queue &q_ct, float &totTime); +double RescalePositions(SiftData &siftData, float scale, sycl::queue &q_ct, float &totTime); +double LowPass(CudaImage &res, CudaImage &src, float scale, sycl::queue &q_ct, float &totTime); +void PrepareLaplaceKernels(int numOctaves, float initBlur, float *kernel); +double LaplaceMulti(CudaImage &baseImage, CudaImage *results, int octave, sycl::queue &q_ct, float &totTime); +double FindPointsMulti(CudaImage *sources, SiftData &siftData, float thresh, float edgeLimit, float factor, float lowestScale, + float subsampling, int octave, sycl::queue &q_ct, float &totTime); +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudautils.h b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudautils.h new file mode 100644 index 000000000..7e1ca317f --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/cudautils.h @@ -0,0 +1,108 @@ +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#ifndef CUDAUTILS_H +#define CUDAUTILS_H + +#include +#include +#include +#include + +#ifdef WIN32 +#include +#endif + +#define safeCall(err) __safeCall(err, __FILE__, __LINE__) +#define checkMsg(msg) __checkMsg(msg, __FILE__, __LINE__) + +inline void __safeCall(int err, const char *file, const int line) +{ +} + +inline void __checkMsg(const char *errorMessage, const char *file, const int line) +{ + int err = 0; +} + +class TimerCPU +{ + static const int bits = 10; + +public: + long long beg_clock; + float freq; + TimerCPU(float freq_) : freq(freq_) + { // freq = clock frequency in MHz + beg_clock = getTSC(bits); + } + long long getTSC(int bits) + { +#ifdef WIN32 + return __rdtsc() / (1LL << bits); +#else + unsigned int low, high; + __asm__(".byte 0x0f, 0x31" + : "=a"(low), "=d"(high)); + return ((long long)high << (32 - bits)) | ((long long)low >> bits); +#endif + } + float read() + { + long long end_clock = getTSC(bits); + long long Kcycles = end_clock - beg_clock; + float time = (float)(1 << bits) * Kcycles / freq / 1e3f; + return time; + } +}; + +template +__inline__ T ShiftDown(T var, unsigned int delta, sycl::nd_item<3> item_ct1, int width = 32) +{ +#if (SYCL_LANGUAGE_VERSION >= 9000) + return sycl::shift_group_left(item_ct1.get_sub_group(), var, delta); +#else + return __shfl_down(var, delta, width); +#endif +} + +template +__inline__ T ShiftUp(T var, unsigned int delta, sycl::nd_item<3> item_ct1, int width = 32) +{ +#if (SYCL_LANGUAGE_VERSION >= 9000) + return sycl::shift_group_right(item_ct1.get_sub_group(), var, delta); +#else + return __shfl_up(var, delta, width); +#endif +} + +template +__inline__ T Shuffle(T var, unsigned int lane, sycl::nd_item<3> item_ct1, int width = 32) +{ +#if (SYCL_LANGUAGE_VERSION >= 9000) + return sycl::select_from_group(item_ct1.get_sub_group(), var, lane); +#else + return __shfl(var, lane, width); +#endif +} + +#endif diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/geomFuncs.cpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/geomFuncs.cpp new file mode 100644 index 000000000..c01e6e7d2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/geomFuncs.cpp @@ -0,0 +1,72 @@ +#include +#include +#include +#include "cudaSift.h" + +int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh) +{ +#ifdef MANAGEDMEM + SiftPoint *mpts = data.m_data; +#else + if (data.h_data==NULL) + return 0; + SiftPoint *mpts = data.h_data; +#endif + float limit = thresh*thresh; + int numPts = data.numPts; + cv::Mat M(8, 8, CV_64FC1); + cv::Mat A(8, 1, CV_64FC1), X(8, 1, CV_64FC1); + double Y[8]; + for (int i=0;i<8;i++) + A.at(i, 0) = homography[i] / homography[8]; + for (int loop=0;loopmaxAmbiguity) + continue; + float den = A.at(6)*pt.xpos + A.at(7)*pt.ypos + 1.0f; + float dx = (A.at(0)*pt.xpos + A.at(1)*pt.ypos + A.at(2)) / den - pt.match_xpos; + float dy = (A.at(3)*pt.xpos + A.at(4)*pt.ypos + A.at(5)) / den - pt.match_ypos; + float err = dx*dx + dy*dy; + float wei = (err(r,c) += (Y[c] * Y[r] * wei); + X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_xpos * wei); + Y[0] = Y[1] = Y[2] = 0.0; + Y[3] = pt.xpos; + Y[4] = pt.ypos; + Y[5] = 1.0; + Y[6] = - pt.xpos * pt.match_ypos; + Y[7] = - pt.ypos * pt.match_ypos; + for (int c=0;c<8;c++) + for (int r=0;r<8;r++) + M.at(r,c) += (Y[c] * Y[r] * wei); + X += (cv::Mat(8,1,CV_64FC1,Y) * pt.match_ypos * wei); + } + cv::solve(M, X, A, cv::DECOMP_CHOLESKY); + } + int numfit = 0; + for (int i=0;i(6)*pt.xpos + A.at(7)*pt.ypos + 1.0; + float dx = (A.at(0)*pt.xpos + A.at(1)*pt.ypos + A.at(2)) / den - pt.match_xpos; + float dy = (A.at(3)*pt.xpos + A.at(4)*pt.ypos + A.at(5)) / den - pt.match_ypos; + float err = dx*dx + dy*dy; + if (err(i); + homography[8] = 1.0f; + return numfit; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/atomic.hpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/atomic.hpp new file mode 100644 index 000000000..922c88c3a --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/atomic.hpp @@ -0,0 +1,317 @@ +//==---- atomic.hpp -------------------------------*- C++ -*----------------==// +// Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT +//===----------------------------------------------------------------------===// + +#ifndef __INFRA_ATOMIC_HPP__ +#define __INFRA_ATOMIC_HPP__ + +#include + +namespace infra +{ + + /// Atomically add the value operand to the value at the addr and assign the + /// result to the value at addr, Int version. + /// \param [in, out] addr The pointer to the data. + /// \param operand The value to add to the value at \p addr. + /// \param memoryOrder The memory ordering used. + /// \returns The value at the \p addr before the call. + template + inline T atomic_fetch_add( + T *addr, T operand, + sycl::memory_order memoryOrder = sycl::memory_order::relaxed) + { + sycl::atomic obj( + (sycl::multi_ptr(addr))); + return sycl::atomic_fetch_add(obj, operand, memoryOrder); + } + + /// Atomically add the value operand to the value at the addr and assign the + /// result to the value at addr, Float version. + /// \param [in, out] addr The pointer to the data. + /// \param operand The value to add to the value at \p addr. + /// \param memoryOrder The memory ordering used. + /// \returns The value at the \p addr before the call. + template + inline float atomic_fetch_add( + float *addr, float operand, + sycl::memory_order memoryOrder = sycl::memory_order::relaxed) + { + static_assert(sizeof(float) == sizeof(int), "Mismatched type size"); + + sycl::atomic obj( + (sycl::multi_ptr(reinterpret_cast(addr)))); + + int old_value; + float old_float_value; + + do + { + old_value = obj.load(memoryOrder); + old_float_value = *reinterpret_cast(&old_value); + const float new_float_value = old_float_value + operand; + const int new_value = *reinterpret_cast(&new_float_value); + if (obj.compare_exchange_strong(old_value, new_value, memoryOrder)) + break; + } while (true); + + return old_float_value; + } + + /// Atomically add the value operand to the value at the addr and assign the + /// result to the value at addr, Double version. + /// \param [in, out] addr The pointer to the data. + /// \param operand The value to add to the value at \p addr + /// \param memoryOrder The memory ordering used. + /// \returns The value at the \p addr before the call. + template + inline double atomic_fetch_add( + double *addr, double operand, + sycl::memory_order memoryOrder = sycl::memory_order::relaxed) + { + static_assert(sizeof(double) == sizeof(unsigned long long int), + "Mismatched type size"); + + sycl::atomic obj( + (sycl::multi_ptr( + reinterpret_cast(addr)))); + + unsigned long long int old_value; + double old_double_value; + + do + { + old_value = obj.load(memoryOrder); + old_double_value = *reinterpret_cast(&old_value); + const double new_double_value = old_double_value + operand; + const unsigned long long int new_value = + *reinterpret_cast(&new_double_value); + + if (obj.compare_exchange_strong(old_value, new_value, memoryOrder)) + break; + } while (true); + + return old_double_value; + } + + /// Atomically subtract the value operand from the value at the addr and assign + /// the result to the value at addr. + /// \param [in, out] addr The pointer to the data. + /// \param operand The value to substract from the value at \p addr + /// \param memoryOrder The memory ordering used. + /// \returns The value at the \p addr before the call. + template + inline T atomic_fetch_sub( + T *addr, T operand, + sycl::memory_order memoryOrder = sycl::memory_order::relaxed) + { + sycl::atomic obj( + (sycl::multi_ptr(addr))); + return sycl::atomic_fetch_sub(obj, operand, memoryOrder); + } + + /// Atomically perform a bitwise AND between the value operand and the value at the addr + /// and assign the result to the value at addr. + /// \param [in, out] addr The pointer to the data. + /// \param operand The value to use in bitwise AND operation with the value at the \p addr. + /// \param memoryOrder The memory ordering used. + /// \returns The value at the \p addr before the call. + template + inline T atomic_fetch_and( + T *addr, T operand, + sycl::memory_order memoryOrder = sycl::memory_order::relaxed) + { + sycl::atomic obj( + (sycl::multi_ptr(addr))); + return sycl::atomic_fetch_and(obj, operand, memoryOrder); + } + + /// Atomically or the value at the addr with the value operand, and assign + /// the result to the value at addr. + /// \param [in, out] addr The pointer to the data. + /// \param operand The value to use in bitwise OR operation with the value at the \p addr. + /// \param memoryOrder The memory ordering used. + /// \returns The value at the \p addr before the call. + template + inline T atomic_fetch_or( + T *addr, T operand, + sycl::memory_order memoryOrder = sycl::memory_order::relaxed) + { + sycl::atomic obj( + (sycl::multi_ptr(addr))); + return sycl::atomic_fetch_or(obj, operand, memoryOrder); + } + + /// Atomically xor the value at the addr with the value operand, and assign + /// the result to the value at addr. + /// \param [in, out] addr The pointer to the data. + /// \param operand The value to use in bitwise XOR operation with the value at the \p addr. + /// \param memoryOrder The memory ordering used. + /// \returns The value at the \p addr before the call. + template + inline T atomic_fetch_xor( + T *addr, T operand, + sycl::memory_order memoryOrder = sycl::memory_order::relaxed) + { + sycl::atomic obj( + (sycl::multi_ptr(addr))); + return sycl::atomic_fetch_xor(obj, operand, memoryOrder); + } + + /// Atomically calculate the minimum of the value at addr and the value operand + /// and assign the result to the value at addr. + /// \param [in, out] addr The pointer to the data. + /// \param operand. + /// \param memoryOrder The memory ordering used. + /// \returns The value at the \p addr before the call. + template + inline T atomic_fetch_min( + T *addr, T operand, + sycl::memory_order memoryOrder = sycl::memory_order::relaxed) + { + sycl::atomic obj( + (sycl::multi_ptr(addr))); + return sycl::atomic_fetch_min(obj, operand, memoryOrder); + } + + /// Atomically calculate the maximum of the value at addr and the value operand + /// and assign the result to the value at addr. + /// \param [in, out] addr The pointer to the data. + /// \param operand. + /// \param memoryOrder The memory ordering used. + /// \returns The value at the \p addr before the call. + template + inline T atomic_fetch_max( + T *addr, T operand, + sycl::memory_order memoryOrder = sycl::memory_order::relaxed) + { + sycl::atomic obj( + (sycl::multi_ptr(addr))); + return sycl::atomic_fetch_max(obj, operand, memoryOrder); + } + + /// Atomically increment the value stored in \p addr if old value stored in \p + /// addr is less than \p operand, else set 0 to the value stored in \p addr. + /// \param [in, out] addr The pointer to the data. + /// \param operand The threshold value. + /// \param memoryOrder The memory ordering used. + /// \returns The old value stored in \p addr. + template + inline unsigned int atomic_fetch_compare_inc( + unsigned int *addr, unsigned int operand, + sycl::memory_order memoryOrder = sycl::memory_order::relaxed) + { + sycl::atomic obj( + (sycl::multi_ptr(addr))); + unsigned int old; + while (true) + { + old = obj.load(); + if (old >= operand) + { + if (obj.compare_exchange_strong(old, 0, memoryOrder, memoryOrder)) + break; + } + else + { + old = obj.fetch_add(1); + break; + } + // else if (obj.compare_exchange_strong(old, old + 1, memoryOrder, + // memoryOrder)) + // break; + } + return old; + } + + /// Atomically exchange the value at the address addr with the value operand. + /// \param [in, out] addr The pointer to the data. + /// \param operand The value to be exchanged with the value pointed by \p addr. + /// \param memoryOrder The memory ordering used. + /// \returns The value at the \p addr before the call. + template + inline T atomic_exchange( + T *addr, T operand, + sycl::memory_order memoryOrder = sycl::memory_order::relaxed) + { + sycl::atomic obj( + (sycl::multi_ptr(addr))); + return sycl::atomic_exchange(obj, operand, memoryOrder); + } + + /// Atomically compare the value at \p addr to the value expected and exchange + /// with the value desired if the value at \p addr is equal to the value expected. + /// Returns the value at the \p addr before the call. + /// \param [in, out] addr Multi_ptr. + /// \param expected The value to compare against the value at \p addr. + /// \param desired The value to assign to \p addr if the value at \p addr is expected. + /// \param success The memory ordering used when comparison succeeds. + /// \param fail The memory ordering used when comparison fails. + /// \returns The value at the \p addr before the call. + template + T atomic_compare_exchange_strong( + sycl::multi_ptr addr, + T expected, T desired, + sycl::memory_order success = sycl::memory_order::relaxed, + sycl::memory_order fail = sycl::memory_order::relaxed) + { + sycl::atomic obj(addr); + obj.compare_exchange_strong(expected, desired, success, fail); + return expected; + } + + /// Atomically compare the value at \p addr to the value expected and exchange + /// with the value desired if the value at \p addr is equal to the value expected. + /// Returns the value at the \p addr before the call. + /// \param [in] addr The pointer to the data. + /// \param expected The value to compare against the value at \p addr. + /// \param desired The value to assign to \p addr if the value at \p addr is expected. + /// \param success The memory ordering used when comparison succeeds. + /// \param fail The memory ordering used when comparison fails. + /// \returns The value at the \p addr before the call. + template + T atomic_compare_exchange_strong( + T *addr, T expected, T desired, + sycl::memory_order success = sycl::memory_order::relaxed, + sycl::memory_order fail = sycl::memory_order::relaxed) + { + return atomic_compare_exchange_strong( + sycl::multi_ptr(addr), expected, desired, success, + fail); + } + +} // namespace infra +#endif // __INFRA_ATOMIC_HPP__ diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/device.hpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/device.hpp new file mode 100644 index 000000000..4a859e20f --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/device.hpp @@ -0,0 +1,534 @@ +//==---- device.hpp -------------------------------*- C++ -*----------------==// +// Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT +//===----------------------------------------------------------------------===// + +#ifndef __INFRA_DEVICE_HPP__ +#define __INFRA_DEVICE_HPP__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__linux__) +#include +#include +#endif +#if defined(_WIN64) +#define NOMINMAX +#include +#endif + +namespace infra +{ + + /// DPC++ default exception handler + auto exception_handler = [](sycl::exception_list exceptions) + { + for (std::exception_ptr const &e : exceptions) + { + try + { + std::rethrow_exception(e); + } + catch (sycl::exception const &e) + { + std::cerr << "Caught asynchronous SYCL exception:" << std::endl + << e.what() << std::endl + << "Exception caught at file:" << __FILE__ + << ", line:" << __LINE__ << std::endl; + } + } + }; + + class device_info + { + public: + // get interface + char *get_name() { return _name; } + sycl::id<3> get_max_work_item_sizes() { return _max_work_item_sizes; } + bool get_host_unified_memory() { return _host_unified_memory; } + int get_major_version() { return _major; } + int get_minor_version() { return _minor; } + int get_integrated() { return _integrated; } + int get_max_clock_frequency() { return _frequency; } + int get_max_compute_units() { return _max_compute_units; } + int get_max_work_group_size() { return _max_work_group_size; } + int get_max_sub_group_size() { return _max_sub_group_size; } + int get_max_work_items_per_compute_unit() + { + return _max_work_items_per_compute_unit; + } + size_t *get_max_nd_range_size() { return _max_nd_range_size; } + size_t get_global_mem_size() { return _global_mem_size; } + size_t get_local_mem_size() { return _local_mem_size; } + // set interface + void set_name(const char *name) { std::strncpy(_name, name, 256); } + void set_max_work_item_sizes(const sycl::id<3> max_work_item_sizes) + { + _max_work_item_sizes = max_work_item_sizes; + } + void set_host_unified_memory(bool host_unified_memory) + { + _host_unified_memory = host_unified_memory; + } + void set_major_version(int major) { _major = major; } + void set_minor_version(int minor) { _minor = minor; } + void set_integrated(int integrated) { _integrated = integrated; } + void set_max_clock_frequency(int frequency) { _frequency = frequency; } + void set_max_compute_units(int max_compute_units) + { + _max_compute_units = max_compute_units; + } + void set_global_mem_size(size_t global_mem_size) + { + _global_mem_size = global_mem_size; + } + void set_local_mem_size(size_t local_mem_size) + { + _local_mem_size = local_mem_size; + } + void set_max_work_group_size(int max_work_group_size) + { + _max_work_group_size = max_work_group_size; + } + void set_max_sub_group_size(int max_sub_group_size) + { + _max_sub_group_size = max_sub_group_size; + } + void + set_max_work_items_per_compute_unit(int max_work_items_per_compute_unit) + { + _max_work_items_per_compute_unit = max_work_items_per_compute_unit; + } + void set_max_nd_range_size(int max_nd_range_size[]) + { + for (int i = 0; i < 3; i++) + _max_nd_range_size[i] = max_nd_range_size[i]; + } + + private: + char _name[256]; + sycl::id<3> _max_work_item_sizes; + bool _host_unified_memory = false; + int _major; + int _minor; + int _integrated = 0; + int _frequency; + int _max_compute_units; + int _max_work_group_size; + int _max_sub_group_size; + int _max_work_items_per_compute_unit; + size_t _global_mem_size; + size_t _local_mem_size; + size_t _max_nd_range_size[3]; + }; + + /// infra device extension + class device_ext : public sycl::device + { + public: + device_ext() : sycl::device(), _ctx(*this) {} + ~device_ext() + { + std::lock_guard lock(m_mutex); + for (auto &task : _tasks) + { + if (task.joinable()) + task.join(); + } + _tasks.clear(); + _queues.clear(); + } + device_ext(const sycl::device &base) + : sycl::device(base), _ctx(*this) + { +#ifdef INFRA_USM_LEVEL_NONE + _queues.push_back( + std::make_shared(_ctx, base, exception_handler)); +#else + _queues.push_back(std::make_shared( + _ctx, base, exception_handler, sycl::property::queue::in_order())); +#endif + _saved_queue = _default_queue = _queues[0].get(); + } + + int is_native_atomic_supported() { return 0; } + int get_major_version() + { + int major, minor; + get_version(major, minor); + return major; + } + + int get_minor_version() + { + int major, minor; + get_version(major, minor); + return minor; + } + + int get_max_compute_units() + { + return get_device_info().get_max_compute_units(); + } + + int get_max_clock_frequency() + { + return get_device_info().get_max_clock_frequency(); + } + + int get_integrated() { return get_device_info().get_integrated(); } + + void get_device_info(device_info &out) + { + device_info prop; + prop.set_name(get_info().c_str()); + + int major, minor; + get_version(major, minor); + prop.set_major_version(major); + prop.set_minor_version(minor); + + prop.set_max_work_item_sizes( + get_info>()); + prop.set_host_unified_memory( + get_info()); + + // max_clock_frequency parameter is not supported on host device + if (is_host()) + { + // This code may need to be updated. Currently max_clock_frequency for + // host device is initialized with 1, in assumption that if other devices + // exist and they are being selected based on this parameter, other + // devices would have higher priority. + prop.set_max_clock_frequency(1); + } + else + { + prop.set_max_clock_frequency( + get_info()); + } + + prop.set_max_compute_units( + get_info()); + prop.set_max_work_group_size( + get_info()); + prop.set_global_mem_size( + get_info()); + prop.set_local_mem_size(get_info()); + + size_t max_sub_group_size = 1; + std::vector sub_group_sizes = + get_info(); + + for (const auto &sub_group_size : sub_group_sizes) + { + if (max_sub_group_size < sub_group_size) + max_sub_group_size = sub_group_size; + } + + prop.set_max_sub_group_size(max_sub_group_size); + + prop.set_max_work_items_per_compute_unit( + get_info()); + int max_nd_range_size[] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; + prop.set_max_nd_range_size(max_nd_range_size); + + out = prop; + } + + device_info get_device_info() + { + device_info prop; + get_device_info(prop); + return prop; + } + + void reset() + { + std::lock_guard lock(m_mutex); + // The queues are shared_ptrs and the ref counts of the shared_ptrs increase + // only in wait_and_throw(). If there is no other thread calling + // wait_and_throw(), the queues will be destructed. The destructor waits for + // all commands executing on the queue to complete. It isn't possible to + // destroy a queue immediately. This is a synchronization point in SYCL. + _queues.clear(); + // create new default queue. +#ifdef INFRA_USM_LEVEL_NONE + _queues.push_back( + std::make_shared(_ctx, *this, exception_handler)); +#else + _queues.push_back(std::make_shared( + _ctx, *this, exception_handler, sycl::property::queue::in_order())); +#endif + _saved_queue = _default_queue = _queues.front().get(); + } + + sycl::queue &default_queue() { return *_default_queue; } + + void queues_wait_and_throw() + { + std::unique_lock lock(m_mutex); + std::vector> current_queues( + _queues); + lock.unlock(); + for (const auto &q : current_queues) + { + q->wait_and_throw(); + } + // Guard the destruct of current_queues to make sure the ref count is safe. + lock.lock(); + } + sycl::queue *create_queue(bool enable_exception_handler = false) + { + std::lock_guard lock(m_mutex); + sycl::async_handler eh = {}; + if (enable_exception_handler) + { + eh = exception_handler; + } +#ifdef INFRA_USM_LEVEL_NONE + _queues.push_back(std::make_shared( + _ctx, *this, eh)); +#else + _queues.push_back(std::make_shared( + _ctx, *this, eh, + sycl::property::queue::in_order())); +#endif + return _queues.back().get(); + } + void destroy_queue(sycl::queue *&queue) + { + std::lock_guard lock(m_mutex); + _queues.erase(std::remove_if(_queues.begin(), _queues.end(), + [=](const std::shared_ptr &q) -> bool + { + return q.get() == queue; + }), + _queues.end()); + queue = nullptr; + } + void set_saved_queue(sycl::queue *q) + { + std::lock_guard lock(m_mutex); + _saved_queue = q; + } + sycl::queue *get_saved_queue() + { + std::lock_guard lock(m_mutex); + return _saved_queue; + } + sycl::context get_context() { return _ctx; } + + private: + void get_version(int &major, int &minor) + { + // Version string has the following format: + // a. OpenCL + // b. + std::string ver; + ver = get_info(); + std::string::size_type i = 0; + while (i < ver.size()) + { + if (isdigit(ver[i])) + break; + i++; + } + major = std::stoi(&(ver[i])); + while (i < ver.size()) + { + if (ver[i] == '.') + break; + i++; + } + i++; + minor = std::stoi(&(ver[i])); + } + void add_task(std::thread &&task) + { + std::lock_guard lock(m_mutex); + _tasks.push_back(std::move(task)); + } + friend void async_infra_free(std::vector, + std::vector, + sycl::queue &); + sycl::queue *_default_queue; + sycl::queue *_saved_queue; + sycl::context _ctx; + std::vector> _queues; + mutable std::mutex m_mutex; + std::vector _tasks; + }; + + static inline unsigned int get_tid() + { +#if defined(__linux__) + return syscall(SYS_gettid); +#elif defined(_WIN64) + return GetCurrentThreadId(); +#else +#error "Only support Windows and Linux." +#endif + } + + /// device manager + class dev_mgr + { + public: + device_ext ¤t_device() + { + unsigned int dev_id = current_device_id(); + check_id(dev_id); + return *_devs[dev_id]; + } + device_ext &cpu_device() const + { + std::lock_guard lock(m_mutex); + if (_cpu_device == -1) + { + throw std::runtime_error("no valid cpu device"); + } + else + { + return *_devs[_cpu_device]; + } + } + device_ext &get_device(unsigned int id) const + { + std::lock_guard lock(m_mutex); + check_id(id); + return *_devs[id]; + } + unsigned int current_device_id() const + { + std::lock_guard lock(m_mutex); + auto it = _thread2dev_map.find(get_tid()); + if (it != _thread2dev_map.end()) + return it->second; + return DEFAULT_DEVICE_ID; + } + void select_device(unsigned int id) + { + std::lock_guard lock(m_mutex); + check_id(id); + _thread2dev_map[get_tid()] = id; + } + unsigned int device_count() { return _devs.size(); } + + /// Returns the instance of device manager singleton. + static dev_mgr &instance() + { + static dev_mgr d_m; + return d_m; + } + dev_mgr(const dev_mgr &) = delete; + dev_mgr &operator=(const dev_mgr &) = delete; + dev_mgr(dev_mgr &&) = delete; + dev_mgr &operator=(dev_mgr &&) = delete; + + private: + mutable std::mutex m_mutex; + dev_mgr() + { + sycl::device default_device = + sycl::device(sycl::default_selector{}); + _devs.push_back(std::make_shared(default_device)); + + std::vector sycl_all_devs = + sycl::device::get_devices(sycl::info::device_type::all); + // sycl::device::get_devices(sycl::info::device_type::gpu); + // Collect other devices except for the default device. + if (default_device.is_cpu()) + _cpu_device = 0; + for (auto &dev : sycl_all_devs) + { + if (dev == default_device) + { + continue; + } + _devs.push_back(std::make_shared(dev)); + if (_cpu_device == -1 && dev.is_cpu()) + { + _cpu_device = _devs.size() - 1; + } + } + } + void check_id(unsigned int id) const + { + if (id >= _devs.size()) + { + throw std::runtime_error("invalid device id"); + } + } + std::vector> _devs; + /// DEFAULT_DEVICE_ID is used, if current_device_id() can not find current + /// thread id in _thread2dev_map, which means default device should be used + /// for the current thread. + const unsigned int DEFAULT_DEVICE_ID = 0; + /// thread-id to device-id map. + std::map _thread2dev_map; + int _cpu_device = -1; + }; + + /// Util function to get the defualt queue of current device in + /// infra device manager. + static inline sycl::queue &get_default_queue() + { + return dev_mgr::instance().current_device().default_queue(); + } + + /// Util function to get the current device. + static inline device_ext &get_current_device() + { + return dev_mgr::instance().current_device(); + } + + /// Util function to get a device by id. + static inline device_ext &get_device(unsigned int id) + { + return dev_mgr::instance().get_device(id); + } + + /// Util function to get the context of the default queue of current + /// device in infra device manager. + static inline sycl::context get_default_context() + { + return infra::get_current_device().get_context(); + } + + /// Util function to get a cpu device. + static inline device_ext &cpu_device() + { + return dev_mgr::instance().cpu_device(); + } + +} // namespace infra + +#endif // __INFRA_DEVICE_HPP__ diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/infra.hpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/infra.hpp new file mode 100644 index 000000000..498512888 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/infra.hpp @@ -0,0 +1,35 @@ +// Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT +//===----------------------------------------------------------------------===// + +#ifndef __INFRA_HPP__ +#define __INFRA_HPP__ + +#include +#include +#include + +#include "atomic.hpp" +#include "device.hpp" +#include "memory.hpp" + +#endif // __INFRA_HPP__ diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/memory.hpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/memory.hpp new file mode 100644 index 000000000..444d193ed --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/infra/memory.hpp @@ -0,0 +1,1292 @@ +//==---- memory.hpp -------------------------------*- C++ -*----------------==// +// Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT +//===----------------------------------------------------------------------===// + +#ifndef __INFRA_MEMORY_HPP__ +#define __INFRA_MEMORY_HPP__ + +#include "device.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__linux__) +#include +#elif defined(_WIN64) +#define NOMINMAX +#include +#else +#error "Only support Windows and Linux." +#endif + +namespace infra +{ + + enum memcpy_direction + { + host_to_host, + host_to_device, + device_to_host, + device_to_device, + automatic + }; + enum memory_region + { + global = 0, // device global memory + constant, // device constant memory + local, // device local memory + shared, // memory which can be accessed by host and device + }; + + typedef uint8_t byte_t; + + /// Buffer type to be used in Memory Management runtime. + typedef sycl::buffer buffer_t; + + /// Pitched 2D/3D memory data. + class pitched_data + { + public: + pitched_data() : pitched_data(nullptr, 0, 0, 0) {} + pitched_data(void *data, size_t pitch, size_t x, size_t y) + : _data(data), _pitch(pitch), _x(x), _y(y) {} + + void *get_data_ptr() { return _data; } + void set_data_ptr(void *data) { _data = data; } + + size_t get_pitch() { return _pitch; } + void set_pitch(size_t pitch) { _pitch = pitch; } + + size_t get_x() { return _x; } + void set_x(size_t x) { _x = x; }; + + size_t get_y() { return _y; } + void set_y(size_t y) { _y = y; } + + private: + void *_data; + size_t _pitch, _x, _y; + }; + + namespace detail + { + class mem_mgr + { + mem_mgr() + { + // Reserved address space, no real memory allocation happens here. +#if defined(__linux__) + mapped_address_space = + (byte_t *)mmap(nullptr, mapped_region_size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +#elif defined(_WIN64) + mapped_address_space = (byte_t *)VirtualAlloc( + NULL, // NULL specified as the base address parameter + mapped_region_size, // Size of allocation + MEM_RESERVE, // Allocate reserved pages + PAGE_NOACCESS); // Protection = no access +#else +#error "Only support Windows and Linux." +#endif + next_free = mapped_address_space; + }; + + public: + using buffer_id_t = int; + + struct allocation + { + buffer_t buffer; + byte_t *alloc_ptr; + size_t size; + }; + + ~mem_mgr() + { +#if defined(__linux__) + munmap(mapped_address_space, mapped_region_size); +#elif defined(_WIN64) + VirtualFree(mapped_address_space, 0, MEM_RELEASE); +#else +#error "Only support Windows and Linux." +#endif + }; + + mem_mgr(const mem_mgr &) = delete; + mem_mgr &operator=(const mem_mgr &) = delete; + mem_mgr(mem_mgr &&) = delete; + mem_mgr &operator=(mem_mgr &&) = delete; + + /// Allocate + void *mem_alloc(size_t size) + { + if (!size) + return nullptr; + std::lock_guard lock(m_mutex); + if (next_free + size > mapped_address_space + mapped_region_size) + { + throw std::runtime_error("sift_malloc: out of memory for virtual memory pool"); + } + // Allocation + sycl::range<1> r(size); + buffer_t buf(r); + allocation A{buf, next_free, size}; + // Map allocation to device pointer + void *result = next_free; + m_map.emplace(next_free + size, A); + // Update pointer to the next free space. + next_free += (size + extra_padding + alignment - 1) & ~(alignment - 1); + + return result; + } + + /// Deallocate + void mem_free(const void *ptr) + { + if (!ptr) + return; + std::lock_guard lock(m_mutex); + auto it = get_map_iterator(ptr); + m_map.erase(it); + } + + /// map: device pointer -> allocation(buffer, alloc_ptr, size) + allocation translate_ptr(const void *ptr) + { + std::lock_guard lock(m_mutex); + auto it = get_map_iterator(ptr); + return it->second; + } + + /// Check if the pointer represents device pointer or not. + bool is_device_ptr(const void *ptr) const + { + std::lock_guard lock(m_mutex); + return (mapped_address_space <= ptr) && + (ptr < mapped_address_space + mapped_region_size); + } + + /// Returns the instance of memory manager singleton. + static mem_mgr &instance() + { + static mem_mgr m; + return m; + } + + private: + std::map m_map; + mutable std::mutex m_mutex; + byte_t *mapped_address_space; + byte_t *next_free; + const size_t mapped_region_size = 128ull * 1024 * 1024 * 1024; + const size_t alignment = 256; + /// This padding may be defined to some positive value to debug + /// out of bound accesses. + const size_t extra_padding = 0; + + std::map::iterator get_map_iterator(const void *ptr) + { + auto it = m_map.upper_bound((byte_t *)ptr); + if (it == m_map.end()) + { + // Not a virtual pointer. + throw std::runtime_error("can not get buffer from non-virtual pointer"); + } + const allocation &alloc = it->second; + if (ptr < alloc.alloc_ptr) + { + // Out of bound. + // This may happen if there's a gap between allocations due to alignment + // or extra padding and pointer points to this gap. + throw std::runtime_error("invalid virtual pointer"); + } + return it; + } + }; + + template + class accessor; + template + class memory_traits + { + public: + static constexpr sycl::access::address_space asp = + (Memory == local) + ? sycl::access::address_space::local_space + : ((Memory == constant) + ? sycl::access::address_space::constant_space + : sycl::access::address_space::global_space); + static constexpr sycl::access::target target = + (Memory == local) + ? sycl::access::target::local + : ((Memory == constant) ? sycl::access::target::constant_buffer + : sycl::access::target::global_buffer); + static constexpr sycl::access_mode mode = + (Memory == constant) ? sycl::access_mode::read + : sycl::access_mode::read_write; + static constexpr size_t type_size = sizeof(T); + using element_t = + typename std::conditional::type; + using value_t = typename std::remove_cv::type; + template + using accessor_t = sycl::accessor; + using pointer_t = T *; + }; + + static inline void *sift_malloc(size_t size, sycl::queue &q) + { +#ifdef INFRA_USM_LEVEL_NONE + return mem_mgr::instance().mem_alloc(size * sizeof(byte_t)); +#else + return sycl::malloc_device(size, q.get_device(), q.get_context()); +#endif // INFRA_USM_LEVEL_NONE + } + +#define PITCH_DEFAULT_ALIGN(x) (((x) + 31) & ~(0x1F)) + static inline void *sift_malloc(size_t &pitch, size_t x, size_t y, size_t z, + sycl::queue &q) + { + pitch = PITCH_DEFAULT_ALIGN(x); + return sift_malloc(pitch * y * z, q); + } + + /// Set \p value to the first \p size bytes starting from \p dev_ptr in \p q. + /// + /// \param q The queue in which the operation is done. + /// \param dev_ptr Pointer to the device memory address. + /// \param value Value to be set. + /// \param size Number of bytes to be set to the value. + /// \returns An event representing the memset operation. + static inline sycl::event sift_memset(sycl::queue &q, void *dev_ptr, + int value, size_t size) + { +#ifdef INFRA_USM_LEVEL_NONE + auto &mm = mem_mgr::instance(); + assert(mm.is_device_ptr(dev_ptr)); + auto alloc = mm.translate_ptr(dev_ptr); + size_t offset = (byte_t *)dev_ptr - alloc.alloc_ptr; + + return q.submit([&](sycl::handler &cgh) + { + auto r = sycl::range<1>(size); + auto o = sycl::id<1>(offset); + sycl::accessor + acc(alloc.buffer, cgh, r, o); + cgh.fill(acc, (byte_t)value); }); +#else + return q.memset(dev_ptr, value, size); +#endif // INFRA_USM_LEVEL_NONE + } + + /// Set \p value to the 3D memory region pointed by \p data in \p q. \p size + /// specifies the 3D memory size to set. + /// + /// \param q The queue in which the operation is done. + /// \param data Pointer to the device memory region. + /// \param value Value to be set. + /// \param size Memory region size. + /// \returns An event list representing the memset operations.. + static inline std::vector + sift_memset(sycl::queue &q, pitched_data data, int value, + sycl::range<3> size) + { + std::vector event_list; + size_t slice = data.get_pitch() * data.get_y(); + unsigned char *data_surface = (unsigned char *)data.get_data_ptr(); + for (size_t z = 0; z < size.get(2); ++z) + { + unsigned char *data_ptr = data_surface; + for (size_t y = 0; y < size.get(1); ++y) + { + event_list.push_back(sift_memset(q, data_ptr, value, size.get(0))); + data_ptr += data.get_pitch(); + } + data_surface += slice; + } + return event_list; + } + + /// memset 2D matrix with pitch. + static inline std::vector + sift_memset(sycl::queue &q, void *ptr, size_t pitch, int val, size_t x, + size_t y) + { + return sift_memset(q, pitched_data(ptr, pitch, x, 1), val, + sycl::range<3>(x, y, 1)); + } + + static sycl::event sift_memcpy(sycl::queue &q, void *to_ptr, + const void *from_ptr, size_t size, + memcpy_direction direction) + { + if (!size) + return sycl::event{}; +#ifdef INFRA_USM_LEVEL_NONE + auto &mm = mem_mgr::instance(); + memcpy_direction real_direction = direction; + switch (direction) + { + case host_to_host: + assert(!mm.is_device_ptr(from_ptr) && !mm.is_device_ptr(to_ptr)); + break; + case host_to_device: + assert(!mm.is_device_ptr(from_ptr) && mm.is_device_ptr(to_ptr)); + break; + case device_to_host: + assert(mm.is_device_ptr(from_ptr) && !mm.is_device_ptr(to_ptr)); + break; + case device_to_device: + assert(mm.is_device_ptr(from_ptr) && mm.is_device_ptr(to_ptr)); + break; + case automatic: + bool from_device = mm.is_device_ptr(from_ptr); + bool to_device = mm.is_device_ptr(to_ptr); + if (from_device) + { + if (to_device) + { + real_direction = device_to_device; + } + else + { + real_direction = device_to_host; + } + } + else + { + if (to_device) + { + real_direction = host_to_device; + } + else + { + real_direction = host_to_host; + } + } + break; + } + bool is_cpu = q.get_device().is_cpu(); + + switch (real_direction) + { + case host_to_host: + std::memcpy(to_ptr, from_ptr, size); + return sycl::event(); + case host_to_device: + { + auto alloc = mm.translate_ptr(to_ptr); + size_t offset = (byte_t *)to_ptr - alloc.alloc_ptr; + if (is_cpu) + { + buffer_t from_buffer((byte_t *)from_ptr, sycl::range<1>(size), {sycl::property::buffer::use_host_ptr()}); + return q.submit([&](sycl::handler &cgh) + { + auto r = sycl::range<1>(size); + auto o = sycl::id<1>(offset); + auto from_acc = from_buffer.get_access(cgh); + sycl::accessor + acc(alloc.buffer, cgh, r, o); + cgh.parallel_for(r, [=](sycl::id<1> idx) { + acc[idx] = from_acc[idx]; + }); }); + } + else + { + return q.submit([&](sycl::handler &cgh) + { + auto r = sycl::range<1>(size); + auto o = sycl::id<1>(offset); + sycl::accessor + acc(alloc.buffer, cgh, r, o); + cgh.copy(from_ptr, acc); }); + } + } + case device_to_host: + { + auto alloc = mm.translate_ptr(from_ptr); + size_t offset = (byte_t *)from_ptr - alloc.alloc_ptr; + if (is_cpu) + { + buffer_t to_buffer((byte_t *)to_ptr, sycl::range<1>(size), {sycl::property::buffer::use_host_ptr()}); + return q.submit([&](sycl::handler &cgh) + { + auto r = sycl::range<1>(size); + auto o = sycl::id<1>(offset); + auto to_acc = to_buffer.get_access(cgh); + sycl::accessor + acc(alloc.buffer, cgh, r, o); + cgh.parallel_for(r, [=](sycl::id<1> idx) { + to_acc[idx] = acc[idx]; + }); }); + } + else + { + return q.submit([&](sycl::handler &cgh) + { + auto r = sycl::range<1>(size); + auto o = sycl::id<1>(offset); + sycl::accessor + acc(alloc.buffer, cgh, r, o); + cgh.copy(acc, to_ptr); }); + } + } + case device_to_device: + { + auto to_alloc = mm.translate_ptr(to_ptr); + auto from_alloc = mm.translate_ptr(from_ptr); + size_t to_offset = (byte_t *)to_ptr - to_alloc.alloc_ptr; + size_t from_offset = (byte_t *)from_ptr - from_alloc.alloc_ptr; + if (is_cpu) + { + return q.submit([&](sycl::handler &cgh) + { + auto r = sycl::range<1>(size); + auto to_o = sycl::id<1>(to_offset); + auto from_o = sycl::id<1>(from_offset); + sycl::accessor + to_acc(to_alloc.buffer, cgh, r, to_o); + sycl::accessor + from_acc(from_alloc.buffer, cgh, r, from_o); + cgh.parallel_for(r, [=](sycl::id<1> idx) { + to_acc[idx] = from_acc[idx]; + }); }); + } + else + { + return q.submit([&](sycl::handler &cgh) + { + auto r = sycl::range<1>(size); + auto to_o = sycl::id<1>(to_offset); + auto from_o = sycl::id<1>(from_offset); + sycl::accessor + to_acc(to_alloc.buffer, cgh, r, to_o); + sycl::accessor + from_acc(from_alloc.buffer, cgh, r, from_o); + cgh.copy(from_acc, to_acc); }); + } + } + default: + throw std::runtime_error("sift_memcpy: invalid direction value"); + } +#else + return q.memcpy(to_ptr, from_ptr, size); +#endif // INFRA_USM_LEVEL_NONE + } + + /// copy 3D matrix specified by \p size from 3D matrix specified by \p from_ptr + /// and \p from_range to another specified by \p to_ptr and \p to_range. + static inline std::vector + sift_memcpy(sycl::queue &q, void *to_ptr, const void *from_ptr, + sycl::range<3> to_range, sycl::range<3> from_range, + sycl::id<3> to_id, sycl::id<3> from_id, + sycl::range<3> size, memcpy_direction direction) + { + std::vector event_list; + + size_t to_slice = to_range.get(1) * to_range.get(0), + from_slice = from_range.get(1) * from_range.get(0); + unsigned char *to_surface = (unsigned char *)to_ptr + + to_id.get(2) * to_slice + + to_id.get(1) * to_range.get(0) + to_id.get(0); + const unsigned char *from_surface = + (const unsigned char *)from_ptr + from_id.get(2) * from_slice + + from_id.get(1) * from_range.get(0) + from_id.get(0); + + if (to_slice == from_slice && to_slice == size.get(1) * size.get(0)) + { + return {sift_memcpy(q, to_surface, from_surface, to_slice * size.get(2), + direction)}; + } + for (size_t z = 0; z < size.get(2); ++z) + { + unsigned char *to_ptr = to_surface; + const unsigned char *from_ptr = from_surface; + if (to_range.get(0) == from_range.get(0) && + to_range.get(0) == size.get(0)) + { + event_list.push_back(sift_memcpy(q, to_ptr, from_ptr, + size.get(0) * size.get(1), direction)); + } + else + { + for (size_t y = 0; y < size.get(1); ++y) + { + event_list.push_back( + sift_memcpy(q, to_ptr, from_ptr, size.get(0), direction)); + to_ptr += to_range.get(0); + from_ptr += from_range.get(0); + } + } + to_surface += to_slice; + from_surface += from_slice; + } + return event_list; + } + + /// memcpy 2D/3D matrix specified by pitched_data. + static inline std::vector + sift_memcpy(sycl::queue &q, pitched_data to, sycl::id<3> to_id, + pitched_data from, sycl::id<3> from_id, sycl::range<3> size, + memcpy_direction direction = automatic) + { + return sift_memcpy(q, to.get_data_ptr(), from.get_data_ptr(), + sycl::range<3>(to.get_pitch(), to.get_y(), 1), + sycl::range<3>(from.get_pitch(), from.get_y(), 1), to_id, from_id, + size, direction); + } + + /// memcpy 2D matrix with pitch. + static inline std::vector + sift_memcpy(sycl::queue &q, void *to_ptr, const void *from_ptr, + size_t to_pitch, size_t from_pitch, size_t x, size_t y, + memcpy_direction direction = automatic) + { + return sift_memcpy(q, to_ptr, from_ptr, sycl::range<3>(to_pitch, y, 1), + sycl::range<3>(from_pitch, y, 1), + sycl::id<3>(0, 0, 0), sycl::id<3>(0, 0, 0), + sycl::range<3>(x, y, 1), direction); + } + } // namespace detail + +#ifdef INFRA_USM_LEVEL_NONE + /// Check if the pointer \p ptr represents device pointer or not. + /// + /// \param ptr The pointer to be checked. + /// \returns true if \p ptr is a device pointer. + template + static inline bool is_device_ptr(T ptr) + { + if constexpr (std::is_pointer::value) + { + return detail::mem_mgr::instance().is_device_ptr(ptr); + } + return false; + } +#endif + + /// Get the buffer and the offset of a piece of memory pointed to by \p ptr. + /// + /// \param ptr Pointer to a piece of memory. + /// If NULL is passed as an argument, an exception will be thrown. + /// \returns a pair containing both the buffer and the offset. + static std::pair get_buffer_and_offset(const void *ptr) + { + if (ptr) + { + auto alloc = detail::mem_mgr::instance().translate_ptr(ptr); + size_t offset = (byte_t *)ptr - alloc.alloc_ptr; + return std::make_pair(alloc.buffer, offset); + } + else + { + throw std::runtime_error( + "NULL pointer argument in get_buffer_and_offset function is invalid"); + } + } + + /// Get the data pointed from \p ptr as a 1D buffer reinterpreted as type T. + template + static sycl::buffer get_buffer(const void *ptr) + { + auto alloc = detail::mem_mgr::instance().translate_ptr(ptr); + return alloc.buffer.reinterpret( + sycl::range<1>(alloc.size / sizeof(T))); + } + + /// Get the buffer of a piece of memory pointed to by \p ptr. + /// + /// \param ptr Pointer to a piece of memory. + /// \returns the buffer. + static buffer_t get_buffer(const void *ptr) + { + return detail::mem_mgr::instance().translate_ptr(ptr).buffer; + } + + /// A wrapper class contains an accessor and an offset. + template + class access_wrapper + { + sycl::accessor accessor; + size_t offset; + + public: + /// Construct the accessor wrapper for memory pointed by \p ptr. + /// + /// \param ptr Pointer to memory. + /// \param cgh The command group handler. + access_wrapper(const void *ptr, sycl::handler &cgh) + : accessor(get_buffer(ptr).get_access(cgh)), offset(0) + { + auto alloc = detail::mem_mgr::instance().translate_ptr(ptr); + offset = (byte_t *)ptr - alloc.alloc_ptr; + } + + /// Get the device pointer. + /// + /// \returns a device pointer with offset. + dataT get_raw_pointer() const { return (dataT)(&accessor[0] + offset); } + }; + + /// Get the accessor for memory pointed by \p ptr. + /// + /// \param ptr Pointer to memory. + /// If NULL is passed as an argument, an exception will be thrown. + /// \param cgh The command group handler. + /// \returns an accessor. + template + static sycl::accessor + get_access(const void *ptr, sycl::handler &cgh) + { + if (ptr) + { + auto alloc = detail::mem_mgr::instance().translate_ptr(ptr); + return alloc.buffer.get_access(cgh); + } + else + { + throw std::runtime_error( + "NULL pointer argument in get_access function is invalid"); + } + } + + /// Allocate memory block on the device. + /// \param num_bytes Number of bytes to allocate. + /// \param q Queue to execute the allocate task. + /// \returns A pointer to the newly allocated memory. + template + static inline void *sift_malloc(T num_bytes, + sycl::queue &q = get_default_queue()) + { + return detail::sift_malloc(static_cast(num_bytes), q); + } + + /// Get the host pointer from a buffer that is mapped to virtual pointer ptr. + /// \param ptr Virtual Pointer mapped to device buffer + /// \returns A host pointer + template + static inline T *get_host_ptr(const void *ptr) + { + auto BufferOffset = get_buffer_and_offset(ptr); + auto host_ptr = + BufferOffset.first.get_access() + .get_pointer(); + return (T *)(host_ptr + BufferOffset.second); + } + + /// Allocate memory block for 3D array on the device. + /// \param size Size of of the memory block, in bytes. + /// \param q Queue to execute the allocate task. + /// \returns A pitched_data object which stores the memory info. + static inline pitched_data + sift_malloc(sycl::range<3> size, sycl::queue &q = get_default_queue()) + { + pitched_data pitch(nullptr, 0, size.get(0), size.get(1)); + size_t pitch_size; + pitch.set_data_ptr(detail::sift_malloc(pitch_size, size.get(0), size.get(1), + size.get(2), q)); + pitch.set_pitch(pitch_size); + return pitch; + } + + /// Allocate memory block for 2D array on the device. + /// \param [out] pitch Aligned size of x in bytes. + /// \param x Range in dim x. + /// \param y Range in dim y. + /// \param q Queue to execute the allocate task. + /// \returns A pointer to the newly allocated memory. + static inline void *sift_malloc(size_t &pitch, size_t x, size_t y, + sycl::queue &q = get_default_queue()) + { + return detail::sift_malloc(pitch, x, y, 1, q); + } + + /// free + /// \param ptr Point to free. + /// \param q Queue to execute the free task. + /// \returns no return value. + static inline void infra_free(void *ptr, + sycl::queue &q = get_default_queue()) + { + if (ptr) + { +#ifdef INFRA_USM_LEVEL_NONE + detail::mem_mgr::instance().mem_free(ptr); +#else + sycl::free(ptr, q.get_context()); +#endif // INFRA_USM_LEVEL_NONE + } + } + +#ifndef INFRA_USM_LEVEL_NONE + /// Free the device memory pointed by a batch of pointers in \p pointers which + /// are related to \p q after \p events completed. + /// + /// \param pointers The pointers point to the device memory requested to be freed. + /// \param events The events to be waited. + /// \param q The sycl::queue the memory relates to. + inline void async_infra_free(std::vector pointers, + std::vector events, + sycl::queue &q = get_default_queue()) + { + std::thread t( + [](std::vector pointers, std::vector events, + sycl::context ctxt) + { + sycl::event::wait(events); + for (auto p : pointers) + sycl::free(p, ctxt); + }, + std::move(pointers), std::move(events), q.get_context()); + get_current_device().add_task(std::move(t)); + } +#endif + + /// Synchronously copies \p size bytes from the address specified by \p from_ptr + /// to the address specified by \p to_ptr. The value of \p direction is used to + /// set the copy direction, it can be \a host_to_host, \a host_to_device, + /// \a device_to_host, \a device_to_device or \a automatic. The function will + /// return after the copy is completed. + /// + /// \param to_ptr Pointer to destination memory address. + /// \param from_ptr Pointer to source memory address. + /// \param size Number of bytes to be copied. + /// \param direction Direction of the copy. + /// \param q Queue to execute the copy task. + /// \returns no return value. + static void sift_memcpy(void *to_ptr, const void *from_ptr, size_t size, + memcpy_direction direction = automatic, + sycl::queue &q = get_default_queue()) + { + detail::sift_memcpy(q, to_ptr, from_ptr, size, direction).wait(); + } + + /// Asynchronously copies \p size bytes from the address specified by \p + /// from_ptr to the address specified by \p to_ptr. The value of \p direction is + /// used to set the copy direction, it can be \a host_to_host, \a + /// host_to_device, \a device_to_host, \a device_to_device or \a automatic. The + /// return of the function does NOT guarantee the copy is completed. + /// + /// \param to_ptr Pointer to destination memory address. + /// \param from_ptr Pointer to source memory address. + /// \param size Number of bytes to be copied. + /// \param direction Direction of the copy. + /// \param q Queue to execute the copy task. + /// \returns no return value. + static void async_sift_memcpy(void *to_ptr, const void *from_ptr, size_t size, + memcpy_direction direction = automatic, + sycl::queue &q = infra::get_default_queue()) + { + detail::sift_memcpy(q, to_ptr, from_ptr, size, direction); + } + + /// Synchronously copies 2D matrix specified by \p x and \p y from the address + /// specified by \p from_ptr to the address specified by \p to_ptr, while \p + /// from_pitch and \p to_pitch are the range of dim x in bytes of the matrix + /// specified by \p from_ptr and \p to_ptr. The value of \p direction is used to + /// set the copy direction, it can be \a host_to_host, \a host_to_device, \a + /// device_to_host, \a device_to_device or \a automatic. The function will + /// return after the copy is completed. + /// + /// \param to_ptr Pointer to destination memory address. + /// \param to_pitch Range of dim x in bytes of destination matrix. + /// \param from_ptr Pointer to source memory address. + /// \param from_pitch Range of dim x in bytes of source matrix. + /// \param x Range of dim x of matrix to be copied. + /// \param y Range of dim y of matrix to be copied. + /// \param direction Direction of the copy. + /// \param q Queue to execute the copy task. + /// \returns no return value. + static inline void sift_memcpy(void *to_ptr, size_t to_pitch, + const void *from_ptr, size_t from_pitch, + size_t x, size_t y, + memcpy_direction direction = automatic, + sycl::queue &q = infra::get_default_queue()) + { + sycl::event::wait(detail::sift_memcpy(q, to_ptr, from_ptr, to_pitch, + from_pitch, x, y, direction)); + } + + /// Asynchronously copies 2D matrix specified by \p x and \p y from the address + /// specified by \p from_ptr to the address specified by \p to_ptr, while \p + /// \p from_pitch and \p to_pitch are the range of dim x in bytes of the matrix + /// specified by \p from_ptr and \p to_ptr. The value of \p direction is used to + /// set the copy direction, it can be \a host_to_host, \a host_to_device, \a + /// device_to_host, \a device_to_device or \a automatic. The return of the + /// function does NOT guarantee the copy is completed. + /// + /// \param to_ptr Pointer to destination memory address. + /// \param to_pitch Range of dim x in bytes of destination matrix. + /// \param from_ptr Pointer to source memory address. + /// \param from_pitch Range of dim x in bytes of source matrix. + /// \param x Range of dim x of matrix to be copied. + /// \param y Range of dim y of matrix to be copied. + /// \param direction Direction of the copy. + /// \param q Queue to execute the copy task. + /// \returns no return value. + static inline void + async_sift_memcpy(void *to_ptr, size_t to_pitch, const void *from_ptr, + size_t from_pitch, size_t x, size_t y, + memcpy_direction direction = automatic, + sycl::queue &q = get_default_queue()) + { + detail::sift_memcpy(q, to_ptr, from_ptr, to_pitch, from_pitch, x, y, + direction); + } + + /// Synchronously copies a subset of a 3D matrix specified by \p to to another + /// 3D matrix specified by \p from. The from and to position info are specified + /// by \p from_pos and \p to_pos The copied matrix size is specfied by \p size. + /// The value of \p direction is used to set the copy direction, it can be \a + /// host_to_host, \a host_to_device, \a device_to_host, \a device_to_device or + /// \a automatic. The function will return after the copy is completed. + /// + /// \param to Destination matrix info. + /// \param to_pos Position of destination. + /// \param from Source matrix info. + /// \param from_pos Position of destination. + /// \param size Range of the submatrix to be copied. + /// \param direction Direction of the copy. + /// \param q Queue to execute the copy task. + /// \returns no return value. + static inline void sift_memcpy(pitched_data to, sycl::id<3> to_pos, + pitched_data from, sycl::id<3> from_pos, + sycl::range<3> size, + memcpy_direction direction = automatic, + sycl::queue &q = infra::get_default_queue()) + { + sycl::event::wait( + detail::sift_memcpy(q, to, to_pos, from, from_pos, size, direction)); + } + + /// Asynchronously copies a subset of a 3D matrix specified by \p to to another + /// 3D matrix specified by \p from. The from and to position info are specified + /// by \p from_pos and \p to_pos The copied matrix size is specfied by \p size. + /// The value of \p direction is used to set the copy direction, it can be \a + /// host_to_host, \a host_to_device, \a device_to_host, \a device_to_device or + /// \a automatic. The return of the function does NOT guarantee the copy is + /// completed. + /// + /// \param to Destination matrix info. + /// \param to_pos Position of destination. + /// \param from Source matrix info. + /// \param from_pos Position of destination. + /// \param size Range of the submatrix to be copied. + /// \param direction Direction of the copy. + /// \param q Queue to execute the copy task. + /// \returns no return value. + static inline void + async_sift_memcpy(pitched_data to, sycl::id<3> to_pos, pitched_data from, + sycl::id<3> from_pos, sycl::range<3> size, + memcpy_direction direction = automatic, + sycl::queue &q = get_default_queue()) + { + detail::sift_memcpy(q, to, to_pos, from, from_pos, size, direction); + } + + /// Synchronously sets \p value to the first \p size bytes starting from \p + /// dev_ptr. The function will return after the memset operation is completed. + /// + /// \param dev_ptr Pointer to the device memory address. + /// \param value Value to be set. + /// \param size Number of bytes to be set to the value. + /// \param q The queue in which the operation is done. + /// \returns no return value. + static void sift_memset(void *dev_ptr, int value, size_t size, + sycl::queue &q = get_default_queue()) + { + detail::sift_memset(q, dev_ptr, value, size).wait(); + } + + /// Asynchronously sets \p value to the first \p size bytes starting from \p + /// dev_ptr. The return of the function does NOT guarantee the memset operation + /// is completed. + /// + /// \param dev_ptr Pointer to the device memory address. + /// \param value Value to be set. + /// \param size Number of bytes to be set to the value. + /// \returns no return value. + static void async_sift_memset(void *dev_ptr, int value, size_t size, + sycl::queue &q = infra::get_default_queue()) + { + detail::sift_memset(q, dev_ptr, value, size); + } + + /// Sets \p value to the 2D memory region pointed by \p ptr in \p q. \p x and + /// \p y specify the setted 2D memory size. \p pitch is the bytes in linear + /// dimension, including padding bytes. The function will return after the + /// memset operation is completed. + /// + /// \param ptr Pointer to the device memory region. + /// \param pitch Bytes in linear dimension, including padding bytes. + /// \param value Value to be set. + /// \param x The setted memory size in linear dimension. + /// \param y The setted memory size in second dimension. + /// \param q The queue in which the operation is done. + /// \returns no return value. + static inline void sift_memset(void *ptr, size_t pitch, int val, size_t x, + size_t y, + sycl::queue &q = get_default_queue()) + { + sycl::event::wait(detail::sift_memset(q, ptr, pitch, val, x, y)); + } + + /// Sets \p value to the 2D memory region pointed by \p ptr in \p q. \p x and + /// \p y specify the setted 2D memory size. \p pitch is the bytes in linear + /// dimension, including padding bytes. The return of the function does NOT + /// guarantee the memset operation is completed. + /// + /// \param ptr Pointer to the device memory region. + /// \param pitch Bytes in linear dimension, including padding bytes. + /// \param value Value to be set. + /// \param x The setted memory size in linear dimension. + /// \param y The setted memory size in second dimension. + /// \param q The queue in which the operation is done. + /// \returns no return value. + static inline void async_sift_memset(void *ptr, size_t pitch, int val, size_t x, + size_t y, + sycl::queue &q = get_default_queue()) + { + detail::sift_memset(q, ptr, pitch, val, x, y); + } + + /// Sets \p value to the 3D memory region specified by \p pitch in \p q. \p size + /// specify the setted 3D memory size. The function will return after the + /// memset operation is completed. + /// + /// \param pitch Specify the 3D memory region. + /// \param value Value to be set. + /// \param size The setted 3D memory size. + /// \param q The queue in which the operation is done. + /// \returns no return value. + static inline void sift_memset(pitched_data pitch, int val, + sycl::range<3> size, + sycl::queue &q = get_default_queue()) + { + sycl::event::wait(detail::sift_memset(q, pitch, val, size)); + } + + /// Sets \p value to the 3D memory region specified by \p pitch in \p q. \p size + /// specify the setted 3D memory size. The return of the function does NOT + /// guarantee the memset operation is completed. + /// + /// \param pitch Specify the 3D memory region. + /// \param value Value to be set. + /// \param size The setted 3D memory size. + /// \param q The queue in which the operation is done. + /// \returns no return value. + static inline void async_sift_memset(pitched_data pitch, int val, + sycl::range<3> size, + sycl::queue &q = get_default_queue()) + { + detail::sift_memset(q, pitch, val, size); + } + + /// infra accessor used as device function parameter. + template + class accessor; + template + class accessor + { + public: + using memory_t = detail::memory_traits; + using element_t = typename memory_t::element_t; + using pointer_t = typename memory_t::pointer_t; + using accessor_t = typename memory_t::template accessor_t<3>; + accessor(pointer_t data, const sycl::range<3> &in_range) + : _data(data), _range(in_range) {} + template + accessor(typename std::enable_if::type &acc) + : accessor(acc, acc.get_range()) {} + accessor(const accessor_t &acc, const sycl::range<3> &in_range) + : accessor(acc.get_pointer(), in_range) {} + accessor operator[](size_t index) const + { + sycl::range<2> sub(_range.get(1), _range.get(2)); + return accessor(_data + index * sub.size(), sub); + } + + private: + pointer_t _data; + sycl::range<3> _range; + }; + template + class accessor + { + public: + using memory_t = detail::memory_traits; + using element_t = typename memory_t::element_t; + using pointer_t = typename memory_t::pointer_t; + using accessor_t = typename memory_t::template accessor_t<2>; + accessor(pointer_t data, const sycl::range<2> &in_range) + : _data(data), _range(in_range) {} + template + accessor(typename std::enable_if::type &acc) + : accessor(acc, acc.get_range()) {} + accessor(const accessor_t &acc, const sycl::range<2> &in_range) + : accessor(acc.get_pointer(), in_range) {} + + pointer_t operator[](size_t index) const + { + return _data + _range.get(1) * index; + } + + private: + pointer_t _data; + sycl::range<2> _range; + }; + + namespace detail + { + /// Device variable with address space of shared, global or constant. + template + class device_memory + { + public: + using accessor_t = + typename detail::memory_traits::template accessor_t; + using value_t = typename detail::memory_traits::value_t; + using infra_accessor_t = infra::accessor; + + device_memory() : device_memory(sycl::range(1)) {} + + /// Constructor of 1-D array with initializer list + template + device_memory( + const typename std::enable_if>::type &in_range, + std::initializer_list &&init_list) + : device_memory(in_range) + { + assert(init_list.size() <= in_range.size()); + _host_ptr = (value_t *)std::malloc(_size); + std::memset(_host_ptr, 0, _size); + std::memcpy(_host_ptr, init_list.begin(), init_list.size() * sizeof(T)); + } + + /// Constructor of 2-D array with initializer list + template + device_memory( + const typename std::enable_if>::type &in_range, + std::initializer_list> &&init_list) + : device_memory(in_range) + { + assert(init_list.size() <= in_range[0]); + _host_ptr = (value_t *)std::malloc(_size); + std::memset(_host_ptr, 0, _size); + auto tmp_data = _host_ptr; + for (auto sub_list : init_list) + { + assert(sub_list.size() <= in_range[1]); + std::memcpy(tmp_data, sub_list.begin(), sub_list.size() * sizeof(T)); + tmp_data += in_range[1]; + } + } + + /// Constructor with range + device_memory(const sycl::range &range_in) + : _size(range_in.size() * sizeof(T)), _range(range_in), _reference(false), + _host_ptr(nullptr), _device_ptr(nullptr) + { + static_assert( + (Memory == global) || (Memory == constant) || (Memory == shared), + "device memory region should be global, constant or shared"); + // Make sure that singleton class mem_mgr and dev_mgr will destruct later + // than this. + detail::mem_mgr::instance(); + dev_mgr::instance(); + } + + /// Constructor with range + template + device_memory(Args... Arguments) + : device_memory(sycl::range(Arguments...)) {} + + device_memory(const device_memory &) = delete; + device_memory &operator=(const device_memory &) = delete; + ~device_memory() + { + if (_device_ptr && !_reference) + { + try + { + infra_free(_device_ptr); + } + catch (std::exception const &e) + { + std::cerr << e.what() << '\n'; + } + } + if (_host_ptr) + std::free(_host_ptr); + } + + /// Allocate memory with default queue, and init memory if has initial value. + void init() + { + init(infra::get_default_queue()); + } + /// Allocate memory with specficed queue, and init memory if has initial value. + void init(sycl::queue &q) + { + if (_device_ptr) + return; + if (!_size) + return; + allocate_device(q); + if (_host_ptr) + detail::sift_memcpy(q, _device_ptr, _host_ptr, _size, host_to_device); + } + + /// The variable is assigned to a device pointer. + void assign(value_t *src, size_t size) + { + this->~device_memory(); + new (this) device_memory(src, size); + } + + /// Get memory pointer of the memory object, which is virtual pointer when + /// usm is not used, and device pointer when usm is used . + value_t *get_ptr() + { + return get_ptr(get_default_queue()); + } + /// Get memory pointer of the memory object, which is virtual pointer when + /// usm is not used, and device pointer when usm is used . + value_t *get_ptr(sycl::queue &q) + { + init(q); + return _device_ptr; + } + + /// Get the device memory object size in bytes. + size_t get_size() { return _size; } + + template + typename std::enable_if::type &operator[](size_t index) + { + init(); +#ifdef INFRA_USM_LEVEL_NONE + return infra::get_buffer::type>( + _device_ptr) + .template get_access()[index]; +#else + return _device_ptr[index]; +#endif // INFRA_USM_LEVEL_NONE + } + +#ifdef INFRA_USM_LEVEL_NONE + /// Get sycl::accessor for the device memory object when usm is not used. + accessor_t get_access(sycl::handler &cgh) + { + return get_buffer(_device_ptr) + .template reinterpret(_range) + .template get_access::mode, + detail::memory_traits::target>(cgh); + } +#else + /// Get infra::accessor with dimension info for the device memory object + /// when usm is used and dimension is greater than 1. + template + typename std::enable_if::type + get_access(sycl::handler &cgh) + { + return infra_accessor_t((T *)_device_ptr, _range); + } +#endif // INFRA_USM_LEVEL_NONE + + private: + device_memory(value_t *memory_ptr, size_t size) + : _size(size), _range(size / sizeof(T)), _reference(true), + _device_ptr(memory_ptr) {} + + void allocate_device(sycl::queue &q) + { +#ifndef INFRA_USM_LEVEL_NONE + if (Memory == shared) + { + _device_ptr = (value_t *)sycl::malloc_shared( + _size, q.get_device(), q.get_context()); + return; + } +#endif + _device_ptr = (value_t *)detail::sift_malloc(_size, q); + } + + size_t _size; + sycl::range _range; + bool _reference; + value_t *_host_ptr; + value_t *_device_ptr; + }; + template + class device_memory : public device_memory + { + public: + using base = device_memory; + using value_t = typename base::value_t; + using accessor_t = + typename detail::memory_traits::template accessor_t<0>; + + /// Constructor with initial value. + device_memory(const value_t &val) : base(sycl::range<1>(1), {val}) {} + + /// Default constructor + device_memory() : base(1) {} + +#ifdef INFRA_USM_LEVEL_NONE + /// Get sycl::accessor for the device memory object when usm is not used. + accessor_t get_access(sycl::handler &cgh) + { + auto buf = get_buffer(base::get_ptr()) + .template reinterpret(sycl::range<1>(1)); + return accessor_t(buf, cgh); + } +#endif // INFRA_USM_LEVEL_NONE + }; + } + + template + using global_memory = detail::device_memory; + template + using constant_memory = detail::device_memory; + template + using shared_memory = detail::device_memory; +} // namespace infra + +#endif // __INFRA_MEMORY_HPP__ diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/mainSift.cpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/mainSift.cpp new file mode 100644 index 000000000..12d1b943a --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/mainSift.cpp @@ -0,0 +1,313 @@ +//********************************************************// +// CUDA SIFT extractor by Marten Björkman aka Celebrandil // +// celle @ csc.kth.se // +//********************************************************// + +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include +#include + +#include "cudaImage.h" +#include "cudaSift.h" +#include "infra/infra.hpp" +#include "Utility.h" + +#ifndef KERNEL_USE_PROFILE +#define KERNEL_USE_PROFILE 0 +#endif + +void copyData(void *host, void *dev, size_t size); +int ImproveHomography(SiftData &data, float *homography, int numLoops, float minScore, float maxAmbiguity, float thresh); +void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img); +void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography); + +double ScaleUp(CudaImage &res, CudaImage &src); + +/////////////////////////////////////////////////////////////////////////////// +// Main program +/////////////////////////////////////////////////////////////////////////////// +int main(int argc, char **argv) +{ + auto totalProgTimer_start = std::chrono::steady_clock::now(); + int devNum = 0, imgSet = 0; + if (argc > 1) + devNum = std::atoi(argv[1]); + if (argc > 2) + imgSet = std::atoi(argv[2]); + + float totTime = 0.0; + float imageInitTime = 0.0; + float extractSiftTime = 0.0; + float matchingTime = 0.0; + + sycl::device dev = sycl::device(sycl::gpu_selector()); + sycl::property_list q_prop{sycl::property::queue::in_order()}; + +#ifdef DEVICE_TIMER + auto q_time_start = std::chrono::steady_clock::now(); +#endif + sycl::queue q_ct(dev, q_prop); +#ifdef DEVICE_TIMER + auto q_time_stop = std::chrono::steady_clock::now(); + // std::cout << "Queue creation Time is " << std::chrono::duration(q_time_stop - q_time_start).count() << " us" << std::endl; + imageInitTime += std::chrono::duration(q_time_stop - q_time_start).count(); +#endif + + // Read images using OpenCV + cv::Mat limg, rimg; + auto ioRead_start = std::chrono::steady_clock::now(); + if (imgSet) + { + cv::imread("../../inputData/left.pgm", 0).convertTo(limg, CV_32FC1); + cv::imread("../../inputData/righ.pgm", 0).convertTo(rimg, CV_32FC1); + } + else + { + cv::imread("../../inputData/img1.png", 0).convertTo(limg, CV_32FC1); + cv::imread("../../inputData/img2.png", 0).convertTo(rimg, CV_32FC1); + } + auto ioRead_stop = std::chrono::steady_clock::now(); + float ioReadTime = std::chrono::duration(ioRead_stop - ioRead_start).count(); + unsigned int w = limg.cols; + unsigned int h = limg.rows; + std::cout << "Image size = (" << w << "," << h << ")" << std::endl; + + // Initial Cuda images and download images to device + std::cout << "Initializing data..." << std::endl; + CudaImage img1, img2; + + img1.Allocate(w, h, iAlignUp(w, 128), false, q_ct, imageInitTime, NULL, (float *)limg.data); + img2.Allocate(w, h, iAlignUp(w, 128), false, q_ct, imageInitTime, NULL, (float *)rimg.data); + // std::cout << "Img Allocate time " << totTime << std::endl; + try + { + img1.Download(q_ct, imageInitTime); + img2.Download(q_ct, imageInitTime); + } + catch (sycl::exception const &e) + { + std::cerr << e.what() << '\n'; + } + // std::cout << "Img Download time " << totTime << std::endl; + + // Extract Sift features from images + SiftData siftData1, siftData2; + float initBlur = 1.0f; + float thresh = (imgSet ? 4.5f : 2.0f); + InitSiftData(siftData1, q_ct, imageInitTime, 32768, true, true); + InitSiftData(siftData2, q_ct, imageInitTime, 32768, true, true); + + // A bit of benchmarking + // for (int thresh1=1.00f;thresh1<=4.01f;thresh1+=0.50f) { + float *memoryTmp = AllocSiftTempMemory(w, h, 5, q_ct, imageInitTime, false); + for (int i = 0; i < 50; i++) + { + float time = 0.0; + try + { + ExtractSift(siftData1, img1, 5, initBlur, thresh, q_ct, time, 0.0f, false, memoryTmp); + extractSiftTime += time; + time = 0.0; + ExtractSift(siftData2, img2, 5, initBlur, thresh, q_ct, time, 0.0f, false, memoryTmp); + } + catch (std::exception const &e) + { + std::cerr << e.what() << '\n'; + } + extractSiftTime += time; + } + FreeSiftTempMemory(memoryTmp, q_ct); + + // Match Sift features and find a homography + for (int i = 0; i < 1; i++) + MatchSiftData(siftData1, siftData2, q_ct, matchingTime); + float homography[9]; + int numMatches; + try + { + FindHomography(siftData1, homography, &numMatches, q_ct, matchingTime, 10000, 0.0f, 0.80f, 5.0); + } + catch (std::exception const &e) + { + std::cerr << e.what() << '\n'; + } + int numFit = ImproveHomography(siftData1, homography, 5, 0.00f, 0.80f, 3.0); + float matchPercentage = 100.0f * numFit / std::min(siftData1.numPts, siftData2.numPts); + + std::cout << "Number of original features: " << siftData1.numPts << " " << siftData2.numPts << std::endl; + std::cout << "Number of matching features: " << numFit << " " << numMatches << " " << matchPercentage << "% " << initBlur << " " << thresh << "\n" + << std::endl; + +#ifdef DEVICE_TIMER + totTime = imageInitTime + extractSiftTime + matchingTime; + std::cout << "Images initialization time = " << imageInitTime / 1000 << " ms" << std::endl; + std::cout << "Feature extraction time = " << extractSiftTime / 1000 << " ms" << std::endl; + std::cout << "Matching time = " << matchingTime / 1000 << " ms" + << "\n" + << std::endl; + std::cout << "Total Device Time = " << totTime / 1000 << " ms" + << "\n" + << std::endl; +#endif + // data validation + auto dataVerficationTimer_start = std::chrono::steady_clock::now(); + int data_verification_flag = Utility::RunDataVerification(thresh, matchPercentage); + auto dataVerficationTimer_stop = std::chrono::steady_clock::now(); + float dataVerificationTime = + std::chrono::duration(dataVerficationTimer_stop - dataVerficationTimer_start).count(); + // Print out and store summary data + // PrintMatchData(siftData1, siftData2, img1); + // cv::imwrite("../../data/limg_pts.pgm", limg); + + // MatchAll(siftData1, siftData2, homography); + + // Free Sift data from device + FreeSiftData(siftData1, q_ct); + FreeSiftData(siftData2, q_ct); + + auto totalProgTimer_end = std::chrono::steady_clock::now(); + float totalProgramTime = std::chrono::duration(totalProgTimer_end - totalProgTimer_start).count() - ioReadTime - dataVerificationTime; + std::cout << "Total workload time = " << totalProgramTime / 1000 << " ms" + << "\n" + << std::endl; + return data_verification_flag; +} + +void MatchAll(SiftData &siftData1, SiftData &siftData2, float *homography) +{ +#ifdef MANAGEDMEM + SiftPoint *sift1 = siftData1.m_data; + SiftPoint *sift2 = siftData2.m_data; +#else + SiftPoint *sift1 = siftData1.h_data; + SiftPoint *sift2 = siftData2.h_data; +#endif + int numPts1 = siftData1.numPts; + int numPts2 = siftData2.numPts; + int numFound = 0; +#if 1 + homography[0] = homography[4] = -1.0f; + homography[1] = homography[3] = homography[6] = homography[7] = 0.0f; + homography[2] = 1279.0f; + homography[5] = 959.0f; +#endif + for (int i = 0; i < numPts1; i++) + { + float *data1 = sift1[i].data; + std::cout << i << ":" << sift1[i].scale << ":" << (int)sift1[i].orientation << " " << sift1[i].xpos << " " << sift1[i].ypos << std::endl; + bool found = false; + for (int j = 0; j < numPts2; j++) + { + float *data2 = sift2[j].data; + float sum = 0.0f; + for (int k = 0; k < 128; k++) + sum += data1[k] * data2[k]; + float den = homography[6] * sift1[i].xpos + homography[7] * sift1[i].ypos + homography[8]; + float dx = (homography[0] * sift1[i].xpos + homography[1] * sift1[i].ypos + homography[2]) / den - sift2[j].xpos; + float dy = (homography[3] * sift1[i].xpos + homography[4] * sift1[i].ypos + homography[5]) / den - sift2[j].ypos; + float err = dx * dx + dy * dy; + if (err < 100.0f) // 100.0 + found = true; + if (err < 100.0f || j == sift1[i].match) + { // 100.0 + if (j == sift1[i].match && err < 100.0f) + std::cout << " *"; + else if (j == sift1[i].match) + std::cout << " -"; + else if (err < 100.0f) + std::cout << " +"; + else + std::cout << " "; + std::cout << j << ":" << sum << ":" << (int)sqrt(err) << ":" << sift2[j].scale << ":" << (int)sift2[j].orientation << " " << sift2[j].xpos << " " << sift2[j].ypos << " " << (int)dx << " " << (int)dy << std::endl; + } + } + std::cout << std::endl; + if (found) + numFound++; + } + std::cout << "Number of finds: " << numFound << " / " << numPts1 << std::endl; + std::cout << homography[0] << " " << homography[1] << " " << homography[2] << std::endl; //%%% + std::cout << homography[3] << " " << homography[4] << " " << homography[5] << std::endl; //%%% + std::cout << homography[6] << " " << homography[7] << " " << homography[8] << std::endl; //%%% +} + +void PrintMatchData(SiftData &siftData1, SiftData &siftData2, CudaImage &img) +{ + int numPts = siftData1.numPts; +#ifdef MANAGEDMEM + SiftPoint *sift1 = siftData1.m_data; + SiftPoint *sift2 = siftData2.m_data; +#else + SiftPoint *sift1 = siftData1.h_data; + SiftPoint *sift2 = siftData2.h_data; +#endif + float *h_img = img.h_data; + int w = img.width; + int h = img.height; + std::cout << std::setprecision(3); + for (int j = 0; j < numPts; j++) + { + int k = sift1[j].match; + if (sift1[j].match_error < 5) + { + float dx = sift2[k].xpos - sift1[j].xpos; + float dy = sift2[k].ypos - sift1[j].ypos; +#if 0 + if (false && sift1[j].xpos>550 && sift1[j].xpos<600) { + std::cout << "pos1=(" << (int)sift1[j].xpos << "," << (int)sift1[j].ypos << ") "; + std::cout << j << ": " << "score=" << sift1[j].score << " ambiguity=" << sift1[j].ambiguity << " match=" << k << " "; + std::cout << "scale=" << sift1[j].scale << " "; + std::cout << "error=" << (int)sift1[j].match_error << " "; + std::cout << "orient=" << (int)sift1[j].orientation << "," << (int)sift2[k].orientation << " "; + std::cout << " delta=(" << (int)dx << "," << (int)dy << ")" << std::endl; + } +#endif +#if 1 + int len = (int)(fabs(dx) > fabs(dy) ? fabs(dx) : fabs(dy)); + for (int l = 0; l < len; l++) + { + int x = (int)(sift1[j].xpos + dx * l / len); + int y = (int)(sift1[j].ypos + dy * l / len); + h_img[y * w + x] = 255.0f; + } +#endif + } + int x = (int)(sift1[j].xpos + 0.5); + int y = (int)(sift1[j].ypos + 0.5); + int s = std::min(x, std::min(y, std::min(w - x - 2, std::min(h - y - 2, (int)(1.41 * sift1[j].scale))))); + int p = y * w + x; + p += (w + 1); + for (int k = 0; k < s; k++) + h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 0.0f; + p -= (w + 1); + for (int k = 0; k < s; k++) + h_img[p - k] = h_img[p + k] = h_img[p - k * w] = h_img[p + k * w] = 255.0f; + } + std::cout << std::setprecision(6); +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/SYCL/matching.dp.cpp b/third-party-programs/Velocity-Bench/cudaSift/SYCL/matching.dp.cpp new file mode 100644 index 000000000..a5c4f10db --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/SYCL/matching.dp.cpp @@ -0,0 +1,1944 @@ +// Modifications Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include "infra/infra.hpp" +#include "cudaSift.h" +#include "cudautils.h" + +//================= Device matching functions =====================// + +void memcopyKernel(float *src, float *dst, size_t src_pitch, size_t dst_pitch, int numPts, size_t width) +{ + char *d_src = (char *)src; + char *d_dst = (char *)dst; + +#pragma unroll + for (int i = 0; i < numPts; ++i) + { +#pragma unroll + for (int j = 0; j < width; ++j) + { + d_dst[j] = d_src[j]; + } + d_src = d_src + src_pitch; + d_dst = d_dst + dst_pitch; + } +} + +void MatchSiftPoints(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2, + sycl::nd_item<3> item_ct1, float *siftPoint, float *sums) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int p1 = item_ct1.get_group(2); + const int p2 = item_ct1.get_group(1) * 16 + ty; + const float *ptr1 = sift1[p1].data; + const float *ptr2 = sift2[p2].data; + const int i = 16 * ty + tx; + if (ty < 8) + siftPoint[i] = ptr1[i]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + float sum = 0.0f; + if (p2 < numPts2) + +#pragma unroll + for (int j = 0; j < 8; j++) + sum += siftPoint[16 * j + tx] * ptr2[16 * j + tx]; + sums[i] = sum; + + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < 8) + sums[i] += sums[i + 8]; + item_ct1.barrier(sycl::access::fence_space::local_space); + if (tx < 4) + sums[i] += sums[i + 4]; + item_ct1.barrier(sycl::access::fence_space::local_space); + if (ty == 0) + { + sum = sums[16 * tx + 0] + sums[16 * tx + 1] + sums[16 * tx + 2] + sums[16 * tx + 3]; + corrData[p1 * item_ct1.get_group_range(1) * 16 + + item_ct1.get_group(1) * 16 + tx] = sum; + } + item_ct1.barrier(sycl::access::fence_space::local_space); +} + +void MatchSiftPoints2(SiftPoint *sift1, SiftPoint *sift2, float *corrData, int numPts1, int numPts2, + sycl::nd_item<3> item_ct1, float *siftPoints1, + float *siftPoints2) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const float *ptr1 = + sift1[sycl::min((unsigned int)(numPts1 - 1), + (unsigned int)(item_ct1.get_group(2) * 16 + ty))] + .data; + const float *ptr2 = + sift2[sycl::min((unsigned int)(numPts2 - 1), + (unsigned int)(item_ct1.get_group(1) * 16 + ty))] + .data; + +#pragma unroll + for (int i = 0; i < 8; i++) + { + siftPoints1[128 * ty + 16 * i + tx] = ptr1[16 * i + tx]; + siftPoints2[128 * ty + 16 * i + tx] = ptr2[16 * i + tx]; + } + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + const int p1 = item_ct1.get_group(2) * 16 + ty; + const int p2 = item_ct1.get_group(1) * 16 + tx; + const float *pt1 = &siftPoints1[ty * 128]; + const float *pt2 = &siftPoints2[tx * 128]; + float sum = 0.0f; + +#pragma unroll + for (int i = 0; i < 128; i++) + { + int itx = (i + tx) & 127; // avoid bank conflicts + sum += pt1[itx] * pt2[itx]; + } + if (p1 < numPts1) + corrData[p1 * item_ct1.get_group_range(1) * 16 + p2] = + (p2 < numPts2 ? sum : -1.0f); +} + +void FindMaxCorr(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int corrWidth, int siftSize, + sycl::nd_item<3> item_ct1, float *maxScore, float *maxScor2, + int *maxIndex) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int idx = ty * 16 + tx; + int p1 = item_ct1.get_group(2) * 16 + item_ct1.get_local_id(1); + p1 = (p1 >= numPts1 ? numPts1 - 1 : p1); + maxScore[idx] = -1.0f; + maxScor2[idx] = -1.0f; + maxIndex[idx] = -1; + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + float *corrs = &corrData[p1 * corrWidth]; + +#pragma unroll + for (int i = tx; i < corrWidth; i += 16) + { + float val = corrs[i]; + if (val > maxScore[idx]) + { + maxScor2[idx] = maxScore[idx]; + maxScore[idx] = val; + maxIndex[idx] = i; + } + else if (val > maxScor2[idx]) + maxScor2[idx] = val; + } + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + +#pragma unroll + for (int len = 8; len > 0; len /= 2) + { + if (tx < 8) + { + float val = maxScore[idx + len]; + int i = maxIndex[idx + len]; + if (val > maxScore[idx]) + { + maxScor2[idx] = maxScore[idx]; + maxScore[idx] = val; + maxIndex[idx] = i; + } + else if (val > maxScor2[idx]) + maxScor2[idx] = val; + float va2 = maxScor2[idx + len]; + if (va2 > maxScor2[idx]) + maxScor2[idx] = va2; + } + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + } + if (tx == 0) + { + sift1[p1].score = maxScore[ty * 16]; + sift1[p1].ambiguity = maxScor2[ty * 16] / (maxScore[ty * 16] + 1e-6); + sift1[p1].match = maxIndex[ty * 16]; + sift1[p1].match_xpos = sift2[maxIndex[ty * 16]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[ty * 16]].ypos; + } +} + +void FindMaxCorr3(float *corrData, SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + sycl::nd_item<3> item_ct1, int *maxIndex) +{ + int block_dim = item_ct1.get_local_range().get(2); // blockDim.x == 16 + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int p1 = item_ct1.get_group(2) * block_dim + ty; + const int idx = ty * 16 + tx; + + maxIndex[idx] = 0; + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + + float *corrs = NULL; + if (p1 < numPts1) + { + corrs = &corrData[p1 * block_dim * 2]; + corrs[tx] = 0.0f; + corrs[tx + 16] = 0.0f; + const float *pt1 = sift1[p1].data; + for (int p2 = tx; p2 < numPts2; p2 += 16) + { + float *pt2 = sift2[p2].data; + float sum = 0.0f; + for (int i = 0; i < 128; i++) + sum += pt1[i] * pt2[i]; + if (sum > corrs[tx]) + { + corrs[tx + 16] = corrs[tx]; + corrs[tx] = sum; + maxIndex[idx] = p2; + } + else if (sum > corrs[tx + 16]) + corrs[tx + 16] = sum; + } + } + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (p1 < numPts1) + { + for (int len = 8; len > 0; len /= 2) + { + if (tx < len) + { + float val = corrs[tx + len]; + int i = maxIndex[idx + len]; + if (val > corrs[tx]) + { + corrs[tx + 16] = corrs[tx]; + corrs[tx] = val; + maxIndex[idx] = i; + } + else if (val > corrs[tx + 16]) + corrs[tx + 16] = val; + float va2 = corrs[tx + 16 + len]; + if (va2 > corrs[tx + 16]) + corrs[tx + 16] = va2; + } + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + } + if (tx == 0) + { + sift1[p1].score = corrs[0]; + sift1[p1].ambiguity = corrs[16] / (corrs[0] + 1e-6); + sift1[p1].match = maxIndex[ty << 4]; + sift1[p1].match_xpos = sift2[maxIndex[ty << 4]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[ty << 4]].ypos; + } + } +} + +#define FMC2W 16 +#define FMC2H 4 + +void FindMaxCorr2(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + sycl::nd_item<3> item_ct1, float *siftPoint, float *maxScore, + float *maxScor2, int *maxIndex) +{ + + const int p1 = item_ct1.get_group(2); + if (p1 >= numPts1) + return; + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int idx = ty * FMC2W + tx; + if (idx < FMC2H) + { + maxScore[idx] = -1.0f; + maxScor2[idx] = -1.0f; + maxIndex[idx] = 0; + } + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + const float *pt1 = sift1[p1].data; + for (int i = idx; i < 128; i += FMC2W * FMC2H) + siftPoint[i] = pt1[i]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + for (int p2 = ty; p2 < numPts2; p2 += FMC2H) + { + const float *pt2 = sift2[p2].data; + float sum = 0.0f; + for (int j = tx; j < 128; j += FMC2W) + sum += siftPoint[j] * pt2[j]; + for (int j = FMC2W / 2; j > 0; j /= 2) + sum += ShiftDown(sum, j, item_ct1); + if (tx == 0) + { + if (sum > maxScore[ty]) + { + maxScor2[ty] = maxScore[ty]; + maxScore[ty] = sum; + maxIndex[ty] = p2; + } + else if (sum > maxScor2[ty]) + maxScor2[ty] = sum; + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + for (int len = FMC2H / 2; len > 0; len /= 2) + { + if (ty == 0 && tx < len) + { + float val = maxScore[tx + len]; + int p2 = maxIndex[tx + len]; + if (val > maxScore[tx]) + { + maxScor2[tx] = maxScore[tx]; + maxScore[tx] = val; + maxIndex[tx] = p2; + } + else if (val > maxScor2[tx]) + maxScor2[tx] = val; + float va2 = maxScor2[tx + len]; + if (va2 > maxScor2[tx]) + maxScor2[tx] = va2; + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + } + if (ty == 0 && tx == 0) + { + sift1[p1].score = maxScore[0]; + sift1[p1].ambiguity = maxScor2[0] / (maxScore[0] + 1e-6); + sift1[p1].match = maxIndex[0]; + sift1[p1].match_xpos = sift2[maxIndex[0]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[0]].ypos; + } +} + +void FindMaxCorr4(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + sycl::nd_item<3> item_ct1, float *siftPoint, float *maxScore, + float *maxScor2, int *maxIndex) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + if (tx == 0) + { + maxScore[ty] = -1.0f; + maxScor2[ty] = -1.0f; + maxIndex[ty] = 0; + } + const int p1 = item_ct1.get_group(2) * FMC2H + ty; + const float *pt1 = sift1[p1].data; + for (int j = tx; j < 128; j += FMC2W) + siftPoint[128 * ty + j] = pt1[j]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + for (int p2 = 0; p2 < numPts2; p2++) + { + const float *pt2 = sift2[p2].data; + float sum = 0.0f; + for (int j = tx; j < 128; j += FMC2W) + sum += siftPoint[128 * ty + j] * pt2[j]; + for (int j = FMC2W / 2; j > 0; j /= 2) + sum += ShiftDown(sum, j, item_ct1); + if (tx == 0) + { + if (sum > maxScore[ty]) + { + maxScor2[ty] = maxScore[ty]; + maxScore[ty] = sum; + maxIndex[ty] = p2; + } + else if (sum > maxScor2[ty]) + maxScor2[ty] = sum; + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (tx == 0) + { + sift1[p1].score = maxScore[ty]; + sift1[p1].ambiguity = maxScor2[ty] / (maxScore[ty] + 1e-6); + sift1[p1].match = maxIndex[ty]; + sift1[p1].match_xpos = sift2[maxIndex[ty]].xpos; + sift1[p1].match_ypos = sift2[maxIndex[ty]].ypos; + } +} + +void CleanMatches(SiftPoint *sift1, int numPts1, sycl::nd_item<3> item_ct1) +{ + const int p1 = sycl::min( + (unsigned int)(item_ct1.get_group(2) * 64 + item_ct1.get_local_id(2)), + (unsigned int)(numPts1 - 1)); + sift1[p1].score = 0.0f; +} + +#define M7W 32 +#define M7H 32 +#define M7R 4 +#define NRX 2 +#define NDIM 128 + +void FindMaxCorr10(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + sycl::nd_item<3> item_ct1, sycl::float4 *buffer1, + sycl::float4 *buffer2) +{ + + int tx = item_ct1.get_local_id(2); + int ty = item_ct1.get_local_id(1); + int bp1 = M7W * item_ct1.get_group(2); + +#pragma unroll + for (int j = ty; j < M7W; j += M7H / M7R) + { + int p1 = sycl::min((int)(bp1 + j), (int)(numPts1 - 1)); + +#pragma unroll + for (int d = tx; d < NDIM / 4; d += M7W) + { + buffer1[(j * NDIM / 4 + (d + j) % (NDIM / 4))] = ((sycl::float4 *)&sift1[p1].data)[d]; + } + } + + float max_score[NRX]; + float sec_score[NRX]; + int index[NRX]; + +#pragma unroll + for (int i = 0; i < NRX; i++) + { + max_score[i] = 0.0f; + sec_score[i] = 0.0f; + index[i] = -1; + } + int idx = ty * M7W + tx; + int ix = idx % (M7W / NRX); + int iy = idx / (M7W / NRX); + +#pragma unroll + for (int bp2 = 0; bp2 < numPts2 - M7H + 1; bp2 += M7H) + { +#pragma unroll + for (int j = ty; j < M7H; j += M7H / M7R) + { + int p2 = sycl::min((int)(bp2 + j), (int)(numPts2 - 1)); +#pragma unroll + for (int d = tx; d < NDIM / 4; d += M7W) + buffer2[j * NDIM / 4 + d] = ((sycl::float4 *)&sift2[p2].data)[d]; + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + + if (idx < M7W * M7H / M7R / NRX) + { + float score[M7R][NRX]; + +#pragma unroll + for (int dy = 0; dy < M7R; dy++) +#pragma unroll + for (int i = 0; i < NRX; i++) + score[dy][i] = 0.0f; + +#pragma unroll + for (int d = 0; d < NDIM / 4; d++) + { + sycl::float4 v1[NRX]; +#pragma unroll + for (int i = 0; i < NRX; i++) + v1[i] = buffer1[((M7W / NRX) * i + ix) * NDIM / 4 + (d + (M7W / NRX) * i + ix) % (NDIM / 4)]; + +#pragma unroll + for (int dy = 0; dy < M7R; dy++) + { + sycl::float4 v2 = buffer2[(M7R * iy + dy) * (NDIM / 4) + d]; +#pragma unroll + for (int i = 0; i < NRX; i++) + { + score[dy][i] += v1[i].x() * v2.x(); + score[dy][i] += v1[i].y() * v2.y(); + score[dy][i] += v1[i].z() * v2.z(); + score[dy][i] += v1[i].w() * v2.w(); + } + } + } + +#pragma unroll + for (int dy = 0; dy < M7R; dy++) + { +#pragma unroll + for (int i = 0; i < NRX; i++) + { + if (score[dy][i] > max_score[i]) + { + sec_score[i] = max_score[i]; + max_score[i] = score[dy][i]; + index[i] = + sycl::min((int)(bp2 + M7R * iy + dy), (int)(numPts2 - 1)); + } + else if (score[dy][i] > sec_score[i]) + sec_score[i] = score[dy][i]; + } + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + } + + float *scores1 = (float *)buffer1; + float *scores2 = &scores1[M7W * M7H / M7R]; + int *indices = (int *)&scores2[M7W * M7H / M7R]; + if (idx < M7W * M7H / M7R / NRX) + { +#pragma unroll + for (int i = 0; i < NRX; i++) + { + scores1[iy * M7W + (M7W / NRX) * i + ix] = max_score[i]; + scores2[iy * M7W + (M7W / NRX) * i + ix] = sec_score[i]; + indices[iy * M7W + (M7W / NRX) * i + ix] = index[i]; + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + + if (ty == 0) + { + float max_score = scores1[tx]; + float sec_score = scores2[tx]; + int index = indices[tx]; + +#pragma unroll + for (int y = 0; y < M7H / M7R; y++) + if (index != indices[y * M7W + tx]) + { + if (scores1[y * M7W + tx] > max_score) + { + sec_score = sycl::max(max_score, sec_score); + max_score = scores1[y * M7W + tx]; + index = indices[y * M7W + tx]; + } + else if (scores1[y * M7W + tx] > sec_score) + sec_score = scores1[y * M7W + tx]; + } + sift1[bp1 + tx].score = max_score; + sift1[bp1 + tx].match = index; + sift1[bp1 + tx].match_xpos = sift2[index].xpos; + sift1[bp1 + tx].match_ypos = sift2[index].ypos; + sift1[bp1 + tx].ambiguity = sec_score / (max_score + 1e-6f); + } +} + +#define FMC_GH 512 +#define FMC_BW 32 +#define FMC_BH 32 +#define FMC_BD 16 +#define FMC_TW 1 +#define FMC_TH 4 +#define FMC_NW (FMC_BW / FMC_TW) // 32 +#define FMC_NH (FMC_BH / FMC_TH) // 8 +#define FMC_NT (FMC_NW * FMC_NH) // 256 = 8 warps + +infra::global_memory lock(0); + +void FindMaxCorr9(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + sycl::nd_item<3> item_ct1, volatile int *lock, + sycl::float4 *siftParts1, sycl::float4 *siftParts2) +{ + // 4*32*8 = 1024 + // 4*32*8 = 1024 + //__shared__ float blksums[FMC_BW*FMC_BH]; // 32*32 = 1024 + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int idx = ty * FMC_NW + tx; + sycl::float4 *pts1 = 0, *pts2 = 0; + if (idx < FMC_BW) + { + const int p1l = + sycl::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), + (unsigned int)(numPts1 - 1)); + pts1 = (sycl::float4 *)sift1[p1l].data; + } + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < sycl::min(FMC_GH, (int)(numPts2 - FMC_BH + 1)); + k += FMC_BH) + { + if (idx < FMC_BH) + { + const int p2l = + sycl::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + idx), + (unsigned int)(numPts2 - 1)); + pts2 = (sycl::float4 *)sift2[p2l].data; + } + float sums[FMC_TW * FMC_TH]; + for (int i = 0; i < FMC_TW * FMC_TH; i++) + sums[i] = 0.0f; + + if (idx < FMC_BW) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts1[(i + 0) * FMC_BW + idx] = pts1[0 + i]; + if (idx < FMC_BH) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts2[(i + 0) * FMC_BH + idx] = pts2[0 + i]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + + int b = FMC_BD / 2; + for (int d = FMC_BD / 2; d < 32; d += FMC_BD / 2) + { + if (idx < FMC_BW) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts1[(i + b) * FMC_BW + idx] = pts1[d + i]; + if (idx < FMC_BH) + for (int i = 0; i < FMC_BD / 2; i++) + siftParts2[(i + b) * FMC_BH + idx] = pts2[d + i]; + + b ^= FMC_BD / 2; + for (int i = 0; i < FMC_BD / 2; i++) + { + sycl::float4 v1[FMC_TW]; + for (int ix = 0; ix < FMC_TW; ix++) + v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)]; + for (int iy = 0; iy < FMC_TH; iy++) + { + sycl::float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)]; + for (int ix = 0; ix < FMC_TW; ix++) + { + sums[iy * FMC_TW + ix] += v1[ix].x() * v2.x(); + sums[iy * FMC_TW + ix] += v1[ix].y() * v2.y(); + sums[iy * FMC_TW + ix] += v1[ix].z() * v2.z(); + sums[iy * FMC_TW + ix] += v1[ix].w() * v2.w(); + } + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + } + + b ^= FMC_BD / 2; + for (int i = 0; i < FMC_BD / 2; i++) + { + sycl::float4 v1[FMC_TW]; + for (int ix = 0; ix < FMC_TW; ix++) + v1[ix] = siftParts1[(i + b) * FMC_BW + (tx * FMC_TW + ix)]; + for (int iy = 0; iy < FMC_TH; iy++) + { + sycl::float4 v2 = siftParts2[(i + b) * FMC_BH + (ty * FMC_TH + iy)]; + for (int ix = 0; ix < FMC_TW; ix++) + { + sums[iy * FMC_TW + ix] += v1[ix].x() * v2.x(); + sums[iy * FMC_TW + ix] += v1[ix].y() * v2.y(); + sums[iy * FMC_TW + ix] += v1[ix].z() * v2.z(); + sums[iy * FMC_TW + ix] += v1[ix].w() * v2.w(); + } + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + + float *blksums = (float *)siftParts1; + for (int iy = 0; iy < FMC_TH; iy++) + for (int ix = 0; ix < FMC_TW; ix++) + blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (idx < FMC_BW) + { + for (int j = 0; j < FMC_BH; j++) + { + float sum = blksums[j * FMC_BW + idx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = + sycl::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + j), + (unsigned int)(numPts2 - 1)); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + } + const int p1 = sycl::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), + (unsigned int)(numPts1 - 1)); + if (idx == 0) + while (infra::atomic_compare_exchange_strong((int *)lock, 0, 1) != 0) + ; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (idx < FMC_BW) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = sycl::max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (idx == 0) + infra::atomic_exchange((int *)lock, 0); +} + +void FindMaxCorr8(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + sycl::nd_item<3> item_ct1, volatile int *lock, + sycl::float4 *siftParts1, sycl::float4 *siftParts2, + float *blksums) +{ + // 4*32*8 = 1024 + // 4*32*8 = 1024 + // 32*32 = 1024 + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int idx = ty * FMC_NW + tx; + sycl::float4 *pts1 = 0, *pts2 = 0; + if (idx < FMC_BW) + { + const int p1l = + sycl::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), + (unsigned int)(numPts1 - 1)); + pts1 = (sycl::float4 *)sift1[p1l].data; + } + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < sycl::min(FMC_GH, (int)(numPts2 - FMC_BH + 1)); + k += FMC_BH) + { + if (idx < FMC_BH) + { + const int p2l = + sycl::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + idx), + (unsigned int)(numPts2 - 1)); + pts2 = (sycl::float4 *)sift2[p2l].data; + } + float sums[FMC_TW * FMC_TH]; + for (int i = 0; i < FMC_TW * FMC_TH; i++) + sums[i] = 0.0f; + for (int d = 0; d < 32; d += FMC_BD) + { + if (idx < FMC_BW) + for (int i = 0; i < FMC_BD; i++) + siftParts1[i * FMC_BW + idx] = pts1[d + i]; + if (idx < FMC_BH) + for (int i = 0; i < FMC_BD; i++) + siftParts2[i * FMC_BH + idx] = pts2[d + i]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + + for (int i = 0; i < FMC_BD; i++) + { + sycl::float4 v1[FMC_TW]; + for (int ix = 0; ix < FMC_TW; ix++) + v1[ix] = siftParts1[i * FMC_BW + (tx * FMC_TW + ix)]; + for (int iy = 0; iy < FMC_TH; iy++) + { + sycl::float4 v2 = siftParts2[i * FMC_BH + (ty * FMC_TH + iy)]; + for (int ix = 0; ix < FMC_TW; ix++) + { + sums[iy * FMC_TW + ix] += v1[ix].x() * v2.x(); + sums[iy * FMC_TW + ix] += v1[ix].y() * v2.y(); + sums[iy * FMC_TW + ix] += v1[ix].z() * v2.z(); + sums[iy * FMC_TW + ix] += v1[ix].w() * v2.w(); + } + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + } + // float *blksums = (float*)siftParts1; + for (int iy = 0; iy < FMC_TH; iy++) + for (int ix = 0; ix < FMC_TW; ix++) + blksums[(ty * FMC_TH + iy) * FMC_BW + (tx * FMC_TW + ix)] = sums[iy * FMC_TW + ix]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (idx < FMC_BW) + { + for (int j = 0; j < FMC_BH; j++) + { + float sum = blksums[j * FMC_BW + idx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = + sycl::min((unsigned int)(item_ct1.get_group(1) * FMC_GH + k + j), + (unsigned int)(numPts2 - 1)); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + } + const int p1 = sycl::min((unsigned int)(item_ct1.get_group(2) * FMC_BW + idx), + (unsigned int)(numPts1 - 1)); + if (idx == 0) + while (infra::atomic_compare_exchange_strong((int *)lock, 0, 1) != 0) + ; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (idx < FMC_BW) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = sycl::max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (idx == 0) + infra::atomic_exchange((int *)lock, 0); +} + +void FindMaxCorr7(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + sycl::nd_item<3> item_ct1, volatile int *lock, + float *siftParts1, float *siftParts2) +{ + // features in columns + // one extra to avoid shared conflicts + sycl::float4 *pts1 = (sycl::float4 *)siftParts1; + sycl::float4 *pts2 = (sycl::float4 *)siftParts2; + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int p1l = sycl::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), + (unsigned int)(numPts1 - 1)); + const sycl::float4 *p1l4 = (sycl::float4 *)sift1[p1l].data; + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < 512 / 16; k++) + { + const int p2l = + sycl::min((unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + ty), + (unsigned int)(numPts2 - 1)); + const sycl::float4 *p2l4 = (sycl::float4 *)sift2[p2l].data; +#define NUM 4 + float sum[NUM]; + if (ty < (16 / NUM)) + for (int l = 0; l < NUM; l++) + sum[l] = 0.0f; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + for (int i = 0; i < 2; i++) + { + pts1[17 * tx + ty] = p1l4[i * 16 + tx]; + pts2[16 * ty + tx] = p2l4[i * 16 + tx]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (ty < (16 / NUM)) + { +#pragma unroll + for (int j = 0; j < 16; j++) + { + sycl::float4 p1v = pts1[17 * j + tx]; +#pragma unroll + for (int l = 0; l < NUM; l++) + { + sycl::float4 p2v = pts2[16 * (ty + l * (16 / NUM)) + j]; + sum[l] += p1v.x() * p2v.x(); + sum[l] += p1v.y() * p2v.y(); + sum[l] += p1v.z() * p2v.z(); + sum[l] += p1v.w() * p2v.w(); + } + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + } + float *sums = siftParts1; + if (ty < (16 / NUM)) + for (int l = 0; l < NUM; l++) + sums[16 * (ty + l * (16 / NUM)) + tx] = sum[l]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (ty == 0) + { + for (int j = 0; j < 16; j++) + { + float sum = sums[16 * j + tx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = sycl::min( + (unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + j), + (unsigned int)(numPts2 - 1)); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + } + const int p1 = sycl::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), + (unsigned int)(numPts1 - 1)); + if (tx == 0 && ty == 0) + while (infra::atomic_compare_exchange_strong((int *)lock, 0, 1) != 0) + ; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (ty == 0) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = sycl::max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (tx == 0 && ty == 0) + infra::atomic_exchange((int *)lock, 0); +} + +void FindMaxCorr6(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + sycl::nd_item<3> item_ct1, volatile int *lock, + float *siftParts2, float *sums) +{ + //__shared__ float siftParts1[128*16]; // features in columns + // one extra to avoid shared conflicts + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int p1l = sycl::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), + (unsigned int)(numPts1 - 1)); + float *pt1l = sift1[p1l].data; + sycl::float4 part1 = reinterpret_cast(pt1l)[tx]; + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < 512; k += 16) + { + const int p2l = + sycl::min((unsigned int)(item_ct1.get_group(1) * 512 + k + ty), + (unsigned int)(numPts2 - 1)); + float *pt2l = sift2[p2l].data; + reinterpret_cast(siftParts2)[32 * ty + tx] = + reinterpret_cast(pt2l)[tx]; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + for (int i = 0; i < 16; i++) + { + sycl::float4 part2 = + reinterpret_cast(siftParts2)[32 * i + tx]; + float sum = part1.x() * part2.x() + part1.y() * part2.y() + + part1.z() * part2.z() + part1.w() * part2.w(); + sum += ShiftDown(sum, 16, item_ct1); + sum += ShiftDown(sum, 8, item_ct1); + sum += ShiftDown(sum, 4, item_ct1); + sum += ShiftDown(sum, 2, item_ct1); + sum += ShiftDown(sum, 1, item_ct1); + if (tx == 0) + sums[16 * i + ty] = sum; + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (ty == 0 && tx < 16) + { + for (int j = 0; j < 16; j++) + { + float sum = sums[16 * j + tx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = + sycl::min((unsigned int)(item_ct1.get_group(1) * 512 + k + j), + (unsigned int)(numPts2 - 1)); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + } + if (tx == 0 && ty == 0) + while (infra::atomic_compare_exchange_strong((int *)lock, 0, 1) != 0) + ; + + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (ty == 0 && tx < 16) + { + const int p1 = sycl::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), + (unsigned int)(numPts1 - 1)); + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = sycl::max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (tx == 0 && ty == 0) + infra::atomic_exchange((int *)lock, 0); +} + +void FindMaxCorr5(SiftPoint *sift1, SiftPoint *sift2, int numPts1, int numPts2, + sycl::nd_item<3> item_ct1, volatile int *lock, + float *siftParts1, float *siftParts2) +{ + // features in columns + // one extra to avoid shared conflicts + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int p1l = sycl::min((unsigned int)(item_ct1.get_group(2) * 16 + ty), + (unsigned int)(numPts1 - 1)); + const float *pt1l = sift1[p1l].data; + float maxScore = -1.0f; + float maxScor2 = -1.0f; + int maxIndex = 0; + for (int k = 0; k < 512 / 16; k++) + { + const int p2l = + sycl::min((unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + ty), + (unsigned int)(numPts2 - 1)); + const float *pt2l = sift2[p2l].data; + float sum = 0.0f; + for (int i = 0; i < 8; i++) + { + siftParts1[17 * tx + ty] = pt1l[i * 16 + tx]; // load and transpose + siftParts2[17 * tx + ty] = pt2l[i * 16 + tx]; + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + for (int j = 0; j < 16; j++) + sum += siftParts1[17 * j + tx] * siftParts2[17 * j + ty]; + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + } + float *sums = siftParts1; + sums[16 * ty + tx] = sum; + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (ty == 0) + { + for (int j = 0; j < 16; j++) + { + float sum = sums[16 * j + tx]; + if (sum > maxScore) + { + maxScor2 = maxScore; + maxScore = sum; + maxIndex = sycl::min( + (unsigned int)(item_ct1.get_group(1) * 512 + k * 16 + j), + (unsigned int)(numPts2 - 1)); + } + else if (sum > maxScor2) + maxScor2 = sum; + } + } + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + } + const int p1 = sycl::min((unsigned int)(item_ct1.get_group(2) * 16 + tx), + (unsigned int)(numPts1 - 1)); + if (tx == 0 && ty == 0) + while (infra::atomic_compare_exchange_strong((int *)lock, 0, 1) != 0) + ; + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (ty == 0) + { + float maxScor2Old = sift1[p1].ambiguity * (sift1[p1].score + 1e-6f); + if (maxScore > sift1[p1].score) + { + maxScor2 = sycl::max(sift1[p1].score, maxScor2); + sift1[p1].ambiguity = maxScor2 / (maxScore + 1e-6f); + sift1[p1].score = maxScore; + sift1[p1].match = maxIndex; + sift1[p1].match_xpos = sift2[maxIndex].xpos; + sift1[p1].match_ypos = sift2[maxIndex].ypos; + } + else if (maxScore > maxScor2Old) + sift1[p1].ambiguity = maxScore / (sift1[p1].score + 1e-6f); + } + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + if (tx == 0 && ty == 0) + infra::atomic_exchange((int *)lock, 0); +} + +template +void InvertMatrix(float elem[size][size], float res[size][size]) +{ + int indx[size]; + float b[size]; + float vv[size]; + for (int i = 0; i < size; i++) + indx[i] = 0; + int imax = 0; + float d = 1.0; + for (int i = 0; i < size; i++) + { // find biggest element for each row + float big = 0.0; + for (int j = 0; j < size; j++) + { + float temp = sycl::fabs(elem[i][j]); + if (temp > big) + big = temp; + } + if (big > 0.0) + vv[i] = 1.0 / big; + else + vv[i] = 1e16; + } + for (int j = 0; j < size; j++) + { + for (int i = 0; i < j; i++) + { // ik (upper right), k=j + float sum = elem[i][j]; // i>=j (upper right) + for (int k = 0; k < j; k++) // kk (upper right), k=j (upper right) + float dum = vv[i] * sycl::fabs(sum); + if (dum >= big) + { + big = dum; + imax = i; + } + } + if (j != imax) + { // imax>j + for (int k = 0; k < size; k++) + { + float dum = elem[imax][k]; // upper right and lower left + elem[imax][k] = elem[j][k]; + elem[j][k] = dum; + } + d = -d; + vv[imax] = vv[j]; + } + indx[j] = imax; + if (elem[j][j] == 0.0) // j==j (upper right) + elem[j][j] = 1e-16; + if (j != (size - 1)) + { + float dum = 1.0 / elem[j][j]; + for (int i = j + 1; i < size; i++) // i>j + elem[i][j] *= dum; // i>j (upper right) + } + } + for (int j = 0; j < size; j++) + { + for (int k = 0; k < size; k++) + b[k] = 0.0; + b[j] = 1.0; + int ii = -1; + for (int i = 0; i < size; i++) + { + int ip = indx[i]; + float sum = b[ip]; + b[ip] = b[i]; + if (ii != -1) + for (int j = ii; j < i; j++) + sum -= elem[i][j] * b[j]; // i>j (upper right) + else if (sum != 0.0) + ii = i; + b[i] = sum; + } + for (int i = size - 1; i >= 0; i--) + { + float sum = b[i]; + for (int j = i + 1; j < size; j++) + sum -= elem[i][j] * b[j]; // i item_ct1) +{ + float a[8][8], ia[8][8]; + float b[8]; + const int bx = item_ct1.get_group(2); + const int tx = item_ct1.get_local_id(2); + const int idx = item_ct1.get_local_range().get(2) * bx + tx; + const int numLoops = + item_ct1.get_local_range().get(2) * item_ct1.get_group_range(2); + +#pragma unroll + for (int i = 0; i < 4; i++) + { + int pt = randPts[i * numLoops + idx]; + float x1 = coord[pt + 0 * numPts]; + float y1 = coord[pt + 1 * numPts]; + float x2 = coord[pt + 2 * numPts]; + float y2 = coord[pt + 3 * numPts]; + float *row1 = a[2 * i + 0]; + row1[0] = x1; + row1[1] = y1; + row1[2] = 1.0; + row1[3] = row1[4] = row1[5] = 0.0; + row1[6] = -x2 * x1; + row1[7] = -x2 * y1; + float *row2 = a[2 * i + 1]; + row2[0] = row2[1] = row2[2] = 0.0; + row2[3] = x1; + row2[4] = y1; + row2[5] = 1.0; + row2[6] = -y2 * x1; + row2[7] = -y2 * y1; + b[2 * i + 0] = x2; + b[2 * i + 1] = y2; + } + InvertMatrix<8>(a, ia); + item_ct1.barrier(sycl::access::fence_space::local_space); + +#pragma unroll + for (int j = 0; j < 8; j++) + { + float sum = 0.0f; + for (int i = 0; i < 8; i++) + sum += ia[j][i] * b[i]; + homo[j * numLoops + idx] = sum; + } + item_ct1.barrier(sycl::access::fence_space::local_space); +} + +#define TESTHOMO_TESTS 16 // number of tests per block, alt. 32, 32 +#define TESTHOMO_LOOPS 16 // number of loops per block, alt. 8, 16 + +void TestHomographies(float *d_coord, float *d_homo, + int *d_counts, int numPts, float thresh2, sycl::nd_item<3> item_ct1, + float *homo, int *cnts) +{ + + const int tx = item_ct1.get_local_id(2); + const int ty = item_ct1.get_local_id(1); + const int idx = + item_ct1.get_group(1) * item_ct1.get_local_range().get(1) + tx; + const int numLoops = + item_ct1.get_local_range().get(1) * item_ct1.get_group_range(1); + if (ty < 8 && tx < TESTHOMO_LOOPS) + homo[tx * 8 + ty] = d_homo[idx + ty * numLoops]; + item_ct1.barrier(sycl::access::fence_space::local_space); + float a[8]; + +#pragma unroll + for (int i = 0; i < 8; i++) + a[i] = homo[ty * 8 + i]; + int cnt = 0; + +#pragma unroll + for (int i = tx; i < numPts; i += TESTHOMO_TESTS) + { + float x1 = d_coord[i + 0 * numPts]; + float y1 = d_coord[i + 1 * numPts]; + float x2 = d_coord[i + 2 * numPts]; + float y2 = d_coord[i + 3 * numPts]; + float nomx = a[0] * x1 + a[1] * y1 + a[2]; + float nomy = a[3] * x1 + a[4] * y1 + a[5]; + float deno = a[6] * x1 + a[7] * y1 + 1.0f; + float errx = x2 * deno - nomx; + float erry = y2 * deno - nomy; + float err2 = errx * errx + erry * erry; + if (err2 < thresh2 * deno * deno) + cnt++; + } + int kty = TESTHOMO_TESTS * ty; + cnts[kty + tx] = cnt; + item_ct1.barrier(sycl::access::fence_space::local_space); + int len = TESTHOMO_TESTS / 2; + while (len > 0) + { + if (tx < len) + cnts[kty + tx] += cnts[kty + tx + len]; + len /= 2; + item_ct1.barrier(sycl::access::fence_space::local_space); + ; + } + if (tx < TESTHOMO_LOOPS && ty == 0) + d_counts[idx] = cnts[TESTHOMO_TESTS * tx]; + item_ct1.barrier(sycl::access::fence_space::local_space); +} + +//================= Host matching functions =====================// + +double FindHomography(SiftData &data, float *homography, int *numMatches, sycl::queue &q_ct, float &matchTime, int numLoops, float minScore, float maxAmbiguity, float thresh) +{ + *numMatches = 0; + homography[0] = homography[4] = homography[8] = 1.0f; + homography[1] = homography[2] = homography[3] = 0.0f; + homography[5] = homography[6] = homography[7] = 0.0f; + if (data.d_data == NULL) + return 0.0f; + SiftPoint *d_sift = data.d_data; + numLoops = iDivUp(numLoops, 16) * 16; + int numPts = data.numPts; + if (numPts < 8) + return 0.0f; + int numPtsUp = iDivUp(numPts, 16) * 16; + float *d_coord, *d_homo; + int *d_randPts, *h_randPts; + int randSize = 4 * sizeof(int) * numLoops; + int szFl = sizeof(float); + int szPt = sizeof(SiftPoint); + +#ifdef DEVICE_TIMER + auto start_malloc_1 = std::chrono::steady_clock::now(); +#endif + d_coord = (float *)sycl::malloc_device(4 * sizeof(float) * numPtsUp, q_ct); + d_randPts = (int *)sycl::malloc_device(randSize, q_ct); + d_homo = (float *)sycl::malloc_device(8 * sizeof(float) * numLoops, q_ct); + +#ifdef DEVICE_TIMER + auto stop_malloc_1 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_malloc_1 - start_malloc_1).count(); +#endif + h_randPts = (int *)malloc(randSize); + float *h_scores = (float *)malloc(sizeof(float) * numPtsUp); + float *h_ambiguities = (float *)malloc(sizeof(float) * numPtsUp); + float *temp1 = (float *)malloc(szPt * numPtsUp); + float *temp2 = (float *)malloc(szPt * numPtsUp); + +#ifdef DEVICE_TIMER + auto start_memcpy_1 = std::chrono::steady_clock::now(); +#endif + + infra::sift_memcpy(temp1, &d_sift[0].score, szPt * numPts, infra::device_to_host, q_ct); + infra::sift_memcpy(temp2, &d_sift[0].ambiguity, szPt * numPts, infra::device_to_host, q_ct); + q_ct.wait(); + +#ifdef DEVICE_TIMER + auto stop_memcpy_1 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_1 - start_memcpy_1).count(); +#endif + char *src_score = (char *)temp1; + char *src_ambiguity = (char *)temp2; + char *dst_score = (char *)h_scores; + char *dst_ambiguity = (char *)h_ambiguities; + + for (int i = 0; i < numPts; ++i) + { + memcpy(dst_score, src_score, szFl); + memcpy(dst_ambiguity, src_ambiguity, szFl); + + src_score += szPt; + src_ambiguity += szPt; + dst_score += szFl; + dst_ambiguity += szFl; + } + + int *validPts = (int *)malloc(sizeof(int) * numPts); + int numValid = 0; + + for (int i = 0; i < numPts; i++) + { + if (h_scores[i] > minScore && h_ambiguities[i] < maxAmbiguity) + validPts[numValid++] = i; + } + + free(h_scores); + free(h_ambiguities); + + if (numValid >= 8) + { + std::random_device rd; + uint32_t seed = rd(); + std::mt19937 rnd(seed); // mersenne_twister_engine + std::uniform_int_distribution dis(0, UINT32_MAX); + for (int i = 0; i < numLoops; i++) + { + int p1 = dis(rnd) % numValid; + int p2 = dis(rnd) % numValid; + int p3 = dis(rnd) % numValid; + int p4 = dis(rnd) % numValid; + while (p2 == p1) + p2 = dis(rnd) % numValid; + while (p3 == p1 || p3 == p2) + p3 = dis(rnd) % numValid; + while (p4 == p1 || p4 == p2 || p4 == p3) + p4 = dis(rnd) % numValid; + h_randPts[i + 0 * numLoops] = validPts[p1]; + h_randPts[i + 1 * numLoops] = validPts[p2]; + h_randPts[i + 2 * numLoops] = validPts[p3]; + h_randPts[i + 3 * numLoops] = validPts[p4]; + } +#ifdef DEVICE_TIMER + auto start_malloc_2 = std::chrono::steady_clock::now(); +#endif + float *temp3 = (float *)sycl::malloc_device(szPt * numPtsUp, q_ct); + float *temp4 = (float *)sycl::malloc_device(szPt * numPtsUp, q_ct); + float *temp5 = (float *)sycl::malloc_device(szPt * numPtsUp, q_ct); + float *temp6 = (float *)sycl::malloc_device(szPt * numPtsUp, q_ct); +#ifdef DEVICE_TIMER + auto stop_malloc_2 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_malloc_2 - start_malloc_2).count(); +#endif +#ifdef DEVICE_TIMER + auto start_memcpy_2 = std::chrono::steady_clock::now(); +#endif + + q_ct.memcpy(d_randPts, h_randPts, randSize).wait(); + infra::sift_memcpy(temp3, &d_sift[0].xpos, szPt * numPts, infra::device_to_device, q_ct); + infra::sift_memcpy(temp4, &d_sift[0].ypos, szPt * numPts, infra::device_to_device, q_ct); + infra::sift_memcpy(temp5, &d_sift[0].match_xpos, szPt * numPts, infra::device_to_device, q_ct); + infra::sift_memcpy(temp6, &d_sift[0].match_ypos, szPt * numPts, infra::device_to_device, q_ct); + q_ct.wait(); + + // kernel call to transfer memory from device to device(replaced 2d memcopies are 2d copying is slower on sycl) + q_ct.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1) * + sycl::range<3>(1, 1, 1), + sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + memcopyKernel(temp3, &d_coord[0 * numPtsUp], szPt, szFl, numPts, szFl); + }) + .wait(); + + q_ct.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1) * + sycl::range<3>(1, 1, 1), + sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + memcopyKernel(temp4, &d_coord[1 * numPtsUp], szPt, szFl, numPts, szFl); + }) + .wait(); + + q_ct.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1) * + sycl::range<3>(1, 1, 1), + sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + memcopyKernel(temp5, &d_coord[2 * numPtsUp], szPt, szFl, numPts, szFl); + }) + .wait(); + + q_ct.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1) * + sycl::range<3>(1, 1, 1), + sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + memcopyKernel(temp6, &d_coord[3 * numPtsUp], szPt, szFl, numPts, szFl); + }) + .wait(); +#ifdef DEVICE_TIMER + auto stop_memcpy_2 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_2 - start_memcpy_2).count(); +#endif + +#ifdef DEVICE_TIMER + auto start_kernel_1 = std::chrono::steady_clock::now(); +#endif + q_ct.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, numLoops / 16) * + sycl::range<3>(1, 1, 16), + sycl::range<3>(1, 1, 16)), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + ComputeHomographies(d_coord, d_randPts, d_homo, numPtsUp, item_ct1); + }) + .wait(); + +#ifdef DEVICE_TIMER + auto stop_kernel_1 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_kernel_1 - start_kernel_1).count(); + // printf("ComputeHomographies time = %.2f us\n", std::chrono::duration(stop_kernel_1 - start_kernel_1).count()); +#endif + checkMsg("ComputeHomographies() execution failed\n"); + sycl::range<3> blocks(1, numLoops / TESTHOMO_LOOPS, 1); + sycl::range<3> threads(1, TESTHOMO_LOOPS, TESTHOMO_TESTS); +#ifdef DEVICE_TIMER + auto start_kernel_2 = std::chrono::steady_clock::now(); +#endif + q_ct.submit([&](sycl::handler &cgh) + { + sycl::accessor + homo_acc_ct1(sycl::range<1>(128 /*8*TESTHOMO_LOOPS*/), cgh); + sycl::accessor + cnts_acc_ct1(sycl::range<1>(256 /*TESTHOMO_TESTS*TESTHOMO_LOOPS*/), + cgh); + + cgh.parallel_for(sycl::nd_range<3>(blocks * threads, threads), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + TestHomographies(d_coord, d_homo, d_randPts, numPtsUp, + thresh * thresh, item_ct1, + homo_acc_ct1.get_pointer(), + cnts_acc_ct1.get_pointer()); + }); }) + .wait(); +#ifdef DEVICE_TIMER + auto stop_kernel_2 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_kernel_2 - start_kernel_2).count(); + // printf("TestHomographies time = %.2f us\n", std::chrono::duration(stop_kernel_2 - start_kernel_2).count()); +#endif + checkMsg("TestHomographies() execution failed\n"); +#ifdef DEVICE_TIMER + auto start_memcpy_3 = std::chrono::steady_clock::now(); +#endif + q_ct.memcpy(h_randPts, d_randPts, sizeof(int) * numLoops).wait(); +#ifdef DEVICE_TIMER + auto stop_memcpy_3 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_3 - start_memcpy_3).count(); +#endif + int maxIndex = -1, maxCount = -1; + + for (int i = 0; i < numLoops; i++) + if (h_randPts[i] > maxCount) + { + maxCount = h_randPts[i]; + maxIndex = i; + } + + *numMatches = maxCount; +#ifdef DEVICE_TIMER + auto start_memcpy_4 = std::chrono::steady_clock::now(); +#endif + safeCall((infra::sift_memcpy(homography, szFl, &d_homo[maxIndex], + sizeof(float) * numLoops, szFl, 8, + infra::device_to_host, q_ct), + 0)); + q_ct.wait(); +#ifdef DEVICE_TIMER + auto stop_memcpy_4 = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy_4 - start_memcpy_4).count(); +#endif + } + free(validPts); + free(h_randPts); + safeCall((sycl::free(d_homo, q_ct), 0)); + safeCall((sycl::free(d_randPts, q_ct), 0)); + safeCall((sycl::free(d_coord, q_ct), 0)); + return matchTime; +} + +double MatchSiftData(SiftData &data1, SiftData &data2, sycl::queue &q_ct, float &matchTime) +{ + float matchSiftDataTime = 0.0; + + int numPts1 = data1.numPts; + int numPts2 = data2.numPts; + + if (!numPts1 || !numPts2) + return 0.0; +#ifdef MANAGEDMEM + SiftPoint *sift1 = data1.m_data; + SiftPoint *sift2 = data2.m_data; +#else + if (data1.d_data == NULL || data2.d_data == NULL) + return 0.0f; + SiftPoint *sift1 = data1.d_data; + SiftPoint *sift2 = data2.d_data; +#endif +// Original version with correlation and maximization in two different kernels +// Global memory reguirement: O(N^2) +#if 0 + float *d_corrData; + int corrWidth = iDivUp(numPts2, 16)*16; + int corrSize = sizeof(float)*numPts1*corrWidth; + safeCall(cudaMalloc((void **)&d_corrData, corrSize)); +#if 0 // K40c 10.9ms, 1080 Ti 3.8ms + dim3 blocks1(numPts1, iDivUp(numPts2, 16)); + dim3 threads1(16, 16); // each block: 1 points x 16 points + MatchSiftPoints<<>>(sift1, sift2, d_corrData, numPts1, numPts2); +#else // K40c 7.6ms, 1080 Ti 1.4ms + dim3 blocks(iDivUp(numPts1,16), iDivUp(numPts2, 16)); + dim3 threads(16, 16); // each block: 16 points x 16 points + MatchSiftPoints2<<>>(sift1, sift2, d_corrData, numPts1, numPts2); +#endif + safeCall(cudaDeviceSynchronize()); + dim3 blocksMax(iDivUp(numPts1, 16)); + dim3 threadsMax(16, 16); + FindMaxCorr<<>>(d_corrData, sift1, sift2, numPts1, corrWidth, sizeof(SiftPoint)); + safeCall(cudaDeviceSynchronize()); + checkMsg("FindMaxCorr() execution failed\n"); + safeCall(cudaFree(d_corrData)); +#endif + +// Version suggested by Nicholas Lin with combined correlation and maximization +// Global memory reguirement: O(N) +#if 0 + int block_dim = 16; + float *d_corrData; + int corrSize = numPts1 * block_dim * 2; + safeCall(cudaMalloc((void **)&d_corrData, sizeof(float) * corrSize)); + dim3 blocks(iDivUp(numPts1, block_dim)); + dim3 threads(block_dim, block_dim); + FindMaxCorr3<<>>(d_corrData, sift1, sift2, numPts1, numPts2); + safeCall(cudaDeviceSynchronize()); + checkMsg("FindMaxCorr3() execution failed\n"); + safeCall(cudaFree(d_corrData)); +#endif + +// Combined version with no global memory requirement using one 1 point per block +#if 0 + dim3 blocksMax(numPts1); + dim3 threadsMax(FMC2W, FMC2H); + FindMaxCorr2<<>>(sift1, sift2, numPts1, numPts2); + safeCall(cudaDeviceSynchronize()); + checkMsg("FindMaxCorr2() execution failed\n"); +#endif + +// Combined version with no global memory requirement using one FMC2H points per block +#if 0 + dim3 blocksMax2(iDivUp(numPts1, FMC2H)); + dim3 threadsMax2(FMC2W, FMC2H); + FindMaxCorr4<<>>(sift1, sift2, numPts1, numPts2); + safeCall(cudaDeviceSynchronize()); + checkMsg("FindMaxCorr4() execution failed\n"); +#endif + +// Combined version with no global memory requirement using global locks +#if 1 + sycl::range<3> blocksMax3(1, iDivUp(numPts2, 512), iDivUp(numPts1, 16)); + sycl::range<3> threadsMax3(1, 16, 16); +#ifdef DEVICE_TIMER + auto start_kernel1 = std::chrono::steady_clock::now(); +#endif + + q_ct.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, iDivUp(numPts1, 64)) * + sycl::range<3>(1, 1, 64), + sycl::range<3>(1, 1, 64)), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + CleanMatches(sift1, numPts1, item_ct1); + }) + .wait(); + +#ifdef DEVICE_TIMER + auto stop_kernel1 = std::chrono::steady_clock::now(); + // printf("CleanMatches time = %.2f us\n", std::chrono::duration(stop_kernel1 - start_kernel1).count()); + + matchTime += std::chrono::duration(stop_kernel1 - start_kernel1).count(); + matchSiftDataTime += std::chrono::duration(stop_kernel1 - start_kernel1).count(); +#endif + + int mode = 10; + if (mode == 5) + q_ct.submit([&](sycl::handler &cgh) + { + lock.init(); + + auto lock_ptr_ct1 = lock.get_ptr(); + + sycl::accessor + siftParts1_acc_ct1(sycl::range<1>(272 /*17*16*/), cgh); + sycl::accessor + siftParts2_acc_ct1(sycl::range<1>(272 /*17*16*/), cgh); + + cgh.parallel_for(sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + FindMaxCorr5(sift1, sift2, numPts1, numPts2, item_ct1, + lock_ptr_ct1, + siftParts1_acc_ct1.get_pointer(), + siftParts2_acc_ct1.get_pointer()); + }); }); + else if (mode == 6) + { + threadsMax3 = sycl::range<3>(1, 16, 32); + q_ct.submit([&](sycl::handler &cgh) + { + lock.init(); + + auto lock_ptr_ct1 = lock.get_ptr(); + + sycl::accessor + siftParts2_acc_ct1(sycl::range<1>(2048 /*128*16*/), cgh); + sycl::accessor + sums_acc_ct1(sycl::range<1>(256 /*16*16*/), cgh); + + cgh.parallel_for( + sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + FindMaxCorr6(sift1, sift2, numPts1, numPts2, item_ct1, + lock_ptr_ct1, siftParts2_acc_ct1.get_pointer(), + sums_acc_ct1.get_pointer()); + }); }); + } + else if (mode == 7) + q_ct.submit([&](sycl::handler &cgh) + { + lock.init(); + + auto lock_ptr_ct1 = lock.get_ptr(); + + sycl::accessor + siftParts1_acc_ct1(sycl::range<1>(1088 /*17*64*/), cgh); + sycl::accessor + siftParts2_acc_ct1(sycl::range<1>(1024 /*16*64*/), cgh); + + cgh.parallel_for(sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + FindMaxCorr7(sift1, sift2, numPts1, numPts2, item_ct1, + lock_ptr_ct1, + siftParts1_acc_ct1.get_pointer(), + siftParts2_acc_ct1.get_pointer()); + }); }); + else if (mode == 8) + { + blocksMax3 = + sycl::range<3>(1, iDivUp(numPts2, FMC_GH), iDivUp(numPts1, FMC_BW)); + threadsMax3 = sycl::range<3>(1, FMC_NH, FMC_NW); + q_ct.submit([&](sycl::handler &cgh) + { + lock.init(); + + auto lock_ptr_ct1 = lock.get_ptr(); + + sycl::accessor + siftParts1_acc_ct1(sycl::range<1>(512 /*FMC_BW*FMC_BD*/), cgh); + sycl::accessor + siftParts2_acc_ct1(sycl::range<1>(512 /*FMC_BH*FMC_BD*/), cgh); + sycl::accessor + blksums_acc_ct1(sycl::range<1>(1024 /*FMC_BW*FMC_BH*/), cgh); + + cgh.parallel_for(sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + FindMaxCorr8(sift1, sift2, numPts1, numPts2, item_ct1, + lock_ptr_ct1, + siftParts1_acc_ct1.get_pointer(), + siftParts2_acc_ct1.get_pointer(), + blksums_acc_ct1.get_pointer()); + }); }); + } + else if (mode == 9) + { + blocksMax3 = + sycl::range<3>(1, iDivUp(numPts2, FMC_GH), iDivUp(numPts1, FMC_BW)); + threadsMax3 = sycl::range<3>(1, FMC_NH, FMC_NW); + q_ct.submit([&](sycl::handler &cgh) + { + lock.init(); + + auto lock_ptr_ct1 = lock.get_ptr(); + + sycl::accessor + siftParts1_acc_ct1(sycl::range<1>(512 /*FMC_BW*FMC_BD*/), cgh); + sycl::accessor + siftParts2_acc_ct1(sycl::range<1>(512 /*FMC_BH*FMC_BD*/), cgh); + + cgh.parallel_for(sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + FindMaxCorr9(sift1, sift2, numPts1, numPts2, item_ct1, + lock_ptr_ct1, + siftParts1_acc_ct1.get_pointer(), + siftParts2_acc_ct1.get_pointer()); + }); }); + } + else if (mode == 10) + { + try + { + + blocksMax3 = sycl::range<3>(1, 1, iDivUp(numPts1, M7W)); + threadsMax3 = sycl::range<3>(1, (M7H / M7R), M7W); //(1 , 8 , 32) + +#ifdef DEVICE_TIMER + auto start_kernel2 = std::chrono::steady_clock::now(); +#endif + q_ct.submit([&](sycl::handler &cgh) + { + sycl::accessor + buffer1_acc_ct1(sycl::range<1>(1024 /*M7W*NDIM/4*/), cgh); + sycl::accessor + buffer2_acc_ct1(sycl::range<1>(1024 /*M7H*NDIM/4*/), cgh); + cgh.parallel_for(sycl::nd_range<3>(blocksMax3 * threadsMax3, threadsMax3), + [=](sycl::nd_item<3> item_ct1) +#if !defined(USE_NVIDIA_BACKEND) && !defined(USE_AMDHIP_BACKEND) + [[intel::reqd_sub_group_size(32)]] +#endif + { + FindMaxCorr10(sift1, sift2, numPts1, numPts2, item_ct1, + buffer1_acc_ct1.get_pointer(), + buffer2_acc_ct1.get_pointer()); + }); }) + .wait(); +#ifdef DEVICE_TIMER + auto stop_kernel2 = std::chrono::steady_clock::now(); + // printf("FindMaxCorr10 time = %.2f us\n", std::chrono::duration(stop_kernel2 - start_kernel2).count()); + matchTime += std::chrono::duration(stop_kernel2 - start_kernel2).count(); + matchSiftDataTime += std::chrono::duration(stop_kernel2 - start_kernel2).count(); +#endif + } + catch (sycl::exception const &e) + { + std::cerr << e.what() << '\n'; + } + } + checkMsg("FindMaxCorr5() execution failed\n"); +#endif + + if (data1.h_data != NULL) + { + float *h_ptr = &data1.h_data[0].score; + float *d_ptr = &data1.d_data[0].score; +#ifdef DEVICE_TIMER + auto start_memcpy = std::chrono::steady_clock::now(); +#endif + infra::sift_memcpy(h_ptr, d_ptr, sizeof(SiftPoint) * data1.numPts, infra::device_to_host, q_ct); + q_ct.wait(); +#ifdef DEVICE_TIMER + auto stop_memcpy = std::chrono::steady_clock::now(); + matchTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); + matchSiftDataTime += std::chrono::duration(stop_memcpy - start_memcpy).count(); +#endif + } + return matchTime; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/common/Utility.cpp b/third-party-programs/Velocity-Bench/cudaSift/common/Utility.cpp new file mode 100644 index 000000000..6c230dd44 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/common/Utility.cpp @@ -0,0 +1,83 @@ +// Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#include + +#include "Utility.h" + +using namespace Utility; + +int Utility::RunDataVerification(const int threshold, const float matchPercentage) +{ + printf("Performing data verification \n"); + switch (threshold) + { + case 1: + if (matchPercentage > 20.0f && matchPercentage < 30.0f) + { + printf("Data verification is SUCCESSFUL. \n\n"); + } + else + { + printf("Data verification FAILED. \n\n"); + return -1; + } + break; + case 2: + if (matchPercentage > 26.0f && matchPercentage < 38.0f) + { + printf("Data verification is SUCCESSFUL. \n\n"); + } + else + { + printf("Data verification FAILED. \n\n"); + return -1; + } + break; + case 3: + if (matchPercentage > 35.0f && matchPercentage < 45.0f) + { + printf("Data verification is SUCCESSFUL. \n\n"); + } + else + { + printf("Data verification FAILED. \n\n"); + return -1; + } + break; + case 4: + if (matchPercentage > 40.0f && matchPercentage < 50.0f) + { + printf("Data verification is SUCCESSFUL. \n\n"); + } + else + { + printf("Data verification FAILED. \n\n"); + return -1; + } + break; + default: + printf("Threshold values should be in the range [1, 4]. \n\n"); + return -1; + } + return 0; +} diff --git a/third-party-programs/Velocity-Bench/cudaSift/common/Utility.h b/third-party-programs/Velocity-Bench/cudaSift/common/Utility.h new file mode 100644 index 000000000..da09d2d78 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/common/Utility.h @@ -0,0 +1,31 @@ +// Copyright (C) 2023 Intel Corporation + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +// SPDX-License-Identifier: MIT + +#ifndef UTILITY_H +#define UTILITY_H + +namespace Utility +{ + int RunDataVerification(const int thresh, const float matchPercentage); + +} +#endif // UTILITY_H diff --git a/third-party-programs/Velocity-Bench/cudaSift/common/Utility.o b/third-party-programs/Velocity-Bench/cudaSift/common/Utility.o new file mode 100644 index 000000000..220855fcd Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/common/Utility.o differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/cudaSift_migration.md b/third-party-programs/Velocity-Bench/cudaSift/cudaSift_migration.md new file mode 100755 index 000000000..59912f69c --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/cudaSift_migration.md @@ -0,0 +1,385 @@ +# SYCLomatic Tool: Migrate bitcracker APP +## Use the command line to migrate large code base. +The SYCLomatic project (the Open source version of Intel® DPC++ Compatibility Tool) can migrate project that contain multiple source and header files. +| Optimized for | Description +|:--- |:--- +| OS | Linux* Ubuntu* 22.04 +| Software | Intel® DPC++ Compatibility Tool +| What you will learn | Simple invocation of dpct to migrate CUDA code +| Time to complete | 15 minutes + + +# Purpose +The SYCLomatic tool can migrate projects composed with multiple source and header files. +Used the dpct option **--in-root** option to set the root location of your prepared migration APP. Only the files under this specified root will be considered to migrate. Files located outside the **--in-root** will be considered system files or libraries files and will not be migrated. + +The dpct **--out-root** will specify the directory into which generated SYCL*-compilant code producted by the dpct tool is written. The relative path and the name will be kept, except the file extensions are changed to **.dp.cpp**. + + +# Key Implementation Details +Except the --in-root and --out-root options, there are additional options can help to migrate the code more smoothly: [Command Line Options Reference](https://software.intel.com/content/www/us/en/develop/documentation/intel-dpcpp-compatibility-tool-user-guide/top/command-line-options-reference.html). + + + +## Migrating the CUDA Sample to Data Parallel C++ with the Intel® DPC++ Compatibility Tool + +Building and running the CUDA sample is not required to migrate this project +to a SYCL*-compliant project. + +> **Note**: Certain CUDA header files, referenced by the CUDA application +> source files to be migrated, need to be accessible for the migration step. +> See *Before you Begin* in [Get Started with the Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/develop/documentation/get-started-with-intel-dpcpp-compatibility-tool/top.html#top_BEFORE_YOU_BEGIN). + +> **Note**: If you have not already done so, set up your CLI +> environment by sourcing the `setvars` script located in +> the root of your oneAPI installation. +> +> Linux*: +> - For system wide installations: `. /opt/intel/oneapi/setvars.sh` +> - For private installations: `. ~/intel/oneapi/setvars.sh` +> - For non-POSIX shells, like csh, use the following command: `$ bash -c 'source /setvars.sh ; exec csh'` +> +> Windows*: +> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat` +> - For Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'` +> +> For more information on configuring environment variables, see [Use the setvars Script with Linux* or MacOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html). + + +### Command-Line on a Linux* System + +1. This sample project contains a simple CUDA program with 12 files: + +``` +CUDA +├── CMakeLists.txt +├── cudaImage.cu +├── cudaImage.h +├── cudaSift.h +├── cudaSiftD.cu +├── cudaSiftD.h +├── cudaSiftH.cu +├── cudaSiftH.h +├── cudautils.h +├── geomFuncs.cpp +├── mainSift.cpp +└── matching.cu +``` +2. Make sure the ```OpenCV*``` is installed on the machine. ``` +$ sudo apt-get install libopencv-dev +``` +Then, make a `build` directory to use the **cmake** command line tool to generate the corresponding build tool (make) directly. +```sh +$ cd CUDA && mkdir build +$ cd build && cmake .. +``` +3. Use the **intercept-build** tool to intercept the build step to generate the compilation database `compile_commands.json` file under the same fodler. +``` sh +$ intercept-build make +$ ls . +CMakeCache.txt CMakeFiles Makefile bitcracker cmake_install.cmake compile_commands.json +``` +2. Use the tool's `--in-root` option and provide input files to specify where + to locate the CUDA files that needs migration; use the tool’s `--out-root` + option to designate where to generate the resulting files(default is `dpct_output`); use the tool's `-p` option to specify compilation database to migrate the whole project: + +```sh +# From the CUDA directory as root directory: +$ cd .. +$ dpct --in-root=. -p=./build/compile_commands.json --out-root=out --gen-build-script --cuda-include-path=/usr/local/cuda/include +``` + +> If an `--in-root` option is not specified, the directory of the first input +> source file is implied. If `--out-root` is not specified, `./dpct_output` +> is implied. + +You should see the migrated files in the `out` folder that was specified +by the `--out-root` option: + +``` +out/ +├── MainSourceFiles.yaml +├── cudaImage.dp.cpp +├── cudaImage.h +├── cudaSift.h +├── cudaSift.h.yaml +├── cudaSiftD.dp.cpp +├── cudaSiftD.h +├── cudaSiftH.dp.cpp +├── cudaSiftH.h +├── cudautils.h +├── cudautils.h.yaml +├── geomFuncs.cpp +├── mainSift.cpp.dp.cpp +└── matching.dp.cpp + +``` + +3. Inspect the migrated source code, address any `DPCT` warnings generated + by the Intel® DPC++ Compatibility Tool, and verify the new program correctness. + +Warnings are printed to the console and added as comments in the migrated +source. See *Diagnostic Reference* in the [Intel® DPC++ Compatibility Tool Developer Guide and Reference](https://www.intel.com/content/www/us/en/develop/documentation/intel-dpcpp-compatibility-tool-user-guide/top/diagnostics-reference.html) for more information on what each warning means. + + +This sample should generate the following warnings: +``` +warning: #DPCT2001:228: You can link with more library by add them here. +LIB := +``` + + +See below **Addressing Warnings in the Migrated Code** to understand how to resolve the warning. + + +4. Build the migrated code with generated Makefile.dpct +``` +$ cd out +$ make -f Makefile.dpct +# Please make sure the oneAPI package was installed before building the application to make sure the oneAPI DPC++ compiler was installed. +``` + +# Addressing Warnings in Migrated Code + +Migration generated one warning for code that `dpct` could not migrate: +``` +warning: #DPCT2001:228: You can link with more library by add them here. +LIB := +``` +This message is shown in the Makefile.dpct, for **cudaSift** the application need to link the **OpenCV** libraries during the link time. Modifing the Makefile.dpct will fix the linker error. +``` +LIB := -lopencv_core -lopencv_imgcodecs +``` + + +## Rebuild the migrated code +After manually addressing the warning error, need to rebuild the application. +``` +$ make -f Makefile.dpct clean +$ make -f Makefile.dpct +``` +# Example Output + +When you run the migrated application, you should see the following console +output: + +``` +$ ./cudasift +Image size = (1920,1080) +Initializing data... +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of Points after sift extraction = 3681 + +Number of Points after sift extraction = 3933 + +Number of original features: 3681 3933 +Number of matching features: 1220 1258 33.1432% 1 2 + +Performing data verification +Data verification is SUCCESSFUL. + +Total workload time = 2206.28 ms +``` +**Note:** The testing result was running on Intel(R) Core(TM) i7-13700K on the CPU backend with 2023.2 oneAPI released oneAPI packaged. + +If an error occurs, troubleshoot the problem using the Diagnostics Utility for Intel® oneAPI Toolkits. +[Learn more](https://www.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html). + +## License +See +[License.txt](https://github.com/oneapi-src/Velocity-Bench/blob/main/cudaSift/LICENSE.md) for details. diff --git a/third-party-programs/Velocity-Bench/cudaSift/inputData/img1.png b/third-party-programs/Velocity-Bench/cudaSift/inputData/img1.png new file mode 100755 index 000000000..efd56fde5 Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/inputData/img1.png differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/inputData/img2.png b/third-party-programs/Velocity-Bench/cudaSift/inputData/img2.png new file mode 100755 index 000000000..28020ea2f Binary files /dev/null and b/third-party-programs/Velocity-Bench/cudaSift/inputData/img2.png differ diff --git a/third-party-programs/Velocity-Bench/cudaSift/inputData/left.pgm b/third-party-programs/Velocity-Bench/cudaSift/inputData/left.pgm new file mode 100644 index 000000000..0005f1967 --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/inputData/left.pgm @@ -0,0 +1,2616 @@ +P5 +1280 960 +255 +½ľþþº¾ľ¾¿zhSGdbbdedeeda`_ZWXWXZ[WXXWPVWUSNMLLKNLLKJJOOOPQPTSVVTXYY[\\^}wdjuy{vyspmed]P}p@47;=@DFIJJLNNONNPOLLLMKKLKKKLLMOPPOLLLLLLJPOKOPMMOPQQOPHI`defe`bb_aLNUVZ[^fhbhihcdggf`aecca_muc99:;=LUeqkXiVHTlsi`aS.MlHCC@:5%MraPOQSFbB3?@^`:2*2|ePpb@>g;..MeIBc^HPXkM9B_A9B_iJOkUe9'769=YZY]`af_VRGPhvmJ?}}y}tx|yrqpp]IQU{y~]:RTXZYSJXjD42574;BN_nrf\Va~B3}p16IOU>*/UmdI2(.jjCVieHK/7nwyzxf.1YveC>EGMQRRTQWyhB]J6mD>Q^bdcLFB7+*,5GRZ~QFDEC?@?AIIRQE=9=P^fea]QMUVTRNMKLKHGZojdindl_STX`^ZXMSbgjiifhqwqkljlpolomlmprrqpppnh8%(&<<471567546665452.+*%'+  #&)**)+))#&#""%#$$%-142+%#"$$#"  + "! +    + +¿¾ž½þĽ¾¾ļ¼ŽĻþ½½¾·xiQG^W[_^]]]]][ZWTUUUVTQRSTUVSQQQRPOPNNKKGIIGIIHJKPTVVWZ[ZXY^z}~~~~wr{t?6;=BEGILNNMPOOQRTTTTUTSTQNPQOLMQOOPNLJKMLLPPOPONMNMQROPMOdghgfccb_\MOVZ\Z\fg`gggaeggfddgccbdnwc<:79?LQisq`o]FUougdcQ-MoGCMB;2'QvcPRRQFg>1D@\^83)2}cUma@@e=//NeKDdnVGRXjH7Bb@;CekHO~y4&:D=86564/Dgi^]^_^7\h2aG>JqI9>JtP9o]QadXFLl,LB5224:=CWkkijhNDIA69N6)*NKN`e_]c:j~B610137>N`nndZXcA8~i26>BD7(-J\WB2'2jj>OXUJK/7kw||h00YseD?EJSXXZXLR{c:_M4nE9R^cebH>=5./29HPSUA4-.*3<>@7.-./15P`acilG9NXUNJLONLHHRYVXok[YYQSX]YURRU_[_`^XRaomefjmpoproopqqrsonpod8),& !""%*,.03/.,+(+) "&((*+*))"&%#"""$#$'())'%%"#"#$  +  +    þý¿ÿýþ½Ƚ¿ûľzjODa[YX]^^_]_`^\\^^]`a`a^`ba^\X[[XWVRQRONMKHFFHGHMPRTWWXZZ\ZpxvsmC9?DGJMQUUSWXX[^Z^_a`acda[YWXWVWXWSRQPPRRSRPTVTUTSTVXZ\]PJhfhggbba``LPWYZ[\fhbfggceffffffcabenxaB:8:?KSfln\lYHUmrhc`O.PjKNjU;1*Su^NQRXEg@3BC[\<3)5~aUob@Aj?20NiLD]rhHR]oF55<8Tu_FNOXeKciA;;<><2ZSFC@?A::XUPSWQZ[VQEQWZRE@~|{~uqx}utto^EU\}Y9LU^`_YK>Lm/MB7335:;CUjkhihPCJ@7EzK,(LMT]`]^a0***09>>7,('),4H`ZYjtC:Lc`PFEFFC@76AKJRSLHHBN[dbZSB?HN[]USRUaecegilmmproopqqpoopm`6(*&"!$)%  %'&)****)#$%##"$&%$$#$%(&&%%"""      +¿ºĿŽ»úüľŻſ¾¼ĿzgQNjggfgghjjlljjjljjkmjllntrpihgdeccba][YWTRPNLILOTY^`dhhiio}zlfJ=AEIMNSXZ[]`eddcdgijikjjgb^`_^`_^YUPSSSSUWWWXWYXW[_`fniXLhhihfdec_`FRVZ[]]eidgiiaegefffedcbamx`<<::?IRdklOYTJXmri]_R0JmIVrX80)Rv_NPSYBg=5@F]]92)5~^Wra?=mF32QiIA[woIR\mH4456M_K3&+5443.%5h\?^oeQF.5mzp.2\wbC9DIT[\\VHTwc9VE2t@:T]aeb@:83/8=0*(3ACLhrqngE7?CJNBFFJX]_ceimpopqnoqprqoqqoe6))%!  +$'()+)++)#%#"#$&(*)*&'()&%""##!  +    ¼þüſžĿŽþž»þŽĿzgOKyqqssrrssrsrrropqqrponqrronnljjkjklkjffc_]YWTV[]afimpppt{zhUECFHLMQTX[]^acfegggijkkikhfhgggdb`\YZ[\X[\_]^^`bedfgmniZDfhfhedeb^_MSXY[\]fidghjfgigfedeedden|`?=;:?KQlkjey`FTlwk^^N.OkHWxZ71-Su_RTTVFc=8BBXW<4+4~YVl^<;Tc830OdF@euKSZlL5355:EdhJKdpn6"=F<78763/FfiinkfY7_jz2bF;864:AJsH>qXL`dQF;NnabY[gg]Xou|{z7-SFGF?:VubJQV^kKfgB;::;94`SGCB@A=?Y]`bbdd]YTFewcA|~{zronmw}tssoaD[aZG]]z~P@Nj1VC7566:>CQh][bjNCLC8;V<*(BDQ\_^cj7[C6610/26>VrsaVZd{:Dh.@R\O6&.AC<4,#0d]B]mgQG.9lyp-2]zaC7EKVX[ZUBP{g;OF7t9;T^bfc>772.8ADO]]adglnopnonqrsrpqpmd4')&!  +$'))+),-+#$""##(/276.*),'$$%#"   +  + "  + + ¾¾ÿ¿½ſÿſÿļ½¼ǾſżzhQGzuuwxwwtttutrpprrprqpoopqnmmiijkkiinojkmge^\]_cbdglmqqs|y}|\THEGKNOSWY]]bcdffhfiklljjjjjlnjhebbcbcdcfghhjlnnomnopok^Jdghkhdda``JQXZ]]_fichgjhhjgfedgdddgo|b==<;?JSjng^xcJVmtdd`Q,VjDW}Y72-Vy\PTSTFg94A?C@;2)6XZm^>>b<1/OeJ@dZGS]kH6345O;*':DU_c`fp;hD7511005?VrraU]h~9Cf/ANMH6'0UjQ:-%1g_D]phPE/4o|k+1aza>9DIUXYZUEMzk>RD7s;=<91(2[_p]==l@10MfHFhbFR\jF6555=DfhJbn4(BG;877540Feko{i\6\is7aK<7668@MtG;oXS^\_I8Orce[_hlZVqx}~~-.QEk`w[HTW^kKhc@:;;<84`TDA@BA;>\XPTTWYZVTEjyd?~~{pronmppw~pnor`Ia_|YD_ezuP@Qe/UB8:=<<=DTiigggJGRA9CkG*(:Gdmlgkq8Yq>5200016?Yruc\]h8Ed1Ngh]<%/ZnV;-%2f]C_qdMH)8r}k.-ay_?:EHQVXYSDMxhK?( + + ÿ¿Ļ¿¿ľĿÿǿÿÿ¿¾½|hMNnprrsrssqppppopnnnnmomkkhhjgbacbbdghjijkjjigfghhffjlmn}pOHMvwn[GLhcVHEJKNPRVY\`bcffhffjiiighjkijghfdedcegehlnpoprsqqrqqngZLdgdcadeb`_KMUZ]^ahi\gkkgfggffehhfdhr{_A@=?CMTklgheF[pvqdcR1UlC\]:3,Uy\`k\XAd87D<=;:2(3Z_p^=@h>30QiG=_eDGR\mK7547KrEEVjiihhJGSC9CrF)):Jyns5Sf<5100249B[wugdbn8F{`0KOLM:'0IG74-'2idF]ofOH+5u~q/-cya?:DFMQRTNEP|g;b;6v?>TjdA675.-16-($! + &'&!!! +  #DVL. ½¿ý¿ýþÿ½ǿǾ½ü»|gMMkklmlkmnnmmlkjjjikmlkjhefdb_]\]]]a`bghhjiihjhhhjhklmnv]FHGUyxyzfIAEKz|[NEGILNPSUZ\]_beeeggihhhijhiigddeeefegilnoqqqqsrrqqqmhYOdffbbdcb`aFOTY\^agiUhjjgegfffehhdcgqz^?>=>DLSgndfbJ\otlg`K*RjARrX94-X|\lx]VBa39A?<;;3)5X\m\?Cm>30TiE8HB=HT]iJ4347;BdfH_d=3%?J=77562*Gfkpl[9\fr7eD86769AMs@?sTQahYG8Poee`bhp^[px~||z//JEi~fOt_NY^dmHg`?:;:<94cTFBCCA;=_YTQRUVXRPHdcSUY@~vps~uonu~}m\Vk]G`]}TGbczQ@R~f5SE;;<;N9')7Hhpt4RlA520138<@Y{vlk^g4J|^+;:74/&+7865-(2gbD^qdJH*6s{k*1dz`=ETE60,+,124<4+)(,,6Lf^YmwE@RWSL>59C6*!(51046774Hl{I-//+*(.45AQ[agikponppoopstpopo\1'($! %(+.<>BMVljaZeXF]osleaK,UeHSqZ74.X|ZktYUDe699653,Fikr~}i\9^kn7cE9777:ANs}A?sUPckZH=SohjgfmoWWpx~~}y13QFvds]JY`fkKi`B<<;=;8dUHEED@:;^ZYZ\^^XSRH`pbaWB~}rwwoot~|iYYl[H]VquVG]fmQBS]5WD:;=;==DUljjifJHS?6>nJ),9Ifz|pr3Qc=23138>BI\xwml`n1O}`-7974.%+9@@<-)3e_CZe[KK):v|e*4eya@>F<88:=@ARd;a98|>>Tmh=5:61./5?DS~C60+*,035:3*)),-5KfgksxEATWSL=18C6*!$2207:;77Fnz~I*/0,*(-24AT[`filnoppnmonrsopqsb3&(%  #)-/-7IP\c1#&$#%%-AVUUK79KZ`D-&##  !5GTG2% +@DA5& +  3KC/  + + ¿ÿü½½¿ľǻÿľÿýüü»½Ŀƻ{gMHjegfdbacccdcb^^a^[\a\\]YXVVWRVUXVUWXZZ[\\[\\]^_^_begh_DFHLa~cZixyF9h^IDELwm\E>ADEFIMOPQTWZZ\`ba`adefeeccbcbaa`ceegilmnopnppppplcRLffcdeeebabINUV\^_hhcfbcdgiggecgedehr}Z>=>=BMUlk`kvXGXprje]N+V`HW]63,WyZft\X@f4;A<<;;3(8U^k_;=el<50SgE><:=LRZgG4565;GefBRcn\2#BH>:8652+Ijlr|jW7ben6cF96779@Nt};AtXSfh[I;RpekjiklUZqw}}v/3TCfqZNYaglKh^C@C?><6cUGDBBA;?_]``adg[RRAVhedVF~~}s{~ynposzeUZlXCb\cdUD_khPAQ]3UC9:;8:Oy}pr7MY;2127:BCPkyvqlin4V{`09>A;3%/FZTD0)3f`?Z`[ND":x|j*2e{`@=G<7769>ATueAa:;4>Ule;5<61027>DQ|A6/+),/27:2**+,-6OinryuDBUWSJ<15?5)!%3118:887BuyrxD+./,)'-03?OY_ejlklnnpmnoqrppqpd0()& #6;.(,((&&%# (37<8C\W]Z-"'%"#%-:O]s\DP]bV?,&%#  +#KZTR>((DS[N, +  1MQ7)" + +¾¾ſ¾ŽƿžľûſƽþĿĺ{hLO~oommlllkjjjhgdcbcba`WYZYUZWVXVYUTXZ[Z\\^\[\\\[ZZ\]^fWGGIRrms~~m?uwxmUEBG`y^J;<>@BCFHHLNPNUWYYYZ]_[]\__^__`][Z\_`cdghjklkklkjhg`NNihfdefdb`aGPUZ]^`hjefccdgiiib`cdbchr~\?<>OK85-ThG=;;>JRXhF544798652*IhmqjX7cjm4cD87769?Ot~;DrVSdhZH`[Z\]]aZSQFKT[UGI~~~}}}w|urlt|v||aY[o]EebqvQF^ulwO=Sa6VE:::9:>DUgjigbGGP@865/)'SCOurr7T\=315:=@BRs|tqlkl/`^/?SYN7%-;MM@1*2gdDVdZLC*>z}f*2e{a@=J=665:?BUaBf=:~5CRl~`<5;5/039?COx@70,),/2671+++,.;Pggkuw@CYZTK;38?5*"&51178677CrqfoH-/0+(*/01@OZ_ejklllnpomnqrpppne0++&+U`D8Q7=67@6% /@DYHD^hqX-$%#$$'-?SbqLBZZW_@0(&" + %LUCG<+" *LXUN/ $>TL;44)  ¿¿¾¾¿Ŀþ¿»þýƽĽü{hMO~srssttusssqonomjjkjie`aca_]\^_a^__bceefhhiffeccadefnPEHNZkz^=^۷Z|xZE@EObQ;;>?CEGILNOQMSWUWVVOTUTUXVWRXWXVUTWZ[]ababcccca`__XKPhhgfggec`bGRYZ]^ahieddcefhggdbbccchr}Y=<<@DSVfl`UcWGZmrldaO(YdAZ{Q63-Z{]sTZDa1:A>=;92(FgeFVxmX0#AH>88652,Iilo~iY8aig4bB:6768>Lx~;DtSPhmaH=Vqdf\dnkXYoy~{y+2XBmXv]TafhjOhaBKLD=9;eRFABCC<<`VPUVRZZTPGKSgd@I}|~~}~}|w{urq{z|cWXs]D_\syQG`rxqM>U[5ZC:;<;<=FXkjihiMLOA8=V<*$SJS|~mo.biB359=@AEOp{w~plko+^\-GPEL6$)28;90*0i_CLNMIC&=u}g)3ezb?DK;545:=AV]Ah8Bv;BSka:7;4159:AEVuA92,*,/2481-.-..7MaSVnw?BZZVL=49=5+!'72/67457HxvosE-01+(*.02@UZ_cikmnnnmooqsrpopni0+*%5\0,[=YG?PG0/DTbTJ\^fQ-%($$$%+KgmkD9UXU`A/&$"  !Gf`L8-)'('/BA3.$ '@KICGN9  ¿þÿ¾þ¿Ǿž½ľ¿ýſ¿{hKL~sstuuuuuvusrqrponnpqmjghhdeedefeeejikklnomnpomlmmopJFFQb|p*%qaew[IBHOiU;=BDHIHLPRTVY[\]^^]YYYXXYYYWZXWSRQVXYZYVW\]YXXVVWYTLQighkmfgdb`GRY[^_bhidcedffihgebcdedfqzY;:;AFUUjl^^y`D^psja]M)\bH]rS:5/\|]n|UU?a2:@<;:81):Xfp\=Cv952RcH<:<=KQ\jD6335>GhdFAFDD0%AF<78652,IikskY;ahf8d@97778>Mx=GuTQklYH^WSTTW[[UPHO]l^@Ivy}~~|||y}vy{xrv{gTWo\B__~QI^hfO=>@b|x~ojel*\^+?E?@3"/68<<2)3l_BJHJKD(?z~k'3b|dADI;554:=BTyXFm1Fz:BSi}y`;9<328?=AKWtC;2,)+/34823:3/.9QbS^m{ADZ]XN@7?@7*!'83068658Hxzx}C+02.))/01=SZ^ekklnqqomoprsqnpmd.++#,_!$d?W=>CHJJKNPSUX]^`bdeecb`abc`bbbabb^[^]^]__`cd^^````aZRPggjljffea_IS_^]^chhddddefigfdabdfegr}Y99:@GTVkl[SgUBYptm^`N+\c?TkS94-YzX^bWTE_1JO`hD4545;DfeFDPS@/$@E<79664-HikrgW;agh;c=:::89AKyaZ\^__c^UPIMafZ@Luz}~}|}{{vw}|rms}}{j[\jZDaawvNHaijQ=SX8[A7667;@GWhijihJLS=6?O9'):Zzko/gr>3347:8:=b{y}lj`k*S{\)>KB;/"2=EIB1*1kdFLJLK@'<|{g*8i}d@BL;867;?CWYGm1G5>Tj||b79;43OQTI0 #;6& ¾ü¾ſÿ¾ÿĿĿÿƿžƿü}gJHg^diyurpmkjkkihijkjmnojhgggghggfikikmkjloqoppnorrros~RFFNf҂Y-llfzZECEMtoX=?CCHIKMNRVY]``adeedgfcffhfgfghgdeeeehiilmmlillljkfVUdginjfeca^KU]]^^aghbefgcgigfcabcefgs~Z:99@ERTkk`_jXFWosk]_J*\dDVoU930_|YYaWWC^3>B>?<<0&:NcrY;Av430SdG=::=KQ\iD5446DG=:@Lx=ErOTin`K>Uqda[eqiW\pv}}x)7RD{[sZS]eglJh]DKQCB<:hRDADEE?=a[]__`f_RRIPZ_SAJw|}}|~~}|zty~~~xpjr}lb`lYD^XswLI^lePDTxW9WA7425:?GXghefhEHL<6FmG(+9\o|rm1n?645@A;:?gy}x~lhXi-P{\*>cfJ/#3R]ZB1,:lcGKKKJD(;z|i+5d}b>FM=778<@E^[Fl{u/H~:@Uizw_39;43>B=BJ[m?81.+,244:5;J80.:UbfiquCHWYXPA47?5* *740788:;O~kuA010-/1102BS\`djkmnpqqqoptusqqm],!0-'!*d)]%Y9@J+( 0U]e[QKIe],$&#$&&*NeflR;TimcA-&$# !FQTZCGYfpSKZZ][2  + =;<1(=Rhv[>Cw;30SbG;::?JQ[hD5345=KjbI`w|s/#AE=:9972)Jiim||kV9bgd9_B<^m?:@Mw:GqOUfh_IEXjf^bgFJN=5=U:(-:Yymj.vE956OP=;Ej|~w~mgZj*P]/<_aD0" 2;=D:1+5h_CJJNJA)?|~e&7fz_>HP=878=BJaYFmto,Hv4@Tl_37<418=:?GXk>90+,*256:4#M E1=J;0 )AIOPDCEPUikS`mY?Zlojzrd_J(Y`D_wQ83-`}Yr^VCf4=A?<;8.+AKkxY>Av<45XdF<<;?JS[dE5344;9872+Jhkv|iV2aid7b@>LT>:?Pw5FvXZik^I;UjcaYdmjW^nx~|u&3RHycvWRZcimEn\DNN?D?AgQFDDCD>>[STVWU\\POIRciTANy{{{{z|~}~|tx~{umq|~zb^boVCa_d|FM[gtq|qL@XW140-12312>Q[_chmmopppppqstrrrm\+//)!"'$(33& 4WaZI;?;;9%%&"$%&/`ntlLKh^e^>+$%  !R[\^E@[abQBTc`J/ + !6>6;FQA&  þÿ¾ÿÿþ¿ĿýĿȿ¼ľü}gLR}qnswvttrrsspoqponnmlihhbac`_`_\^___^\]_^]]ZZYX]]]^aoPGIMizvpnrtgQDBCL]te@037:=>A@CGJNQSUXZ[_abca``_]`ba`a`beeegjkkklmllmooniWUhijnjeebb\EU__`_afe`hjldfighefhgfeir|X;:;=DOUgkXgo\C\oqheda\L)^aC[}P72-_}[rYSAb0AB?==;0(@IruW=?kq752ZdD;::?LS]kB4547;9773(Kllu}eX9bjb:aBD[qE:AOx|3EuRWil_H:Ulga[gljW_nv~~{q(7PI}\tZTZagjEjYDOK?B=CgPEDDCB=B^VTWWW]ZPOIQfzYANwy}}}||||}|y~}~vjr}|~zeXarVC`]jyAM[ezN;XS:ZB7457<>EXojijgEKQ97I];&)RVM6#2;C?80)4m[CFHKHB,A{~\):e|_DGI=676=@H_UCn.M1EUn]58<82OV;=G]we>7.**+237:13<3/.KbWURB6:@5*.;3/5767;Kzp~<140.13312BT\achlnoooqqpsuussroZ'".10$ 9LCMNG[[SD'%&$'&(,[kmjBMjlrg<)%#!  EIFJCDVX[N:G^YL2  + +JYHKUZD#  ½¿¿½¿þ¾ľ½ÿŽýžü}eITwvuvvuuutwwutssssqspqponlnnkihhgffgiijjkihecbccdegioeFJIOzzwzxgUEBDGQo|q`:359=@BDCEHKMMLLSQQRXXZWVXVVWX[\ZX[^]_`abc``bclpmlkhYUgihmidcca]LU^]]^_geahkkghjffdegfecfrW;89=CMPjfVYZRHYrrieda\L,ZdB[vG64.a}ZoWUHg1C\kijkgEJO;6H`;&,?[ioi,lzG:57:74:Fu}~xwxme`j'G|T,G`TEp}/L3CTm^29;75JP<=G^f?:/+*+2467002/-.Ha^[TA5:C2)/=404899>Pyw8330/24333BU_cfhloopqrqqrtttrqm]("06=.""(*' !"&#%&#!"3NU\RHhvqW)$&%%&)2\jgaAKddj`@*%#  1RVJ1?HDJC:Kce`6 -?GIHNG8" ½ĿŽ¾¾ľÿ¿ý½¿ƾź|dKQ|uuuvyusstvvrrsqprqpoqpnnppponmljkjiklloqpljjjlkknpquTIGGOnxodSHCCGK^ythO25:?BFKJLMOQQSTVVSWY[[][ZXXZXZXXWVVVSVTVY[ZWXcyied]PUfggmkedba^ET\\_^_geakmngfifecdjfebgt}S999;BMRhfUgp[FZqqifeb]J.YaCK]H83-b}Xn}TVPo3=A>=:8-)?KxV;?y=4/YdD<;:9972(LlmuxdT;al^<^CBbz;:BOyv4ItQUej\K:ChPECEDA:A`]qnac]PMGOgkZ?Puy|zz{|{}~yt{}vtojjioze`csW@aZvkEP[^a]daM:[R>X=4334??E\lijidCKP<7Op>'-?\toh*grG;7;GE7;Eyz}stwrke[k)N}Q-;CPK6"#2YlQ8.+8q\EIKKKE&Dz}V%7h{_EKJ<98:<>F`MHnyl0Q0EWq[3::87NQ>>H[hA90++,357924?1..=Ueaerw@Lel`UB49G2( 3>2/347;AT|w943//35423BT_eghjoppprrsstturrm_)#10iQ#"9HIU)4*..*%%# 4VklLHakcS&#$%%%(5IU]`9FijeZ@,%$! (7;3'2DILGDV]XM1  ,MYQDY]@$ ¾¿üĿĿľƿƾǻĽ}eKNqonnrtqpoqqsrqqnnnolklmlkmmkomjjiiihillnoonlnoppqstuv~KFFFKVdjhgYQMECDDLTptiZ>29?BGJOOQTWWXX\`^bccccaa`ba_aded`````__`_``Y[jxg\]YPXhhjnlfddb^FSZ]`_^fe^hnmgfigfdfjeedgs|U:9;=BNVgjYfhYIZpokhecaL+`aGSoQ62,`~Xp~YWUw3@B?>:9.'EL~U==gy530\cC=<=@JP_iE6456=HibC\u^0&GH><<;83,MkkvxfS=`n^=aDA`q;;CNyu7HuRS^dYJ;YnebZdkfT`iwt#6REzasTWnogiFkYDNL;=:AeMEBCDA:A^^zsceZQLEKefY=Luw||{|}|~xqy~wpigjlrzz~{qmovW=`^AR\`kdd[M2(-:2,./4:BS~~|;340.144/4CU[djkknqqqqssrtwurqq`+"3+uY "[C!eEVKF)]UI-PMD0:7' 8UgkTUiY_L$$$#$#&:ef`b@Ielj]>)$$! +1853-3DQ]RBT__U3" + ,X^RM\bD$ ľýĻ¼ĽĿ»¾ÿļƽüķ|gJOqkikmnmkhjknmljlkjgigegghfhggfegfefefikljklmmnoqrstttsuPHGFEGEILFEFDDDJVhoi^G75;ACGKORRVZ^]^`cbdfgffgigghgijjikjliijknigihfjonjghfWXhghlidcca^GT[^a`afgahmmhgigffhlfefit|V=:;=CSVkjYaeWH]spigdb_J*_dAWuN66/aYwYYYw1;DfMFCCD@7D^Ur^[^WPLALlcRBJvx{}zz}}}~wlw{ujfgls{~|uuuvS=e];P\pxgN:ZM@ZA5226:>E\lljhbBNN<8Lq?&1A]|me+fr?86=RQ;9Hzwsuvqoe]j!TwP,=HGC0  /FVI7/)7tWELKLMH*D|~T&9d~^DKI<889?@>bNLrrp,N/FWowX/>JQXddSHK_f@91,--13883EM70-?Yg_dtr8KfqfXC16?3'.72,*,5- 1M`aTXhcgN($%#$%(6[OGI=F_fi^;)$$" DZXP86=QcU@Kcc_7"  1WbTOXS9# ºž¾ÿŽļþþþľĿĿ½¼ýĿž·|iISyollpomjgighifadeda___`adca\^]a_aaa``adeeffhiijlmopprtp۱wRMECEFGFDDEDGKVfkc^O825=BEHMPRUY\]]bcdefghhhjkigjklmonnlllmnlonkjjklmnorrq_Wgijmicdb`_GV\]a`cghahmnfghffehldfegq{V<99;APRjlWfwYDZnpjgda]I,\`BgO882^|Wnu[\Wt/?C;<=;.+CCuV;Chz95/YbC<:;?MN[eD5657;HhbC\}P/+FF@]xK83+KhkjrobR=amX8]>>fx?:ANxr7IqX]leYH;Vkd_\gniO_ny|~s&5S@ZuR[}vheJjXAJI=>8HhKEDBCA7F_Wv[Y\VMMGMcfRBQtwz{yz|zz|xry|{rgdhq|}}ttsuQ>e]y~4O\xkO8[~L?W>63459>C_mkjhe559NL89Dwxnuvpmb\iQzN2@IHK2 "/4:;5/(8rZFLLKOD,Fy~M(:e|[BNK?<;=@AAbvMKtqn*Pw-GVhwt[5AaRJ\d?:1--.24990BM9/.@Znptzs:NdshXB/5A3&/61+*-4<@V{v0440.343.5DT^cjkmppqsppsrrttttpV%#..vH%+z6GV!6j& ÿ¿»þ¾ľþƿžž¾ǿ¾ǿŻĿ¼~{fHTwxxvvusqqppnmkfdghgfbeffdd`a`b`ba_^____aeb_aa]]edgkllkd~cPHIGDCHGIMS^ie[UM:0/6>BFIMPSWZ\Y[cdcedfhhiiijjijlknlmkiklmnqnmjikkklortq[Xdimrleec`\GW_\_^bfgaikkeggfffhlhfdiq|V<99=BNSjmQQbTJYtokgdb]H*cbGkO963a}XtY]Lt/CB:;=:/'GGtT=Dv43.]`D=<<@MO_fD3456=HjaCEW^N/%CGAaH63,NglxgQ:`lW;[>?gB:APzw6IqWZhg[G7HgKEBAB@6C]Zb`bXNNJQitYATvxzyyz{z|{yu{ngipy{|qjeoP@d`~5QZh}z^N6\H=U@6216;?G^mjigh?NL<7Jj=&0?Wcclhmd&lsC55:HI:;Kywx~pvvrlc\l#K{O.FW\[2!/6863,'7rXDJKLL>*Jz}L(:i{[AIIACKLCAEbJLssu,T{-IV`hmZ7@MUWej_DH_`?<2,,.349:0AG3.0BZowz}q9OgshYC004.%170-,.5;@Z|}2230.254.4FU_dhknprrssrstssutroT"#0/rP?5`P(IE2FC]8+.PBJ$0K>$ 4EHZJLLAQB$&%$#$(9`grkDK]WYY6)$$ !NibX;7VcaD>UYXU3 /IVKMLJA' ¿¿ľ¼ûž½ÿǻƽžĺygFRxvxyyvvstsusqtrpnmnqpoqommjkkmlkjjgeeggfffb`a_YWa^ad`aa_kxzdVKBCNQV_gdYRQI9-+/8>BGHKNRXZ\WY`abcacdfhhghigghhhghfegiijlkkjhhiijmnrkVYfjnqkeea_ZIZa_``beechjjfgiffdgjfcchs{U=:;?ESTlgVMURG[rmidec_H(`_C]rF870_Xs~W]Ks)CB<<<9.&LHxT=Ap}520]_D<;<@JP`gC65479JeLEDCA@7E\_^_aRLLKTipV?Suxzzzyzx{{yt{zogipz||pf\oRAb]w1R[c}^M7[HAVB6217=>E]mijhe>MK:3ewqsgm`)pyE64?XJ:FbLKty|*S3IW_cjZ/<;02AE;DTYUJ;XfhV3  2KQLLZ`F% ¾üľſ½¿ĽþĿƼľƾ»zgJStpsrrqoonoppqqmoomnonnnmlljjlmlkkjgffgikmnlmlh`ajlkllmkjifmzoeemoni]SLME7.*,06<@CGIKPRTUWWZ\\ZYX]^^aacb``[\^[[a__`bbecddeeefjjjkgXWeijokee``\HX`^``cfeahmlhgjffcfiedchr}V=:=?ESQleW_fWE\mngeec^G-a\AT_B962cPdiUWGk-EB;;<8,(FFrT>An932\_E;:<>KOah?4438@Oi]Gl|oW,(HD@]H82)QkmqzhU:amW?Y?>KS6:AO{|0KrQ\rlZL:YhgfdjmjPbm~~|x%8MJdrUUwskdFkRCSWA?;KdKFCCCA:F^]yyWX[QLLLWjjR=Svwxyzyyyzzvot~zpkhipuw{lacsQAf`dz5QWmhO5]JDU?7647@AF[ihikk?ML;443/%2=qrna+Z_844AO>6:F{wlsurma^jvi~IvH4HYWF-"?ZfZ:.&7sSCKNKL=&PzJ)CeKGq|~,Rv1JT\bl[/:;65A@9>J`Y>;2.-.45:94=@20/C]qwz{o8NhtgW?268/&2813@558BT|n-220.134.2EW_ejnlprsttuttuuvssnT$'/-(%#'(%39:+6(;4=82! &)8H@?_jeG'%$#%%'8[WU[ALeiiW5*%# D\hiINUFDJ9CUWF. 3KSPHV_B# Ŀÿ¿ÿ¿ſ½ƿǿĺŹºzfMLe^Z_dcceaeddddfeecfeddeeddaadeccfdcabcehikkllmhimnoqsrpoke_duzurlh^UQPJ@5.++16;@DGJMNNPSXZ\YX[[\YYYX^ceedcabcbbeca_`Z]`][`_\_cceeZMXfjinhdea_[GW^^``dgf`inlfgiffdgiedeis|T=:::BSOhfXrx_L\poeeba]G.b^Fe{D65/eZnnZUCe1?@=;;8-)J@qQ=@o}340b_I=<<>MQ^fB5249HVk]@i],(IGCcH61*Rhmx|fT:_nO>V??_i<;CQz.OqTVeeZK:WgeaTbljLbi~|s9IF`qUVielcKlRBHH?B:HeLCBBDA8G\[|~YXXKNLJVbcOAYtwzyxy{||zurz~}shgjorz}yj[arN@f\7QYutz]N7\KBWFB@?///B]pwzzm5NjufWA5=A2(2913<449CU{r},11//.13.0HXbgkmnpsuttusrruutpmW$$/+%! (+" #&&06PjmpK$'%#%%&?diciAU``h`2)%$ F[c^CH`dRA;IB20% 0NVMIP[A! ¿½ÿþÿ½½¿¾ÿĿÿĽȿü¿żžƼ·yeGJeacfddefc`\^___\]]_ZZYZ[ZYT[WUVYYYWXXX[[[\bbfffhijklnnklhc\Z_elpvupkhdaXQJB<81+,17?<;;9--LZ@=`yJ=DOz0NmR[kfYJ6\ieaW`lkMbl}~|s BA=H;KdMDBCD@4I\[wZYW?LKMQT]S@Trvzyvy|{{yvpy~~|ujhhsyfU\oPAd`x}{8SY{~jM2[HB[Xbj\\QCE\i`_ef=NL:6No=&0AazdlZ*KK5:GIRN79Dpmuwql`YjumzKtB5QWNO/ /4574.(9sSEKMPL>%Ny}J,:n|\CPc`Tm\E=GdFPu{~{)Sv~3JV\alZ.:936OJ?JK?85:70DA011B\nv{{n6NivhYC3??3'4:127258BSz+00/-.23.3EXagjmmoqsrstsrsuuuqmX#%.,&" $(*+/VdXL<#&%"%&%9^U[d>Qc_dS3(%"  OZV\?>[d^H620+%  -S_GCQF0 ¿¾ĿĿýºÿ»ſÿľþĿƿƿĿżſżöziKVurtuvtrrppmljijihhigfebba_ac`\[ZXXRTUROOSRVTS[W__`a``baa[[SQSTTVTYYZUPKCA<752,).39?DKLNSW[a`cdeehihihgghhihjmmqnmnnnmnlkmopqqponprupooaSZhkjigfeb_[LU\^b_bghbhmkffgcddilgfdgqzT<:9<<@JO]dA546:DNiZHygB-(GEAbB72)PijvwhQ5apPAV>?JYB=CN{{/OpR_lg\J;]hcaXcnlHbq}|zp$7ODN|m^sNUgegcHeVADDBIJIKNRj[CRmuzxxz|y{yumu}}~ngjxaUfsM?fc6QZszeM3[JC^b|ot^EE^idcgh;QK;6@L4$.BgloV,XW28DB8858@~skuxrma]kpj{ Qs?4FUVC0#.6994.&8vTGQSQJC%O~~}I(:mzUDP`]ZeRA=FbGR{rv}'Wy2GX`fiV09803PK;>I_xiWAHckG;;908;100B`qw||l7MhrfWD3@@1&7:00D768>Q|s*21/-254/5FXbhjkmonrstsssttwtroT$'0+%! $(++/[ffjE &$#$$(1^kl]8CipjL1'$"  M__bB>>:/& 9M`K2*" + +ĿþĿ¾¼ºÿľĿƾȿÿþ»Ŀʽľ~{iIXyuvtutprpqpqopqnmnnlllmonkkjkiifebb`_ba^]\\\XXT[Z\\YWXVYXVRIMOTQMIIH@>>=:751,+-06:?DFIMMPSUVYZ\`cecefefgfdddhkkhillmlnmlknpnopopsutrsoeTWiiggghgb_ZGSZ]a]`gh^hmlfffceeikgffiqzU>9:<=8-)K?tR:G243\\D=;<@KN`eA4436>GjYLc}}S0+GEAmA62*Pnoz{jS:cpM@YAA^nE:CQ~{-RnOZkfYI:Ygc`[hpiLdo~~|n#=RIv~q^tMPzukaJjUDQOEH=LbNFEBCA4I\`{yac`=IKIHJONATrvzyyzzwzzwoy|}yowx_RcsL@d`5QW_^bbXI5^HF_muoq[BEZjjhkf=OJ97AV;%1?oqqZ&qr88G@0348B~sitwrl`YowkxTs<3DSO?/#6OWG9/(9xNH\mZM=&P~}~}~K)>p}SCQWNWhQB?IfAOypvz+Zz1IYbhnV-:706QM;>IcoUA[m>>83DC200D^puz}m6OlueVB5;=/'7;23:448;Mlswxi+42/056405IXbikmoppsuttrsuuxuqpS!)/*& %)+-/WJ>A7'%###&5UrqU9XdmqT0($#  "CKY_A;:87448:<>@CHJKNKLNJKPPQRTXZ\\ZYZ]\[Y[\]\`cdfeeecfeghgfjnnonknj`KRjigjhgfd_[HVZ_``bfd]fjhcfgccdiljgehqzU<89;Y>@fF:?O~z/TpN[mlYL=[gfb]ingN`k}}m5I\]ef^db8LKHHFEHBVrvwxzzzwy{wqy{~unwaTdwLAeb6QWYZWTSK5`HFZ[Z\]ZVDF_ijike@RJ;6BQ8"2>x{nrV(=:5NB/.1E^pwy}k4SmsfUA58:.'3:35;425;FagjiU*2100673.4JYagknpppppqsttsuvvrnP %1+'" #)*,.@:0/+&$"$$+@]XYV=<9-)QHqO>Apm21,`]F>;;@NM_nD2457=KhXJh|P'*IED\sE80*NjlpuohO?`uLBZ@@lr?;AV||-TlP`os\G>YiggelpiPco~~}j"$5FrqS(@?UZS<6:I¸xumtuqk__ojqZpA1=675+"7FG?5.':|QGe{gK@%P|}C*;p{WDLICWaMB@GjEQztqy(Wz0NX^bpX0:723QK?AEcxXCuE>;6N:-02F^pwy|j7SkubV@6;?2'692;A416;D_ddbO)21//463-3JW`glnorrqrsqsttuvtspO%1*%" +%(*+-/11-( '&$%%(DhMEW=KNJXM.'%# G^bV<8@M_N6[_UM,  4`_H) ¿þ¿½üļýƾƿſ¿ź¼Ŀ¿¿ȿž¿˽½Žľ}gJ[voooooollnnnlnkklmnllklljjjmoljffehggghikjjjiegijhgghhihehdc`ZWWWTQJDHHHHJKOSRX[ZZ_bbdeegihcghefba`ba`a___^\[\YSUV[ZYYUYWUVVTTUVTSPTSURNVfkkpkgec_[LW`ab^bdh[gigdegddddeefdis{R>;:=<;<9*'TDjO=Epj441`XF=::?OPamD3446?GiXEIJGH++HEEfC90/QihhmjfS=]vCEW>?lo9;CS|x.SiS_ieUI;XfjlooniM`n}h"6J\Uad]ZU>LDDGEDF=[quvwx|zuyzuuyvjwzw_OaqNAbal2QOMMJHJE5^EE^VLORRTGH_iiijb?QI77N_8"5CjuhnQ#xz9>U_R=67C÷rkvwql]_nset[s=/6364*#15634/';|RIj~iN<,K~B(>nyUAJE@YiOC?Dh@Syz~u'Zx5KZ^cpY1;835JH=BJdS>tC><.G9/02E^qwz|j7TjrdW?3;C4(57.8:117;GafbdK(01/0353-5KZaglnnpqqrrrrsstusqoK(/+'   %+,-.2221)!&%$#%)BqlZS7G[]fQ2'#"  Ka^Z?7RXV84DEOR3! &(*O]H' ¾ý¾ĿľþǾĿƿÿȾŽǾŻĺ}gL^{mnnnnmlknmljihjiiiiigfegdehffhhgghjiiknmnmommqomljjkklkkonjdggcb_YUUMPPQMQTSRY[[\]]]`cacdcdhkjjfgglhgljgggfgigefijilhhfecbbdbbedbbdfe^RXdjjojggb`XITY^b_afhagiidfiehcddehdit{V>99=CNUiiOrtYDOslhefc`H+c[B@C=900d\jlWWGh,C@>=<8.(VCiQ?E963^[F::<@MNakF5569OLo^nHO{mg`OlRGXGWpvwwwyxvuxwqpszxcWdqIAd`X0QPKdjYFC7_HH\SIJLMPFGckgijaCRE76EU:$0X~l,Wr3LZ]boV.:94269=@Ib~}SCwF>:1I>020E_rx{}k8TlqdV?147-' 5805=226;DZ]^^F+11..341-7KZ`fknnoopssuussttrqoM(.*&!$>'-0&4&&+.$#" ! '-0,/9>82(!'&%&&)?dde[88D_pK+'$# !MU_kO) -1-4Kah9 *00HK<   ¾ý½ÿļ¾¼ĿĿľƽſ¿ƾ¼ƼĹ|hI_wnopnpuqpprrnmpnkknmliiihfffefaddedfhgihgiggimklkkihjigkmmkgffe`db`]^]^]Y]\^\\Z^__aa__ada``beeeedcfdfgghhhhhiiijilkllkihkikknlmolmqpqldKUgkkmigdb^[DW^`b_`hh`hkjefhbfdfhgfdht{N:68=ANVjhLgtYAMtkfgeb^H,dZ=;;980/e]npZWFk1H@?==9.%UGiPKlZCBLF8**MFCqC81*RjigokbK>as@EVAMt:=AVy}*UnK`plUJ:^ifd`gmiKem}d?NSoapGMrif[MiRFVH?C:N_NHBAEA5J]aqoce`HLJEDBDG@Uzmtussvwwvxudj{xh]apEA]e4KP`h\iLA8]DIYTMNIIOCEbkjjicAOF:4Rn;"36BKSFXjK(@7BRF839Bqgxzpm]^rrjnVo;4=FOB+#8VaJ;2+:}JGl~_O8&T|~A*@oyTALGD^jNDAJi:2JA03/F_qyy~e9WmrdR@1-,)%26/5?536:ASWVX?,21.*+0/,6KYafjmpqoptuuttsrrromL(/)##v9Ac*c1>KK4'C04) '-./2F]B3)"''$%&%I[Uli=D]if=(&$$ L9@QC),D[j[6 .1 ¿ü½¿¹¿ſƻû¿ž½ÿƾÿǿ¿ľĹzgIZqmlkkmnolnqpllkkkkkjnklmllikjlghgijjllkjjlkkkmkkljheihghkifceffc`a]\^][^\]]]^^]^abgeedefdabgiggghgfcefegiggigdhffhhijihgjejijjjllmkikg^NYijknifeb^ZHY^]`^cgg_hijdfgbefhjgfeirzQ;8:?CMWijRgnUCLukgeb`\E+aU;;::81/g^{^SCi2B@>=>8,%ZMmO=D~u121c[E=;;@MPbtL6659@Lk]NOIBB,,FFArC80-QmkflgcL:`zAFT?Gvu?=CQx{(UlY`jcRH7^fea_bkkKdle!=RIg_rFMwkg]OmSCSJ@D9M]NFDBEB7J\g|{c`]FMIHFDFF@\nrttuvywxywsw}vlaetNC`g3NKWmmkF@:_BI]YTUQUWEG`jjjjd@TI:8IL1"36ThgaZlF,|>7@cX@4;EseruolY^ogjlcu=3BXaH.!<`kN=3(>MHfyfN;*VE(@ozTBHED]oND?FfBX}p*[o2N[]coT*;943:9;@GgrsLGzD?;4G@335Gctw{d8VppdS=/++*$!67023226:BNTSR=.01.**./*4LZbgkoqqoptvttttsuspnI)1+%yU_Y#`4[1+9(c7S?" (0474GaE5)"'&$&&&;F,CF32255/&%%#  BDFV<#.QXaV4 >:! +ÿ¿ƾ½¾ƿþȼǾȼżyeM^tjkmllmmmlmkiihiiigilighhjeihhfgfgjikkkmlmlmllllkhfijiiikmoomihhgefffc`a`]^]^^]abdgcabbcdacfkhgkiiffgffdhikjjjjiiiifiilkllkkkkjlnmkiki_PVdikmgffd`ZLW^]`adfg_gijedebcdghefdirxQ<7;-)TNtK>C}v543c^D<=OMl^nCQyif\GiOETJ>C;N]MFECDA9L\ezpXXWILHGFFGF=attvxwwyw{}yrqssI@]bs3ROTt}jCA9^CH[XURRYWCGbkjijh?MC85Z{;#37[]\d`oA,{G74*@}LH[pkO:(U~B(BpxQALE@GMCC>Jg=Ul%]l7R_afoR(;946HF:@GfldJD{B>:4JE123Iasxz~c2VoseT=/,*)$"57089216<::820cYvXTIf/D==>>:.'\NrI;E~z231a]D>;;APOctL4569AOk[VP)+FCA^j@9/,WkizzdL@_x=DQ?@ku=-~|D55BC23=FƷmhuxroX`ngjl!lr<8P[[K.$2;FK>3+BIIbpgL;+VC,@qxSEOEAEOFC>Fm;97720g^u{XUMa/B??=<:.'VMpI>Bzt142^VD>9RF71K\;"14^^mb^l?)yr=25DN43:K·ngvvslU[ljwi#bs?:V_dV3 %06AF@0,BQDl{aM;(X@+@rwSDHCARoTC@Kp>X|i*Yj:N\_dlO)8736UJ;CMiyxIDxps==94L;/14Jfrxzc8UmpcS;.+*)$26.,-.07:OehimI*13.**-/+7R^dhkoqqqruutttttrurmC/1,$$s>%a RD7W'W$N5  +'3AS>IUA6) %##&&$&)*&&+340)$$%$#  !50$  L\ZI*.A4%#!+67+ ¿½ÿ¼¾¾Żýľľ¿½Ŀž¾ǿĿû|gJ`l`b^dggfceebecacccacacdebbab`_ddffefijhklkmpmkggjjmpnlmmnpnljfgegjmkjjgfeheb`_abdd_`dddddddceffhhifffefeedceijjlkhjjjjkjjlnoomnnmoollfZLYehikggfd^WGW[^aaeed`gggefg`ecffffdiqxN;8;>AMWjnMopNBQnhgfcb^C,bV>888910gXq{ZYHb2H>??=;.&YWtJ>Fu640`YA=:B<^?IVTQMNQS?Ealkkoj?RG75Mh: 28^hi__n<.}w816OK439GjbwzunT[kh$ov<5CDKP1'225=<1+ENKq[L;)YG*Cu}QAKE?L^LBAJo?94M;.12Hduy|}c4WoscQ:1--.( 56,)+-/3;PcedaG,12-*,-/+6R]ejmoqsrtuutttrprurjB+2+"8",9%G "ECT.T'O5 (2DXGCF@4) %"%%&&&))%'2HH8,%&'$"  "*0& +%2=/",-*1=5( ½½¾ĿþýÿſĿÿ¿ȿýÿĽſž¶~~dMg}mljjlljijifhffhdcee^_a`Z\\\\ZZ[\X_[Z_[a`aa_ce`dgfbcbc`^deeeecaaagffd``b`abb_`abeeeffffdfiffgihgghhhhggeegggghhikijljhiiiijmmllmnonkjfZLXejgidffd_XKW[_b^cfg`fiiefiadafgedcirvK:7:>AMUhjEQQLHUnfhfda\E,_W?:::9//cVpp\[Jd2H@>><8.*ZVjI=Dv040cX@=>==JNah>523]|ksuwuwyx~yliltCB`dbx2QO_fIXFB6a=KYWQSUVVGIcjkkkc=OH94420#15NWRFYj:-u{B04QB239Höjfx|wnQ[mb%uv=3ARRE2-823651*FJJq~^N;)VE(?svM?KEEbnKB?Fn9Ye.]de9O^biqM/:649XI8BKjID|srx~==9/H6030Jgsz|^4TosaQ:68;:-#65-)*,/2;Iab^c@-/3/*+-/+9P^gjmprsqprsttssqrtsj>-2-""$'".( +)2C]JNT>5*!&!#%&%'+)'*7ZT:,%$%#!  "" .:@/$1724:5& +¾ĽþĿ¼ľſýýžɿǾƼù~fMk|spqqpoqnopqpmlmmlmlkkilhghggfdfeghhihhgdedefhfggihgbaebb][_]_`\[[Z[\ZUQQUSXXUX]Y^_\\[Z\``__]````ab`_bcdbccdfijhggijjjkllklnqpptsupoqrdQXhkhhgged_ZLWZ^a_dge`gihffgddbffdcchq{M;9;?BNUgnHMLLHVoghddbbD+cT@<;;804hUZ]Z[Qc.CA>==9..]`gJ>Ev341d\C@DCDNOcc?754;EPg[QtzjA)-GEFjw>90+VqkyuhIB_23cKQP=4+!%$#$$$&++'';_R;,%"$$# + '-)!$7EH1$5>4032%  +¿¼¾¼ĿýǿƿĿ¾ÿý¾ƾǿÿĿɿbE`mggijikjjnnnmkkjkkkkkkkjjjjmkjihhjkjklmnnnoopsrqutrrrsrqppmmkmmlgjmfddbb]^\^]\^^aba^^_\Z_Z]YVYV]\YXVSWYVT[Z[\ZY]^^bacecccchffimlhkeefi[IWgjjlkiee_[IV[_a`dgf_fffgggcdcfdbcchryJ<;;>BMShnFLMLKYmgfcdcaD,aS@<99803eYaa\TOe2J@>><80*_[hL71)VokwsfL@`7KR>7798=EUzu.\gXZhjTI8cjf\YfpgLdjWEKIcblNRX^eZPhHCQMAH8RZKGCBED6NX`spUWWMMJMNMLI=_snsuusrwtsw~wd\em@Dagm4PPWYcgKB5b9KYSGJLKNCKelkkka24C?24;Bjex~uhO^ory\ru<7JjZC3 0BHlFGtn{<=95I9104Ggvyz\7XttbP98RQ:-#55./..148BNZUS4(/0,*)/.*;O_hknqqqopuvtsrrqqspi=00(!1)#-%(!#%&""#$$! *2C_DTS:3)#%%$##%'.-++=gW;,$#%%#  !5I;) +&9JK3&7=3+++"  ½¾¾ýÿȿǾÿÿƽ¼ŽdH^rfgabdfhhjjihgdedeeffegeiifhffggfgfghjklmnoopqqosstqrsuqspqqqoorpqqqprrmnnkokkkjkjkilpkikjhjjjgghffgfdfeefegfecaehhfdaedggffgffefg]ac^TKWhjiljggf_XJV```_dgh]ghiffhadcfgdedjsxO?:;?CMThmGNKLLVpihfdb]@+cS@=:98./f]rt]YRc3C??>=:/,lZgK=Ak\722]UFInlKMPdhB877866:>CT{v*]fXVZUOHi|vututsv~}nvu}u_]grCD]aw/MNLCUQ@@8`{=LWVLMLKNFLdlkkjd>CHoDEwsw;<9:V;106Ffszz|V6XqtbN>@LC>-"6419;2357?IQOG0*/0,*,00-=T`jmmqpoqtuurstsqsusj:-/)#jN4e)].'99S65074,!  *3;ZFXd>1( "%%$#$$(.197@[W3*&%%%! +   7NB) )@Z]9);B413/$ +¾½þ½¾¾úþþº¿ÿƿĿ¿Ľ¾ǿǽ}eNfmmommpsnnpmjjffehfcdcbaccb_]__`aa^^ba_dbcffghhgfhiijkfchhiffghiiikieiiljkhiiiiikklkjhjkkkmmnmnmllkmmlllkkklllnmopooqpopqstuqoqrpolknj_PYjkjkjhec^YHW_`__dei]hklhgi_cdhigdchq{I;9:=ANSfqHMLKKXpefhdb^C-eR@;:98-5h[uy[WMf5CA?>>:-(iUdI>Ip212`VFHmjFNNchA757D:Q[KECCFB6L[`}cTTSLNKNMMLK?k}xtrttst~{pjzqtu\SftCD`f/PFOizlF?6`;JYXTURRVCJakkjkd=RCDzJ 56bNfZXj1'XR7422017XW+a|zR9Q`bhjI.:CKO\MBBJnEAqc{x;=8;Q9/16Ifuyz|Y5Vqp`M;H]aoB $82.<>4148?JPME1*02.*.00.=Uaimopppprtvtssutttrj:.,(%f\Gxl1(D:rRNX_;2'"'&%%%%'.9YD>DE3*%$#$" ""=UA' .L_lC"(9>7672$ ý¿¾Ŀ¿½Ŀþ¾ÿ¿þĺž¾ý½ÿþƿ¿¿Ǿ|fKbzpqpoqttruzvrrstqoomoiilnljidfhfedgghhefeefeghgfcdfeeheffb__^`\bb_Z]`bYWX`_^`^aab`dbacdbeeggihkjiklljljjkkjmmklmnmlmnppqqrtrpppqprppppdNZjolkhgdc^\HW]_`_cfgakjjggf]ddhjfdciq|J:::>DNSfnGKLJJYnhdfeb_C-aP?;:;802jZxyY\Oa4FA><<8-)hR~cH=Go.25aVDDb`DNKbbA745;CLhXQ}m>*,GB?Y\98.+Wtk|ubID_.OS=8768=DTs,^g\_moWF=bjg^\emgEekX#JLU`hlRSU[dZTdKCVH>E7U\JDCEFA7JXe~aWVRLMIMMMKI@hvtstttv~vrkdx}sssustub[fmAA`ho1QJKLQJA?<`:MYWWTVXVFMgokknf=SFExs?49mda^_j1'[S<61-,2;>HŶmkqjK\jWpX%qu:HJA;4(KEMfsVN4+`B+BsuODIDDWfEB@Lqy<\X2b~wP:R_bkoN0:89900iXxz][Pc/DA?>=9*(kNygH;@eU213aUDFsuFPMdf?756:BLgWBnr>+2FCANG=:.+]qgprg`EDa0LP=9678>DSs,[d[`jcQD;dhhedikeJfkW!IGI_cjSVW]dXS`JDVF@@9TYKEEHID7NWZiZSXTMLHKKJKI;e{tuuttuw}ypkkwtkm{zxidioDB_jt1QJMNPE??>c7L\daa`]TEKilkloa;PCAexr6$2=\^gb`i/*fb=2../2=0.=LO=;2'F>MWYQL1'bG)CzzLDIEE]aF@@Jry;]S/axL;S^ejkK2::3:ULECMp?I~ysr}l8>:4H9027Jhuzz~V5Qgh[I75XY\3&430=6/17:BNUSL-+22-++/-*<6++mQ~hK=In126cUEGiqGPOdg?777;AIhUPo?)0HDGnmC9--^niine`DC`4MS>7878>DW}t'_gWbplTD:gkllnoncNdiWKPZbfiPX^ahXT`LGWE>C6TXKFFHIB4MYWd\WYVMMJLLKIL:446:BPYWW1)32,*-0,+=Tckjnrrpoqtuvwuvvwupe</*'#l8-pf<5>48$N#3V# + +4DXAX^B4'#'$&&'&(1DdURYG6+%#$#"  %7\;<77( +$6O\gE %3?63:1$ ¼þÿĽüþýǽſþ¿ŻƻƾżĿſ¿ľſƽ|bG`tgfhpolkifgghfhhiddcebbbad`bcfbdfecbfgfeghiiehikmllnmjijmnlmmlnnpqpqqonpnooppqqmnnnmnooonnpopqpnomikllhggfeegiiijggfeeecccecdcdc`bb\[ZQS[ijgkhfca]SJUY]b_beh\eeefhh[cceddddjqxO;:<>BMVisCNMLGVmjegcaYD,dO=<::6,3k[||\YZa6HA@@@9-*pPhK=B~n513\VFJmgCPOdkH765;CIgRFcrf>(/HCDtt?9-+[pm{tbEAd1MQ:8768=BT|u&`g]ekcOE:egknqrmdHejSKK\ZgjSYakmWUdKFVF@F6[ZLGEFGA3LWZe\XYVLLKMLHHJ:kripssqqtxysru{|lek~}sbZhrAG_i0VKhe^hHB>b4Q`ttz~\JAJjnnlm_?Ujo^k&"28JU[9_e.*m`;-/7>7FGOklkhQ^mkxS'tt4:T^gJ+.H^UC=1(J;L\jXL3.cD+JvxIBFDJlgBA@Ou|<\~S3hywL9R^binG/F]S@TH@BKlDGkah9=7,*,.15Kluy{Q8K^]WJ59QTH,%75/CB4249BOVTV1+11+*..+,=Scklprqqpquvuvvuvwuqe:0,'$J.(U9H@&64!F#1I#  +4@G?Ma@3%"'$%%%')1GgMFaI6+%#%$! &5\>B@6( !/DVmA $7?2*2/$  ¿¾¾Ŀ¾ǿüžƿÿſƽ¾ƾǽſǽſȾľľ}aGbuiir{~xqrtppqrpnoqokklkjjjkhiihhebcddecbegd_`deddgfgdecegedcddafgfkifeciiklmlpkkjjjklnnmpoponnoomkkjlmlkkklopmnoolnpnnnllmommmlmmnhhh`W[jniiggea]WFSY^a_cdhaecehhh_bdecdechqzL:;<=CLQhqCNPLIWpgbgdb]C+dQ=;;;8/4lZxv^\W]3CABEB<.-tYgJ>Ji-03^UGN|lCOOjwJ465;CKiYINFGA(/IFHwu<910YmowcCCc0MO>9967=BV~u*ag^dmaPD@>@H9ZZLGEFE?7OX]reXVSKLIKLJJL;v}tktstsstwvsrxwibi|~}r^]gp=I`l~{/PDdolcAB>c7NVRLRMDFDKlonmoeDWop=e}h-"2=`goR^f,.gX2+-782=DS¶rqliV\qdR1wp57JRE@0+CXK?80)HANaiVM/)f@)J{wHAGCFlgDB@Pu~=_yo~O7gxN=S`aktI7TpCOB?BLtDKwpj<=7,(*.17Lmz|{~K7JY\YK4?ZPk6*63-CC3136>IOLG*+01,,-1..?Vdjlostuqqtwuvxuvvvsd61+'$ % ',"*4   *3>Q@O]=3&"%#%&'&*2JcJMeH6*&%'&# $7O9JK4' %7Kj<%4;1,00$  ¾¾¾û¿ÿ¿ÿþǿ¿¾¾ǿǾžƾȾ¹ƿƼ{cB]ogelyxxrlnonnnpnmllllnoonnomllkmmjkkkjmommnpmnmmklmmooonnplkiieeefggcdaZeacdc`^`caefgffhhd`bdfggfifefhihijihjkkklloooopppnlmllmmmnonnnfVZkledfgca^UFUY]_^ede`dddfgi_bacbdfcgoyM::<=ANUepGNONIVmfcgeb_B,bP><;9703gUnn_\W]6IDLWP:.-qtbnF9::9=AU|t,\f]bbcQC;ficZYfobKflK$PJR]jfN^jmhUWgC;;;@C;VWKEFGE@4NWawaUURKLHIIHIK>u|sqprtqqrnnltpjfjy||ta\gp>I]kz3WGbd[bBB>b5MM?255;CAJlooorb=Ttv=lx7"2=jYbb`d+-`C/++7508>ZµpskfV[uh{L7yp65<<4K;*>aaE8/*C=MgmRL0)aA,MvrIAHCKofCD?Qwy9^xnK5hyP>S]`hpG7Qn7GICCGs}AK}sg<<5.))+05Kjwyz~O4MWZYJ6?WJe4)96-@@5357?IMJC+*/0*)+0,-?Wbimoqstsrtuuwxvvvurf3/,'#  */4?@STA3'$$#"&(&)-JZI@YK7*&%&$! #=]DMR6' +#-=^<%5901_tqweBIc,NR=999:=>[|r,ag\bbdSA:efc[`jpfEekM JNebfeNbjgfZWgE<;r~vqprspqohgkt~me]hr}~racmq?L_kwm{0VGkfkmIC;e3KI:23370)E>QlsYO1.g=-MuyGAHCEQK@AAPyxB`{kM6j}~Q?V`cjnC9O{W:UQABLs{COf9<4,(),.6Njvz|~M3MY[WI75PZG&*851?=5356@LNKA-)//**,-*+@Taimnqststuwxwvvwyyuk11,(! +!+0./9SQ=3&$%#%&&').C^N9KE5*'%$$" !BeGF_9(!,:Z:$7@22:6% + +þĿ¿ľǿýʼÿýþ¼¾ľſſÿÿ~dFavommmqvstrrrrsrpspmnmmnnmqnkhhefighfgefjgghhjkjiljhkkjhjkjhghgfgmjjikjmmlkomnomnnnljmmilmlmnnknmlkjljklikihfggdeefhhijillifebadfddcdgf`LXjjdeeedb^VJWZX`^dfg_gdeehibdcddefbfozJ;:=?DNPdtGQPRKQo^`fca]?0aPBLM@8.0hVv{W^TT8EF`fV;.-uQR=Js/05`SEKte@RLhqE5569@LdRY|zl>)1DCFys;8.+]pj~rb?Ga*PQ<9878J_jtkt/TGank]@A?d4MJ<103;DCMkppptggpm_^b'+^B,(+3636AQ¶qlmjNau`|H:xn45AIA@-(;EF<71)I@Tgk[L..d;+JurF=IC?<=?>@Owy?]{k~L4jy{OATadkoD5FX9;J>>AHqz?Rj<=5,')//6Rmyz|P6N]^VG78GH8&)642653368CNPMC)*00,*+-*+CUbjnorsuttuwxxutvxxte0/-'" "*/-.7QQ=3&%%#%$()+.;`PH\B7+&%%%!#=VBGQ9* +!,=Y9$04,/97$  ¿½¿ý½¾¿ÿ¾ƿĿþ¿þĿĿÿĿɾſþ¼÷~cFXjjjggkpnlkkmkkllnllpjklnlllmmokmqnkklnmqnlnnmpopnmnlkkijkkkkmjjilgfhhikjhfiddehicefdgccihiijkkllikkllljhjkkmlkmkjkklllmmnmkifhhhhpqlkkdRZimfedecaZQIUZ\b^def[feeehhcdcddfgcho}N<OeROtWA%-CBGx=8-+`voqaCIb+RP=9989:AY}o)`f[_edSD3Ta__]`fYLMELN_iSH`qux/ZERNOB@@Be1OI;336;BCQjnqprd>]|rz};%2{r438:41)'=Z]G9.)H>QabSO1+f<)KuiC?HB==???@Q|t8a|m~J5nyL>SacipE6@A8CKsv;Pi:>7-)*..7NoyxzI5RkjWF6>X\T*(85/99416:DNRNA)+10,)*.,)DWcimorstttswyywvwxxwc0-+'! #+...1GJ80$&&$%&(',.5JABfK7*'%%&"#=S?7E8+! +&1@S6#('#.85$ +¼ù¼ý¼Žü¿þÿ¾ƿǿ»ƼĿľǿÿÿÿ»ķ|cH^iffddgghghhhhijjjigffhefebgihkhgggeegghjkkllkmnnmlmnnomnnnpommlnmjkmkllkmlkkkjjkjkihikilmjlklotnkmmljlgejjhjjihijhghklnnmkhfefhip~uuhSZjlfddfcb[SHVY]``dgg^hfdfggadccbceafovG:;?>ERQcrENPPNRqdcgca[B1fQIkjK:.4hWmua[ZQ;FEfoS<./z\fF=BjP0.8cTFIufGSQjkA867;@ObTY{b@&0CBD|;8-/]qlsb@Ic/MJ>9999:;Zzl&bf\fnhQB=hihfkol\LilH KE<=:>JmfKXY]cXY_C;;;AI6[XLGDDE>1Scnk_clZMMKMS`cQ.LobC>GE=>>?CAP~s<`ytI4q|KAV_ahsD5=>4>U?;BLovBSd9=4*)*,/8PnwxyF3UtrXI6BG2eNKstJ<-8e\wudaZV8FJikT<0.{[eI>Ja-/7bTGNiBSRhlF987=?PaQ^gQhE'1FC@z68-0^potc?Gf/ON>889:<<[|m*eeWaf_PA;eefehnn]IgnD"GF:;;QN;6-(J?RnmVN3.i;*KrcB?HF?@@BEDRqBc}zJ5pI?XbahsB4983>Q?:@IurBRb8<3-++.18Orwzz|H4Wrp[G8AB5M,)74/B=435Ij/07bRHOgAORie?787Df1QN<;:9:=>\{m+dcTQOPMB=hidZ^im_DfoD$GE<;<WbbiqB1765>T>9>EqpCS]9<3,**038PnwzyzH6XrnYF9BLG=&):5/C:426;IYXRH'*/.+,-1.-BXcklpsrsrtvxwwwxwxxud-5-'! + +,IMNE71/0-#$&'()*)-0('/LbK4*&%(%# #*=/7V?+ #6DLZ7!!'$  ¿Ŀ¾ýľÿǾ¿¾¾¼Ⱦ¿Ŀ¾ɽüþ|fDWea]]^^^^`d`a^_aa_bddfdefgegdccgddcbbcggefhffgffgfegghghhkkhghefhfegegklkkkiihkkkkijihhhkjkilfdgigedejjghgaa^_cbcgfhhfdfhikighjkjknljlkaO[knfdcdbc]RJWZ_e]degahecdgibdb`_cdcfmyN;;;:CUWizMeTTNSoc`jhd`@5eTIrsF9-7hZnc`_W;IN}Y<./r|eH=Jd316cOEH\JCMPhb=778;BRfKXz{s=(1FDIvf47.4\po}~ocBGd3RJ@?@;>B?^n,caRONNLB?gjcZ\ik^HhmD%KB::;?OlcOeqgcSW_C;;=@D>[WKEEFG=3MY\Z_\`YNNLPTadR;y|~spopqrqqzjbftzedhnrw|{uosqm?J^ru~1XFD@NFB@Dcs(PH:435;DDTmqoqv_DauO|o$%1:^n{Q^\!0c<755458=Jöso|~mfX`ybBJp/;LbG5(+=FA:4+$J=ThiUR00d5-NufBAJEABFFA=Sp=cB;qG?Xbfmo@/653;K:7>Glo?Q\:<4+**027Vmwz{F8YypXD6@blY*,841G>426;P]ZVL(+/0*+,0//E\ffkostustvxuwwwxxwwj.6.(! 1_fpj=4461$ &'&'())-/*(+O]F3((''&$#!!%5W;* + +?KTd:$,+%  ¾¾ûþĽžľƿÿþýº¾žǾƽżȽſĿ¿Ǿ|cDYgffdbcccggeecdd`]gjhggiiiiedeegfefeddgebce_deaac^^`ababbcffgdfcaceeeegfdb^`fiefjiklkklnlnnlllpnprmoquupnpokjkhjkjhikkijhghgdcdd_`dgghdTJ[jljkfebb[SMX^ad_egjdhhffhibcceeddbfmxN<9:;CUTiz^nWNRobdtb^@3fLHfoL;.7g\xqc_bU;HKlwS;..~tgF;>J:307cMCHpZCQPga=778;BQdOYoj_=&1CDK|n88-3_xt}wl_?H_~0PQY_]]]U@]~l+fbRWWTL@DCTjpqqu_@^4'2>ZJLDd[#6hC776787HD=.+59653,&JVl?a<=uF@Vdhoo>05438A;9?Hki7P];?3*()/27Vlwyz?9YxqYF78Td<&-730G?636;S][UK)+/0+*-0./EXeiloqststtvvyywwxxsc,:/' + .WFNbPl`MftidM^^B;;<@A6bWGEDEF>5O`hmpv{ZIPNRQ\dM:v}~~~~qpqqppqtwlfjlb`dclqv{wha^el;K_tzw2[L^\_VGCAe-OH6156=CBSkprrx_E`;)5It\eX_U"6fB865778=\¶rnw|ofWaz`B:zn+5:CFH3+7ED63+&K~>GMLMN22i:.KncA?IHFHLLEBUk?c?8xE=Yekrp?1633>L>V];<2+()/08Woxy{>7WuoXE;EVbK(-613D?739>U^XTL()-0-,/1+/HYfkmoqsutstwyxxxvxzud+7.)! + )RnmY?SIpY( %'&&')-4750.492,('&&%"  8V:+ 5JTcn;QdbM;:7' +»¿¿ý¾ÿŽþ¼ÿ½ʽþſļ˿ɻ³|iI]wonmmmnmmlkjkkkjjjjhgefghdgfeeecbdeecddfffdacfeggfifdbaeffffdcbbad^`dc_bdbfhfggghkmkkjmmmllmlkomkjigikklikgeijjeedb^_adggfdecbeefhjhjiYMYllhhefec^VLX^_baeef]ipmehh`bdiigeaemzT>:9XN87+1_rn}vha>I]iyp.S_|t3_}m*baYfkdM@;hje]dmpYLir>&RB:;;;Pm_NdrieN\\B:8:?B8cWJECDG?6OY[]]`eVKORWU^dN:r~}|}~}~~~mmqqplorwjgkkdfgbhqvzy~d__fj=L]ulet1[b{xJ@dz.PJ:345Xekqq@/874AEgg7SZ:=3+(*01=Wnww|~@6XunXD87434"+643=7416=T[YXR&),/+,-/+-H[elmorrttttvxxyxwyytf- 9.(! .UmoL:Ucp]) %)''&)5DFF@3...*''')("  9J5' "=RZj|? GJSNNW_7 +¾ÿþżĿ¼ÿſ¾ľƼýþýĿĿ¿ǿǿʾĿĻǻ|hFYuonkkjhhijjighheghijhdeffdeccfedefdgggiggjghfhhiihiijiffiigfeeggiecbddabbbdgdcdeijllijkjkkllgdgedgiihijihlihnnmkjjghgfhhfhhee`babffbccRO\mngkhhfd^VKX]_cbdch]fjighiaachfec_emvQ?;;=ESTf~{_YLTsci^`96gIRx{J:/=gVqma]^P>KIuH;*2~gF96533.9cMELucHQTirE668QtoxzogT\j=@{k/69AGE/,DQP;3*(M?MNNNJ01i5+RpaECLQVWWZTGYf?i|};>z:BUckqq>0953C[B@?Cjj>UY:?4+))00;Wlvx}}?;ZwkWC4--+'.:44=3116451.+)(('" 2C,".L]^r> AjdJRae= +¿Ŀþ¿¿¿Ľ¾ŽĽýȿùžƽ¾Ŀ¾ƾȽ½ȽýĿüľĽý}iG\zpmmmjgdilmmkjjihhjkiihggdddcgfggeghghghggeeegeecehihgdcdefecdfljcdcdd_bbdfgddfiijkjhjkkmlkkmpnjhghjgdfccgfaighea`aefffgeegfigigghgfgdTL]lniokhec\SIY__dcedf_ihigii`cdeffdaelwS@>?CHQUe|{[ONRp]f`\:6dJRE9.9eMYYfc_RAJMyO;.5}gH87633-9cNBNvhGPQjuE899Lm^QljdMYZB<=?@@5_SHDBBE?2RZX]]^_VIORWYlgO8~~~~~}|{~}}zy~y~tqqpqporzfgpffz~movwty~vssrl@M]wyqu2[iO8gq/RF63353843?R@?>Dmf>VV:>3+))0/8Xlxz{y?9[vnTD3,**&+811E72.4:CHMOM'+.-*+.0,0J\flnqsttstuuwxyyx{xsf)#<2'! /VJS_=RTW]+''&&')>e[^l@:ABB7-)'&!"!!" "" ;cjk{w;@c\=;86/3`znoc;I\ipjpo-TZ|s/dk'icWcjaM?>kkhbfom[Gir>#NB:9;?NoXPpcbPXYE;;<>=7^THECDD>2P\bgffg[IPRVWokO>y}}|~~~}~}~~|~}{wronoqos{}`hrhktkotxuxvvvvl@L`zr[r3[mJ:i,SI:436;BDTloqrw^Ac}v.1HwkT!?V857644:@]ws|~keW[g9C}o*5j8<|:AUcjoo91922:I=2'" +/TjrW?ipsm+)'&'()9ffeb8IPZjI0*'&" !,.00(" @doxym7JJVGAGS; ¿þþƿſƿĿſǾľƺļžžžſĿĿľĿǻþĿƿþ}gJRysqqpppnoosqoonnnmmlnjjlggiiiihefhffifddbadfheffecdffffggfdffgedgdddfgefghmihkkimljkkikmkjiijfgfjhfhhgedhihljkiihhgfgfdfegjjjigihiikfYO^jokmgedc[RLYdac^cdeciomggj_dejmefbelwVBDNPMUUg}{RtULSr_dY];9hLK|C;+8gWZ[icZPCMTS;-8}gA75641.=cUEFd[ERPktB9:;;@OeKgC%7F>;<:6605b{npc:K[irdjh*WY~q/`}j*gc[ehcO@PpULobbP\ZD::;??9_THEBDD=0Q\bhigmZHOSTZbXO:{}~~{}||}}|}~x}}}~yrnompqsyyaglikrhgkpu{twzvwvp>M^yxxs4Ykzsr|I=i}-RJ:416<@?Rjnoor\DcY&3JyiTA_8775459>\¸tridW\;Kn'49=?<*.Ic]>3)*Sv:IIKMQ/0o~6-Sna?BNXgncbWDQd;j7<~:DYbhop906329C:JSS;,8zgA855320AfkNSnDPPmsE;PI>AOdLN}l="4J@;;:7707bvkpc8K_kpflm0Z[n3`h+gd[a``N??hjdZ`lqZIhv?&OA;<9>OpQDleaQ]YB;;1S[_bddg[GMSWY\XU<|{~~}|{zw~~|u}~zspponnt{y_hjjpmedhirvyy{vuwr>N_y|tp4Yfirv}~I?k-RH8315;AASmnnpt^D_51GxhVBZ?87678;B]¸vuidX`9Oh(59?@=+-;C?:6+*RpfT;@OaM^|gw?!4D@;9::906f{qm_9G]kriqj1T[o5ai*jfWfhcN=;hjb[_klVHfv=*MA;<:=MnUC_e_fR]XB;<;?>9^VIEACE=4UYY[^[bTJMMSYYVN@B>,-;JG:6+,Pr@KMMNJ03r2.Xn_BCOYo^UDX^Ak4D4BYcjpn84950G\?=CU8% De_F@P02M]PU]\=   + ¿¾Ľ¾½ûý¾ſžżɿûŽƿľżƼŹdI5nmc`\VYT[\Y^]__]`^affffea^]`_ab`^`_\`cdgfedaacegfgikifighhdcdffgfedeghkhiijkkjjjllllkkonmommrpmnnnmmokihihggfihgegeb`edcabachfccaa_^\UEJbrqijhggd]SKY`adacdhajkkhhj`cdhige`ditVAHRTTZUe}qi]VQRoad^c:=bLPqmG;09iXZ\kb`FEKLzL9,5tfA955630=fwoxcDLQdV?AnT:?O`NiyB%3C@;<==9/7evsp`:J]irhqg0SWm2dg,ogWihcN?>jjb[`kjTJfv<*ND;<;>QqQHe~haP^XC<<7_WIDBAD<4WURUXX_THMPUUa_U?z~~{|~}~{{}y}yqllnolp{{lidefbjgb\nxyy}zwvuq=M^}u~k7^ivhgvKDh1OG83139>@SkolmrV=b2:KxeN;e>8888:>IcztibY_5Od)?BAA@1*JKLNO0.w11Wn]DCNYy|m\VC\^Dl5B4@Xcjol85:61BW=:@Ky^484AL?726:0**-22>Yledxx8FhxiRA1+)*%.50+*-3148;DGH<&).0-+-/,4M\honotvuuuvzyxwxyxxs^'&80(! +:PTPAA_:WS"!+'(*),;jipd0:jXZ\g[aG=IFTM;8.8maB:78720?gfqmxcCLScN>?mN:AQ`JI\ZW8#4F?:==;8-:fust^8M\lwx~c-WZm0gg.mjV_fYK=7`THDABD;3VUSUXY]SFPQUXrgS8~~}}~~~}~~~}}~}y{xx~~z~vpmmnnlowwkga`hgkld_p{vx~yvvtm?O[|yymiv.RF8321:=AVpnlmpSAa}m*,=IxgP>[::::89*+DVY]A,+Rk>KLLMP1/q2,Zn\EDO[x{mcUAZ`Fj3G5FWckno54:61EW;9AL|]561-+-00690*).20<^iacvs7HjwiQ?1,**%.80+*/2269?DEHJHR5 + ¿¾¿¾ÿ¾¿þȽǾ¿ʽ¼žȿ»ƾžǾǿȿ}iI2lwnoornqpprqqttqruuvruqljfgfiikihehhggfhjhefhffd\``ZZYYXWXZWV]]VTNMROTUXUTTPSUSUVZWZ_^_]]^`b^a_``_`edgkmigfeecdefedgikgfigggegghjmobIH^pmhnihed\SP^abdbccdbjnkiii]adiiigbffyYFLSUTYOguRjXOLna`kkf^88aMCB@?A2:lWY\dTcI@L[{M<+7kuc?98871->fimueENVfQ<@hL;>PdG@<<<4#3DA<=;::.7gzmnb8J]nww{e-WZm7eh-jfPPSNK?@jjc[^mjPGhv9+LB;9:?RiOKni_P][A<=HKLLJ00r50Zq^BBP^~}l`SAXXHn2H.FZckpn42942AO;;@N\761,--106:0**.21=^iXb}u`ijmP1++(" ++YOF^@?PQUP& Bcr|{7DJE@S\U4  ½þ¼ſþĽǿĿĽýľü¿¾ľƿľɾżȿȿhG*Hdf]^]VXWXX^cdceghhhmnjnjecaabeikknkngginnkjjkhjjkilmopompomlijkfhhkjnnhahfeabYYVV\[VRWY\ZZZ\_UYSIPQMMPPPNLPMWTSVZZYWXZVUVYXVWY^b]_`bVFG\roijgjfb\NI[``eacaccjlkijg_achihgbgcwWGOWTTUPe|tZ_cXPRqcfffe^<;fLCA??@/9iW[YdTbHEJ[L;-7bh`?76563-?iqhkgCNThR=BjR:@P`D>>;86#5H@;=:89/:gsnn_4KZkqnwc0UXi1de/khV[`TM>@kke]alkODgu5+TB::;;bXJFBCD<3VWXXZ\aVGNUZ_xsO<}~}~~~~~~|~~}|~~~|||{|z{y{zzz~}z|}{}~~{{~|nmlmlonnrpld`\ikhgdgu|vu~xwwtnATmonloM=a6.INy~~{eB@W:87655:CZ}yugaSb/Nz`(EDBD@,*GYdsE),Wh:JKKMJ-+q2.Yl^BBO\zzj_UBRUGq0F-DZcjqi418436:7:@I|V54/+,,0/570+*-219[pkmyt7FiwhR?.*+*"-50-233059?CFE7%(/.-/20,4Naimoqtvttttwxwyzyxyu[ +=.($ BcaTB87Hl]!"))')*,CQev]6]_hsS/***%  +&RRUaAIebe[+855631BffX]fAOVfQQcE=<;9:%4E>:<<;:.PoNFUY^_Q]XC==>?;5cWIEBCF<4VXWYWZ`TGOTY]qlK?}~}}~~~~~~}}~~}}|}~{{y}{zz~{y{~}zy|{zv|~{nnmllpnotoiba]abaddhw||wuywwunCQ]|syj2\wGFh0QG8467;?CVmponoOD_{,86788;AV~zxgaY_)Uv`)GEEB=)+?KKK:))We9IKMLI00s21]r]BBN_tujbUCZVBq0H0CWdkti338210359AKxP64.,--0.5:0-)+12<]tvm{t9KjygS>1-+*$1924:86459?DGE8$'..0672*4Pakmpstvuutvwxxy{xyzxY"':/)# KdceKFjhu\"#))(*)+Hb`aO8ZTM]M1+)(!  + Ibrj<PdE>=;;6#4D><=;<9*9iugkecb7I]lnese.U]d2da.hmUhlbI>CgkgdglhKEfx3*S?8:;@QnMDTZ^`P\U==<=@=8bXHDBDG:4XVWXTX[OCNTRVkbG9z~~~~~~~{{|~}}}~~|{|~{|}{{}|}{x}{{z|{xu|}~vsokjnoptpjeb``aabdjqu~vuv~xvwulAOX~ysh6[o~~GBl0QF8579=>DVkomllL@b{\:K_tmng>G]<;:8990,+)#/837;98479AEGE6#(..2893*6TbkopsuvvvuvxxxxxwxwwZ!$:1("!HWQ]LFehsQ#*'((*-Ff_TD4^hihN/+*& + &CO`sCGXZaP)"1Ygttm5%[^JDO\b6¿¾ÿļĿļŽſȿĿĿ¾Ļȿȼ¿ź÷jO.oxidfgefhfjihgefgffhdbcfefeehd`YZ]ZZ\ZU[^]ZXWYX[UPTXVWVTSSRJHSPNSROK=IIGKJNMMJMNJJPOLOSSTUUWVWY[]^\\^ZYWW[ZYXXZ\__`__^^`a__a`^]\^\ZWTJA=O^Idkljl`[VMTW^`a]bgbihfgieba_ceegfbfqXEGIGNVRjxJng^hghc<;aKCA@?>+?lY\_pWdIDIM`[F8.9\_]@:76641AfMKe`FLReS=>C=>@MbE?>==9#3C><<<<8-;gulpqfa3J_mvwf/WZi3gc4ikYkncJ@CjlebgnkPGhy6)M>::;@RmJFX[`_NaV==<z{|~}~~~}~~}}z}xy}~{w}}||}}yy||zz{|zy|}|zrnpmmnospifb^_`bbcdhn{zssvyvwvmAL[uhg6[ieRWsyCAj/PE7698;>BXonmlqM=e}.8IikmgrfABX?==;;:=9469@DDI3#(./2:82)5RcimpsvwvuuwxwyyyyyxvR';2'! DM464IffrI0/'**,-=_u}`4PgtuT2+)'  %4:iq>Z]\sW*!"4U\n~;'VWK?Pja5þÿľ¾¿¿¿ľƾ¿ƻżĿƿû¾ƾžǼĽžƾȾȼ½ĻŶhO0ovommnnmqqpoprooqpqsqqutwxustrrourqsmnponklommnoopmmmjjlh`]ZW``ccbcb_`\ZaVTOFFFGGLGDBDDGJPMEDGJCGIJHMKLPNHONMPQRRQUUUUTQNPSUVUTTTVQMGC<;NjOgcaX]]RLAOTYZ`[Z]Ghhccgb\ZUacacfceo\Y`RLRWTfoKnmcfd^Y8-Ak\_`r\fDBPaKB@>=:"5@>;;=<80>jwnmh:J_m{d0XWm1g`.ko`mheMC?8]VID@DI=8UXYZ]^^Q?MPOM_WLC}~yy}~z|~||}}|~~|~~}x~|||z|~z||{zyz{yuz|{s|vnjnmmlmnngdb^Z_afc`emwytsvyvvtn?LZ{nti8\hjmls{GFi/QG7687;?EYqnmmlNCd28Mrihszi?Js{{{T760,,-0/6:4CV71/;ah^cvl8MnufN=1-,*!/915>?904:?DDC0#(-.1882*8ScjoprvwvutwyyxxzyzxvR)80)"  LkklKHnmvX)C.(**,/TtvmX5?FgnP-+)%  (GZe].KI?TM+ !9bD@~7'PLGBM^[2 ¿ÿ¿ľľǽŽľĿǽĿǿƽǾɿ¾žȾƾ¿}lN.kqpooqopqrqpqtnrttstvxzzzyxywyyxyzy{yy|zz{{{{y{}|{||}{}}xywkkmptwx{|xzzz|vvwvmbhhcbffhfhgebac_^]WWURRNUVQONRPKLJHCLKJHFF?ABFINIIIM>BA:22M[Oa][LPVFF=MQSJWYQaRc_]Y_\TSPX_\]fh_cZbreUUSQZUeojjjUMe]fxbWQ5=D?<`WICCEK@8[Z[]]`gN@MQRT`YM@~~~~~~~z|}||~~{{}~~~|{{{~}}|}~}~}||}}|y}||z~ronlnnnmlnjhg`[dikgcfnuywss}xuxvoBO]}r~`8]k|y||>Eh0TI7486;ABWmllmiICaiq{i 7NphRgqd:AZ<=:978=Hdxyg_V_&fu]#:>AA=)2SkH32)*_c:LONNS.5v/-]n\BGQ^f_PE_}MDs+M,G^elsi237624669@Nv~|{U75/---0/6:1FT611>`dIb~l4OovkQ=241+!0526><7149@CCA2%(..1891*:Wgkpstuvuvuvxz|zyxxyxO(=1)! "$$""   EXQlT\pntQ05/()+*1Sf_OE8Z`kvS/+*& + +(HUKJ9A\V`O,! 4_fn~3#J>2<[cd6ÿĿ¾ľĿĿſľżǾýƿȾƿĿȿüſǼȾÿż~mO/oqnoppopononmopqqprtvy|}}}{zxz{z{{|{zz|{y{{|}||}~~}zppoty}~~{sonostwxwx{wwzuuutwtronjfcbbfe`abac^cb__[]YWQVX^]^a^U`_\U[`ZV^`_\^]UXR[WTP^_U`Q^[ZLRZOT\XVVUgvXXRDLMNU]NGLJVVLTRO[VK[UNSHGTIAKJCC5GWYVdYSOBoO<4=80($&&7:C?/3*mZD@A=:90Ef[KOQKNPVbQFC@@CBRlUGCE@;%5A><=>=;,9p{`b`ae6L`n`.W\d3fY4ijYln`L;CorprrphIJex4-MA:;<;PmOK\dd_Q_X@;;?B=;`UIDDFI>6WWX[^_dJDNRPUodK<|}{|~~}~|~|}}}{z~~|~~|z~~~~|}zyyzz{|{zx}|}yyz{xz|~}{z|}{{~{mllkjmklllhhgcafhhd_dlrywrs}zwxvnLSZ_5\txizyCDi0TF5488<@@XjnmmlHC]Y0862 ;K}nppf<F^>:99:8=Fivwe\X]%iy^$63=:-"2925=;535;DMIE2&)/.187/*8Wdlqttutvvwvxyz{zwx{uJ&=1'%:D/4>,# + C_UeEXTYvP#+.((+**`slsX9dqyrM0+)&  +%?cmf?@XppX)# /_qx|0+\^XMZc\3 +ü¿¾ÿƿľľĿþľǿľÿĽſĽľƿƽſɿƿʾžżnN0jooppqqppnnllmnoopqtuz|}}||{z{}{{|{zyxz{zzz}}|~~zoqruz}}xsrrvwyyz{|}~}|}|yyzyusnmjkjikljhihjijllkputvtvuw|}|{|y{wtsrqvxz{zx{|tqrnhljhhijmgeiffejja`YV`XU]g[TNALPFK[SSN3EDGHQKRL;@=AMNPRF;KGIONsx`=EDC?60/;4BHHDFz~JACJB?A7H\VV^VQUPRYR[g\UUHMaUgkUUP(5@BEHGF?,?fmok^[]6HYgza6V`b6gV7ov`ll^K:DkonrqqgLHjz50PA;:@<)0LO:41'+d^D[kaRM*9x+3_pV@ET_}i^KA`}HJw+L+K[hosd02974BI;;@Jx{Q62/-*-..59.AH53.Fatux}f-YmxeP:3<:,!6;24;:515=7\\khC/-*' + +&PgnpB*#IhV)# 5crlkg+&RT_SKW^6ýľſ¿üÿĽþǾÿɼſûɿȾżƼĿĻjL1kmoprrpnnmlmlmlllmnrw{{|{|{|yz|yy{zzyxzyvwy||{}~|qrsty|}xrqrwxyz|{{|~||zyttojkllkijlllpoortwxz}|}~~~}~~~~}z}{yzx{~|}}{|||zx|ztyvx{xvtstuptspnib\_][T[SVTKPRJIHPULFIOMSNf{{y~WA<@GHFUOMUSRWSSXNLQNR_\S]VR\J[hTZa669?Pd\UC.ATLtr]SN5AMPX^`\F2OVgu|W8h~V4jvVYQSK9CkmgjklcLNiw.0OA;::?WmHJ_fn`K_W?<>?C:=_SIEEGM:7YWVX[]aNGQRQONOG=~}~~}}z~x|~|{~{w}|}}|~ox|w{~~{}|}}{|~{|~~{{}}zzz}{yz~}|x}~}yy~}|ymjkmnnmmsprbZ`aeccehlkort|yvvtjGQ^a<\w|tj|z8Bm|,ZK68:8<@CYklkkmED^V8XK0:Po_Tdue@5EhM:35>;16=ftzf]Qb iz\'9?MO<)0NjN82*/g_C_mbTQ*:u/1]mTBCR]lpY]KDbCJt+O'J[mvsd22767EI;;Oyz;+" %HIatCRa]kF'*()+,-bjW_E;oqwsG+,+' +bbSL1APVqZ+""9_jc?D'%O]WKDDN* ÿÿļſÿƿŽ¿þʿ¿ɾǺmK/kqooqsqpomnkinmnmnpsvyyy{{}yxxxvvwvvwxwyzyzz{|~~wupsw{~}}xqsuvxz{{{z}~|||}{yvrrqoponqprttx{{}}~~}~}|}~}}}xywusokmmkpnllknnlnkepxragenigmmnhjkkmgbTREIONGRRHTNHNJJSNIOGGOTNURLYB@DDGQC99>IMJD=9GULW_VOMFDCDGKMLA8GJPV\ZSGASU\o`W\C=]TPc[PM>E]^_de_ZJQaclonlo)9K@.!6;2386303>B?IJ=HJKILLH@>>ACGJDAFILMJKIL_^GWXVTMJXQHPSMPNMORW\`VSKR`XW_SKJ0=MELSTZio[RJM`VM`WSXURK?CXSSZLGD9;`]^`a`ePGQSPOQSNF}~|}w}|{{|}{y}~{|}~~|~~|}|{~}~}yzy|zz}zyz{{ywww{zyx}zz{ywwxxv|uljijijjlrtwpf_emrcUVT[kqwxwuunDO]^7^vnlk~{>Fl~.RH97;9<>@ZpnnojF@^T5SD2 "8T{vd;7IsK831B<..5Xsu}e\Varz]Z)BOl`=&1EOSX[[\HH]FOw,R)L^n{vf.3855BA9:=Oy}kO52-+,-1-450CG0/.Cgrqw].Vij^M93>;,!5922641/5>GNLI2&,//234/+;Zfknsvvvutvy|{zxyxyysM*:/'!Ex~~vh>" #:njM:I^ymA0/'(,,.]YH_V:j[lxK0+)( ,bQ=E/CU\fH-!!2f{['-Kgb:=Uf/ ýžüǿ¿ſĿƾĽýĿƾýĿžƾŽǾʿǿľŽoO0||{yyxz{{z||}||{zxwwwtvususrrpqssprsrtsrtsuxzzz}yuw{{vsuvy{{|~~~}~}zywvwusrtz|~~~{{|{|{|||}|{}{zwwxvwvsmlphdacaabbaaaca_[Y^]Y`b_dcdcac\]`]UHPSPNPTVIACB>HOMJFBBIDIRPUHCQUNQPNLDFMNLRSNLKPTTRWRQbVKUURQEFKBCD=FVf`ppf_TT\UUg[SOCK[TTfVMJ>ASQOXZWUJLSTZYSXXF}}}}~{~}~||zy}~~{|}~|y{}y|~|}|~~}||{x{|}yzy|yzzzy}z{{{uwzyxx~ufhjiklklruz{npvjZVUXamrwzxvwnEP`~]8bpdhgx7Lkz,TG67:8=@DYpoookGC_Q0UF3";Wzh<:OzN665DD007]rrvvh^Xblu]}W)=Emd?)2XXI73(2jZBalgQI/9z}-7cqT@ENQSSTVAGaCOy,O-L_oyte-3865@@:;?T~gN52.**-.-452@K11,Dfqwz{R3Wc`YL86>9-#690254218AJQMH4)03/2650,=Xflnrvwwwvxwyzz{{zzwpL*9/& PqplpB" %Kijb9;BLJ6)-,)++/Px|wB=dBPd?-*)) (`YFC-ZmosV."%Bjzo&)XiZCA[O) ½¿¿¿ſĿĿľ¿ÿÿſĿĿŽȾ̿ľȺƹº½nO5~{yxtrqqoquvwwuwvtsuptsuuuvvx|}~{xy~{wusuvw{}~~~|}}}}{yyuux{~~~}~~~~~~}}|zyutqmjjikjljkkoopoqsqqpprrnrorpoqqrrqsqusrqssrpprnmmnprlrpjnnjffgcfdca[][]TXU[RD=DDJVRSL?GHILVQPE6;@AHOIKK<=?BCR[Wgsi\PUb^IIOSUNVm]M\USSLJMNKNOORFLXPW\TRFL{w|}}~{~|~}}~}}~~|~~}~~zy~|z{}|zyz}z||{ux||}wz|zz{}|{xzz{v{}{yxujjjhhknmruzsdSUUWX`nsu}xvvufBRaV:^kow4Gkq0RB5797<@B\qooniEI_R4H=0!8Yz{c79OgK::3<=.4=amkprh\VffmW~T);BokA.0>:660)3k[@Xn_LH,8z.0dkSBDMPPPNM:D^DL|'Q/L]nwtc+2863::8:?U~i~M50.,*,/.34.06/0-Ddptv{P4R_ZVH64>9,!480176416BLQLC2)1415860+=Yflosvvxwvvwyy{zywzyuI*3.& BgA" $HD4WF>>991(+)*)+5XPitA9\`]d=,,+&  &mpmyCR`[cT,"'Gowm) ^cN97<7" ½¾¾Ŀƾǽþſſƿ½ſžžĿɿ˼ȼýž½nO6~|{||~~~~|{zyywwuwyz|~}zz{|yuttsvy{~~~}{|{zzxxz}~~~}~~~}yxtpnqsqooqprrsttvwwuwyzwxxyyxyxwvwwxxz|{{zz{zzxyyvvuvwuvusuvxwvwwvtutvtqvpttronpolomknjifheaebedfd`ZXYYUY[S]ULPORLYYIDOSPJOQDCIJGKMW[PNURNNNLNLGJLNRKL[GNRTPQ^yz}~|~z~~~y~z{}}z}yz~{~{~~~}}~~~~{z}}}~~|~{|xz{zt{}{zx}}yuy|}zyz{xwy|xx|~woljjghjlptw}|i_`USWUXamtr}zwwugDSbT;]s{t8Jmq7QA78:9=?D^qpqqqFE]Q6VH0!:Zgp{d56JQ><4<92;Ikpqzxg^XcqpXW,RtJ61-+,-0-45.++.2/Ec]Xl}M3OZWSF54>7+! 462597527DNPKB2).21577/-A[flosvxxywwwyz|yxxzzvG+5/(!_G'! $JclmC^|vnD),)++,2`\nmO8EXq}C.++( &SOEc?US[sY)!%@kpv_&-XhS946,!¿¿¿ʿþǿĿƼǾÿ¾ĽȾŽĽƼþɽŽjM3~}{zww|{}zuz}~~{wrrsutx|{~~~~}~~}|{{}z{{|~~}}~~}~~~~~}|}}~~~~~~|~{{}yxunmnmnptvvvyxxyxyyz}{{z{~~{{||||{z{|{|}{}}}}}|{{||}zyz{{zzyyxyzyxxzz{zyzyvyyzyuvuxwuusstssrqppqonorqprppssrlillliikjecb^^]XXVQLPQTTSTYRMQQKNSUTKGKNKHSSLD:BKC=Uk}{~|~|~~}}}|~~~yzy}z{}}}||||y|}{wy{}zz}|{{{~{{z}}||~}}{||xw||{xyyxxwyxvvz}}wpmkjihkmpsuzyhWUZ^WTYZ[cnstywwvg@R`T=b{s8Jpq3ND67:8aO3gL1!:[lnzc<;I|L;83:73=Dlqw{cZXgwuYU*;DljO3/PTE74)4tZ?XfdOL)7{|)4jsQBFOQOPOM:EfAOy*R|-N^p{sY,4634>=88,**( $Vppj7ND7UN)!;e`f}V"0JT~x|G41/-+.0/54,*+/22Eiiet}J4P[ZWH7299-!#66.5:974;HR[TN/*01./14/*B\elprvwxwvxyxyyzyy{ywE.51&!AfY[QL:$ + %@Kby?\ssn?+)'))+1\to^<7RXXM5,+)& $SKii6CMN[P* %ChefoK!,UW[Jaoa. ½Ŀÿý¿ĿſĽÿÿſ˿ƾȿʿȽĽǿlP2{{x{z{{{|~}|~}zw{{|~|{z|}|ysstsvwwxxz{x{|}{}}{|}}}}~}|}|}}|}{{|||{}|||}{||{|}}|||}|}~}|~}~}~~~~~|}~~}}}}~~|~}|~}}~~}}~}}~~~~~}|}}~}zvvuupnlmmorvwwwzz|}}}~~}~~~~~~~||}~~}}~~|}||}|||}|zz{z||{zz{{z|zy{z{{{|zyyxzzyyyxyzyxwxxyxvvxwwwwwvtsvvussrommnljhgfefghhijihiiikmmmonmmmmmnnnoqqssnnonompopnpooostqnfhllrxxwzww{|}}|||zytwonorutvwzyzz}||z}~||z~}|z{xvwyzyw{{{{z}{{yzxyzyxywyzyvuxzzuxwxvwywvuuxywz}vmkiijijjlwk^TTXVU[^cecckrtt~yusueCVbV?]s/Guk6RI73452=\xZ) %Dea`_=MfWH\]S& þſþýƿſ¾Ǿ¿ƾȾǿſʿɿ˾ÿɼʿ¸mN1jzytvusstvvwxvyyzz{{~}}~~~~~~|wvz}~zxx|}{wttrsvvwwxz{}||{}}}~}}|{{zzyxwwwwywwvyyyyxy{z{||||{~}}|{}~~~}|{z|||}{}~}~~}|}}|{}|}~~~}~~~~}~|||}~~}~~}}{z{{yuqnnmlkilnssuyy{z||}|}~|~~}|~~~~}}|}}~||~}|{{{{|||}zzz{{|}{yzzz{{|zzzzyzzyzy{zz{xxxwxvwwvuuuutstttrnlkhggklgfddfhgjklnmlmmoprqqstrrqrssttuttuvutusttuvvtvvvutuqmhnqrvxuwy{{z{||{{zzxvwpjikjklntuvvxyzxxyvvvvpuusrprsssswwwwwzyyyzy{zyyyxyzzwxyywvzwxtvvustuvut{ujmkiiikfhlg[PQWXY^cbbinqtsttrxqqbAT]mMA[ut5Hql6UH5448>@CasrproLMeJ553. ';`v`)TZ976888;@oj||b[Vc^~hY+NQSPB(+23583(5tUBbn_QK+;}r*;pwXBGV]^]^X8GiQvwvewF71,*-//-<6-**-24Ig\Uo|A9RbjXH419=. %7508<8539EQYUR2*00+,+1-,E]hostvwwvwxyzy{yxxywuD.70&!,I>LQMA" $AQeX6U?./(*)&'++1f]WmLAhspf=.*)' $LkjU;6FlnF##Aa\XT4"8X\MH/- þþ¿¾½ǿʿĽ¼ɽſǿÿǽɼɼǿƾƾɽmN0Gd`bcceefgechjllmpoopqrstsssuttsvvuuxwvyz{yz}{~~}~}}~~xqrv{}~{wvvy|~|xuqrtstwwwwz}}{||}|}~~|{{xwvssqppppqnmnoqrrstv{||{{y{|{}}{{|}~~}||~}{~|}}}}|{{{||{|}y||{}~}}~~~}}}}|}||{|||~{zz{{ywtnigijkmortxwwz{|}}~{|~|}~~}||}~|}|}~}~|}|~~}}~|}}z{z{|{|zyyyz{{}{zyyyzzz{yxyxywxyyzywxwxwwvsttssqqoooponkihffecilhghkklmnpqroqrqrqstvwxvwvvvwwvwwuuwvstuvuxvuwxvwussnkmqrruxvwy|{z{|{zzyxwuvqgeggikoruxwyyywvwuuttstsqnmpnpsqrsttuwuuvuvwuuvwttvuuvvustvtutttttsrrrrv{siilllklkjji\ZW_ca_`glponorqkgsqjldITVHTNRR1BU`gmpiQ8M[{Z6PE:>>=ADFWknlkpOGbM7462'9[q_+#j^:98;;bODL31]_?- ûĽÿ¾Ŀý¿ýȾľƽƿǽĿʾȽ¿ȽǼſlP*#*.9BKVYW\^[^^bcbedefdfhigihfhghjhijifjlkmmmpqoooorssrqrqpnklosuvwzyyz{zwxtqpruy|~~~}~}}}ywspnmpqrrsuwz}|}|}|~~~}{ytpnmmkjlkkljjkmmmnqrtyx~}}}zz|z{}|}|{}||||}{|~y{||}{{{{||{||y}}|||}|||}||}|~~zzz{z{{{yzzxupokfeglnqsuvz{y{|}~~~}~~|}}~~{}~||}}|{z{{}~~~|}||||}}}|z|||{z{{zzxyyyyzzzyvvvwyyyvvvuwxvwvtvuusrqnmkkiiiigfgfggffiimmmnmnpsrqrsttusttuuttvwxywwvvvwxwvvwxxxvvvvvxwxxwvvvtohinpprvyvuwy{|{}|{{{{xxxtkfffimqsvyxzzzywvstututrsqqrstsrstvuvwvuttvvutuutstsrqrssttttrqrqqnmolmmkljihffhiieff`^bdhdahljklnppmmhLTbjqohbFR\]VJIURQVSN?9QXSZUMU>?=ABEFGin}ro_ZXaz!W+C??CC*(/2373+9yU>RPOPN)=~r->qxVAE[dcdd]7HkUsqojm:2-.4<411::.+*-3/Kh_ZprA3PheWE65?=, #862787738M[a[R4,/-,*+1,,C^elpsuvwvwvxy{zzzzzxs@06/% ^^\_e`@  "+-.00000.&/*(**-:V[iv@Hksoe9+))' + 'Z[U_<094.2&#?bF6<0"D\F" + ¼½ÿ¾þľ¿ýſǿɿȿĿʿȼƾǼǿmR,%.7KXWWXWYZ`fe`dfgfeghgfeedeeffddddffdfddeeceacdecebbabecggijkjkkkkghhggiikmoprqrrtsrtqstrppmnlihilmloqpoqvwvwyzz{{{{zzyyyywsokiffgfghjjhijklnpsuvxv|z{}~}}~}}}}~|{}{{{{||z|z{|y|~{{|||yzzz{{{||yyzyzxvuttssrnmjhghkpstvwyz|||~~}||}~~~~~~}}{y{||z{{|}|}~~||z|}}|{y{zz{xyyyyyyyxwxvxxrusuwwwutsqrsqqqrpqpokjgfgihffghdbegfgikimortsqqsstuvvwvvuvwxwwwxxzxyxwwvwxvvxyyyyyvvwwwyyxvwwtohinpqqsvvvvxzyy|{zz{|zyxvsnhedhnsuvvvyzxutstttuttuvttuttstwwuvwwutuvutsrstssrqqrsttrrrqqpmlijmmjhgggfedddfeacbbaba`aa`diilqspmmkkkjnopnl__cfb\Y_\OUWUMF[lRGSLJDL_eRNRROEM]SOJFDCIVSJIE@9*:@>EDDMUR,3dhTVSORVUWrqj~h]^]ZUU^oR,A:;;9('./230*?}PBPMMON*7l)?svV?G\dggh_4Ei6Us)\k1N^gnrY+68/++09;>Rronqq<300=K81-97,*)-2/MiX[rq>4XmlXE64<9+$63067760:RYZ[U1,1.)(*2--C]glqsuvwvxxz{|zzyzzxp=.4/'!F^jjkna8Jl9Yp+]h-OaiorX+67/,,/8:>Tqpqsq>500BJ21/95,+),01Qnijtt@;Xuy\E5299-'74/589639FKOPQ-*.,)(*2.,F\fipuuwwvwy{}{zzz{zys<-5/& #!$" &,-/0..00/%/+)),/6Wexc7@H3**,++($ "Jkpo2H`roS) 'GaD20( ÿþ¾ƿÿÿ½¾ſžɼÿ˿¼Ⱦ¾Ƽƻ˾nT9'$%4HBASZZY[\do}{{{|yzyz}peeceedbcbbbdecbedddbbbbbcbba_``__^\^^[_]^]]\]\]]^Y]ZZYZ][[XYZ[\\\\[\\[[ZY[XZ[ZXYYVYXVVXYY[WXXXYVYYYX[]]\^^]]___`^\\]]^`_Z`b`fefehjlloqprpqtuvvvtvxvx{wuyu|{uy|z{|xxx{{z|z{}z{{{zzz{zxwrmmkklkkjjhcgmnptxyxyz}}}}}}}}}}}||yz{xzzyyzz|{{z{|z{~|{|{|{zzzzyyzxyzwwxwyyxxwwwvxvvxvvspooljjkmjjkmonjjjhffhiijihgihhijkmoprtuvuuvuuvwwusuvvuuvtvvxyzyxwwvuwxxwxvvvxwvvwwwxxxwvustplijkknqruvwxyzzyyz{|{z{yxwtqkhgikprutssrsvvuttsttsstuvuuwvstuutsustusqqppoommomonppnoonmlkiffeaabb__`_\^`ca_b^_]]^]\\__fkqqqpooppoopqroopnommmliigdcghnkjplonkllkopmklkhihfb`ZZXVWVVWTPQUOQRKKOQF431CG@:5984=Z|c[VUSRMP][PHCGOMSVQE=4^02'"#%$'1>O|\;BHA:5)8/% þ¿¾ļ½¾ĿļƾŻ½ľ½Ŀ¿ǿȽǻ˻ºnU>2/.5VYHOZ^]]_fouvwvuuvvvxy|lcbefdbcbbccbcabbabbbcccbaba^^_`_^]]]]^^_\[[\[^_\]]]^^]]]\\]\[[Z[]\[]]\XZY\[ZXWWXZZZUSRVWWVVVWUVVWUUWXWWXXWWYWZYWXXWXXWWUVXVVUXUXXWVRMUYWWWUVWW\]YYY_[^^`abbddefhgiinljlkosotomrrqtqoljiihghehgfcgmortvwwux||~~}}}~}~}|{{{zzzyxyxxyy||{||{zy{zy{zxyxyzxzyyxyxwwyvxxxwwwywxwvyvvzxtrmkljgihjigghihfeghiijjjhijghhiihinpqtvwwvvwvvvvvvttuttuvuttuy{yyxwuvwxxwwwyzxxuvwvwvwxuvvtvurojihmnoqrwxwxxyxwx{{||{{zyzvpjfgikrrprsvvvttusqursuuvtvvutuvtsttsstrpoommnmkkkmjkmkkkhhgdccb`a`^_^_^[Z[]`_\_]^]]^]`a^_filmmmnooppqrrqpqrqqnnmoomkigfgilmnoppqpqopnnoqrqqqomjgeccdedeejlgefc`cabbcg[WQLHFEDCL^a^daggh[F>KPNMIFJ]VH?;=AHGII=99531>@CKA@:3?EC=?ISVZ]ZTBP_tAQisvxY/\a9O^gjpU19920/16;@Unmosp:51/JN612<9-*+014LbR`un89\vs[D3,*** &53./0241:GKJIH*)/.*(+1,/Gaklpuxyyvvy{{z{yxwxyj839/% +%+--**-00-$!,,(),,.-,++,*+*)***()% 3?bu:TL092 $CX(,,# ¿þýĿƾÿľ¿ǽÿ¿Ǽÿ;ɿȾɽûmYEBGE4RdXS]```bdmwvwwvttsquvx|ujcecdcaababbccccba```___`b`\\^[]]\ZYZ][ZZVYXWZYWYYY[ZZXZZ[[Z[[Z\]YZZY[ZZYY[YXVVYWXVTSTUTSSRQQQPQRONONMOSSRSSSTSSSUVWTVTUVWTSSTTUSUUUTSSTQNSSQOTTQRTSRONROOQROLPVSRONRPQPRQOXVWYXYVXXVZ[[\YYY]_``chjqpmlmnpqrutswwxzxyyyvvvvxyutxuvwyy{yyyzzxwywwwxvuwvvzvvuuvututtvvuuvzyxwwyxw|zvsnmmjkhhliffffceefgihhjhfhiiiihiikmpqrtvvuttvuutuuttussuxuttwy{zyxxwwxxxwvvwyyxywtuxttywvutwuturpmlllostuvvvvvxzzz||{{{{{xvunhhggjikrvvwutsqqurrtuvuttssuvusrssqqronommllljgigffggecc`]]`^^]\[]Z]]\\\]]]\[]]\]^]_cbehjklnlnoopooqrqqqrpqppnmklmmiheahjmoppqqqprppqqqtrrnmlkkigecbceijjlmkjjjjiileca`]\\]^agiilhgc_TNIQURNCCFMKEDA?:>;;@<8-@]}R51*'%)*09BIO\liEK=:9747>C?B>;=EOZPADR]ORQIG@MWgkd]TC<759>BThhhol;732<:224A<.,-/05Jc`dul69XuoUC2++*)(:5,+*043;MTTVU+.0-(),/,0H`hlquwwxxvxz|zzzxyyvm639-# %+,+*,,...%%/*(+,/.,+,--+++*)()**% "EPpe/A1 %HT!#" ¿¾¾¾ļľÿƾƿ¿ɾžʽÿĿʽſƾȿʾmYGESU5>_[T[^^__bhu{{{zxvwvvw{~sgba_______abcaaa`_^_`_____\]\[\YXXXYYXWWUTUUVTSTTSTUUTVVWWXWXVUXWVZXY[WVXYWVWVWVSRSRQTPMNLGGGEAECCABCGJLMNNPPOPQRRRQTRTSRSQSQRRPSSSSSRQRQRSSQQPQPPPRPOOOONNPNONMLNMLMMMOOPOLOQPPNMPOROPQSQQRTVX[\X\[ZY[][]``bcebefedddccdfhglilhkprpmonolprormoornlmpnrooooommoqprstsuuvvtsussvurrnmjggggffedddccedefgghgfhggigiiiklmnpqrsstttstvtttttsspuusux{}{xvvwyxwvwwvvwywwwvvxxuxwvtuwvvwuwvsrooqppstttuwxxyzzz{|{||z|ytpkf``alquvtsqprsqrrstsrsrtussrronmlmljmlkkkihfeddba`_]^]^\\Z\^]]^_^^^^^`_a^^\]Y`_`cgjkjmllnjmmmnnoooopnppnnpnlkmnokkhec`ekmnppoqrppqpprppommmnlkhggc`_`fhillmjjmmmihhghggghknmnnniedcdbac`]WMHNQQUSRTQQQNJFFGCIPIDEDFGCHIKPRSQLGCA=F=6>?==>@@ADDB>>AIWdRJM[lLFIDADP^ZQMGDHITUNMG@>@KQNI>>37BINIF98NMPRKB@AEIWZXUT:;:=>:637B=741/05GWbgla7>NgbMA2,+*) +>7/-+132?==BFHHHJKKMLONOOMPNQOOQPPPOPPQQQQQNLQQPQOMNONONNNMNMMMNMMMMKLMMNLKKLNNMMKKMNLLLNLLKMNPPKMONNNNLNJLLOLHMPNOHMNMPKLNKPNNMOJIKNONQNORRQNOTQSSSYZXV[[][Z[]]]^\]`ba`bdijihgjilikkijhgfdbddcda_baabaccedeefhgeeddccdegghhkmmonprtussrsssssrorrrtwy|zwvvwuvuuwxxywvwwwxxxxwvuxvuxvvvwywwwutrrqsspmrstttvxwyz{{}||z{ytqga_dlttsqppqqqrqrqpopqrqmponjiihfhbdgggfbb`_``^^_[]]\]]__^^__a^^accbccddfdgfhhhjlkklnkklkopmmlloomnoponmoomlllonkgfc[]ejonmnnpoppppoonmonlmmlkjhfd_Z`__cfgfgilliiiiiijlllmmmlkeeimmlkkjhcPGPWZ^\\\[[[ZWPTWXZXSQTXWZYVVUUVRPNLILNPQPQPRRQRMPQNOKMOOSTLPPNLGGFFGJJIHGFCEIIMIAD???BEGB?=;;BEBDA:8=^eNMHB@AQ\RPIB@;=BDB>:5:E?BA6328DGLIAB3ENVSH>4.-+)!2>;642443;MX_VJ/0640/03.2L`hoqtvvwvwyz|{{{zzyxm/.4.$ &,,**++-.+#*2***++**-+-.,++()((+,$$SZ%1.#%! ¾¾ľĽƿ½żÿſļƿǼ¿ƾŽ½ĿļɾmVHJV]P/Fc]W_`_`acdffcfeeegdfee`b^`^^^\]][]^\]\[\]]]^^]^^]][[[YZYYYWTUUTTTRSSTSQQNOPOOOLOPMMLMMMOPMKMLKONLNMONMNNLOMKKLKJLLLJIGHHGFFFGHGIJJIIJKJGJLMKLLMMLKMONMNNONNLNMNMMMNLLLLMKKLLKLJJKKLKLKJKJJKJJJKKMKKJJJKKKJKLKJLMMPKLMKJLKKLJKIKKJLKKLJJIKLKKJHIILKMLHGIHFIHHHGIJKLLJIJMNLMNOOOONNOORPNNPORNPQRNRTPRTRRPUWVUURVXVVVURWXY[Y[\`_^]`_`__`^^]_``_`aabceegimopnnnnppnnnpopruwzxustuvuuuvvwyvvvtvwtwwvxuwvuwwwvxzzxwuvvvvvvtrstqliopsvvxxz{|{{{zzwpe`elqpopoqprqqqnllkkkjeggfededdd_acdedb`bbacaaededefhhihgggghhihikilmllnnnmlmkllnmlkjlkpplklmnnnnpnmmmmmlkmlmljeeeb`bbikkmlnnqpnmkoononmmlklljhfd`b]]^]a`_bccbdegghiiihihediilprqpoonlmaRU]`a`a`b__\YTY][\]ZVV\]_^^^_^]^]\]Z\]^\]^`_abbc_`_``^b_^_\ZXWVRPQPPSOLLKKIIIIGEFE@DEE=CC@?CBBFCA@==ALPECB??ANXHF@=;;@DC?976;RE?=966=LHEC;:8`sfZA9652-*$(:@DJI=;65:AGID<05;<:87517Oainouwwwuvy{}{z{zzyxl1/1-# ',,*)+++..$.1+*,*+,,./130,*)))*,*")WU+2K5%*% ¿ÿþĿƿſƿſƼ½ǽ¿ſʾƾþʿƾſmVGKXb[A3[c[^c`bdegffefddccbcaaac`_aaa\`]_\^___^__`^^^][]\[ZZ\[[\]\[YYXXWVUUUUTSSQRPQPOMNNMLMLLJLNMMOMKMMLKMMLJLMLLJHIKJHJKLKKLLKKJKLMLJKLLLLLMKONMNJKLLLLLMMLIILLKKIKIKJJIIIHJJIFHHHHIFGFFFHHIGGGFGFGHGHIGHIGFFIIHIIHHIJIJHKHIHJHJJIIHKIGHIHJIGFHIIHIGFGIHJGGEGEDEDCCCDFFGIFFFIJIFIKJIIIHJHIGGIJHKKJHIIKLJJKLKIKJJLMLKMMLLLKNMNPOOQRRSRRUTTUYVVVVWVWZYXXWZ]^`acdfgfgfcdeefhiiloromllmnnpqqrqroqrqssqstrsrsssttvuvxxwwvxxxuuwwxuutrusussrstwwzz{z{zztnb]cghjkmmnljijhhdeecb`eecbbdefgfgggiljhkkkmllmnmomnnnponnnoopnmoonommlmnnnlknmmmmlljkionkjjlnkklnmmmlkkjiijljihggdc^Ycimkkllnoppmonjmnnmmmmllifc^_\[\[[\[][]]__aabbb`_cddeilpsssrqqpnkf\S\_caaaaaa\YW[^``_]WW]```bcbbbcb```_bbabacdddddcdeehfhgijfdba`]]_]]]\YYYZZXXWYWSTSRQQPOPPOQONOOKKJGGC>A<>ACA=??>@B?<=A?@>>:79;;<=>=>JFBA><=WhdX<89740.+5GWGIJ?;78???<84/BLDKC:74=<<;<==?ACDDDCGFFEEHGEEGGFEFFFGGGHFHIFIIGHGHIHHIHJIIJIIJIHIKKHIJHHJKKJJIKKLILLKNLLLLOMNONQRQPTTUSUVWVVUWXY[[Y[__^]^^``a_ceggfcefegfhhgfgjiklljmmnpoqqpsssrrruustqxwuxvvtsrtrstvyxwvtrbXY[]cdhigea]bgjgikkmkmlkmmopoooqnppppqrqpqqqprqqpqpoprqporrqporqqpooonmnnnlononmnlkjjmljkiijihgjkikjhhhijkljjhggdc][`gjjjkmmonnknmlommmmnmlkkgeb`\[Z[YY[]]\Y[[\][^_abdfgkoqrssttrqqokhcWY]aca`^__ZX^_bbbb`[XZ_abdecffccbcbbefcbdeffddeeeeefefghhgecddddcdddb`_`ba_a_`^^_\Z\[^[[]Z\Z[[\Z[WSQOLJIKKJLIHGJKKIIKKKJIJGD@CCFFGF>>EA@CDBCNNGDB><:887=;ddekkmnnppompqononopnmlnllmmmkjjjhgijiighgffeeddcb_aca_a`^^ZYXVVTRRMJJKFHJEDDFCHFFFACBECFIGHDFJKMMKLLLLMMOLKKILLLLLKJJKMKLMNMMLNNMMMPPOLLMLLLLLLLLLKJKKKJJJJIKJHIHIHIHHGGGGFDEFEDEEDDECDCCEDEFDCDBEDEDDDDCCCCCEECBCECDBBBDDBEDBA@A?@@@>>=<>>??>=>?>>><976434689:=>?A@A?ACBACDDCDDEEDCEEEFFFGDEFDEFFEFFEFIGFFFGFGEHGGFHHFFFGGGJHIJHGIHHIIIIJJHHHJIILIIHLKKHJLLKMLMLLNPRKNMLOPQOMQQQRTTTVUUUWSPUUXXXYZZYZY]_`_]_`bbbcegighklmpopropnmmjklnqqjgYPNNMNSZa]`aciilmqppootsqrrqpppqroqqorsqppqqrrrppnrppppqsprrrpprrqpooqnonmnmppnnmlkkklokikjgligfgihgfegghhgihgeie`][]dfiijkllplklmommmmmmnkjjkifb`\[[ZYYX\]\Z[]]^\_behilnpssrsrrpqpnmkif`ZXX[]\[[ZX[`abadbbYYZ_ccdeddddd`cehfefdeddcdefhgfhffffhfgdcdffddeddecbccdddabbbbbacbc`b``aaaab``^Z[WWYUUURPPQOQRNNPQRSTSSTWRTSSRQSSTQSRQRNOQRSQRPONLJGD?>CEFDDA;57;:==;<:77Njoqruwxxwvuz{{{|{yyzxk0'1+$ )./..-.00-#..))--+,-2DLE@6-+**++)! &('")WNCX[4&/00)!ÿÿžſƼÿȿƿʾż¿ȼƽž¿ʺɽkTLPankj[:\fadkmnpqqsqttroppqponopoppnomlmnlkkkijjkjjhhhhgfiedgdccaab`a`]_`_]`_]]\YY\YVVXUSTSQQOPOSQOOOMPQRPNPOOORRPQNMJLLNNMNMLONMNNNNOOPOKKLKMMMLKLMLKJKLLKKJJJIJJKKLLKKKHGFGIGGHIHGEEHFGEEDBDDC@BECEFEDDDDCDEDDCCDCBBCBCBBCBDCBCEEDCDB>>><<;;88889889:;<=<<;<;::98:;;;;;:::;==??>>@AACABAA@@ABDBCBBBCCCBCCBCCBCBBBCBCDABCAA>??@ABBCEEFFFEEGFFHHFFEHHHFHHIHHGGHHHHIIJIJJIHJJKJFJKIKJJKIIIIIJIKKHIKJIIJKIKLMJHKJLMMLLMLPOQPOPPOQQUTSRVTRTVYXZ[VZZ[ZUNG@94/.8?MOSY_dchejkkhjknkkjonmnlnonolooljllnononmlmmknnnnloonomonlonmoonlkmmnmjmlkkjkjnmikliiigefgedcdgdeeefcddd`^]`dihiiikllmlljkomknlkklljkihgca]\[YZXXZ[\\_`\`cceikklmorrqtqrnomkkjihda\\YZXXTZ[adbcacc`ZX[_acc``decfeeeggefeedeeggfhggggggfffhgffgfeefedeeedeefdbbdddceecedbcdccdccabd`^_^_]]]\Z[YXYVUWUUURNRRQSPSRQTTWTWTVWYYXWYZXY[Y\Y\WQQPOOOQRPOMLLLJJEDBBBA>@?@f~zuruxzxxwvzz{{|{{{zym.)' + )-00/0/00/" ,1,+.+*++/99=73-***+,*$ %.45( ([P9JN3*243/$½¿ĽǺɼļľʿƾǿþýmVLPalklg@Kd]\kloppqtrtssrrrqppppppopqpnllmklkhhhhhhgeddedcbabab`^]\\\]Z\]\^\^]\_][^][[[^[Y\\\\\\\]^\\_^\\\Z^]\ZZ[[X[[YXWWX[YWVWWVUXXVTVUTUTROQQONOOOPOOMMNNNMJKLJLJKKJKJHIIHHIHGHHIHFFFHEGFGEEDEDC@DBACDCCBDCBACECAA@@AB@BBBAABCDDBCCBA=9:;73110/.//002335799:;99:<;:<==>=<<>=>>?@???>@B@??@??AABABB?BA?A@??B@A@A@??@?>?>=>=;999:<==???@AABCBDCC@CDEFEFFFGEFFFFFFFHHGGGIHHFIIHFHIHGIHGHJIHHIIIIGEGHHFHGIGIIIFFGHGFGGDFFFEFFFEDFHFGGFEGHHHCHKKJMLHF?9/*%# "*39^wtuwzxxxvy{||}|{|zyh2 +./0021030#!.2-,-,+,,.++-,-,*))*,+#  "-4<:- &XP0@E-'154.$Ľ¾ÿÿþ¿»½ļþſɾĿƼƿ»mYLPclmkkP;e`Xilnoopsssqqrrqqooponllkmmkjhgfdecbbdcbec`__]^^\[[[XWVSSTUUUXVVTTVUSSTVWUXWWYYZYZZ\\[[^\\\]]]]\^\][\\[Z[Z\\ZYY\YYZXYZXYZYZYYXYZXV[WYUVWTUTVUUUTTTRSRSRRPPQOOMMPNMMNNLKLKIIJJIIKIHIGGGDCEDEDEDDBACBAAB?@A@?ABABA@@@AA@@@AA?><8973/,,+*))()*)*+,/23676668869:::;;<==>>?@@A@>@A@AB@@ACBBBBCBCA@@@??A@A@A@>?@@>??@?>===??>>?@AA@@AABAAACA@ABCBCDDDDEDCBFCDEEFGFHGFFFGGEFFEFHFEEFEFGEFHHGCEFGFHHGGGHGGFFFCCFFEDEBCCDECABCDDCEGHGCCEFCDCBA<::61,)*((').17:;;=>CCCDDFFECHIIJKHLIKKKNNMMNIJKOPMMOSRSSRUSRTTUVVYXYZ[]YZ\^ZY[]]]_ba`aa_adaaa`aca_a^^___``___]_`\\_bdfhkhiglmkjjkkijijjhhjkkihhiiggfhgc`__]ZZZ\]\[[^adhikliklkmnnnpqolljhhhecfdgdbddcgfdfbdcc_\XSX_abcdcededccdddddegefffhigiigiihghgggfgggedeeddfgeeefeccdeffeeghggfgghgfeddfcbccbb`babefaabba_`_[[[[]][WVVWXVUUTWUVXWVVVXWYVVVSUTUTRPSRPRTUVVZXVWVYURTUPK?Uxtsx{{zywxz}~|{z|{|f2%$#! &-101103230$(44011/-...+-.-.+++-+++# !15=50!(YS/9B-**.'! ýþ½ÿþÿſþǿƿƿſǿýǼƾþƾkXIO_gigdY:XbV\hjlomoqpoopnkljlkkljjklljiggd`a_`cdccdb`\YVVXXUUTTTRQOMOOOQPQPPPRQRTSSSWUTUUUUVWVWUUWWXXWXXWXXZYYYYVVWXWYWUUTTTTRQRQRQRQRUUTUURRRSRSTSSSOTSSRRSSUTSSTVRTRRPPPOOPQPPQQNNLLPOKMNJNNNMJLLMKMKIJJIIIIIHGIHGFFGFFDDCDB@@???>>>=<<:865531//--/--./126788688778::9;9;<<=>>=<>?>?@AAB@A@ABAB@BCBAAAAA@?AABB@@@ABA@=??=??@AA@???AAAACBBBBCBAAAACBBBAABDCCBF@ACCDEEDDCCCEDDBCBCDCDEDDCBCCEECBDCADEGGGFFEDCDDBEFDEABCDCCDDCCBDFEGIIGFEDEFGGEDCA@>:::9887:<<=?@>@@AAAABDBCBCCDDCCCCDCCDEDDECEGFEFFDFEHGHEFEGIKIIJIIJKLLKMKIKNQNPOPRRSSTSSTTTTSVSQRQPQRUX\[Y\]_`^`dcghljlljnlkfehigggfhhfgghhhfefhgeeea^[[ZYXX[ZXYZ_behfgiijkjllmnmmlljhiihebdaededbecbbecbbb][WVX_`abcceddcbbcdedddedefffeehgfhhgghfedcgffddeggeeffdddefdefgeggefghfgfhhededffccdacaabaabacdcbbb__]^^^][[ZZY[ZZ[\\\ZZ[]]^\]]][[[[Y[ZYXWVTTSRTTSRTUSTTRSWTT=Qwttvz{zxxzz}||yy{zze;:887665:9:99;=>@@<==;<=;<=;>??==<=<;<;==>>=>=<=>?>=>???>@@@@A@ABAAAABAA@@@A?>@?@A?@AAA@A=???@?@AA@?@@@@ABBACBCCBABABA?@@@@?@ACCCB@BAACBBAA@@ABCABBBBBECBBBAABCA??B@CAACCCDDCCCBAABBDBABDECBCB@AAABCEEFHEEFFFHGHGGEDECDEDCBBDCCBABA?@A?@A@AAABCCCCAC?AA@@A@@@@A@CCBBCBCCEECCCCCDCDEBCCDDCCDDCDDEEEDBEFGFEFFDGIHGHJHHIIGFGILQRQRTUWXYZYaaefdegigf`_ccaba`bbbbbaadcaae_abd^[UUSTTWX\WWX\_bddffeghhkjlmkjiiiggggeeeeedcdcdc`ddaa`^[WUWY^`acddcddbcbbddcddeddggfefgfffeeeffddcgeceeeffdedddddegecccefedegfdfdeededeccdbbbbdbbcbaabbcabcaa__^]_\^`]^b`^_^``^^[]^_`__``_`_^`_^[[[ZXWVWVUSTTTSRPOORXDRxvuvyzzwx{{{z|{z{yyeDORJIA=AAB=<=15[keQLBGB:::9:99955388672  )%"!!!!"!  !D>-?fH'¾ÿ¼ÿſüĽĿľȿȿľȾ˾ÿǾ½ľ~lXJP]ffeffZ?`^NTdgiijjllnlllmmllkmljkiiiiiifdefgeeddcdfb`a_`]\\]\Z[WVZTUWVUUXUTYYVWVWYVXYXWUWVUTTSTVVUUYXXVUSWWVUTRTVTUTTQRRQRPQOQQRQPRRQRQPSQPOQQMONOQPNLMKLMNONMLLKLJJLIIJJGGHGGGGFFFDDCFDCCDDDCDCCCDAEDBBBBCBA@@ACBBDDB>:9550/.,,,,),..//0/26:>>?@BBDCCCCDDEDCEFDCBBBDEEDCBACBAAAACA??@BA@BBBEDCCDCBCCBBB@@CA@@@ABA@=??>=<@>=@AB@BB?@A@@@BBACDBBAA@?@?>?@?>CBAABAAA@@A@A??@BCB@AABBB@AAAA@@A@@?@?B@?@?@@BBB@>@A?@BAA@@@BA@?@@?@A??@@CCCCBBCBDDDCDCCCDDBBCECCCBBDCBACABAABBCCECBAB?@?>?A??>@@A@@ABBBCBCAB@@ABAAABB@@AA@ACCBBB@ACAACBAABBAABACBCCDDEEFDCCDCCEEGHEFIJJJHJMORQQUTQPRSUWUVZZWXXXXZ\[[ZYZXYSQOPNPPRSUUUVVW\aabbdcdfhhihhggjkgfeeedcefcbbbdb`ca_`^[VTUV[_^`ccbaabacbbcbbcdeedcffhgfhihffefedeefdccccdfddedbbcdeecbbeecdeffcdcacbbcdbabbbdccabbbaaa`bbbcaa`_``a`a_``ab```_``_^_^`a`aa`accbab`_^^]]\Z[ZYXXYXVVWPQOR@Uuvuvyzyxx{|{z|{zzzzg8S`bdgglkjhjdggbaa__``_]]WWYZ]bbcbbda`^`]^][Y]UQ_d[WVLUQHGFHHGHHFEGFHEJO+1C3-,*&$&$"%&%$#$%$" ,T{b0$" ¿¿Ŀżžſ¿ſĿžƾþȿʻȽĽſƿ¿ȼ̾ľ¿ɾƻ~nYGQ[ffhihcHRgUL]hjjkmnlnlmnnnlmlmmkllljiiihgffghggecdeedb`bb^^^]^]^Z[YYZZYXYXXUXYYWXZZZWXYVWWXWWWYZXXXYWZXYVXYYWXSUVUSQPNOKLNJIHJJKLKMLLNMOOMNMNNNNNNOMMNNLMMOONLLMMMKIMMKKKIIHHGIHGGGHGFFFGHGEEFEEEEEDFGDBBBCBA@AAA@A@@>=<987430.,,*+,++,+,,+/04668:=<;<<<=>=AADEFFFGFHGHGHIHGIHIIEHEFGFEEFGFHIIHGFGEGHFFFFEFEFFGFFEEDCACA=???@AA@@@@BABACDBACDCCAAB@@A?????AA??AAB@??@@?>@>@@A@@??????@A@??A@@AA?@?@AA??AA@?@?=>?@@@A???@@>???@@>=>??><>=<<====>==>?>>??=???@@@??>??>>@@?@@@A@@?>>?BA@@@AB@AACCBBABACA?BC@@ABA??A?>?B@>@??@@?@???>???@><>??AACBB?@@@AAAABAAAA@AAB@A@-&&1LYK@><71/113600,310-!?vzmD214.&" &%ÿþƾȽĽɿǼľǿſǾƿŻǿɿƾƿ¸mWIP\ggffggW@dYJQdiikmlknmmonlmmlmnlllljkliijjjgiiiiggghfefeedbbbc`^]^]\]\\][[[ZYWZYY[ZYXYYVTVXZXWYWWTVVYYXWUUWVXWRSPPKMMJFABCA@@@?A???>BFFDHIJJKKMMMOOMNPQPQNPPONNMMOMLMLKLLLKKIIHHHHIKIIHGGGGGIHFIIIFFHGEDEDEEFECEFDCCAABB?A@?=;:7556542113323567979999987788:<>=@AAAACBDCDGCDFFIFFGFGHGGHFGFHGHHFIKJIGGJKIJHHJIHHLGJIHIIIFFFGFFGEEEEFEHFFGFGFGGFFFFEEEBCCCCCBADDA@@B@@BB@A??>??@@??@@@AA@?ABABCB@@@>?@B@????@?>?@@@??>>??>>?@?=>>==>?>><=<<<<<;==;;9;;=<<=;<>=<;>=;;;;;;;;::<;;=;;:<<>==>=?@>>?>=??@@>>@>?@BBA??>??>==?A?>>>A???@A?><====<;<<<==>==:==<==>==>======<<=><<::;<;>>==>@?>?>@>=?@@@ACDBBCA?BCACEDCEEFHKKKOOPSTTVVXZZYY[WWWXXVWXYYY[[Y\\[YXYY[[UQPPTZ`]b_^`_`b`eb`_bcbca`adeceffffgfeeeccddbdddccccfdbbcddcbbddcccdcbbbcbbaa_abbaa`_^``___a``__``bacb````___^^]_````a__a^__``_`bb`aba_`a^_ab`^\\[]\_`]\]\]^[ZZRHX{wwx{zzxy{|~~|{{{zxc6Q_fiijkijjmkjljklijjijlmmmkmnoqqrrstsssqrsrsrppqmqpoommonomjnmhgfddbcb_[WQQUYXUONMICA@ACFEA?BEIG7 0Ke]QIBBOF<4/020/,*&%'74¾¼¾ÿþĽ¿ľżǽǿſlXMP[dddceb[CX_PJ^gijjlmnlknjklkklkikjjjjljijhhiihhgfhgfeegfeedccda_`a__```__^__^``]^_^]]^^\[\\\\[]][[[ZZYZYYXWRXXXVTSLPOJFDCDDA??>A@?BDFJLIJJIKILMMMOMMMQPONNPOOMNOOOOOOMLNONKNMJKKKJKMMKJJHGGHIHHIHGKHGFFGGFGGFFEEFEEEDDDDDFFDDBBBCA@@?;;:8;==>>=<=<>>==:;9:<:9;=>?@>>@??@>@@?@@B@AA@ACBADDBBBB@BCCDBBBCEEDCDGFFDCEEHHFGEGGIJLFHKHFGGHHIIIIJIIKJGGHHHHHGJHHIKHFIIGFFFFFDEDEDBDCBDECCDDAACAACFDDFFDDDCBBCBAA@@@AA@A@???@@A@>??>=>?>>>=??===>>>=<=>=;=;=;=;;==<=<<<>><;<;:9;9:78889:98::999::;:88;:9:;<;:;;;9:==;;<;<<;;:<><=<;==>==<:;999888:9878888777678886678998:99:88766789::;<<<<;9:;;;;;<>=>=>?=<<<>=<>?=;??????A@AA@ACAEFEFFDGIHIJHIJNMLNONNPPQQMMJJJLLONOSUVVTVXZ[YWXYYZ[[Y^]^`__`_ada^babbeadacaacaecbbbbba`_b`ddca^_ccbb`a``_baa_]^___]a`^\__]]\``b`^__^_^`^^^_^`_`__^``_^_`aa^__``_^`aab`aa``_]_]\[\[\]\][ZZWF[{xvwxy|zyzzzz{zy{{va3O^eggeffhiiihhhiiihhiijjmlmmmonoqprqrspqrqqtrqrrrqpoqqopooononnoommoomkjmmigebc`]]\VTUTNOQNLJIKFA76=>>>CDB?>AA?=;;<;>B4 ½¿ľǿ¿ŽĿǿƿ˾ûſȽľ˿ĻÿȽlUJOX_aabaa_ONhVLR`ehhhikjghikkjiijijgihiiiiefhfgfghfffecccecdbbba__aa_^^___^________``_^^^^^_^]^_^^\_^]]\]]]_]\\^Z[Z\WWVSNNNNNKLNPNNNQQQVUVTTRUUTUSVWUVUUSTPQOPQPPQQOPRPPONNONLKKLKLKLLLKLLJIKIKIIJIIJGHGHGHIHIHHFGGGFHHGGGGEHGFCEEEDDCCA@@>?@@?@@@@BBBAA??;==>=?@@?A@ABA@A@??@?>>==>>??A@AB@@@?=@??AAA==>>=>>=>?=>>@@?>A@@BDCA@BCAADEDDEEGHFGFFFFFGGGGHGHIJIHIHJIIIHHIHGGHIHGIJJIJGFHHFHHGGHHHIJIHHFFFGFEEGFDDECBDFBDCABBCB@@@?>>??>==?=<===<>;<=<:;<<;<==<<;::;<<<<<;;::<<;9::99:;;;99;;9:;:;;978878888;9779988:89999988887978999989778878886676544555456531123324454210/022257876776679:::9:;;<<=<;=<=;;;<;<;;;:8;;99;;;:::;<;??@A>?>>?>>=??BB?>=<:::<<<<=>?BB>.ÿƾ¿ľɿȿƽƽĿĻÿƽȽſnUGQW^`_a`a_YE_ZMH\cdghhiiihikijjiiijghiihghgfffgfeffffeddddddccb^``a___\\^__]^\]^^_]^^]\Z[[[[[\Z[[\ZYYXV[ZXYZ[Z[\Y[Z]YYYXVYVZZWW[WYYXXUVXWXXZUWYXUXXYZYYXUWWVVWWUTWUUVUSTTSSRRPORPPPONONONLOMNLLMKKIKKIMIJJKJHJKJHIIHIGHGGGGGHFGFFFEEEEECDCCCBBBCDEBBADDABA@A@@AAAACDCCBCBABAACA@?>>?>>??@AAAA@?>@>@AA@>====<<=?=;:;>==<>=????>=>?>??@@@@@@A?@@@@A@ABAAA@BCACDCBEFFEEEFEEEFGFGHJHHIFFGHGHGGHIHJHHIJIHHGGHJGHHEFFFGFFGIGEEEFEEFFDCDCACCABBB@CA@A@@??>??=;=>=<<=<:<;;;<;99:::;;;9999<:::9:==<<:;;:878887699:98998:;::98:767878776677767657766545466775443543321/.00/-//-.////01002432456543576666778;::::998878776757866565566787898887:998:999:98799989877887799789:9;:;;<<>=:<;<=<=>>>>?A>@B@BCBEHFDDFFHHHFILJMNNMOQOOPOPSRTVVVWUVWYVWWYVXXYXYZZZY\[Z\\[^]Z\Z[][\[[[[]\\]]]\\[\\[\[[^\\[\\\^][]]^[^_^\]^]^]\[[\[[[Z[YWUYUTTQBezxvxz{}|||}||{{{z{xc6Pagjihhihggijhhhhhiggfggfefgegghhjjijjjjkjkhikjghffghhikjhgggffiiihhhhgfedfeeefhhgfggffggeceaa`_^]]ZYXTPNMNMNIIIEEDC@@@=@B@;75/-)¾ÿþŽžĿſǿûʿǾŽɽǽɽþlULSW^``a`aa\LR_TEYceghghikhhkjjjjiljjiiiihhfdefeffdcdefffffdfbcda``a^^\\\]^_\^]^]]^]][\YYZZZZZ[ZZYXZXVXX\YXWXYZXWY[ZZ[XYXWZWTUWUVUSVUVSUUUTRSSUVUSUSVXWVUUUTTUUSTTUVUSRRSSSSQRQPQQSSTTPPPPQSPPPOMNNMNONONOMLMLMNMLKKLIKJHJJKJIHJJJJGHIIFGFFGFDDEEFEDEDEECCCCC@BBBBBCCCDBDDABCDBA@A@?AAB@ACA@BB@??AAABBAA@@A@>>>@@>=<=<==<===>=<>>><=?@@@>==>=???>?=>>>==?>>>>??=?@A@??A@ABBBBCDDBBDCBCDDEFEGEEGFFHGFEGGGHGEEGIHEGHGFFFFHGEEFFGGGEDFFEEEGFDCGFEEHGFDDEDEEFDBBBBCBB@>@@>>>@>@???>>=>??>>>=>?=<:99:;;:887776888677888:;889:79:9:78778878655665434355552223310/..+))(&('%%),,-./0111223224542442355553555453442331101123200/.../1422343324444545543345443542364355564665887767:978669877::778:::9:;=<;;;==;:=<<=>==:??@>@B@@@@BCCCDEEGEEGGHJHIKNLLLNPOQRUUTVTRRSSTWWWWVWVXZZYXZZZXWWYYXVYYYXYZ[\[[Z\\[[XZYZ\\[\\[][ZZYXXWXWWUUOJ=m{xwy{{|{{|}}{zzyyy{`4Uagihhghhggijhhijiihghihhihhhiggfiiihjjjjikeghfcdca`baaeed_ddcdededccdfgddegefffffffgfdefadda^\`_`_^\\[[XYXVYTSTUTUTSPMNNLMOOPRRR¿¿þ¿¿Ľſ½ƾĿɼμǿƼſȿȿû̾ʾǺøkTLOV^``aaa^\UF\ZQVbeeeeiiggghjhilkkihigijhggggffgfeegdceeffedbdcbba`^_```_^`^^^_]]^]\\\\[\][ZZ\[ZYY[[XY[YVYXZ[XYWYYY[XXWUWVVVVTUVUSTTUOQQQRPONRQQORNMRRONMOLOJIMKHKNMKJLKJJIJNKKIHJJIIHJJJKPMMMNJNPMONLMNLLLLNNLMMMMOLLLKMKLLMKKKKJIJKKIIKHIIIKKHIGHGFFGEEDEECDDDCACBBDCBCBBAABCDAA@@ABABBACCBAAAAAADCCB@BCBAAA@EAA?@?????A@BAA??@>>B@@AA@@?>???>>>>>==>A>>@?@@??>???>???A?>>@>>==>>@@?@?@?BCBCCBCBBBEDEEBCBDEDEFFDDDCDDCBBCBDCDEADFCDDECCDDEEEECBCDFDDEEFDEECCDCECECEFBDBDBBBC@?ADCBA@?@@@@??@>>====<<:9::988778;;88::78888:98888656775656654454454334421/.-*&$$##"!#'*++,-/100./0//02222202422113333422103211212100-///0./020/22/012002100//000/.0033210/1211233325544445443533445556456764568867767777787697589999:86578879889:899:9;:89;;9;=>@@@>ABAEDCDEGHKJJGJIOOPOQPQQSSTRRSSUUTTVUVXTUVXYXXVYYYYXXXVVXXXXVUUVVVVUWPG<>@@?@AAAAABAB@@BBBA@?@@?@@?@@@?=>?=>@??AA@?@BACBABA?@??@A@@AA@@BCABB@?@?@?@@A?AA@@@A@@A@AA@AAACB?A@B@@@AB?>=<@>>==?==?@@??>><<>?>>@>?>===>>>=;=?=>>=??>>><<>?>=>>>=;<<<<==<;;;889998998865543234443432220.+))))))+-..-./0210101100102201320011112//01//12100110/0.//0010011001012210/000/./20/01010/12/00000123211022332///020001311133311035334315432344434454322343335323453444345412553344576557856665:;::;::=>==>ABA@=@CBEEEHGHGHHIONMNQRSQNPPQSRSUUTSSSUUSSSTUTSRPG@wywww{|||{||}{{}yz}{`4FU`dggghhffiffgggfffgghghffhgggdfffedggfgggfecedfccfdaaceefddcfddddcdefgeedeeccedbdfdbacdb`a_``_^__\^^\Y\\][[ZZZ[YVXYXYZYYYZ\[Z]Z¾½ºþſĿƾſƼľǿǾǽŻɿǿſƿÿ~lUNPTijjjjiiii\DZWJhpopprssrttustqrrrrrsrqqqqoopooonmmmlllkiiikieiggegfccccbb_a`]a_`b`aa^^^`{xwx{}|{{{{~~}}z{}z`9DGW\dcfffifgggggffeedfgfhhgfcghegffggfggefgeffdcefffeddddeddedfeddfdccedbcdbbacacdeeca_cccadcba`_`_]``^]^][[[YYYZXWXX[[[ZZY[][\][þÿ¾ýüǾƿÿüžžľǿýǻžünYPQVtokhhgffhcBLXKZmnpqprsrututttttttrttssrtrrrrrpsrrrppqrrpmnolmnmknnlklkkkikjgihiifegededefdedfdcbdbbaa_^`a`^^Z^^]]ZZ[YZ[Z[XX[ZZXYZWVWYYXUVUUWTUWVUVQUTQRQSRRSQQRPOOPOPPOONNOPMNNMKJLOMMNQMKNNPMNLNQOOOONONJKMLLMKKJLKJJJIIGHHGFGGFEEFFGDCDDFDCAAACDDABA@BABBCBAA@AA@CABCABFBBCDCDECCDCCDGCDDCCCBBCDDECBCBBCDBDEDCBEFDCCDFDEEDECDCBEEBBCBCDCCC@=?@AAABBAA@?A?>?BBBA@BB@@BA?@?@@ABB@@@??AAAAAA??????@?>??>>???>??>????@??<===<<>>>==>><;==<<<;;;999:99::99987987666643235555434334425525668765687777976676567655787787789865766655665566678878987766421/.---++,**)(*,1112324343333422344442343222322201/133311/01111121120022122101222011/00010/1/02100//00011131/0-/..-/.,-/..-/.,-..-..-,-..-../+,.--.-,,...-+---*((('&'')(())))&%%')***+,,--/../..0/0//./10121221222112133223222432554454667777:989:;;=@>><>>?@>@@?@AB@ACA@BCBA@ADEBBCA@A@?BB@ACBAA?ACBBBCBAABBAAABCA?BCDBBBBA@BBBB@?A@@@>BBAA?>???>?=>>>==>??;==>;<;<=<<:;:9:<;;<:;=;::<==<=;99;;<;979898875554321//.--..//12443535676577655676314223344466684445555444544543355445432222230/00./-././/023214555466555453365345544454433322233433425346755336665544443334234431111111021013322211001110//..0//..-/.-./....--,,,.,,+,,,+*,+*,*+++**+)))(%%$"!!"#$$&$%&&'&((*()*)++*+**+---,---,.//..0./001/000.0001000/000/2310123544465799889:?><>;8=FKUXfnzwyyz~~{{{}}z{}{z}vg6BJJJMQRVZYZ]^^a__^_^`ac`aa`cca`bfeefdddddcdddedbbbbdccdddddbadc`bbbbcbcbaaacda`ccdcdeccbdcbddddeddcccb_`a`a_```^[[[YZZYYYY[YYZZZZ¿ǽſľȽþƾƾƿƻülZOOU}kaddaa[WJATUF^ghjijjjjjjkkjilljkllkkklkjjkkjjjijjiihfheggghggllkieffffeffhfeggihhihjjjliiihighhhighhhhihhffehiggiighfeedfedfgfegdbbccbdababbacba``cfedefihhiihgehgfiihggffgdcegfddcbdddebcdfb`dedgfcbaaa`__`ab`^`]__aabaa`babbabc``aaaaaacacbcccccbbddbb`adccbdedfbbbabbbbecaddedeccbbdaaab`_`da__^^]]\^\Z\[\Z[]Z[ZYXYVX[YXZ[WWXVUVUSSSSURTQOPPQPPPNLLKLLLLKKLLKHHIIIHFGHIFEFFEDDCDCDDAAABABC@>?>@@@@><>@@>@A?>?@@@?A?>A@>?>>=><<=<;:;::9985655530...//26568889<==>?>????AA@@A>>@>=<=?A@>>>?><=?A@A@>A?A@@@@AA@@@A@@A@??@ABA@?CCBB?@AAA@>?BCAA@??>@A??@?A?>=?>=?<=>==>><<;=<;=<<<:;;===<=<;9:9:99<;999967656521112336668989:::98998888899:867689778775786778765545230212444431210210210/0/./011002212331012110113331123221////1//11/0200001101/13322223333333554244454354555565764665556555554435534443323322101.01./0----00/-..-.,.-./....---,,**)''('&&%'&'('((''(*(%''&(&&&$%%$&%%&&%%%&&&&'''&'()((()+*)())*)(((((()*)*)**++++,--+,,,*&#*@JKHHT_dmwxwz{xwwz||||~}}|z{{{xe40011324454::899;<<:=???@CFHFGGKKJSTTVUUZ\[_`Y\\]__]_\[Z]^`]Z^_`a_```[\a_a```aeacacdcefgfhhghgfefdededddbddcb___^^]a`^_]^]\ZXXYYYWýĿ½¾ǿĿƾƿƽþ»ǿȿȿĺmYQQU~zoT8)D<:cghfgijihhhiihihhijhgggfeeefgeeddeecddadbcbbbabcbaba_a_`a````__a`_^\^_`_]]\[_]]\^]^__^\^\^]]_`]\]]]\\[\_[\^\^^\\\\[[]][]^\__^____^`a_```b`]^a^^_`bgrz}{}{zxx||}{y|}~~}|{{|xb801100//212132233343455665587799:9;;>AABB@DDEEJNPPRVYXXSUVY\[_]`][\_abdb`bbaabcacbddchghdgjffgffhffhheffceeeebb`]``]]]^^__^^\[Z[YZŽƾſǽſ½Ƚȿp[STR|sV7 12,Unjimkklmoolmnmnnpmmljjkjjijjighhgggehdedccbbaabaaaa_```^`a``acbbccbaaaaa`__``_``_`aada`bmw}yzxw{|}{yx}}|}{yz|x_:3221020100110011122210212134402445444656677887<=;=<>==ABEECDHIOKJNRVYTVXZ[[YZ_a]a^c_baabcfbacddgdfcbaeba`b`__``^^`^^]^]^^]_^^^\^\½ÿſ¾Ž¿¿ƿȿȿſǹ»mUOTUu_?+(0)$Bcghkjkmllmmmmmooolonklnllkkkjkkihiihgihfefddcdccceadddecbcbdccdddccdcbcccdddcceddeecdcbaacba``a`__^]^[[\Y[\ZY[\\\\[]\[ZZ[[\Z[YYYZXZZYZYXZZYYZXXZVVVUVRVVVVVVVVWWUWWYXUVVXXXWWUVXWVWWWXXXXXZXXZ[YXWXXXXXVTUYWUTVUTQPQQPQQSPRPPPNORTQRSPPPQRTSURSPPPPRRQQQQQQQRRQTPNOPONOOOONMLLLLKJLNLJLKFIGGEEDCCEA@AACBBCCAABBBBBBA@?BAAA@?>>@?=>===??=>?@==>>==<<;:<<;=::<>>>=;:<=<<<<<>==?=>=;=>>==?>=>=;<=;:;;<<<<;;:;;<;;<;;:;<;<;;::;98:8879899999:=;::::9988;;:::<<<99:::;98889;98:9988:8987899997988878777878785798676876435455444433343233343223332222222122321232432111222001101101323124320/01/++-+-..-/10../,.-//..-,/.--...//../...../-,+,+,--././/.-+--,,,,--,,--+,----.-.----./.---.-./.,-.//////.......,..//...--.-,,-..,,,+*+,*****)))('(''(()'&&&&%##%##""""$#""""##"!"##" !###$$#"#!#4`effddhr{yzxw{|}{z{||z~|zy|v[942332323533322342133312421333234322134355664457556554675568767768;;;<<>ACDHJIKMNJMQRPTUUUV]ZZb_^__^ZZ[YYXXZYY[\[\^[\]^]b^][^^^_^^þƾĿý¾ſɾʿĿŽžŽlXQXZt_=*NP<>H\dijjkjlkkljjjkmlmnlkknlllkkjiihhgghgfdfeeccdcccccabcdccb``edabaceeeddcabbcebbcdccbceedeccddfdbda```bcghgghoyyyvw|}{zyy}}{~|zz|vY71101/0/021112223322434442244545542313456544434333220010//0//010.00//00110110025367788<9:<@@CDBDKCCEIJLMMKLLMNNTSRSTWWXX[Y\]\ZZYZZ¾ʽȿĽǾȿʿƿŻɿſƼlZVZXu`GWlkhjfdikkhikmidiijkjkolhjkijiiigiihgfhhghefccbcccccbcababccbbccbbcbdddeefdedbcedeedcdbcddbfdcdcbaaaabba`dgghhghiqz|~|ywwz|}|z{}}|{}|{z{xW50/.-,,-...-/.,/0.-.00//0//12432444222466654421221/////,)))()*+*(*+)*)***))**()*)**(('(&(),+,,.//1224445;=?A@@BCFHJLLLPVSSUSTUV¿ľüþžƿɿĽȾƿʾȿþǿľ˿ƽnWU^bx`Qyʼ~{x~vvtyulvpnohkpgimigfacfdca]\a\ad__`_]]_Zab``^[ac_aca__`cbcddbedcccdeccccbabbbaa`_b``_^ZZZXWWUSUTSVXXXX]\\YZ\[[]]\^YY[Z]\ZZ\\ZXZZZZYWWVVUVVTUUWVTUVWUUTUUQSRQRPQQRRRQRNPRSOPQSRQPPNRSSQPNNQPQRRRRPOSSRQQRQPRPPMLLKJLMMMOOPRTTSSSPPQNRNOMOOONNNNONKMRMNMPNNNONLLMOPRLPNMONNNNNNMNMLOPONLOLLLLKKLJIJKLLNKLJLLMMKJKKKIJJJKKLKJIKKJJIJIIILKJJJIJKKGJKKIHHHIHGFHJIHIIHIFGIGHHEGGDFEFEDEFDCCDDCCA@AAA@ABABA??A@@?@??>>=;=?@><======<<::;;9:;;::;:::<:98999999::8876778:9988876788765556766444345454435532113111014211334311232232324332324421532000011110101000101121./0/,/1//.,./-...-00.----,-,--,-,--,..--,--.,-+-,.,--.,--.--+++,-,,,,+,,+-,+++,++++,,+**++,*+)*)))(**''))'()***))*)**)**()**)()(()*''&'%&&%&%%&$$$##$"!!"#$""$####"#####$$$#!$#""###$#"$$%%&&'&%%$""""#%'*+,1Iehijiidhkwy}~~{zxz{||yz}}|}~{yz|vT5//-*,+,,----,+-,-/0/.-...0/..-./10/0/24333111/-,,,,,+*(%$#&$%''&&&&'''(('%''&%%#$%#$"#""#$$##$%&')),,-.12246655678;=::>>?==>===><<<=<<=<<=;<:;:9::99:9:9997777875679776576564775445543444543453333255323443432221113431113221100/0/1/0///0//1000.,.0.-.//.../-.----.--,-.-,.--.-.----../...-,--..-..--..,-...,++.++,--,++*,+,++++***(**(*)))(*(((''()(&')((*)))))()'(('((()))('('((&%%&&%%&&&$$%#"#"##"""##"#$$##"!!""" !!"! !!!!##$$##"! "#$" "##%'*...0/-/Fghjkhjdfnyz|}}zyyxz{|{y{|~}~{{}xS8--,,-,*,*+,---.,,---.----//.,-.,-./../00///.0/.,--,,+++))((*)*)*('&''&''&&&%%#$%$#$#""!#""!! "%%&%(())+,-/132223357656776667868:¾ÿ¾ľýÿĿſþžĽĿž̾ƽɿʾÿɿȿǿnXZ``y`Q}z{{{tsxkpaa`^WY[UWPLJPUSXVVVV[[WW]Z\YWY[[Z[\ZW[X]ZYYXYW[[WWVYYWYWZXYYTWXUWWUWWXYXWVWVWUUVWWVUWUUUTRSSSSSTSPPQRQQQRQSSSOQPPOOPPNPNMMLMNNLLKLLKIKKLKKLLJJKLJIJJKMIJKJJKIIIIJIJGEFDGHECEFEFHFA?>@BECEFFEBEDDHBHGGGIFEFHHFGGFGHHIJJKJNMLKMKKIIMLLLIHKLMLJKJJKKKKLKLLKJJKKJJJKJJIJJIJIHHHHJIIIHGHIEGGHGGGHHHHFGGGGGEEFHFFGHGFFFEEGEEDFFEFEEEEEDCDDDBDDDCDCDEEBBCB@@ABA@@@>>?AA?@@=>@?>??>=<>@?<<==:;<<;9:9:::::9878899975764455566655434323433443433322110233001000111000/0//00/1./21/0..00/0//1/00/.-,--/.-,,..,,-..--,,-.---.//.-./-..-,-....-/.//,--.,,--,-+*+-,..-++***()+***)(()''&&&&''%&'(((((((('(%%&&&%&''&'%%&&%$$#$%#$$#"#$"!!!!!!"!"""!"#"!#"!!!!! ! !"%$#$# !##%)./342/-,+*)+Gmiiiikfgoy{|}}zzxyz{zzyz{}|zzyvU2+-*++,+*+,+,,+***++++**,,..-,./.---,..-....-.--,---./00/.-,,-/-.-,+,,-++**(&''((&&'''%&&#"""""###%&'')''()*+----0/120111211/0/012¿ľ̿ͿȼɿƻoTYc^ybTĪ~tuxyprrrqjkhkggkgfacf``]_`a^UY[WW[PVW[XXRRRXQUTURRRTVUTWRPVUURTUWVTUTTRUTTQRRSRRRRRTSRRQPSQPOPNNOOOMNOLONLLNONMNNMKJLNIMLLNKJLLLKLKKIKKKMJIKLKMKJJKLLKKKLJIHGFFGIHIHHHGEDCEEDDDFFEECCDIGHJJKJJIIHJJJJJLMMOOKLMKLLKJMMLJIJIKKJKJKJJIIJJJHIIIJIJHIIHGIIIIGFHHGJHHGIHFHFGGHIIGGIHGGHFGIFGGGGFEEEDEFGFFEFFDDDDDCEEECDCBABCBBDCDBBBBACB@@AAAAA@@BBAAA?>@AA?>@@??<>?@?==<<==<>=;:;;9888:9788997655333332223344323132333221222322001/032000111///00000/00/01/0/./..,.--./.-------,+,--,-/.-,,,,+,*+*+,,,+,,-.-,...--...,,,-----,+-,-,+*,+,+))+**)(())()*(())'((&'&'''&'&&%%&%$$$%###""!"""" "! ! !"!!"   !   ##$"""! $*-/235310.*(')'(,Ejlkkmkhir{{~}~zz{zy|{{z}~}}~}xy{uR.,,*+**)()(*)**()()**)))**+++,-,++,+*,-,,-,,,-*+-,./......-,,-.//-..0/.-,-,,,.-+,,-,.-+**('(')((((()*)*)))(()*)**-,,,++*,,,,--,,-.¾ýý½¿þľǿǿɿſǾžȿǽúqWWa^zcR}xywy|xrxuvnmklkfjihecbeebb`ahljklkjjt{w}~|zz{yz||z{}}|}|{||vN.+*)(*))(')*)))('''((())('((()*)()++(*+*****++(*,)+--,,-,---,,---,.-----,,,-./,,-..-..-,,,,,,--,+,,--+,+*(***+)*,+,*)(**++*)+***,*½ÿžĿƿĿȼʾžǾʿƾƿ¾rVY`b|cS~}tywuynnhokmedfcc]_a`mkklkljjq|{|{xzzxz{{{{{}~|}|||{uL1*(')((()(()((''(((())))(''&'))(')()(())((()**(()*++)++,*+,+++)*+*++,*++,,,++,+*,,,,,,,,+*+---++++,-,,--,+,++,,,,++++()+*++,+****)¾¿ȾƿſľɻſȺ¿ļȼƽpXZa`|cR¼}}}y}t~uvsqnlkngghggf_dd_]_]]ZYUWYXXYUSONRJLNKMOOHMMGIJGJNJGLILMMMMKKIJJHIHGHGGGHHHJHFIIHGGHIHGFGHFGEEDEFFFGFEFGEDFGEFDCGFFEFEGDEEDDDDCDCEDBCCBAAAAAAAB@AAAA>>?@ABBABBECBDCBBDA=??>?@@A@A@ABDCCCCABBBA@@@?ABCCBA@@AB@@BBB@A@?>?@@@@=>??@?@??>><>=<<;<;:::8:878445421-*(''(()(('&&''&')(*/0134565555667877666756567668756656765455775657444444323331100/./-,-+*,+,---./-,--.-+*)'&&&)*(()))()))*)*()()*(*++,+***))))('))*)('())('()(('''''''''''((&'((&&('''&#$%&'%%$$###"!#$#! !     ! !! !$%%&'&'+*''&'''&&&'+.0/,,/Nkllklmjis{||{xzyy|||zyz}|||||{|xF.+))**(((('''(&'((*(((('&&'''((('('&'))(())('()(()**)***(*+*+*''*)'))'''))***((),+*++*((())*+))**)*++,,+****+,++*+**+++***+-,,+*()½¼ĿĿ½ǽſſüŽɾþ˾ŽȿŽ˿ȾùqYZba}dSw|}zx}{}ssnommgfjifccfcc`bf^Xa\\ZZPQRQROPMPQRNKLFGKGGIHHGDFFFDEGFFHIIHIJGFFGFEEEECCEEEFFDEDECDECBBBCBABBABACCBBBACCBDBAA?>@A@A@A?>>>?>;<<98777899::;;==>?>@@?>?@@>=>??@@AA@@>>@???@@=??==>>>?><>=<::;9998685555555856542332100.../010/./01100112355557789::8:9;:8:::;99898888769:88988777898998777788778877777655444111/0//0/.//00/..//,*-..-,,,+,+,+**+*)+***''()***(''(()('''(''(((('&''''%%%%%&&%$&'''%&'&&&&&&&%$$#%%####"""""""! !#" !  "%(+****)'%$!! !! !##$%&((('&&%&,Jinomkjkku|~|}yyyy{{{|zz}}~}}|{|pJ.**+*)(('(*(&''(((((()('&&'('''())('()(()))))())**+()*)****(**)))*)*)'('))***)))*)(('&&&&&&%&&&&%'&'())*)('('(((')**)*)((*++**(()*ýþĿľǻþ¾ĿŽǽ̿ɽǽ¿ǿʿɿʾ¿ſȿȾĹnX[`a|`R{x}|xxuyqtvppkpmheffdb`d^`\`[YTWXXXURLSKKNJMIIGEFCAEDEFDACA?@ACBAA@BACACABBBB@BCA>@?@A@?>AA?><96433131/.,.0.134569;;:<<<=???>===?>==?>?=<<>====<<>>==>==<><<<;;<:9987889976677688676652334543544322125567667777654366666768999:;;;9::;;::::99:;:::9::88999:99:98898798877777888898776544343223211134323342223200121///./0/....-,,,,++,,,,*++)*)(()()'''('(''%&%&#%%%%%&%$%%%%$$%$"#"""!!"#"!!  ""!!" !"'-145/+*'$#" !!"$&%&$###!!! !")Nkmnljigkt||}yyyyxy{{{z}|}~}}|z{sG-+**)'((')+(&'())(&'()'&()))('''(*))**)()(())))**+**+))*+*)(****)*'()())(()())('&&&&%%$$$"$%$%$%$#$%$$&&&%$$#$%&&&'&'&'&())'%&&&''¿ǿſɾ˸Ⱦƿƽȼ˾ɿ¿ĹoWYbb|bSȰz~wxxwwsqpuhhej`ab\`[[[[VTTRPNOLNJIKMJFGEBADA?@?@><<>=:666-00-/,-,02446889<::<:<<>=<==<>?@>?;=?>==>>>??=@>>>>===?><=?>><<<;9:<;::99;9:;:99988666877877755657767668887765455232134356466764688877988878887788788889898799888998899877898:9788678899975654556654553235643344444344311220////0022211220/00..-.-.---.-,**,+(())(('(''%&%%%$##""$"! !!! ! "! $(+.133540+)&%$"" "&)+)('$#"!!!!"$#!"(Rqonlkjhkt|~}}xzzz|}|{z}}|}}|{zzqF,''('%&'''(('((*('())('(()(')((&')))))())()'()++*)(())()))((())(()'))(()''('(('&%%&%%%%%$#$$""#$#"##$##$$$$#"$#$#"#$%%$$&''&$$$$##¾¿¼ƿνž¸ȽĿʼǾɼɽźüoY[b]}eTñ~}}y~x}wytxtqnjjdji`abba^\^\WTRPIKHJGHIJIIIDFFDBA?@B:=??=:>B?>=?><?==???>=<;=>=<<;:;<=<;==<>;=;:;=<9:;;:;:;99988:989:957865675423343330//0/.01001332456455556555555887677766877789878888898667566779:87766777755665434565455555344422221111023232132231110221210/2101111.//./--,---+,,,*)(&'(&$%$$$#"!! !!   "$'+156542.+*'&&&%%%%%()+,*(&%$$$##"#%%$#%(Pmpommljkv}~|vyzz|}||x|~}|}|zzyqF+###"""#"#%$##"%%$%&'$$%%&&&&''('&&'))(((()'''()*(('()(&'&&&'&'''()''''(&&&&'(((&&'&%%&'%%%$#"$$$""$$###$#####"""###$#"""##%###$"!½Ļ¾ľŽþǿžǿļƿſȿʿľĿȼȻùq^_fc~eZ}~x}{yvxusropllegjj^^__^]Y\[UVSSSQINJJJFDGFHDBAB>@BAA>A>>>?@<:@=>?A@?@@@@@AA?A@@==>>>>=>>>=<<=;<<=;:<<99::;:8888765432110232343322345554554444455555454454445435676676755555446688787888777767676655444466553433233322321221000110321114422242222210101212010//00//.,-,++-+()*(&&&%#!!! !#""$###!"$*/123452/+*(&'&%%(''+***)(&&'(&&&&%%%&''''(-Rqppomklkv~~~yyzxy||{y}~~}}}{{znB*$#!!!!!!"!  !! !#" !""$#!#$$#""$%$$%$$%$%&'')'(&''(&%%%%&%'('&'&''((''&&'(''''(&&'&'&%$$$$#$%$%%&%%$$$#$$#"##$$$#"!""#"##!!#"!¾½þÿżȿľżǿǼ˿ǾȿǾżq[]g\{aY±~~~~~|~}}w{~t|{{qu|yspsspjnng`fff_`^_YXXZUVWWQOLGJGGDFFFCFDE@??A@@=?=::?>?=;?>;?@@>>?>=>>;;:::9:;9::889998887887789878888968767768655656455444454433332335434555343466654563322343242333543222331/00/.--,-..0...2553342222312221112212111001/.-..-,,*)((&&%%$"###"" "" !$(,/4552.++((%%%%''&*,+)+'''('%&('('(('''()()*)1Umpoolklnv}}}~zyywz|{z{}~~}~}zzysB(" !  ! !!!""!!!"! ! !""""""#""""#!"#$""$####$%%$%&'&&'''&&'&&'(('''&%%'&&&'&$%'&'&%&&$#$$%$##$##%$$##""###""! !"½ÿƿǿǿƿȿ˿ʿɿr]agazfY}|~~~~{~|~}z|{~~{xy{x~z}yxz}{w}{yt{{|hyywrqptonlqkihce`]\]XYYZVVQPOMMLGIGGIEABCE>C><=?=<=?>=>@><;=====>><;9>><=<;;;:89:98898898887867677666557444544553334544423421012301//1112111//01/-//.,++*+,*-+,,.232111243221223222222220./0/.--,,--**'&&%%&&&&%$! !$*01211/,*)(%$%$&'(,,)*)(&'''&('''&('((('&'**)(**2Xnqpnllkkv~yxzzyz||{{}|}|}}{{yr@#! !#" !""! !!! "#" ! !   !!  ! !"###"$'&$$$%''''&&%$$&%&'(('''&&%&&'%%%%''&%%&%%%%%%#"####"#"""¾ü¼üſžſ¿¿ɿƿʾŽ̿ǿĹpV`fc|`Q~{}}~}}}~~{~~}~|~{|z|p}}xyy|yyy|zw~zwuvv{svwzutr}orowmnnoriiqifcicZ^b_WWZYRSVTNMMIDGILFCB@FD?=?:=A=<<==:<<8;=:99<=;;:98889:787866569765676754444312343322112100110000/00.//.///.-.//.-.10//0121021121133201120..0//0-+***)(&',,))(''&&#!&+.0/.--+)((&%&$&()**)(('&''&(((()))''(('(('&&((((*,3Zrrpmlklmw~xxzyy{{|zz}~||}|z{{v>#!! ! !!"! !"! !#""#"!! !! !"   !!!"""" #"#$###"!##"$&&%%$#$$$%$&&%%&&&&$%'&&&&%$$%%$%%$#"""½¾ÿþýʿľƿƾǾɿɽȿżqZbl`aT|~~y~}{|~{zzy}zzxyx{{{v|rxw~vzv|uwwwxsswtsvtvqyxypvuxmstwnmu{rqpskilmi`fkd^adcY\]_UTWTRPPPLMMMHCDB?A?@>?>?===::9:9<:98698767489898765565454553466544333321233321112/032221123220011101101/.//.-+)'%&&()1=H;31011/'!((,00.-)'&&%$$%%%&(*+*(''&&''&''')*)**))*)(()**))***),.2\oqpplknpx}zxzzxy|||z}}}|{|{z{x>$   """#####$$$""$$#!"!!"###!"#" !"###" !!"!"!!"""" !#"#$#!!!!! "#"$%###"#""!#$$$%%%$$$#$$$#"##¿ÿ¿ǽɽýÿǿʸ̼νɼºrY_jY~~`Oõ}|}w{{|{|~|~~~~}}z~}~{}{v}{{{yzzyz|xzx|x{v|wxsxxwstvytuuupquuootwqktvultuvkpptinqugkosifhljhgkd`ab\][[YWRUTLOLKKILHBBDA@>@>;;9:8778799857457477575465556654344555443446643434332200//-'%%##)28;AM[E;;;<=<1*244100.'!!&')((((&%%)&&&&&'''''((())*)'()**))),+*,-3\qsoolmonxzwz{wx|}|{{|~}{||{ys=(" !"#"#$$$%$"$#$%$$$%%$$####$$%%$##" "###$##!""!#""!!"! ""!"" !! !""" !!!""#""#"½½ýþÿýĿǿžƾʷ̿üs\bk]|_L}|}{|{}z|~z~|zzz{}~{z}|z{}}{y{wwy{yxy{xxwzxuv{xvu{yutuuuwtxsrrvttorqrpsssklpsmkmtllqqmmkqnlqtjforicjmg_hniegij`^]e`\YYTSSRNRKKGHGECC??>>@=>;96;:769:8:68:;:99:967655542/-'$$)5DIHGHQ^H>?AAA?319::8::<.!##$$$%%$##&&%%%''&&'%&((()+)())))***++*+-1aqrrqonooy{wz{yx{|}{{}~~~||{wq@,(%$#$"!!"!"!""!  ! !"#$$#$$#"%$$%$$$%%&%$%%#$&$$#%$"$$#$#####""" !!!! !"! !"   !  !!!¾¿ý½Ŀļ¿ÿžƽ˿ƺȽȺͿļpXdla}]O~|}}{~~w|{||z~{y{yvz}|zz|~rx}}xx{}wwx~xvy}yux|xtu{xtyy{tuxztuwvuvvzuuqtttpootkqtrnpstnorqjlnrmilnmipnkflrndjnphkllehjnjkjneegjhgcjf^]]`^W_\WTRRRMOLNEJLHEEE?@>><9<:8985.0=MONKMT`I@?@AA?32;<=?@A?2 !  #%##""$##$""%&'(()(())))+***++/5bossomnnoy}vyyxxz{}{{|}}~|}zxn@.*)(&&((''&%$%&%$$#$$"!"""! !! #"#$#"#$$#####$$%$#$&$$$%%$$%$&%%%$%$%%#####"!"###!"""##"! ! !!!  ¿¿ľǿ̿ƿ̿ſȻpU^h^}]Utqurbfdckdo{ƿ~~||~{~~|}}~u}z}|}y{zyw|}yyz|wux{uv|{ytv}zrwxwtwz|svwwuttwwsryussvvusxuqprsqprrqpnpnkiprmiqqnhmojfkmmfjmreilohhlngijpkkhoigfleigneifmdeeb]`_^][[\][VROB0BUSRLOV\I@BCB@=12>?>?@?=2#!! !   !$$$&'$%'((()((**,.5gpsrpopoox|xyyzy{}}{{|}|}|}{zn<,)(''&)*(')(''(()''((&&''(&&&$%#$%$$%$$$""!   !!"##"#$##$##%$&%&%%&&%%&&%%$%&$$$$$$%$$$&&$##$$%%$$$%&%$#"$#""""##"""! ¾Ŀ¼½¾ĿĿÿ¿ƾǿȾɻ̽ǿúoV_i^}_Tqylojggd`TOQTPPPOPYbs|}~}~~~}~|}}|~z}|}{}}zx{yxx{|{syxvuwuxn{ywrstvrswztstxsrrwspuvsopwtprvrmotwqopuromonkkoonhmnqemnogkmnkmnoejlnjikodilkfhkofhhlddfiddekcd_\XQBIXVQPQW^G@BDB?>2 2>@A?4($$""##"!! "!"!!"##!!""#%&%$%&$%&''&('''((+4eqsrqpqppy{zyzzxz|}zzz{{}|}{zo=-**+**)*)&())(())(()'()*))()(((''))(()('&''&&%%'%%%$#$##"$#$$#%&&$%&%$&&''%%%$%%%&&&%#&&%$$##"###%&%%%$##%%$%%%%%$$#%&%##$%$$%$$##¾ĿþĿľþþſǽĿľȿǾɾʾĿĺǿʿɽŻp]bh_|~`OötQg}}uokhea\XQNPPPMOQVY[}~}}{~|}~|||~{|}~|z}z{zz}~xxz}{yxxvuxwxtyzxvwvwuuswutvusttvunttrptyqksvtnoprmpsspqrvprqroqnqqoorqqmpnnknnngkkkjlnmgkkkdegdbfif`c`^ZVTX\[VSQV[IBCC@>@46>???AB@;>C>>@A8541../+*++))'*)****))+**)(((('))))+))))+7irsrqpmsrzyzzyzy{z||{|}|~~}{ym=/---,---,,++**)*++)))))**+****)))*++'()(')))*(()(&&'&'&('%&))$&'&&'(()((('&''%&%%$$%%$%%$#$#####$%$$%%%$$$$#$$$&'$$&&%%%$%&%$$$$$$þÿƿſþÿžƾǿȽĿ;ƿƷžoU]h[|{`Pļºz|w}t}zunwm{vxrttGgxpmhb_[WQOONMMMNTUT~zx}{|{w|y{|~~y|~~~~{}~~}zzz~y{~xy}yz|~yxyz|y~z{z~xxwyw{y{xwwzwvvtutusvwrpstsorrqjqtqkrtslrqsnqtvnrstprqtopqwqpopnoqqpolonjjkjihmkhega_]YWZZYVSSZ^HCDCB?>3 5=>@ABCBANSPVXRUPPNQLLLHDDCBDEA=;<875613421/.---/-,,./,-3isttqoorq{zxy{{z{}|{z{}}|||{{k=0,+-,,./..+,+++++**,++)*+++)()***)+,*)*+)*))(())'&&('(((('(((%'(''(''()(()'''&''%%&&''&&&&'&%&&$#$%$$%%%%$%%$#%&$%&'%$$$$$%#"###$$ĿȿȾ¾ʽ˹ĽrY`h[z_Qlcvupl`UOG@C840./(**)&((-*-007Jb9fzqkgb_XVSRPNNLJKTVU~|zwuspmmligdb[[[^[[[j}}~~|~||}{}z}vz{}rxz}zz{zx|}wxy}xvx|wswzxvxyxtwxvuuuwvuptvsrwsqmvtvqsttmttrkuttopsupqrrmnrqmmnpmmpokkppigmkd_][\\[YXUU[bICCBAB@3"6?ABBDEB@QSSVUNTTQMRNPNMMMLRPRLILMIJPLHFGII@CD@@ED>@B>;=Cirutqpqttz{y{||y{||{||~}||zz|j:0-.-,,,-.++,,,--+*,-*)**)*+)()+)***+**+*)((()**('(())((())'()()*(''''')()(('&%&'&%&&'&&&%&'&'((&&$&&&&&%&&&&%%&%$&''%%###""""#$$%%»¿¿ſľÿ¾ȾſǿȾȿȾǾŻ˼únW`f[w\U}kTYkolf^ULE;730-)'#$#$$$#"!!'\xphgc`XUTRPNNLKNTTPÙ~|zxuqqomjhfc_[[WURKIVz~~~~~|~~~y~x||v{}{|}}{{|}yyyzyww}yq{|yuwzuryywswzyvwwvvvwzvuv{vxswtttwrsozwvrvvvrrqqoopplqqokoqoikolgjqlc__\ZYZ[XWU[^GDDDDBB2#9@ABDEFDAUWZYVSVVTQVQQOQPMLSMKIKNKKMNLJIKJJJKKJJNNNONKLQTmssqpooqsx{z{|{yz||{{{~~{|{|{g<00/0./0./--//.--+--,*+,,+-,,**+()+))'(**(''(('&&&(((&&&&''%()((('&'('')'('((''(((&('&&&&%&&&'''&%%&&&&&&'''%%%&%%%'&%%$%&$$"%%%$%%»ÿÿ¿¼ȿȽ˿þĿ˾ȿȿż̽ĽļoY`iZuYNpicRUkpke^ULD;52-*($#"!""""!Yyrkhd`[WTPLMOMMOTVT{Ę}{yxusrmmjhfb`\ZWUQKK[}|~~~~~||~{}~~}}}}}~~}~{{|}{z|{v|z{{~zxwtw}zvtxxxuwxurwwxpvyysvx{styyruz}uvvywvuvqsvvrtsurqptrphnpomooofa`]]]\[XUS\ZFEDEHEC2#:ACCCFFF?T\\ZZZZVVX\USRVTQSWRNNQOMPPPLKLJJLNNKKLNNOONLNQWlvtrppqrsz}||{zyz||zy{}}|||{}gH<:9:878544442220221/-//.//0/..,+,,,*)**+*())&%&&&&&&%%%#$#$%%%%%''&&'&(()()()*(()(()'''(()(&(&'('''(&&((''&$%''&&%%%$%&&%%%%''&&%'¿¿þɿǾĽſ̽żſʽþͽȿžqWaiUv\QƴscgcUXjmie^UJB<80-+'&%$#"!  Yxqjgb]YWSPPPOMLLQTVzĖ~{yxtqqmkigec^[ZWTRKE[}}~~xxty{}z|~~|}{{}|z|x~}{x||zwz~|xxwyyzyyvwtzzztyyvozyynuzyrvy~sswzuuvxstwvruwxrrvwqurqnoqsppha^\]_][YVU[YEECCGCB0"8ACCCFGE?YZYYXWXSVXWRUUTMSUSPPQRMSSRPLMNNMPQONOOPRVTOORSXlttqpnrrw||yzzzzz||zy{}|}|zywiYVWXYRNMPIHKKKGGCB???=7776653122232/010/..--,+)+,++*)'))''&'&%''&&'''&'(()()())''&&&''(')*))''%'((''(('((&&'&&''''&&&''&%&&%&&%%%$$ÿ¾þžÿſ˿˾ƾrXaeVu[OojigeTVhkif^VLB;71,)'&$#"" !"  Vysmkc\[RPQNMMLLLPW[Œzyę~|zxuqrnjjhec_][XTPKEX~wzy~||~~}}yy~yz|{|~|{{zy{z|z~vzxx{}xxzxv{{zwxz|wz|||~{}yxx}zxu|zxs{xzsvxyswwuruutquyxrrwynsvqjnsolngf_]\]]\[ZU^ZIGEDCCB0$9ACEEFFDB]]\ZZ[[YXXWPVWVNUXSMTRNMTROMMLLPOTQQQPKQTWTSWUTXmutrpoqpv~zz|{{{||}{z}~||{zyvh[TUXVSSVYQRTTRRXTUXTPRTNMKKOIHPLD@CA?>;:889767401330//,/0-,+,,,+++,++)*,*)*,**()('''(''&'''''%''('%&'&&''''')''')(&('(%$&'''(''$$%%þ¾¿¿ſ¿ÿ¿ȾſɿȿȿÿȿsX^fT~uZRygjjidUYkkhe^TM?94/,(&&""! !"  Wxsnhea`lwUKKMQVRȋjmwƛ{zwtrpkkifdc^[ZYTPMDTtvtphgd`bn}~~}{||z~}~{~~}{~}~|}z}}|zz~x{}~{s{||z{{~ywz{xtwyusz|wu|zyvxz{wvy}wxy|uwu{vwyzuttwurtxwwvutyqpsrmtunkofe^]^_]\ZUU`ZJFEDCEA/%:@CBCEGDE^]__]\\[\YYXXWVTXVTTVSPQSQLMNOPRPPORRPLTSRSTVRU]oturpnopw}z{|zz|}|{{}~|||{yuh]U[VWUWWVVUWVTWVQSSTRWWTQTURRQURPRSRQRVQKQPIHIHJHEHGDBB<;7:976743232/.0..-..-.,+++***)+''&''&'('''%&''&'&'&'((''(('(''&&''(('''%%&'ĿʿǿȾûoY^gQuUQþlilkjdXVjlid]TL@:60,'%&#"! !"! Vxsmhfa~ɸ]JMSUSƅȞ{ę{zxvsqkljgdb^\[XTRJES{ff_^]^gbkk|ynIr{unhc]W[}|~|~z|~x~y}y~x|}z{~{z{z{}~xxx}z{~{zwy{yxyx{yvzzyv}yvu{yzvuwwrwxyuxz}ww{{uvxxuuxytwxwtvutrssvrppphe`^__^]WWTXVIFDCDDB0&8?CEDEHEE\]_]\Y\[YWZZXUVWYRUXXTRYURTVSTUWSQSVSSUWVUTYYUYcottsrnmrz{y{{{{|}{{}|}}}|zzuj`]a][\_[Y]^\XYZUVZXUTYXWVXTRTWSMWVRQUQOPPTSQQUQRQQTTONQONMNLIIPHIJHBAB??;;::78643322300-+-,,,+--*)),,**))*)()*(())(**()'(''('&&&'%&¾Ŀ¿¾ƿÿȾſɿǿɽȸȾſoV`iQ{qNP~´mjkmnkcTUkkie[QJC=5/+(&$#"""    +Zzrmifc`Q^x}FLRTSȃ_wƙ}zvtromljeca_]YVSOIA72KA61.22.+*-.175+._}uiaa^TX~}~~}~|}~~}~x~~|{~~v|v{}yz{~x{}~xw~vu{~ywy~zw~~yvz{z{~{{xvwyxxuvwxtzzywxz|vwwwtvwvtuxvsvxurttrotvqmprnhba__`_]WXTZZGEC@CD@.%8@DEFFGFI_^b`^Y_][Z]ZVSVXVSYVSRRUURTURSQSPLTTPRWWWYXYWW]epuusoklqz{{z{}{||||{}}}~}zwti`_b\]^`WY]^\[_ZZ\`YZZ_ZVYZWX\ZXU\WRUXWVTUWSQSUQURSTOPSTQQRUQRRTOPPQNSUMMONMKMJIHGIF?BB@?<>988967444533210000/.--,,,-.,,+)))*)'(((&%½ſſÿ¿üĿ˽ǾŽpT`gU}}rSM}vhkknomeWTljhd\RH?:4/+)&#" ""   +QzsmkgeUNPHNarPMQTQȴþxȜ~ywutqonjeba`^ZWUOH?$'PH?886213330/,-+%K|sf^]]VV|~}||zz|z}}||{}~|}~{}}~~~x~}y~}}|}|{x|}rw}|yvy|zx{|wt{|xxz|ztvzztuyysxy}wxxyvzx|vvwyvvvvwvtvxuruurqsuroprleabaa^_[YYY]VIFDBCD?-';BEFGFHCH[bf___b]]]^[ZWYXRUZXTSSWTTVTSSOSQTVRNRUVTYZXXY[bquvsleiqx}{|~}{}~~|{{z|}{{yvjcabbba_\]`^^^]Z]_^X\^^Y[_[WZ\X[XZXYZZVTYWWVXXWUXUVWSSTTUXWYSTVURTTRPOONPPPOQPNOMPMOMNLLLKMLLKIHJHFFGBBBB?>>?>;:8797652101110/..--,+ĿÿþǿǾǾƿʿ̽ÿ¿˾źľqZ^jRv~|ztOBzhnmmoqoeURjkgb\QG?83/)('%#!!! ! +Mzungff|XZXPQQ^QIPUSÒ|uwǛ|xwvspmkgacb`^[VSNGB$'\IA:75132430--+)&Bwnd_ZXUT~~~~w{~{z{v}~}z|v|x|~|{}{{|{{{{z{|vv~{yyz|yyzzwxzwsvxxvtx|zvwywov{yuzywvxy{suyuqvxxtwvustuvssssqqpqrmeb`_`_^\XUV_WDCBAAC?*'9CFFFGGEF\``Z_`]V[\\W[[ZVUXVVWVVVUXVSSTTTTVRTUUUVXZ\[Z[[bquusiVfnx~z{}}y{||{{|{||{{ytlfceea^_`c__`^^_b`]\^^\\``[V[[Y][\YXZZWXZWXZZVWZWSWYXUTVXZYYVXWWVXVTRRRQRQRQRPNRSQNNONONONOPPMLKKKMMMKNMJHIIIKLHGHHEDEFDB@?<>;:9:632Ŀ¾ƿͿȾûo[_lOs}vkKDy´ŷuonnqrndSSslhd]UI>;5/+)'%#""!!! !  MxupigjylLNXkOLPXS̑_vʙ}{ywusnjid_cb`\\XWQK@',HA88634222/,-.+%C~tlda\URO}}}}|~~}~}~}}~|z~}z}{~t~~|x{}yz~yz|xvz}{xz|yx|~xvx||xw{|xvzyxzyxuytxyxwyxyvwxxrvyurvuvswxvrsuuoosskprpnlge`^`_^][TXaZCDABBC>*';CGFGGHGKeb^_a^^Z]Z[Y[ZWTTXWWVUUTSTSRRRQSSTPRVVTW\\[\\\[eruusiTfpx||}~}{||}{{{|{{|{ysjheddc`bbeabc_b`c]^bc]``a`^___^a`^^\]^\Y[Z]\ZWY[XVXXXYWYZ[XZXUVZ[YWXWUWUWWTUWONSSTRTRQRSSSRRONSONMOPNMOMKNNMIKKKLKKIILKKIIJLIGHILKFF¾¾ĿþſžĽ½ȿǿĿƿƾ˽ǽnZ^eRo}{vhMFusrqstqgXOkkhe]SG=:61+)&##"!  + +H{tnjedЀE^JLQVMNJĵtɚ~zyvrrYMPN\cb^]ZWTOJ?%'hH?62543321/-.+(#=~}xrkf`[SPL|}}}z{}|~~~}x|~}~~}~}~}}~{|z}|z|||wz}xx{}xwvzwsy}vuxyuty}xvw|ysy{xqwyzvx{yvxyzwuvwwuxysttvvupttuqorrqqqpomgda__^]][TX`WCCCEEC?*),hJB96552331/..,(%+(9BHHGHHDK^\___YZ^]Y[[XTXVVVWZUYWXXSTWUVSZVTUUTUV[[Z[\[Y^hqsutplnrx{z|~}}}~|{{{}~~|xtjfgfcahhdfhefdeeebcaa`ccb_a``beb\`cc]`a___c_]^_[^__^_^]`]^\][\^^[[\Z\XXXWUXWXXWVTUUWTTVVTUVURUURTUQTUVQPQQQRTPNQSRRRTRQQQSQQPLMNMJLKƾ¿̿ʾȼ˽̼ɾŽrV\hWv|zs<&':l²ztwuvseRRgkibZPGA960*)%$"! ! Fsngba[vVHNMNMRVRÑrpr˜~zxurpnjkfcdc_]ZXTOG@+bKA9788313010/,*":ZSP><=52,023DPOoyxuoffh\X[WRRQPMBH^zzxurrpnljfdaa_^ZXYURVUSUR}~}}~~|~|~x~}~|u||~v~~}x|}xz{{w|}|yuy|zxz}wxy{zxwzyvuy{zvuvxvtyyysuwsuyyxryzxptxvmswvptxsotvsntuojrtqnprmddc`^]]^ZWX`V?BCEDB>+(9BFHIIICF_\ba_\\]]Y\YXVYVVVWXSTVUSSTTRTUYTSVUTUWY\Z][\[]gquvussqty{x{{zz}}y|}{~~}{yshdgfabee_dfeadea]beabed`ccadcdd`_daa^a`__``__ba^``\\_^_`X^_`Z^__[_`][]_VXWZXYYYWWZUV[ZVUYWWWUTUSUVRQSTQSUTSRUNOSROQRTSURQSROLPQONOPM¿Ŀ¿ſȾƽǿɾǾɾ˿ͿɽþȻſrZ^dXy|{yf0/5((;BGJJHJDC__ca_`][Z][XXY]XTUVZUWWVSUSSSSRRSUUTQTVUYZYYZ[]dquuvsrqpx}z||{|~}|{|}|}~~}zsmggfeeegaefecfjdbeb`be`^cea`dga_cebabc_`ba`_aab`a____^`_[a`^\ba[Y\[\Z\[WZ][YXXXXY]VU[WTVYXXVYWWYY[YXWUWWUUVSRSVVTSTTSVVTQTTSORSPPPQP¿¾ȿɻǿÿžȿſɹ;ͼȽ;Ľr^]aR~{zzvc77=Fl·}{{wrePGhnmeZSJB930+'%#"""!!  +9}rofceKflZJMMOPUJƭp_0~z̟~zxusqfeY\add_[ZWSNJ?)'nOA:?hp30-,-$/z~wgouj`ZOVQwgtuphlacffefXG:.m}}|zuqnsupprqiRH]~|~}}~}w{}|{|}w|~z|}{~{{{{{w~~|yzzz~~xzzvyy{zyw|~zvwwxtvxyxvvvrsxxtovvsntxuswzxtyxunrxvquwtrtxsosurqprrnpqpoiebaa^\]\XUW`R=@BDCBB)+*#aE>;Bm400.,&4||yrV;8437MWSwjax}RH68155625(167CG:/rkMMICA0+)*,,),)++,*-,00@RHR}}|~}yuiky}~|~~~~~}~}~~{~zzz|x||}w}~|yxz~wy|uzy~sv{}yx{~yxwxyzxxxwx{vvttvutwwvtpxwwvvvutzxuqsvupsspptvsorurooqmlmqomieca`_]]]WVWbR=>>CCC<'*71+.448:A<9::9V4-/-+%1z{wxmM>:9>U]Xxada~vprhb`][ZQYS=/xxwukljfgeaaa`\YLISEN~}|||~{~~}~pl^XZg}y~{}{{~}~y||~~}{|~z}z|z{x|w|u|{~wz{{r|}|tx|}ttwzwwxxutxyttxywvvywruxxvvtttuwyuustrrptrmoutpqsrponqmmmpmlidca``_^]ZXW`M?;2|xvsnnlhjjjkgffdekjk{WLN~z~~~}vpmhRNPPVf~x}x|}xy~{y}~u|x|y{}x|~~{~}}z|}~zy|{}z}zyywwwz{zxxxzwxyxutvwvuuwtsvwvpyxtsuuvvxwwsuwwssrtsrvuqpvsonqoqqnnnmmlfcb`a^]^[XZ^L>;;AED;&-?DDGJKJBI_\\\[X\\\YXZZVXZZSVWVWXWTUVWUVVWUTUYVUVXZ[\][\^gsttuvvutz~||{~}|}~{z{|~}z{yriggegghfcdgecbffcadebcb_acbd_cca_dc`]ba_abb``cd`_`_`baae`a`ba_aa_^^__^_[\Z[[[[\[VZ\ZYY\ZZZYX[[Z[YZVUY\XYXYYUVXYYXXUXZYXZZYWWYXXWTTTUÿÿýǿÿƽ˿ȽƽÿŽue?AOXge^TI>:94131-++**)((%''$$""$&,}ytdLCeole\TIA:3/+(&$$#!!  + -rlgccuWOMNNNVTtwҟ}{yvtokpjhedbb_\YVOI@,YG@==O~12/-+%/tfFG>6667@Z_^]``}hg[Xswnkgc_[YVSRT[\>1ytyspnjhfdba`_`[YYYWRTXqZLK~||~|~~vcVNOMIJLNWgn{~{~}|}~||z|z}|x{x{|{w|~|w{}{rz{}yyz}vuw{vwxzxxy|wwy{xuvwvuuwuuutuusrxupsuvvtwuvpvxtrtvooruqmsuqmoqooqpommqkedcaa__\ZZY_O<9:@EC:&+?BEHIIHCO`[\]]Z\\[XYYXWXXVTUVTSXUOTWUQSUUSSUVVUWXX[][Z[^guuuuuvuuw~|{{}}y{||{{}~z|zqgdeddgga`efccfdb`edb`dcbcdbcbfbaada_`ba_acb``cb`_a][_a_a]__^]_`_]\^]]^^[]^]][\^ZZ\[Z[[[XYXXYYYZZ[ZWWZYUZYVXWWVWYWTUZYWZ[YZXVVVWSWYVTþÿſƽǿȿʾǼȿͽſʿǿug6':UhhcSG?<9111/.+*(())(&''%%#"!#/~}xtcIHdomg\SI@;3/,)(%$"!  ! ,rlfbdywNWmaLOMKNWPqʿ{ќ~zyvslWSMQZdbb_[XXPKA.]H@@Ab0//..%.rz}ml{}\[cccc^My[LĶtvnjga^YYXVQRYZA8}wzyupmjgedc`^__\[[YVXYxZLM|}||}zvusbPGIGGGGJJKYfó}tw~|}~}}|~~}|}}||}z{y~{|z}|yv{{zzy}uwyzxxxwvxyzrrvxstxxsptytsuxusyxwsq{xwuvxtvuxustvvrstspqttqoopmlqqjjkmgbcbc`_`_ZWY`P><;>CC<&-=BFIGGGAI_^_\\_`][[]XUWZYVWXXUYYWUVVUTUUUTUTTTWWVX[[XX^]grtuuvvtsw|{{z}}z|~~{{|~~}}{qhfhcfgfbcggccfeaabcabbabeebccc``ce`_b`_`a``acb`ca`^bbb_b__^__a`]\]^\\]ZWY\\ZZZ[WY[YXY[XWYY\[\ZX[Z\Y[[YZ[ZWXVXXXXZYW[XW[[X\\XXYXTWXSSþ¾ǽľƾ¿ȾžŽſȾѿɼsc4.VhibSJ@:511//,+*(')**((($##" ,|yt^GGellg]RI@842/+)'$"" ! )tlea_odNJbaNNMLQVRtƬk]Yb|њ~zzwtmefa`beb`_[YWRKC/aH@@@Ge_j420..#-uuPMI?Vghgf`1lP?µsuojgba^ZYVSRZ[B4z{xupojigd`]^][[\ZYXYaxZMS~~{|y|r[KKABFGGGEDGJLYh´zs{~}{z~zwzv~{y~y{|z{~}vx|~xz}~ww{}xxz~uww}xyxywwvwwwrtuxtswwqqssqqtvrqyvtrsyuqtwvsvxwqsvuusttpotvsrqonoooqonmkfaccb____ZXZ_N???=BC:%.ADGFII>N_^\Z]\Y\_ZWRXWTSUWSUXWTUTTSQSUUSUWTTUUTWZZW[_\\gssvuvvusz~||{}|{}~{{}}~{yyphdcedcbddbbfe`beeabeedceeacfeddecaba`aba`]ba`]dc`^_`ad_`aa\`aa``b````][]^[Y[\^^\[Z\\[[[[Z[]YY\\[ZZZYUXYYUXXTSX[VXZXYZXVZ[YWZ\YVXXWXV¾Ϳ̿ýŽƿƾĽue4-Rnl_RJF>643122-*+*,,($" (}{xpY5>bkmg]QH?9518=3032  +!xoiccfyGJMNLQWRmȗy՞~ywwuqkXS_geeb_[XWSKA/]JABF`653/+#+s}khjjiifa=o[Brvokgda[\ZVWS^]H<}sxvrnmligecb_^]]][XXWZ~WNLlw~{}{||z}|~|~~}}}{}um^QCBBBBA>BECAA@?@@EMaŌ|}~}||}|}~|zy~|~}~~|{z{~|zw{~zuzzzwzwxuzyxs|{zpty{ttwxsvxxtquxusuytruvtttttsuxtstwxsquwusrsttrqpnoqtpnqumkpqmlnmfkgbec_^]`_ZWV\L@CC@AC:#,>DFHIKI@Q^]]_]ZY]^YXXXWVUVWTVWTQSTSRSTRRQTTTTRQQVYYW\]Y]hrsvvuvuty}|z{|}{}|{|}}}|{wofdcdeacecbccc`ddcbbdbadfeaaeacfdabbb`bc_abc_`ab_^_aa_bba``\`dbaa`^]`_\]\ZWZ\\]][ZZ[[\Z[XX[^[Y\[Y[[ZYZZXYXXVVVWYYYYX\ZZV\ZYYYXUUYWVVV¾½ýǿʾýƿug3-_qqcXQOC852012/,--,)&"! !"|zyvmS3:clng\QI?878fr! "!  "umgc_hzf_jcLONLOWUnȲnei~՟{ywtp`]cbcfec`\YWSKC/$eGBABU642/,$,sm[VPLMKFGcllkgc?l\Cpsokggd^\[UXSc]E<{xuuqonkklfcb^\]\\[ZXY\zYO7Nżt~~~~{{{z~|~~vz}~xnUF;DEBELCIDBEDEA>=;9>G^|~{y{|z}{v{~zx~~|~~zx{zyy~|z{{x|yyuzy}xvw|ywszxyuvwwqsvwstuurptvqntxsqsvtquyqqtxtqrxwqsuurttppnrqrpopsnonnmmnlhjgbb`__^^][WZ^L@EFB@@;$.>CGHIJGCT\[[_XUZ[[WX[VUUUVWXVUSTVUTVWTRTTUUTVUSTW[[Z[[Z]hrvvtstutx{z{{||~~~|{|}}}}{wndfieedggdegdechbfcbcbabecbab`cd`]aa`_a_Z`ac^_caZ_a_a`bcb_```````_^_``^`[ZY__\[Z[ZZZWV[\[Y[ZY\]ZX]^YY[YWX[XVXYWWWYZ[\XWX^WZ\[YWZXWZXW¾Ŀÿſƾǿν̿ʿ¾ƾtf4-btpi\SOG8642/20/./-*)&$"#pboka^NJADCJLFEBCEFDECAIGEHJMNLA*4aonh^QH@;87I_UZSA#!!!  #xkga`¹~OOMKOUXkɧpxw}֢~{xuupdimg^eea`]YWSKD/ iH??E^62/.,$,vdURKLKHEEepmjhcCk`Htsoiecb`]ZWWUc[G;x{vuromjjiecca_]ZY[YZZ\rUR3MŽ`{~~}~~}~xwz~}|~~~~~|}}~shLAHUYblkxxz}xqmhW?9658BV|}{}}{|}}zx{}zy}wz|yw{zuyz|uwyyvyxuqvy{puy{vyx{tvuxuusvuutzwvqtuutstwtruusnsunptwsmqvrmtvsorsonosopssomosokmpljijh`bbb^a^[ZWZZHBEE>;?:".=AFHHIH?SZ\\]XVZ[XWYWTSTTRQWWSOUUTRTVUTUTSUUTUUXYXY[[ZZ_jrvwvtstrx|||{{{~~}{{|}}~~~wmggfdefgdbffaddc_beaabccccdccbebb`ba`aabca`a^aca_a`[^]adb[a`_]_a][^^^_``ZZ\^]_^YZ\\YWX[^[[\YU[[[[\YWXZXWXYUTVWVXWXWZYWWZ[SY[ZWXWTW[WT¾ʽȿϿǿͽȾɼǻƿɾse2,\tnfZPJB953441.,..*+,,'%$#!BAjpjcZPIECBC?;?;:<88:845135422/%*]ooh`SI@;664-+9%$ "   yoe]_lk]ZmgGRONRUSeɮ}֡~{yvvphgg_[geb`]YWSKC1eGBAF_h410/,%+qzbT_fqokhfAlcDrulgeb`__ZWWXd]H;{uxwsnkjiifccba_`\\[[ZZqTQ1Mľa|~}~~||~~|zy~~~|~~|y~y|{z~hVDDIg|wx\40028Nz|~}||}~|z{|~|x|zzy{{zxzyyxyyww{ywvzwzqyxynyuvrtuuotuxoquwustwrtwvrstutssqrppsturorsrrwvrprrnmoqlmrqllnpmjmnegjle^ab^[_^][WYZFBCEA>>:"0>EGHJIF8L]]^`_[ZXXZXVUTUVWWWTRSVTSRRRRTTRPTUSSTVUUYYXXX_kstvussssy}}}{{{}}~|}}}~~}}xnihdcddfcedebddc^aeb`cfc`dfbbbdabdd``babddb`adcbbba]`]ab_\ba^```^]^_^^`\YY\[ZZZYZ^\UZYY[[[[[Z\^Y[[YXYXXZYWWXXXXYXXXZYXZY[[[YXUXWUWVUTÿ¾ÿ¿ĿŽż˺˿ɾȽλƼsf3+VqmdZPF>966885.*-1-+,,((&%$!BkpjcUMIFEBA@?A===;;9798653322(&_nmi`TGB;67\p$!"!   xmf]ifUNRo{KONLPWRe¿ա}zxvrl_HOcgeb`][WSMC2fGDDBJyJ-2/0-$*r}aIYisnjjd@ohFwsnida__]YXZUb]F9zu|ytqlkjhfdc]```Y\[XZYkVN-Nǽ_y~w~w~}|~x{}|{~v{~}{}{~~wSEb~}yk2+))-B}x|~w{||{|zy|~zy|}x{|zy{|{yzz{zz|}xz{}v{x{wzw|tvtyvto{tvosusnqttprusnpsrporrpnssqrrstsstsrprrpnonoosqnmonomkkmghimfbcaZS[]]YVWXH>BDB?<8!0?FFGHHF;J\_]Z]\ZW[[VUXWVVWYXTUUVUUTSRQUSRQTTRTUVUWYYXXX^ktvvsstsry{}}|~{|}~~||}|}|~zmghccebdcfbdeibdcfdbccecded`accaadb_ab``bc`b`_^bca`^bdb_`aa``a_^__^a__^\\Z\ZZ[ZXZ\WVZZYZYZWXZYZ\ZZVX[TZ[XTWYVWYYWXYYZZXWZ\ZYXZVVWWVVVſĽľľ¾žſǾȽŻǿƼͿ˿ʾƾ̽¿ǻth4/Somc]WG<77::971.173++,((&%$AimicWQJFFABAA@><=:;9566542222&$Yqni^SFA:68Sxo[#"!   vlhndJPLLPXRbɵnZavۡ}zxvrlnjhceca_\ZVQME2$hHCAB\}F;40,#*u{yyysmlihbBljBzqkhaa^^d_^e\C4u|{xtsnmiifhggac`__ahYQ)Q`s~y~~y~}~~}~}~}y~~y~~~{~~}}}}}|x|t{~~p.$""':|y}xs{|~|xyxz{zvx}|yvyzuuzzwtvxzwy|zry{xqxyysvz|ruw}tuu~suswurquurottrpssqmqtqnorrqqssrpqsqonqpompnmnsqmnronomlljkjigdcdTHZ\[YXWYI@CBBB<6 -@DEHIHF;9832696,()''$#"Aklg`WPKCDCCAA><<<:;9766544340'&[oni^QHD;72Kl&$#    yxmgqs|lMNNOPVVfƫksx١~}{yvq_NPQWfd`_\YTPJA1#dD@BHkQrH0.0'(uxumegegb]dgfbAlo=|wmghed`\[[[WYVNF:r~}~zvtroljlhffhfgdbbabaaVP,Ueq|{w{||{{{~{{~z}~~}y~}|}~}||wzz|}{z|}nrzyf%"7~{yy||||y|zzywv{{y{ywzyxzxwquwyuxywswzupwxupvvymrtwlsturrutsqtvrprwroswqnputpnrssqrsurrsqsqopnonnlmmqomnpjmpligkkihd`a^IA[][VVX[E@ABCC=5 -=CEHHIG;P_]\[]ZZ[[WVUWUTUWVSTTTQSTURRTSSQRSRQTSTWXXYZYU_jsusuurttz||||}}|}~}|}~~}||ykedcdcbced^eeb_ccaa`ecaad_b_dabbc`aabaa``bb`^_cc`a`aba_]aa_^`c__ab_`^ZY]\ZY\]XWY[ZX[ZZ\[WX[[ZYY[]ZYY[[[ZXYWWXXVXYXYYXZWXXXXXXXXXVXVVU¿žȽǿÿɿļȾoe3,_rg]ZSMLE@A<:847995.*(**$!Aike_YQKEEDCA?=<;;:9978654222/%#Yqoj^QHB<83WBj4#" ! !! {vohqv{txqJNONPVTdyhj}إ}{xupiikc_fc`_]\VRKC2dFEA?^zuI-//&)zvPD>58:57448TebEmm;{tpnlhfc^^\YVUQQNH9p~tyupomlkjifeba``aa^\ZVWVVK(Qkr{{~z{z}}}~}}~}{yu}{y}vx}}~z~tx~z|y~|z|}{kpvuJ 4}}yy{x{{~xwz}tsy}yy||xz{zyyyxzxzz{xxxxwtv}wutxwzquuvrustsqsroortmqqroptulkptpoqspqqsqrvxtqstqqosppnonqpnmmfmnkiikihie_`_G<\^^ZU[]E?@@BC=4+;8676532/,.1.%@ekd^WPIDECA@?=;;<:9777543212.# "\tri]OK@;84CdK|1"#"!"!!! }xohpcJONLQWVbɽۡ}{yuuqlkm`]ecb`[\YTKC1iJE>ANuB900/&*vulpqnm`VY[UPPdcGn¿l=wrnjfc`]YXVUUSOG2x~zvtojjgffdca_\]]][XYVUUG'N¿¿ew}||}|z~||{~~}}}|}|{}}~{|}v{~}{}z~xy||}}}{~}}qgqtrH1~yxyy|xzw|wwx|st||vvy~wx|{yvz{xty{yvuzytty{qtuwtttxuxwvststpooqmrospqmqptlnrtnnornnqrqmornmpsoopsmmlonnpqlkkokiikhihje`b\D5\^]XSXXD@A?AB>3+=BEFGHD9RYZ[XWWZZYYZXUUXVTWVZTUVTQUVVTTUTRRWWTSWWYZZY\\`hqttuuutt{|{}||~}~~{|}}}~}yiadfcbde_dgd]abb^bdaacgcbcfdbaccae_b__a`_`_^]_`^`^`^^]`a^[aa^[\`]\^_Y]`][\^ZZXZZZZ\]X\[[[\YYZZ]WXYZYY[XY]XVYYXUXYXWXXXZ[YWYWWWVXVWWVT¿þǿžĽʾ·ȽŻȿǿνĽth4,_kf\UNHA><:;:8810/0-,110( Afha\WPKFDBAA@??;9;:7753333210$ %Yvpj_PH?<86J7"#"!  w{ngdudKQapQOONSWWbڣ}|yuusm[MP_fb``\ZYTNE4cKC>BPL/3221$*p}{mbhig`YbaHln>yrlid`]ZVVUUVSNF3z~ztqmiifb`_]][YWXXVWUTTSD+Rſb}}|~{~z|y|xxy{~z|||~|~~{}{|xy{}{urluwehswxB.|w}y|zyz{w{yzyzx|z{yzzysx|yywzxsq{zvtuvurtvvmvutttssmttrnsstoprtmqtunppuouqsqroqpqqqrrooonpoqronooikllkmonghllbfijfgfidab]MH\[XWUYUC?BABD?2 -?BFGHHF9O[^]ZWZZXYYXWUWWVSXUVRWVTQTUSRTPQRRSQQSWUXYXWZ\_jqsssuustz}|||~}|}}}}z|~~~~~wibcdcecd`ced^dcc`ac]_ad``bc^`bbaae_a^a_a_^__`a`^b_b_``a____^]^aa\^_^[]]WXZ[WZYZXY[XWXZYWZ\YVZXZY[Z[^[ZX[[XWYXYXXWXZWWWY\XWYYYZZYVVXWW˼ɼɿȾ̿ʻǽοſtg3+Nee^VNGBA@;6644.-/,*(-,*$ @fld\UQLIEBBAA@?<::9863233321.# !YspkaNG@;76?e^G!""!   t}oga\RNTPPNNNPPQWWaĥڤ~{yvusqheehec`_^ZWUQF5mPEBAY65243('qjID=9::33348M``GjŻn;{uoida\ZWSQRQPNMG7}{xuqkhggb_^^[ZYWUVWTSSRQQE'Pſ~e{z}w{{}~}}}~|~~~}}x}}w~y{~u~{{}|y}{}~z~zz~{{~y~{~}}~yxvwxxs|raivy|=-}uyy|ww{{vx{{vxx~yzx{zww|ywzzyutxyvuvwtuvwtwtztssusspwpogsqpmrrrjqrthmnohmorlooqmnotpnpuoosponoqpnklmlikolilnkdhhjhhgjb_`][[[Z[XVWUC=@@BDA1 /=CFGGGE;O[]XWX\ZWVWXUXYWUWYVUVZWUWVTRTTQPQQRSTUWUYXYXZ[]irttturttz~z|{~~}~}~}{}~}xkfeeeeccaddbcedccbb_edccaab`_`]^a`\_]_]`a`^^aa[_f\__aab^^_^\^a^^__^][^]Y[^ZY]XZXZZWV[[WXYZXYZW[[[WY]ZTVYXWYXTXWWVXZWXYYZYYXYZZXWYWXXXĿ¾ſɿʾȼǿοƻse0,RljbYSNHFE<41.0.00+*),)%##=flbYSPLIFCC@?=<<;88853333220.$ Zqoj`PFB;78K.!#" !!!  j{nf^[YXUSNOQOPNRWX[{bf}ܣ|zvuriTPRcddb`_\WSND4]OD@BO:6575+)tpfc\^]SNJEK[aaHkƸo;rrqjea_XWVTSQPOMJ:vwvssoikkedb`]\[XVWYVRSRRPG*Mf|z}y~~z~{|}y~wxy~~|}~~~~z|{~{||}}{|z}xy{|}z~}zy||||~~y~{wrvy|ndlvy~8+{}v{y}ww||rzzytxx}vvuxvvvytty{vttxusvxvqvyvqxyyopsumruymqpuqsouqpmtnpjrmpmmmqnommkmpsnkoumooojmpqlllnljkqnlkrjgkkhjlmiic``a]\\[YVVVRC=@AAC@0.=CFGGGD;S`\YWXTVVXXRUWVTTUVTPUTUTVRPSTUSRSRSUXXWWYWZ[][]kstttsqru{~|}|~}|||}||}}~}{zykdbbec`a`cacecadcaa`e`bcb_ba__adc`_ba_]ab`^a^]]`b_\`baaZ^_]\__[^`\Z\\[\[[ZXZ_[[Y[Y[]\[Y\ZYW][W[ZYWYZYWY[YZXYVWTSTXYVVY[WYYWWYYWVXWWXZ¾ſ¿ľƿɿʿʾȿǹte3)]spfWRQH>>;40+..12/-..*&## >glc[USLHE@A@=<<<;:7655414220.' Vqpl`SJB;57H6$#!!!  +i{le`\dq]UOTS[^NPXY]ʾvuܦ|ywvpdbnfaeeca^\XSKB6SLC?CVJ26567+*vy~znm`T\_bcMlq;wuojda__a\ZWRK?x{vsnlhhfcdcdfbbbaUJ-Pdt}~~~y~{y~v}~~~v~y{|}{}|}}~y}{|}|{|}~}}}{|~|}{}|~}~~|}}x~rouyz}nZlvzv4)kzz|v{{zxxw|yvzxxwywxzyuwwwywsuvxvuvwtpzvrowusorssotvwkqrsoprtnnotnnpvmtoonppqomlssrooopkponimooikjmjklmllkohgjlhijjehd_^`]\^]ZVUWRC=>>AD?1.>DFGFFC:V_ZYYZX\WYWWXWWTUUXRPVTTUSOORSQQRQOPQQSTVVTX[ZY]nqsrrqrrsz|}~|||~~}}|||}~||jdbbgd`cbd`dc`bba```c[bba^acb]ab__`e_^`b__`a`^_`__``ba_\_\\```Z`_\Z\]\\\ZXX[[X[Z[XX\YZ\]VVWZYX[ZXYZZX[[XYYWXWZVUUWWWXZZXYYWYWWWYYWVXYÿ¿ǿſ̿Ŀ̿˿̿˿νʼue4._qmbTMI?88940,,-022//.*%""@clc[XTLHFACC?==:;:8654323232.' Uork`TJA<87;\VyI#%"   m}oea]uzIOWWZɿnܦ|zyvqijmibdeb_][XRKD5TNBACMu<7210%(sgHJ;:;902769G^dLhÿtAysojddabc`caYK?z|xttrnkhgdc_`^__^ddfxYJ&Nbs{~~{yv{~}}|~|~{}~~~z~{{~}yx~xy|~}v{y|}x{{x}x}}y~x|~~|~~x|~{}~~}z}~{~~x}}}{|rty|{~jUju}q;'f}y|zuxxyuux~uuvztwy}vsz{utxywttvwstwxtsx|ssuvqsstrtrvtqntsrmpqrklpsknprkpoolnqrmknvnonommornkmpnllojiimjjkmkkghikggiibid_\]]\[\YVVWRC>?>AB?1.>CDEEEC=XZTYYWVYYWWXXWVTYXVUTVUTWSRRSTRRTSPSTSTSVXTV[YW^lqtrrpsqqy{{~}}}|}~~|z{}~|ylbcfeccfbccfbbddaabaa]dc`^ca_^cd``be^\_`]\_a^]^___`^`^_`a^^_``^d]^b^\^]]ZZ]Z[Y\[ZWXZYZ\ZUYYXXY[UYYYYZZZWVXVVXYUVVVXZ[ZY[YXXYWVVXWWXXXý¿ĿƿǿɿʿɽȾοǿ¿tf7-]ni^QF?:6521.,,*,//.,+%"!!>blb[XTNHFDDA@=<::88764332331/( UrqkaUJ@:56F?%%"!!!"  pqgcasyKNUWWňrަ}|xvrka[SZfeb_]\WRKE7QLBBGUG;41/'&rqcb[^_[RMJKJTbbQjrBowyykede`]]XS[ZH?AAA@0.=BDEEDB@WYUYYSVXXUUVVTSUVRTVVSRRSQSSSTRSSRRSTUUTUXV[YXY_krsnoppnry||}||~}}}~{{}~}~}xi`ac_bba^^_cbbd_^bda_acca`ca`adb`ba`[^``^^^`\^^Z^]\[]\]`^X^b^\^_XZ`][^\YX\]XZ[[ZXXZZZ[ZXX[ZYY[[VZYXYYXYWWVVZZVTWVVYXXWZ[WUWYWUUWTVYSTǾ½ĽþǾĿǿ˾žȿϾʿſȾɼĿvc6-Zmh\OD?;63//.-,)(()(('#!"!>^jc[YSOJGDC??=:;987643431211.) WtqlbUIA<67D}{pCH&#" ! "" mphdaefDk^]^ONWXWȽަ~|zwurneXXdffbb_\XRKD9UHBCCNyzW27520''k{|yrgkqpiffcRkvAswupojjecZU^_M>zxxvsqpu{ytjfdoi[ZY[YZ`UG%Gi~~{~}}~~~~|}~{w~y}zw}z~{~y|}}ysz~y~~}y|xx||w{~zyyy~}|}|{yz}||}|y~~~x}}~{|}}}}y~z{{~yx{|zw|~{wzxz{}~u[fpv}l< %`stxvvvxuxtxwzuvvzvwxxrzxwpswvtvxvutuvqqtvqnturmprqhusrlpqpkoqojnponoppjmnplmmqnlmmlnknnnllnoilnlijmkhjkmejllfgkhcffidfff^`_\[\ZZWSPVNB?>BBB>..>BCEFEA9QWZZYUWXWVVVTSVUTSVUTSUTRMRSSPPPOQRPPRUTRWYZVYY^ioqpoprpsy~}}}{}}}~|{|}~~~widcb_cba]^_ba_a__ac]^^```_a_^``Z_b_^^b_]]_^`_^_^^\\\^`^_]^a_]]__[`^[Y`]YW[[VY\[XXZXY][[Y[[Y[\ZY[[YYYZXWZXXX[YXXWXY\XWXZZXYYXXWWWWY[UV¿þþǿȿ̿ʿʽ̾Ƽ˾¾ue5.]oh]QF@?:5455/,*(&'**($!""">aib_ZRPKFEC@>;:;;:8444520121-& VpqlcUJB<84AtpmIB&"" ! !  e~rhe__kogbYQSXXVMGbک|xvtqkbd`cfga``\YTLD6XNEDDSo6>:2-$%j~ZG;:971@_qplibRiƶz=puyuvnkkcVWa`N=ytwutqp{~yj|~h[ZYXY^{\K%Gh{}|z~vz|zz~}|}~}}}|}{x|{~{~w}{}z{yu|}}x{{x~~tw{~xu{||xx{w{}}vw}}xy|}}u}~y{|~z}}{x}~|z{y~~y{|~xxz~|{z{|}{y{xxz||~}xbejrzo9 %_~~tsurtwyovxytuuwtxxxuvwyuuvxwuvxwuvwwtqvyyurqtrlssqkysrnqppmonnimnlhmmnemmmhkmokhmmgjnojkonmlmonjlolhhkkkhmljhjhgdffieded\]_][][ZWTQUNA?@>>B<--=CDBCC@9OXYXUZYXVXXTSUWTVWXUSSWUSSTSQURQQQSRQSUUVZXVTYX_iononprpszz|}~}~|{~}{{}~~wkfdcdc`_`cabaa`_aabWb`b__^]\_`^\`^\]_`\[]]\]\[\_^\\^``_\\b_Z]_\]_b]\]_[[\\ZZ[ZYZ\[X[[[Y\ZYWY[\ZYYVZYYZ[\YXYXUWYYXYYTWYVUZZXVXXYXYXXYZ¿ÿÿý¿ʿʿʿϿӿƻtd6-^pj_UMGEE>:::3/*&&&%&%#$&'& =ekaaZUPLHEA><<;:;97653320021/$ YqpmdVLB:81CD $# !!! +cqga_iϥXFevNTZZU̿mߩ}xwuqgVOPVdgd`^\ZUNG6YOFA@[H=81.$"hxdfie\UVUYptokfQf?ptppomkdde^YW`cN;svvvurrq~wqeqrc\ZZZZ]v[L'Ce䟎iJA@FCHTelmsuteYbqXYnnm{~}u~zx{{{zwuk~}||vyx|}}y~~yzz{z{|{w{y|y|~vyxzy{x~zzwzzzxt|}|z{{~vy|~z||~xw{~zw{}|yz|{yyz}uwy|ywz}zxyz|xy{{|{|~{hgbn~vo: "\|}{rmqntuwouturuuvqtwxnrvxqquwsotxusvxvrqttpqtqnqqrkprvmnprnpnolkknnljnjkgnkjgljkhjkjehlmihkmjjlljjmphhijgiinihimhfhgeefdbb_\]^\]YWWSTUL?>?=?A<,.94.'&&&$"#%&)%=cia_ZWQMHCAA?><;976654232022/" XtsndYMD<738^UR!$# ""! + _nib[nƒTNRVkqOTZ[Zvnpx᨝}yvutl]ZZ_hhdb_\XUQG9fQC>ATI752.$%l}onnhdtvtqiRg¿Auuqpmifd`YZYV]_K?yrttuvwujhgjfb]]^]\ZY\`x\J(FeEPE3=+#!" !%&"+CCFP@Bƿļÿe[}}{ywell^/)*%!.96.956DO@=DXupu{rr}~ur}~uw|}w|yyz|t{~y{||y~|}~}}z|~}|}}{z|y{y{z~xwzuy|{tvxzxuyzzwyxz||~zz{mkjoxi9 + !Uqvwmqtvxwusvuvwvuutvuvnxvxsstvsqqvrquuqpquplqvjfprpjqpmfnqnjlpqkjnojmmnhjirmkmnikikjhhlkkkhjlghjihjjkfgiigijjfghfbdgebddc^c`]\][XXWUTSVK@>>??@<,/=A@?@C<:T\XVXYXVYXWTUTTSXTQPTSQQSSUPSSPPQRQQSSTXYYVUWXV`jonklmpnow|}}}~~|{z}|y|~}}ujb^_^_\[WVVZ]^\_a\bba]``^_aa_^ab^[^`]]^^\]_]^\]^_\^_`]Z\][\\\ZX[\]][[XZ[ZWUXXYZXUXZYVXZXVVWWYZZWW[ZWWZYWYYVUUVWVXVWYZYWWYWWYWVXVUUWXVýĿſĿÿɿ̾ͿĿſѾǾvj9-Knni\SNLJFBA@<4+((&&$"%'''#:akc`[YSMGCA@??<<976643332121.# WqsndXLB:5:J?$%#   Wogb_kHEPRRQYSUZ[[Ɛߧ}zwvtg_i\eiidc`\ZUPF9mJA>@Dqo<552.''qwKEC78839]yvsohPf|Atvkkmnd_][W`^J=y{vuwvrrleabb^`]XY[_pYK*E¿jEIB:q]1!"!"!$)/4/+,;DIF67ſg,~}~~~~{wk$$=<$#9wyd>5<@;?JC;58=FGKSVWZZc\LV;44=JEDCIRj~~vwzxs]jieonmqtsw{rwwytux~~~~yty|lmmsyc8 "Sys{}ulstwxrtuuquxupruuqrvxqrrrpuorqtrssqlsutyvtvppyutosrmjqnjjmnk_lnlgjllijjlihmnfjlnihiljjklikknljkljjijhigijhgffdacedacdd^d^[ZYXWWYWSQPG==?>?@;*-:?A@?A\nmXVRRRNNRQY]]e}qmo~znmqsxf7 #Pyqzwsktrtuusrqiqstiptsmtuvpnrsjnosomptpoq}bifhhhknmmhpsjgmuoshsllikjiefggdgjj]fjkgghkdhnobikkgilkghjidfhifghheedhfeddegel_\ZXWXVWURPRE9<>=>?;)-:@??@A=?SWTUUSTVVTUVXTSRSRRSQMRRRSTSPPQRRRSRSRSUWWW[WYX_hmljjinmos}}}|~}}||~zy|~|pZ/%)-:MJ;43/1,2=V]b^^`c[__`_^^_\V`^_\\\]]]^Z\Y[Z_]][[ZZ][YY]][]^[[][Z[]ZXZZVUVXXXWZY\\WX\]Y\]YYXYYYZXX\[WWVWXXVWWVVWWWZXUWWVVYWUSVVOSĿ¿ɿ¿ƼȻŻƾwd5,PkjcYTPR[SFB@;99854424.,*)&:^haa_WVKJFEB@=;;:765443222120% Smtmg[PD==YZjeB"##"!! ! PpicdnYZ__XͺjtYݭ~|xvspqolligfd`\ZWOG8cLHHCZ?950,"$ltf]UYssnkhSeArszytqjfhgg\X`_M?srxvw~{zvpnkklkaWXW^qWJ+B¿hKKA,)9xx+"'8@<7>E9AJID5>d4}}|{wh :(%  ]- &#  &--'Cm!'&.Ib(fIk^XWXW]]`XY[SFhlknrwvf3 %Pvn{w{gltkild]YYUY[Z[[[]bYZ_`_Z]agilprrv͜S]^]XZ\XSTYpfY]\XX_`g\``e`_`jihjljicjilhhilgjkkfjihdhifaggfcfgedfdfbdfe^dfdcfgd]\[YXWVTSPPRF;;>=<<9).8<=?A@;>SVSRVUWTVTWVPRSRPQUSQMTRNRWQNLPPPSQNMQRPQUWWSZZ^gkkjihklmt{|}{z||{z{|{z|~}{rZ%'5NVDB<9:;;BT]^[]]`W^a][_^WZ`b]\[\Y[^_]\_`_^`^][\[[\^]`_]Z]\[]\[[ZXWZZYUZYWXZWXY\ZXYXYVY[XXYYXYZXVZXVVYXVVVUVXXUVYXVWWWXYZXTUXWSVÿþ˿ÿƻƿɿſ¿ƾʿvi7,MghaWSRcraH\AMTI=EE<9?.,,'!:aka_^WWNJFDB@==;;756544321020% OotmeZOE>>NwmX#""""!!!  NrhdcehfkaRR]]Z\_`V̪Wcsݬ}{wvsnnnjigfgd^[YVOG8_FBCCQ;782,$"is^D;45KppnmkTdDxvsjeea^Z[ZYXcbO=xy{||unnnlmkb`[_]VXZ[Y\qXL'@gLKE./OwT'"*=91-.>ABMIC46ĸùa5}zyi9()$ O' &$ %3,)Bx& %##9_'p3^`]\\[[^[YYVO5jnlpswyp7  H{mrwrhosBLQmdAFE@BA>?B??>ABAB?>@@=BVsslxzțS]]][Z[XXXI)$@"!$!"#!!&%%#1Zglhdghkgiijgjkkfghljhhljhgjkjelhjgiggeigd_edccdd`[ZYWWUUXRNNMD<;>:;<9' .8;>BB@;Cf.4Z2(&:Zc`^_YUPLFBBB?=;:65544332110-& Vssme[OD=:>]R#I;$$"!"! ! +FrjcaaŴV^_]^ac\Ȉᬜ|{xwsiWNPYffdb`]ZUOH9VI?@A@FKJ|=763.' efK<6CYelkpqnmlQc¿@}wumjdeedie]QAq~zxvrnjjgecccdiab`aoWL(D¿kKMG>><===>???@>??@?>=:S`PTVb\qˠT\ZZYYXVXWE:&Gff^]ihhbX[iiaec\\bfabhun`[_`badkmgnkjfkejicbc`]WVXUSTQROMMPB<<=;==9&0:==@BB=8RWUPRSSQRWYSUVSOSVQQPSQQSUSQPQPOOSSSTVVUWYTUWWU[chkif`fjnry}}|{{{{|{|}z{|~}ytU"+:WYHEDAAA@?ZZ___]]^^]^ab]]_]\[Z\]]_^[\\ZTZ\][\^^]]Y]]YZ]][[]\ZZXWWWYTY[YVWYWY[[WXYXXWYWXYYZ\ZZWUZXUTWXTUVUTUVVWWXUUWVSRVUURWUTTVǾǿ̽üϽƿɾuf9,Tfid\RNgxuB4ewj@he@Al*2cR-)9Wd``_ZTPLGDB@><<:76433222201.% QmqoeYNE=820L2():)$" "!  + Dtkdfnlxa`a_add]̦ᬞ|yxwvp]Q[`ggfda][TQG9!NJBAGZ>453.(_yohkhgkprnmhSgDqxridbbc^]]\Z^`OCsuuvvrpnjigeb`_^^Z][ZYZs[J+=iLOF4P.$"!#+?EL9+9>DPJE7<Žĸ`8}yzl <%$# +`! '#&\yZ/ "'1f(%HP!82#%(.+*]+s=aa^\jy|TYWP1flmqqxz~j6 KwopbtvtqkBFLUH@>=;<=<==;;:<><=?>==96&1HKLLTLYm͛FQNNNONLKJ7 2+1%.8>+''+/1)#(+**,,1;PSCMJJLIJMJFLMHMJOWbbc^_dX\ZVORUVRFKG==>BAA=:)!#1<<:??B;:TWMQUPOPUTUQXUTSZTQSSQNQROKORQMNQQNPRQQSUSMQURPUagiha[dijox}~~~{y{||}~}~{z~}{wQ!):]QCBECDBBBZY\^^Y[]][[]YW\\YX\]Z[^^\]]\ZZ][\]^a`\]Z\YW]^][]]\]^ZYXVYXZZYWXXWUZWPWWWWWYWWYXUY[ZWWYVUVXWUWVUTVVUVXVXXXVUXVSSTUTVWVÿŽÿȾɾξȿƻƿɿ̽̾ѿʿžte7,PkjfcYRlaG2ggiJe^;::87542122322/$ MqrpgZNF<96BpLV7$"#!  + ?rldfo^c{|f``_bff_ˀcg᫝}|{wvjVeg`ffeb^]ZWSG8UOFEF]@4562'`yi_ZjnokhQ`ƴEjyzrjica\WZbbO;tvvvuqvtnpledc`__^][YW[|YK);nIOG<`cd_]J."'4@E3/54BPLD66ǾQ÷c8}{xh C)$" @ $ 5N(SpI$"%1Ƽ&'HM /o"-*&Y$&-u=f`\]xZYWO0clmrtx~zyi7 !Oxqqm~|uriBDIVE<=;<;;<;=:;;<<;;;<<;87#=DJISKS@`ф &&()'%$## + 7,/! !!$'*KMOOMNOOOOMPONNMON`I0-305>/+,+..,()$$'.78>4*# "&'((*+*'*;748524N\]jllegehd`^WUXYWTTYYVQSTRMNQNIKMG??=62<\ehk_Rafhjuy{}}||{{z{y{}}xz|~}{tF+=\VGJGJICDHZZZ[\Z]^[Z\[Z[^\WY\]YZ]YY[\YXYZYZZWZYZ[[YYY][\[ZWYYXYZ\WYYZYYX\YYYZXVXXUUYXXWWVTVWUVWWVWVVSSRTVVWUWWVSVWVUTURQUTUVUVUſ¾ȿǿнν˿Ҽ̿sd;-Wnkhd[SaldCz8`k_Cad@Hv9;M,E6 9Zi`^`[TPHHF@@>;;976651132210-% MpsofZNG=875Jg&""!! !  + +;smfih~pnb_``adg^ɂlyᯞ~|xvtlhnkhhgdc`_[YSI;SSICBVG267/%]~}wrklmkkhT_Gs|xxl^ma[Z``O?vywuv~zfkrjqmnb\ZY]xXL(<}mIOIHx{<#)7BIF@>7BPLE53ž=1ĸ`9~|zi <&&& : )#%S0_M&3qXZ]__bs#'@E+tx|t",'(X$^iN  +v7gb__dqvp]YWQ*^lotst~|wq8  R{ruo|{urgBDH^D;::<::;<;;<<<;:9::8::3#5IMNOPQOf'% 1 /1!#-HNPQQPPOQPMNOPONOOW, 4buy`\^bg`]`d_VSH;20-+)# &*>Yqutsh]ikcjswv{|zzy{{{z{|{x{}~{yvG-;jTDUa^JABGW\^\]\^^\ZZ[]\\YY_^\\\[\[Z[XZZZZZ\ZYXYYZXY[\ZZ[\WVYUWYXUX\YWZXYWVYWUWYUUXYVVWWVWWXRWUWXZXUTWTUVUVSUVVVUVRTTTTSTSSTTRV¾ƾ˿ƽǿͿʾн̾ѾǺºsh:-WnpjaULKF>55;218;6B>33>984)(  6Sc^^^[UOMGFB?;;:9754532321110& +MrupeZPF>653^[#$   + +9{okkhti`ck\_b^_eg`γþட~{wxtlaejjhhfb^]\ZSI:ROEEETwpi{<251,& \~z{vrpnkhgX`Cr|p||qk`r]YYY_N>ptvsr{uzkornuqnbZY[]zYK*7iKNHBu) $1r7"*>CIJEEAESND50ŽF&úd;~|yxh!8!)) > *#BB))M?$1}z{uio$&=D !8ut2"&'(Y#bSB*u8d\ZX[UW[XUWWO-^onqtxsxo8 !Rvlsm~|sqg;::8665221/023/( MrrogZPC>649\MJSO%" !!! 9wqnnvklabdfZͱ⮠zwtqol\U`gigc`_]XSJ=eSDEBQB342.& Zxjj_^^a^]VTVQ`h\a=lz|inllhcaXZV__ODrvwtsx~}spjjiffhi`[\\\zZK+9lLNHD};#(:{5",8:=C6@?AVQH94ü}-(¹^;}zzj <#&- L"4) ) Ht=5C2G .vzʽ$'CM! 5|rt9',&Z)tMrnnnkohhfecV(dsprtwxys; "Nvhuquoe>@HVF8:9989:>??<:;;;98:9764!6IMPPQZSeщ%[+Q# / )/ $-FPRKmPPQPPPNNPW%  ! "YkIUVUTVTUVUUT6.3:ManPd{zzzxz{~}||}|yx|~|{xB0EoYOz~cF=CKW\\[XZ[[[ZXZ\\XYX[YYYYXWZYZ\][XZXW]]]^YZ][VXZ\[YY][YZXXWX\YW]ZXXYZVVVXXXYVUYYTVWUUVWVYXWRUVSPTTTRUUUVWWVUUTVWTSTTSSTTľ¿ƾ̽žŽƿ˽Ǿ¼tj:0[mldZOE=6213/++)/00)$$(*)($ 8Wda^^]VRNHFC>?;::88543211120-" +NssqgYNC@78E?!"!!   +:{plnndlcacg^Ȃqm௜~zyutn]UQbghfca^[WQH<dLBCBT=2532*Zv@B7<240.0,2:[gYb>~onieadb_\c]M?pvupqolikjgeba^[][YZZXYr\M,6¿kHMI6vg-/Ui+"&317B369CTLG;1»b|º]<~zyh>$&* L(J: 1 1fb?(6K%1lp('LI 1v,#"%]%O[=+sy3crmrrw~z{r< !Tznso|spi:BGSC87779:DKKQ`OODF=:97653# 4FnSMuqR=8;;<86643121100.# Hosqi[OF?969o]F?/'#!  !  + 9voomzhc`qfbfbabeZʎaxⳛ}{wvtj[jlljhfca_\WQJ?YO?<>O\=1212*[|xvhbX`ed]V_f__@n|kvmmchdshh\ND#/05:245CPLD8/üa8zyh 6#& R'0# -$)L_e3$1ͦ#(G>!3t$!$W,ha@.q}3dklutw~uxi5 !X}rrryzsng<@EX=77668K ,/!(('"#)JRObqqujklifb]MQZ#ac& -9>DHMLUN2Y_LR[SzXSTUUUVVS\nprmlnlpv1[vwxzzz{||{}}}{{}~}|yA$6FnC7KI>96;@DQV]QEHMKJSgVNT\XYYTSWWUUUVW[_ddc\ec`a\``][]_]Y[\XV[\TWXXWVUUWYWRUTTSWVUUVUUVWWSTTSVWYUTUTTVWUUSTTTWXUUVVVSSRQSUTSVWSʾɾ̽Ͻȿvh:*L^f^WOIB731221/,+,,)')**$! 9Ugb_]_YRMGEB@><:896633233111.% Nstqh]RG>98:wiT $" ! ! + 4vppoǽsbcb_deZäƾᲘ~zvutmklgehhea_^\XQJ@@C;54?4.100(Xp}iPFZhf_N^f[]Gp{inuinkahtn]S=rwyuurpmiffdc`^\Z[YYYZ`zYM+3ĿlJJE.(,;@* "0# !S% )pǿ4ajmtvysyr6 &^vqqkkrqoi:=BQ>776563]vmtadg@986530"1/0-67Ae΂"^l =$hS)>T (*'7@>4$*KRRWı\N]#Kh_`YV_" + 7LKJFB=7/$]Yw]SqVSTTQVTWTZhjkgfknmnw/Wxxyyy{{||{|{|{z}}yv:&*8EuI>DB>=:>9.))* %")E-$%052.//6?@A@GK_t{y{zzwsoqlmoldb_`^Z[ZWVWVWSWWVXXVTXVVVYUPTUSSVUTRSSQTWXUSTTSTTTUWTSUWWVVUVVTTTTVTTUVSUĿʿѾнϾug:*O`f`UJE@74124210.,,(""$$!7^kba_^\UOHFD@>;:888635332121.' +Nptoj_TH>97@e %#" !!! 4womm~uyi`_]^cdY¾䱞{xureLONWegedb_\YQI=79/+)+Aqv)--,/+R|LOX`onglgXLE\fZ_¿Fz|mjjgcd^^`g]LBkzxvwspmjeddd`^\[YXYYY^q]L+9nFIE@nuhb]\1 #+5EKADAANNI<.ƾÿ`7}{yh7 C(5* (KU@ #0/%2f"*SL /{"rC$$!W V`<(u4gmotuxw}q6 $]wrspnppnf><@Q@86877::05:A=64<656872/"%,_·%cAS$ >%@0 *+,?MI@"%0NTROIINU[Z^_eilrOPU!9u .<=;5110- brJnZQXSTSRRTUUNKWVWUUZY`emv¶`AM7auyzyzz}}}}}{z{{~~}zvD63!N{{{wrikrmkidef][¿Hltohge\]\[ZWW^\PBlsuvtrpnkgecc`^][ZYXZZ`m]J.6oLNE3_op="!(0AMCC>CQQK81ǿd<}~{yh%B#  +;,P= )b^!$%0ɺ!-WN7|fl]>"""*U#h}G  ,vǿ1hmnvvw~r4 axtrnprpni<;>L<6JWLGRPFG<=D>BJMIDJD40#*aχ%NC%  +<9,-? +,-@LK>!%1JQSKs\;NPQS ) )7;;5+ !dnclVNmYUSMGHIQQD6ILNKKKNKEIH[xĶgWd9auwxxyz{||{||{{{{~~|xoH;4=E~=AHGFCAA:)@7&-)&9_DJJHFIGKHBDIJKK~gTSQTYcc[[[XSTUNMRRPPMOKOONPRTQSY[WZWVTRRQSQSSTRSTTU¾ƽƿ˿ǿɽǿvi:*Nce`UHB<9:50..//-/,%"" 5[m___\YURHEDA>><:86544431211.& +Gqspj]RG?;55er~g $!"! "$! ++{spmv]^]\ad[㵟|xuspmnkihgfca^]WTMA!ILSZJZƾL1.&/3N|hfc^SPQXYkmigibVÿGsuomica\[[XVXa^P?iuttrqoolifeb_^^[ZZWXZ[s[I,/vKMH.'8{m3y8"$/>BBCB5APOJ;3Ⱦ½^8~}|yh(V"%<0'%.-/CPB;QLMLKHMPNOQMrŷr[X:euxxy{z{}|{}}{yz|}~zoL<3=I=DGFFC?@9;5:(%MWTD#9TGHGGECDE?=@EDACmNHJMVel]]\VRUN94346::99::=>@CDQdom^XUQMLLLMNPQTNTXZ¼ÿĿļ˾ξǿɿɽuk=,P_f_UJE@<<60-,0200-'#!!!4Zm`b_\ZURJEEA=;:996444332121/' +Dnsoi^SGA;6;kn  !"  +xrojq\_\]\bd[䲠~{wwsm`abafhgbb_\XUJ@'bij\RSq6!'*'JvIE@BAA;7EinkjjdSCrsokfb`\\ZWWW_]O>husrsponligba]]]\YYXXWXp\L++¿IMH5Tj'"'7=.-.,*APNK=4Ž]@{zyg&H '|u'7.3994!"# F.<' .!+;Y' !"#1.O=6km'$""X!FP0)z1nonrtz}y6 + g{vqlosqrk;:@G71g|mqzxeijqvwwqwsx8. %'cщ}s  +1#lz{) ,/!,>A3 $,IQQcKPNPW )lwruyY .6.=?2%hK<>OZlWURF7/2BPDHnJQ\WKOSMLPPrŶyKG@auxxxyz||zz{zyzy~{mJ;6>L}7BFGFC?>6!]/S?!QUZO$>VJIIHCACD??CBCBDsUPPPSU\VXWTRO>  !#&(FyeTVPCCCDEFKLOHYnxĿþĽʽʿ˾ϻvi8,Vdh`WKFC@>20,,.111-)#  3Vnbb_^[VNKHE@AT\9977557C6012/' Boqqi`SG@;68nteML( ! !!"! +'{splgcd^_^\\\\beZ¿崞|xusgX[XZgihda_\XTJ@"^odbVULKe[p)&0/$Jx~||vfUinmjjdSDqrnifda^\\XVV_cP}{yf!=#/$l>Nf"=<N60:<:9;:653113;>F<7::2- #&'g҇%h7C%  +5BAV1 ,1!-BH9!$.HPSXvzIRQQW8DM! +3+7<1#h|yMWkWUQG736DTC>VFhzdv[NQQrõ5exyxy{{{}|z{|||z}{xmJ<5=H<878891e̿ʾϿпʺvi=*T`g_VKGEEA4/-,*-10-)"  8Rjab_crgWOJ:Je;68877XG.210' BsurjaPHA;67u7! !!!  +'}tnjhec`][Z[]^ae^¿⶜~zyvsmlpokiihe`][YTNA&ehZ]RPRd=$#+21Inplja\WQHelljgaRCnqmjgeb_\WVXW]cP>hwuusponliiec^\\][WVXX[p\H0+¾ONH37'%($"" (5DM@50ASMG71Żc>}{{ye!N#%--(@!-3-"#,N\" +D*6'# 7"0Q5  %1y .UR %%"!)')R8A,*w2hpmtwy}|v6 dyvuqsspmf>49K4./2-//+//.001000101/-*+857:)gъ$spm)  +2$pm' .. ,@G8"$6DRRM_XV\[\^][OPSSRS/[A:4>S + .6787610-(mJ[hVTQ>327LV@EnWSPOPOOPQRTTRB3|aXTM?2('''(()$b¿¾ɽ;ƾ̾wg@*U`e`VKHFEA5/.)+-10,&""6Rnccezיkzyk678:63LI/321& +Bltui]OHB;9:n`J%$! !   $~vpkheb_]\\\[\^b\~⶜}zxutrnllmihgea_]YTMC%Z\OTQOQbts}B #), I~~RLEE:9>;FhlkiieT¿Cmtolgec`\ZWYW^fO>fvvtsrpoljjea``^]\[XXZ[s]J/-¾KNJGz~r8 !,AXKK7@VNH86¹d=~|zzi?!(QWW=!',.+*(U^J!&& 4" 49%2jmkm{ 1SV+6)'%* (''T#?4*y1iqswxy~||r5 + cyvurrqomc=59G70/.......-.//..,-..+,'Rk+dҍ$cg`&  +2&hT/20)<@8) $7CPOVXPT!^8 !6ADCEFAD?$(pI=RGaiWTPFADAMOB:JNh^Z^qWGQP{Ķ1dvwxyyz{|z{{|}|z}~|zjK?0@O|!2~eVSMB-$&'&'&($eƽĿ̽ɿ̾̿̿üwf?+R_fbXOIFF?4/-+,230)" !5Rjcde^_Ty[L^FC34:7EJ0100' +>istj]QJA;88h~eV"%#"!  +$uplgec`_[[\[]`e]x⹜~{wtsjXPPZgihdba^ZTMB T\QMNPqY(%&%'I{xxpklmkihfU¿Gutoicf^^]ZXXY_`MAjuuttqooligdcaa_\]ZXXY[oZI..½~MNIElsp> !&1CREB4>SNG:5ǿa@|yxd#E 6i}""369<6)KL f!& +$#YLW- $43LL"67H6-----+,,,++,+*,++++++&*-MPH*eӋ K9N  +/#\]U' .+ -BGD9$#3JOQO~zx~|PPU#!,D?# ,3-060')$#tQQUIbhWTQ?/-7FO>8JNfq?aoxXIOQ¶.duvyzyyzz{yy|}|{{~~{xkE<2?R}UAHILPQD3%/''Y`R6WtahlqXKHIIDBCDEEDC@;Dm\WQQOPMMPSUST<5~aSPMD2&((''(("dʾ˿xj>/Tbhc[QJGE>4/,+/42,&"  6Phadgc~e_feQ^ck>-10/' +>orvj_SJ@888}u[%%$#" !"! $vqlfcd_\WXY[]ad^u湜{zwtonnlhjjhda_]XSLC%YcUPOR}E;74/)Npkd^ZYW^jnkikhSFosmied^^[YYZV[`NAjzustrpnkhfeca`_][ZXZZYr_I.+KNJF~0Ue'r:$.=E?3*+>PPM89÷wechd@|zxh-b/M^ls"!-('$">@ R"1%# /$ ]?d3"%82NL 3lzxs%$!"W 8-)uɿ/frqoru{}tzq3' %ozwwpsrongB47G4*-+*,+++++****,++**+)%02PP)_ЈGi(  +*&hr_$ +0+ .FF?/!#3IPRXCGLTS/`NHGJP 3:5>F7*q~}{DajUTO62>AJT>8ILcY*EgtUJPR.dvvxzzyz|{{z{}}{|~{lI;4APe?]b]TRN4'+*!'^fk@\aKSHPYLHFEDCCDEDCCA?Lp\YTSRQQQRQVUY?;_TQMH6('($%''$gºĽƿĿϿͽȿxl=.Rcid[QJFC<4.+*-.+&"! 2NnacfaꕤY2|tQnE,21.' +&ZcTNKIt^B==8/"OTFJG@:75AcmmjnkUGrrojea_^XZXYX]^M?l{ttrsqnmigec_]\\ZYYYXYk\L.,ÿIMGAi1JM*f4 "/3./*(-?QPK<7ŷmNx^=}zvd#^%'ZV"16662$!% +D,?.! 6 >8!!54KN '#4W(!"!R;D,-v2htrrrw}}wzt>qM&n|vuqssnnhA26F2*))()**))*))))*)***))$KjKSB&bԎLR  )"^UQ! 2, ,@F6!&2BQNTwxu{VIRQRKA +  4=BA?7(zIcfWSP67FJNRA:IMeY9D_vTMQKµ,jwuwyyz{{{z{}}|||}~{fC81>NaEfjv]DO1&)*"&[Q8)bpn|THIGGA==@CCCBA?Ot`ZZZROOPPQUVU;:^TROI4)&&$$''$iļſͿ˿ɾ·wk=)Rdha[QJD@:3/,)(&&$#! 3Pieeg__GX.j|I;<:88RX[`\sĿ㻠~yvtmXQLUejgea`\YVMC$J^TQKQqeC>>93!P|qspXclnnmiODqspkgca][\YWX`bO?m{uvtsrnmkgeb_^\[YZZXVYi_N,/ĿMNG/@-)1/.! $1=;@?;5?PNI:4ƹwǿaC}{v`#W$B/ *.-1>+ G '3%" 6$ IiX0#!$'8j489D4QJ :olYU #W ]X*z.iutvuw}z{u?I%i}usrsrpmkB14C3('(('(()*)(**((')''(($FsgJ@&i֌`gQ#  +#$dgs' 4* *-.9INA;JM_d[UVlXNP^µ,lyvyzyz{{{|z|||{{}yf@94?YeLePekOJ2%**#,`_R3eGg6BOEKGEAABEEBBAA=Qn_]baUONNMQSUV7:_VURM8(%%&%&)&jп̿ɿʿɿwm?*Tdka[RJGC:2/-*(&#"##" 1Ofdegi^kTibvesE5g`' ';B9A?<4=MMI:1øĽaA|zye.Y)}x%.649?, I"*8$! 8#N)i[I"#'9wi4WE #=&""T=E2 *v½1krsvwx|||qB~4&gtqosspnk@02C0((&'%%(-0+,1-*'''&&('#D\U3''hԋMii#  +' 9$H5+ ;/-AD2&7ENRYIPQSK3G3N=0= +9::9'&~PG?dfWVOF:4:JQ>:KKJEEDCJKNTmw.fvuwzz{z{{{z|}|{{|~}we>86@UsB{dbdZM2#)'-`kh:lDH}EOGHECBJPLEBAAABCA:!GgYTOGCCBA>wdlTH $!Q/v.jnmpuy~{|oEw+'lvsnrtome9.3@0''%%$%):noojk;'&&&%&%"?^uF%)n׍aG   -%su% :/ -DG6%5EPOTHORTIA 8EFFB1-aDx>ebWVO=45:KR?:GJIFBAEHMNPow2jwvxyzzz{z{{{}}|~~{e;76G]{Cq|\[YS.!&#*VPC+qNALMJKGEDJ_l^MEAA@=Yi[ZbdZSNOPRUVX6E_XXUI8)&'((&'(sĽ˾źwi>)[gmdYQLHF;0-+)'&$#""" !  2PkadcbXnBNqzE8Yga/6TaM031) 8qwvpdUKB<67GPMLG)#""#"!! +xoh^VF-6HMNOQVZSh͹彡}zvvogcejjhhca`]YSKA(N^SOOTYgrd@BC@6!Ey}}zsgknnmibOAprpkgd_^_ZWUX^dN=iwtutspomjhdb`^\[YYX[ZYj[N,(ÿ…IKHBl|R-~7 &32(+((*>PJE;-¸Pe_C~}zxf-R&>%% +0/39, Y&-;) 5#HkG2XJ%65RI/j\4"$N,u1hihjov~z|mGt"$o{prnqsone;-0>-&&&%%%(FQ&%&%%$%!!12'&qԋ +'[pW&% ?0*3+YD_7jeVSL1-55EQ;9GJIEAADHLMPnǿt5kuuxxyz{yzyz{~{{~~}wf:54DYyGnpEWUU-&+%-Y\`4udUpPKIGFL]LSRD@AA;]`V[_]TUSROQSVV2 F_XXVF6*')+*)))s¿ĿǿǿxkB)agmg]RKGE;1.+)(%##$"!!  4Ooadb`dyqwHKtKPtgY|B-0+ 2mvwqdXKB<643&#%&'$"!!!!!  +wnfV2,CQNLAKOSWQi×佞}zxvqnZQ[iigeca]YVMB(Y`TSTTf=EC?5">qcbfca]Zionoi`NArspjgc_^\ZZVX`eM?i{vvsqppmhgeca`_^ZZXYYZh^N.-ƒIKF/ThQ'+I'&399=873>MIE;/ĶgTf^C}zyf;j+v|U"6955- +R#*:* 9"B6^+(0'<3ULF-$O%OZ<'vſ/hhilnw{}oHw%'3*'o~utrttpoh:-/<,%%%$$"%EJ\R%&%#$%%"*("(-'oڋD  +$+laR  =.):@/$>FQPOFDLQVTPYXNMQRTJ++2 0ABB@/*3ldVSL6:D9HP>:GHECAADGKMQqʿo4kuwxwyxzyyyzz{{{~}}vf:46G^m}OikNQXO,Ig:*QL]6|ZMCFFGEKZFOVEBAB8\^W^[VX_e]USSSS2&82%KbZYUH9('****)*zʽƾȾĸzlA+cipi_WMHB91/,)'&$"#"!#  2Nhbbba`PC|o<'n/-il$>`-1) +5hwvodWJB:63E[ONO7"#""!""" +vnfQ,MVRONIKPQUTfθ|zwupXPZgjjhea`\XUND'VfRMQPn>HA<0!?~S@>LIE6.Ķ½`E~|zzg3W(D+Hx!&%%% $E"'9)  9!KD>3!$&:6RM=wuv:"#$Q&H7)*xſ1kpnrsz~{znJt)Qwk?iwzuuuoni8),<,#$%##$%GFTS#%$""!!!Q~xi'iԋ&+  !&(66R C/):B4!QS3Pa[ZWI7*(),*++*{ü˿ŷ|nC'dipkcZRIC91-*''&%$$###" 0Qmddc`^]t{b?tx}?5b~|'7E/1( +4fwwreXJA967`{M""""! !! +wqgYJUPONONLMQUPdϾŸzzvtrjookhiheba]YTOD&YhUOPMrBCD?3$?hqsqstpqhromkg`KCprnigda__XZYV\eKCfytwtppomkhfebba\[\[[YZl^O1,IIHGyA;u/"*85+-1?AAMHE92·`C~{yye.L0yx}n /:<>. #A"";+!: +f99!%?mRPU6VR-l:" "R#)y/kstvvx~}{nKr)_wsIgvzotvonq:),<*!##"!"$EAHEQ""""!"" QV:IF(oَ  %$8Rc D/&5<6" #AKNLLLKNOOOOONPNORWM'),`i 9CHIG8-[Jd5jaTTQG76FTR;;GDCBBFEEFMUwr/ktvuxyyzxyzz{zz|}~~vf+)5Fa\vKqwMFJR+ AS1'8,'~>A-cAKKGGM^b\MCB??;b[XZTU[ZP\ZQQQQ*>*)6 U`\ZUI;.-.-+++*Ŀ˿ɿоȿ¿zkD)_ispdZRKD:3.,*((&%$#"#! /Rndbcaf£uzd=jvcVJtpa+/1' +/gxupeYLB968B/%((5$!! !" xqg^H313FNOMNRVUfμ{ywuqjmjcfhgdca\ZUOB'QlZUTW}pS@BE>3";jmjc`cca\imlge^REvsnhfga]\ZZ[X_aM@m~txupponjjgeb`_^]^\[YXi_N/-IIFH5RH:ms+&L#TZ;(z(ovxvvx|~{mNs&bwwHg|tvlquplk8')5(!"!"!"$@6"3B!!!"!!!DB%)u֋:55 +#&eaA/'HC35=I?43MYO4-10( /nwxrf[NC:64AQ+(K=&!" !"! +yqi]OMQFGOPMNQWU_Ý|zxum\TPUehgeda^ZUNF$KoZTST^A@?=5$>SC>;7;;6@bljfd^QGmtnjiib_]\VXX\`PŸ6NBB~x6!!L"*,z(nwwtuw|~}oOo$cywDd{trprsolk:&(4'! !!!!#,?'%):+""!IdB.&(y׌q)  + OM) B-$3>8# #G—AOOMKGINMIMJHLNQQSA,qC 3 7@BDA/0~3m`TSN8+29NM;?JKLKLOQONQNwh5ltuuxyyxyzyzz|}~}|}w_)#8AaPqUnoMLLM*"AK28_fb/!?nFHHEEJQ\TGA@A@9i\WUQRUXY^ZURRR*...0TaZZTNXFD:1-,+2ÿսʾ˿ϾvoI-iktneZPJE=3/+)&%##$"!""!  0Qqbbbb^\SKGC>=::9764320.031/.*0fxwrgZNC<8594=__b*$&#"!"!  xnh]MNRDFPSSQSZW_}zwurpqoijihedb][WNC%OtZORMqp=CB?7$;~wwtvplmopng^]baOFzxtmjjgb__^_a_WM?`{zvrrtpnjgfgddd`cbdeWP0%KMG6-""$%!$.9@9.)84PpIS)&%"!!!! +xpi`Q617NRTUUV][]Ϳơ|{vup^[Y]figddc_ZUNE'QwOHIKWr{eg;DA=6#=UGA>:<69<_V7Ka_MGv}okohddfc_[[XWWSOJ=Yusnpmmnjjkigfbaa`]\ZXXZXUO1"LMI shC,-$&@ƽ:]B7}m: !E$")uɾ,irtttyz}}{gWe"cvs?jzrtqssole2&(.%"DA!  /\hf[5('uؑN2!  2\ 6&%4<4$MȀJOGmfguyyOT;5;@@?>?A7!5Lw1n\RTN?12DON;=HLLMMMMNPPK\ʾ]:lxsvuwxzyxxzyy}~|}}xW$'6@jQkFfp]OMN)"\^1<__Z/(=<+jJIGFFOdd`H@CBA7oc_XNOYdkhYPMPO*2FE,YyZZZXOH@=70-./:ſɿyoK/nnvof\SKF=3-*(&'&'*****&#!  0Ipdbcc_YWMGDA==<9853432320231( 2evurhZOF?92Hui_,&$"!!"! +xqibZWYPPRTWWZ``^ƹš}}yvqgf`bghfed`a[VNG(AtTKOTnuDFC>8#8|snolii`YXc`OYd`JMt}wqolhgeb_^\YWVUQL>X}wtrqpojiheeb``^]\\[YXVVO1$~JLH5Yifjrb0#.7:2(#(;JLF:8ȽY8W#!%$"! !!""" "! ! !D;M\z&+=5! #@$)6;Kd^LdS& F"Lk^bHH"(?|9YB7uddC, !"J%WV>*uɾ*durwxy|zfX`!fut@kytwotsqom8&(/%$FC/1+! (N`Xr:'.{׌S}!  +]MMUH 4&0FED,%K~FOLqN|NS9/3,"?CEDCEHHA(:jDYz/u\UUNB;>EPM9muqtvwyzzyy{{z}~{|}zS%&6AlVcMrkRJML)'OU2;ZKJ.)f0PoHJGEDLeheNBBBB8qaYTORYagdYQQPQ'CNN0 \ya^[XQTWV<.-11>ƽ˾ȿɿǿ;zqM,nqyqeYOIC;2.,)&'*036<95/)$  2Ikhcb_^YVMFBB?<:985555321011.( 1cuvsi[NF>82Ir\Z)&$ ! "!!  zska76:.9RWXT[b_[͹¿ƞ}zxustpmlkigfe__YXOE';sSKOWrAFA?@*2vz~tfeffa`a[LIzvqljeb`]ZYXXVUQK:Wytqppjhfcab_]\[ZYYXWTTVQ.%}FII01$(\pJ!!(3FL>5-;KIE84ȼĿÿ_9V#!" !! !#"!  0L5feI1DC6( +#;%3aR-aX##F#EI.c^&)?bhjcm8`J 37"#!H'inJ *wÿ(`oluvxyg]d!dwtBjuuworrnno;().#!G12Uba5)*}֎pqK  +)N56 0& 1HCB3 #L~ENLX|JqcKW9)ee  7<9<<9:;8"72v\STH.,1:KM7:61*"1Ilgeec`\UOKDA=;:876654321110.) +-guvuj\QF>65AVOd-%$!#"!!!  +ū~uodVUSMFVYZY]baaǑȡ~zxuoWT\Xejhfd`^[WOD)6oSNOWitotBCCC>(3{pZj}v[9ASefca`[NKxzrnihb_[[XUTUSRPJ;W~usqnjjgdd`^]\\ZXWVVVTRSQL0'|PNI7jbOwb?!!)FRHI8, !QɁEOLSqcdggfgjeMGMOS5LdPKNY= /BHHHFHB3=3p\TRH0=DHNO6;GC;9557@MPU`N9lvvsvxx{{z{}}|~~}vN&(3=yOENynKMMQ&"\\0"=NIO*,dMEisEJHDBKPAHABBCA5yRMMMR^llmXRSOM&$I]T7!bt_^_\SSYN5/.0-:ǽо;{tK)hkythZMB<81.-,/37<=@@;<:4/&/Ikhefd`[TNIEA>;97544433111010) )evwui]QG=75Aeip3$##"""""" +êxqlife\AUYZZ\bb`Đϑ¿ɠ{yvnbjjceijgdb_[VOH(6tOORWfyt}j?CCGB+<Q?Yxqcdcbb^J¿Lx{oomfcb_\[WVSOMNNJ;:;BLOR`˿]kK;muvvwyx{|zz{||}~}}~wM$'4@~FDKhtUMLK'>T+!B\f['-ClvDupJHFD@OlfgJBACB7}SPMLT^ijoVQPPP%CXO7 bta_]ZRKGH7-,.+>̿ɿϾԽ}sM,hkusi\OC=7/--044698=A?>=71,  0Goiefdb[VLJFA?999653433223210* +*fzwtjcSG=75Ea`T$%#"! !!#" űyonmgbWGWX[Z[be`ǕǛ¿ʡ{xwpipk`fihgcb^[UNE-4tSX[WvDECGE.9sLD_xmY_fgec_BNy~{volke`_^]ZYYRK>Z~wusttolhfdeehkojn]N4'NKH0&Aiyzi(#.?C' &J}FNPSbecj|{]JQPQ3F!ZE 4BCFA+@Kbi}3t\WUH95:CNK87/-.(C¿ȿ˿̿ϻ}qN+gkrog[OD>60/0553444;ABA@:2-& -Jniggd_[ULIFA?<::65312323200/) +cxvulaSI@70;TnU0(%#"" !! vȴwFB4:;,4Z[\Z[acZͽƺɝ}{xurh]V^jjffcb_[UPE-7z[XNHxBFA>=)6kSUr}lWWghigc^L¿O~zsslkgdgfhf\M;X|zvutqmigihijlnpaS5&|IHF0&&)/2$ "-1,'"#)>QJG?6Ƚ`mqü`5S!#! #)+&(*+&')'$ ?P,!% )H%)H&'֏k[e"  ,(yzE 5$!6HD/ !QvDONLIBERfMROR3$CR&O( !789B?0>}w8s_VQH4(%7KL8>IHB>61DB?<3-) 1Hphhfd_ZWOJD@><;975532321222.)+]zvtl_TIA93CkcA-+&"!"!"! w²yhpadbXOZ]]Z[be[ǝɢ~zwvqokelkifdba_\VRE2=QGKOzGIB=<*6zyyljjje`UG{zprmkgedfie]P?X|ywusqomkjjklkllcV3$|¿zMHF0#"###" !)*&&!"'=QNF=/e6U###"%?a\KTeYScW0#*[Fxxr'+&584" (C%)046203RC! B"?BO8\mZ'Jº|;QK -f?# K 5=6&{F^``a___]]^_N#aprquy~|ea^"dyw=gwuuqsuuli5$&0#C>EW[ce*ٓ cRf.  .;f@D A(!1FA+"%QzENNJGCKG[ekmbCJQOT4' 021CB+C]WYw:u]UPOK>9AKJ6>GIB6).?JOOLdȻeh|H6dwvrwyy|}|||~~~~|}|rD%6IAoTe[SONH$%\H"%>W[M#9gtl`FFGFDLdd[B@BC@7}WRNNP]ghbPQURK$&EPV1%qP1/-..CLK3)(("HɿȾ}mL(lpwm`UNC=62016411205:DA@=3,&.Fohgee_ZVOHC@=;:9755422223221) +&bvvtmbVJ@:1CsmppO&"!  s{wsimjeTW]\[]cfY̻|ÿȢ~{{voa_\^ehfcba^\XRF.9wMKOXudHGDB?,1|}aTOPMGFENjiijfQMzvsmkhfeiii_O>X|xxvtoqnjjjjllmncO5#yxJHG9KNC?A9%"-10-*-+;PKF;3̿qzqb5R$#"#'F{GjemlsnP# #:hU;)507*.& +#4#&OhehgfaaI"!Q oZ_)22(M¿WWXV\\Kw};XO5.&&!!N HYI (|J\`_^\[[[]^^O$_nqwx{}{f`Y"czy>gwstnsrqmi9#%0"'JNLPN*Ts[P-ّUii$  *'/H 4&,@6#"%UwEMMozZNT1_thcai?+.,41'##Hs7uWTRRPHHKPM4>HI=/.9@JORO`ǻ~F6Xhouwxy}~}}}}}~}|||v@'7KAcuYc\QNG!&aU# 1@^aL#5[CEGF?EUNMC?@@>8XSMJKOQUUPRTQG#!()1&%pI*)*'.?4;/1/0$Kþ̿Ϳʽξķ~oM)dozkaULC>73224356547=:876642231221.) +#^xywncWJ@:7@eGQ;.$#" !" + e{wqW@IDBZ]_\Zdg]ͣˤ~zzwpc[X\dhfd`a^YVRG,5zUNNT|lDJI?<*/zf[]YRLHHKH\eknlRN~}xsnjifgiig`P?W{zxwtonlhhmllnnnbQ8$w{MJGAj<%6ADJHJ:>RLI=8˾sž`5S&#%'+Ezv_rknktkL$" (D4!!45,! +.;&$UmgjkimqO!U!297*&'M¿{<]]"'"% %J(|K]__ZKORR[^ZO!bopvyz}|h]X"buw  *)`WaRD 5"1GB.""VsGMRd\QW0mX .369;=AF5Em\cj6xZSRSSQQTRM4?LH<:996542221010/( 'XwwvncWJ@:6CiIHM5#"!!!!!" o~zqhL@G^]^_\]df]ǥ˵ɢ}{zwsmngggigec`]ZVPH0WF2)*",R1!%%# 5C"'UeCJgXJcN  B Meh4#"$%Ru@[X"-:9=9) &"K,rzF&x K^__XIMPHZ^ZO"`mkpvy|}g\[#avuCe}utosrllk9!#-# =8.@i:'+Ւ1  (.ztvM -" 7GE<* &WuKLHLk}~LBjrsVCROOV/'O16 /9?DDIJH5EIJI8,),2FRMTgɻerD6]mtswz{|}}}}~||~zu@+=HQyysmYZPC6V^I$;_CFCD@XojfFA@?<5zTPKHMXjaZTUVTG(FK<+&rp``^I>QdqkmkrBTȿɽ̾þ}rO%jmwg`VLB;622447:;=<=GS[aVE1$0Hsjhhha^YRIC@>;::855321442200( 'XzyulbXKA:6CytpM0##!""!!! + ezsndRYc```_^cd\Ѱʡ}|xwsnpoolhigea^]XQI1=IEP^IEDD>-4{}zurrojNP~xsnjgihihf`Q@WuvrnoolhghhkmlmonaV2$~wLHE1&&EhV8 '695Fizutpusmmm;!$-"!# A>J{_2'(֔q, #6pE7 /#"2IF@0'UiILL{~XNRQW/T{s~?":;vURNJMNQQZUTUTH+%+((vqc]B)%"/YglmsFT̽̽ʿ˻Ž|nK#dbmf^TLC:655458<>ABBFPY`UB0% -Fnjilgc_XPIDA@>;:85431032120/) $Y|yuldWK@<:DfNP''#!!#!!!! a{uP/@?Q^aa_adc]̱ͣ|zzwqij[Zbhgfc__[VQI/6xHJPYp|~oBCBD?..y~|wtqniOL}}oibbghhge]PVaRPSdI.$E#"'|K___YSWZW\]]P&`lljsx}|j^S#_sq9exurorsqml7 %.=}}~=%)S5&(ٍ>@\- $)l]2 4%!1C=/  %ZfIJPzvvdPQQT,"y[C#"59=>=>?<,G@0Z_<|XROB.%)>OK9?ILJH916GPRRjȺ>0IVkyxy|}}|}~}|~}s=+=RGy\od\Q?)XL&Kc\I%IF.Y`GHCCBEHGOGA>;:8yURMLIXY^aWTSUI ,0A/'wmdV*!(9KP`jnrJVƿʿ˿ȿľ{oK#fdkf]RIB;78988:<@@@AGMSUT@7' .Gwmihgd_YQLGA@>9885642.23111/( X{wumdXLC=9@cej6&$!!"#""  [}qYhq_Z`bca`ef_˫ʞ}zyxsg[Yfdigec_^[UOH51vOMO[yu@DAAJ5/x~|{zuqqlPRz}nd`dhffhf[K>Ummjigffca`ahmonnmcU:-uMLI5,;:55. !*6B90.*:RLG;7ɻż^8T&(So`qibopi^{d^mwN!'>g.@1",120" 2E(Ua)5dJ0eP""E S_pP)WX#WtHWM -OY?%!*;VL0E(mlN&wJ]`^XIKJJ[]ZN#_rusrx}}i_S `xt9bxurmqvrmm8!$-B7YqkA=+ێEKS*  *AqbD 8#!4JE1 #ZgHMTsym]LRRX,0(@ABDKHRS@I]?URSM?2:;>xUOMLI[chdQTUQE-RRY1)xleT($8>:[josNaɾ˿ȿzmI!}ieqh\TIC;9;=<=:78643210101.-) XwxuocXME<62P4_5#"" !#""! + S}wgmnUN`eeb`efaոΡ}{yxvmkpmkhgfda^ZWOH6.zWMU_vGHEDG3.x}|zxtqnlN¿S~~ne`ajhhhibO;Splhihgfcaa`hnnnmk`T9,mONG7U= *49;632?MKF;8ƻ{Ļ_4T&%3F>0AJ8:HKB8d~}X! /7A>) +-< (Q\+1eK2fU  :!EFXY.+ UeLM^~sB]V$2XI-#+\fH(A#]d<(yJ\_]\RWTQZ][S Vtusrv}e`Q `wq:\vtqnqtpni5$/ 73GSR11"ڑ^oi&  +&.@@ 4"4IE>*!$\eIOKMdhd[JPii^GOROU)PD><6F+#47756354%MKOQVBrYSPG5.4FQK4?HJGE?69KPRSsɹo^g8*4Cjvx{~~~~}}~~~}||o:*;::@tQRONIKGGVIRTTB!#"5++{mdP% !% 2\mmrJeɿϽͿ˿ʿ˾Ż|nO#~ifoi`VOE?:<>?BDDCA?ADDEHGH8+ .EnlmjhcbXQKFB>=;9854311130./-* WuyvndYKG:7AnOR3$#""!!!!  VznokTTbdcaafh`͸џ~|ywuusqomkifc`_\WOJ4+yWMTZoHGFHC0/w}{{yuqmjmSVx~lf_`gihgfaO=SpmjggfdccbbjqnmnmbS9&oTNJA|a,+/.#+:JVKNFV='!-YmU.A(}F]a`[TUWV_]]O$`ttqtx~}`bN!evr=Xssqnqsnrn:"%/>?Log1%)ڏNPh /.ZbO 3!1EDA,!%\_LNMnPU)!M &8?=>=)/)U\cOCrXPO@*&+?PI6@IG93/,7JORSvȺ;38Cgvxz}}}}~~~~~|p;+9:963532210/0/) !VszuldYLD<5=(##Z]JLLftlpqutbQT%5'!#'1$  %5><=901-WaQMLGqZSMI:9CJUL6AHD90-7BLPRWyƸw6-2Agwy|{}~~}|}|~~~}}r9!FDYH_rfTJKH;2^S#XZ{q}XJDDEFKgnW@;:<;AnRRPIG\hi[PRNO@5MMG#-}jb^I31EKglppsReƿ{qNzhblgaYPHA@BBBDFGE?<<<77:<>9.#/Fpmljhe^YRJFBA>::86433122101/+ UxxtmdZOC=9?cTV,!#$#!"! "  EyD+:;4\bb^\dh`ǟѤ~|zy|umefcb`]XQG8%\OUYeIDFC@?.'pi@?8745.6135BijR]y~nbddhffge`M;Slkjjifddba`hopompcU;)oWKJ@wQ$#$$!(4GQFF;8'zK\_a\RMJS]^ZK%auttty}|XgR!awq==<<>ACA>:72268=YWlD#""" ! "  B~xfhi`S]ca`^df`ʢѣ~|zxlieba`\ZSH:(zZNSRfoKJD@C2%f~uwkRK\hiihjlSUunc`agfedc_P8Nlljihffba`agoopolbR;-~mWIG5]rvl0 +8C=53*:KJF<`hF$)22/  8; $OhhijjmlF#nqO.S$ !$(*]D;MnFe`$/7HbaF$C(H^a]YB@MRZ]`I$`zttuz|{^cQ#ctrxI + 0"6GB)!!^[ENP`pmr{fMRSQQ$W54jI&&=CBBCGCD3[mYyFHkWSR:'708HD4CJJEBBBINOOaƹ)'B\qvw{||}||}}~{o9,\Va|?_w~bCGG4:_C);$`J}UvGPGDDCGPOHA?;;<;CmUPMHJYfiXTSQN< ,GN7!6icaa_dttnlnppMmʿ̿ɿƼ̿}oO!{lgqibYPKE<:9687;BC?:8659HKB3+!-@jlklkibYQJGB><:9755432221//1+ PxzwsgXMC>72Hcp>$%#""!!""! @~xpsvqZZ`ba_bd^~ѥ|zwyyhgfb`^\YPH9'xXLOT{xEHDAD0%ho[F?>Xa`kjomQVnlc_`hhgegaO8Knkkkjgfdba`gnoonmcS;-pUHG.0}rpd+'058.((9KIG>>qf]9W!""!! !!"#%&'&%" 8?LZb<&82-50 A>%Nd][a\Z\=FQ=NqjR!&&'[ɾjB`[ -ShZZggN1%C"#" 'J\`_\VWQO[^dN']}wxwy{z_iQ dvt=exwurvzts: '.PTh7 &,ݓqwr"5,Hg< + 1 4A<$ %g^LQEU~hxq]HORSR!!JS)<:8553223211111) QvyxreYOD<9AyWR8$%#"" !! ! ;}xP5?95X`a_^`c_yεӣ~|yvvvqojihca_]XPG9hXNNW}ADAAF2$iZJ>GYlpwxn\VhiRWrod^[ffhec_O:Olnkkhdcba_`foonnmcS<,oUDG;V}SG?''0DB;7+7NJHB@¾];$N]3#&''*!> GNCO\e;*"'[dSQSah@X^ $9OW^\D*&  C0B=@A<3'H\_^]XWWS_]_M%]zwxxy{y\hN!f|s;\|wvqsuto:'-PNK7!&/ۓ _hb'0 .m6 *4@>$!&dXHONTNRRODbz1r% &;=>A>@DA/ e@hGLqXMM?-),AKB5BIJD>?CLRQTUƷ-EWgrxwy{|~}{|}~~zu1=OewJ`{wyOAD- ;B1/88[4j[|hQFEDEFL`eC9:::8DgNLJDL]`WSSUNO57A2'9dXOA=HJ=:;7R_Fr̿ſɿʿ|pNzheoka[PHA?@A=::33=ELPPQWZVH8.%,BtqnoljbYNKFE@<;976521111120.+ + RyywoeZPE<:67F8A/ .! &-=>'"(jUHNKUMSSP!*17!% #69;=:1/-$i?AdFOpUSQJ>:?LSD7BFCA@?BGPQR\Ǹ+H[mvyyy{}}||}}~}xq/%PWerAm|spBEG.!?YC(`P@*nZCLN]PIFFFG]jWD=<==9FdMLKKMcddYTRNL6.8?5!@XC5;J\nufK3/DDvʿƿylQrpenjbZPIA?><961/-3@NZ`YXOH?91&+?rrmonkcZPMFB?<;96663110011//) NuyyqfYNC<89e`o>"%##" !  +7{xidkPQ]_^][_c[t͚ÿԡ|zxz~~phffc_[WOH; q_PSTOqiWSCFGD@2"h~~~|{{wtomifOWo~kca`fhhge_P9Nmmikiheb^a`fnrrqpfT7,{nSLH3)'_c2p: $38*')=BBMKE;?Y5Y"%%#'''%"   *;7Z=>T*.&  + =:$N[($F Sl`#B;&]ß˿f>d`'P{_dum[9!?^N" +~F\a`WLVXY^\^M%axuvwy|}[lL#cxu:_vtoqtuu:&-Jwf/$# *-2ߌYq# :  *,==%"*gXLNJuNSSQP"^JCC@O  +"5=CA7 l?WqSTN@5:8HRD7AD=996:ALSP`ȶ~kWe(Garxxy{|~~}}}}}~yq.*kfgoFknmMDG+!;<<7KdLMOOT\gr_TPJK2+@[D ?S?3?F[faQEIXgBw¾̼ľzkQ tnepkbZPIC<85650-,-9IWXOHFA?:1%*>rrnnpmd\RMHA@=:97554210/12/.) MtwxrgXLE=:;VN`D!$"" !!"!" +3š{wfl\2TX^_[X]a]rн}ץ~|{y}soxuhgeb`\UPJ;#u^IIKLJ@EMJGFGD?3!cqRQLg`HboqqnkdR¿Vm~lc``eghhe`O:K{wrrqlkgdfgjnpqqqeR<*|pUKI8=k~~}0%42%&+8A@MGD7"Sa+"?:%plOpnqmmokbZSJFA?=9976343122221.( KvyxtgYNE<8;fgtD$%!! ""!  -Ĝ|thW?<&`riujisbRgirrokgR}ÿXlxqkgdfffgg`Q:L~{wspmlkhjmqqpofS:+xÿkRIG?lv91H2!07/02:<=MGF=?[;\))&%"! !" ! 0CMvF*57-! 66"O`&"8 (YY:$%%)\jyupeA\)(;AF`XH\Y. BHU)}I[^][YVTR]]ZL#eyx{xwx{[iG!bxy;Xzvuqsutq~1$-&" L:4GB-ߎ +4 % +!8HG:%"#nNMMRRSTSQ&D%6s1Y:[iSSM:/8=GRB7BEA@?<=CNQPbö)Pixxy{z~}|}~~}|}|yj*Kcpd?\[N`HDF'$ 8_>.F]0|P#ALIHEFGEB??==;92JbSQRRQhkh^XQOQ06\F3 KzdccaZX_ojilmsB|ſͿ̾Ǽ|lPvocqne_ULB<;91.,-,+(.9;5.0395, )?nromnjc[TLGB@><;86433133321.* +Krzyti\ND>9=b[`1"$#! !  ,Ǟ|vlQTdVWY\ZW[]Zḭ{֥}|xzskhfcb_[WRH9!nXLPLNJEBBCGEBBB;%^~}pd_eb`pqnmiLz¿SnyrlhecehihaS;K~ywtqmljjilmnongX=+pkNKH<@,)*#'8=<;=1:LGE?C]?_(&$#""!" !!->^_FR&27' <8"Nc, #<H/ "%%'_¾dBY0 (Qo[\nf_W6Ec**|I\__[VWUY^\_K%dvwyxwx~beC$ezu1ތ 1  N" &/>C<*$&uPLOPp|~{y|JORSQ!B%=A8 + '"$ulc9YdSRMAUo`@WWHXGEI$!Cc1=:9.R_RQMONUWYYVSRS0",)&Gtcd^TD0H[X\Z^fB̿о˿̾ȽznPqqetod]VLD><81/,++0-)*,)(,340))AqtolnjcZUMFBA@;:9644322121//+ + NoxxujZOF>9:Rl]8%$" "!! !Ǣ|vpkhbZYY[XUXZZiǿק~||zxxyzrhiihc`_YWRL8rXQMNRIB>>GKGEE>3$cwNJIF?568WppnkeKwWn}xrlhdddhfd_O>H|yxspokhjljkmoniZ@*rmPHG79gv^0 *:B:4,7MHC>Aľ^=T#""!!!!#"!!!"!  5B_|Q1491* ?:$EWIGJGDF3$?30"$%%`cC\s'$DVD:KODNGDLn!)I___VMOJP\][K%^twyvx{|deD%cyqLOQaĶzjfq.Sltvy{~~~}~}}}||ve'@XvcAKQIQGCG%0=*@hS4#~GJJHHFCQlkiI?=:7(WYORPOQQMNRRRRW/Hq^`RRHλ}oMrrgund\TMC<9720,+.75-&#"*452,")?rxropjb\YNEB@@:9666333111221* +Krxxsh[NE=:;VtI2&$!"!  +#ȣ{rmhg^XXWYWUX\Xeőأ{zzz|hgfdba[YSK?wXPKO`_igKJHGD<-%duuuorufissookfNvZt|urmifefghe`O&tmTJG;_\Gm^$!*3553P(F]^^WHHOOX]ZL*_wuxux{|]kC'dxo;]vsqqvrr8&)1KDJf+2Se=.ڎ (  !"#"!vPKNLpuwsssFRRPN)TE>AB_   %yOLf7_iTPK6'26EQ@6BJB4104BOQT[ĵ~~.Wkuyxy{~~~|~~}||xf%[^r\BLOLMFEG# :^jK xR*>GFHHBHL:<>==;4(^bXUROSSRRRPPQU,!%&$!IlZO>GHFNOXUUck@ƾɿpPouguph^TMC<:732.3211,&#$*86.% )>ptqoplc[SMGDA>:8664431011110( +Htxxrf\QE=:38.'*%"!!"!  +"Ȣzplbg`ZRORVVW]Zfι{ܦ|zxxz~nggfcb`\YSI:lTMMQysGGE@6.*fysorpplfKq^j|unkjgfgihg`O d%+L]\][TQXT]]YK&\yvxxyz~aiB&bsn9]tustvst8$(K-,Po}S0ڎ 1 " "&79&"#yKMPTORSQK"5 + !y2cjRPLD;CGPRA6BGA1//3BNOUUƵ,^nuxxzz}~}|~~~~zh&#`wSEJNLLEDD#!AYX>%ZZwIIHGCGS_RB?<;1+a]TSST]efgXPNPT)+ILG4OhZWVYRPNPXRN[r?ȼ~rVnwftpl`ULC=;733495,*'&&%&0/# )?otnonjeZQNIE@=:9853333200120* +Gvzytf[QF>:5cYK<-$#"""!"!  Ȣ~yo[387,/6?QTW]XaϽ¿ڦ|{xv}pnlifddb]XRJ;h[MKRDEE?6.(c}^ROMHEA=RopnjdIpZr}wokjggfffd]N8G~zytqomjkjjjiihjgWB'loTHHE|C"".xG'>JI3'(7QIF?;]>R#()1/),)'*.'-)&)$' DJ +;2%Z]/%()=fE"(?I'!"$$aƾ\A^t#$2C=Xaf^D$DU}$+~M^`^][__\_^]P$Yyxzxyz~emC$bto8Zwxrxyvw=%+)C=@`,eU+(/ۋ 5  2@M> (.JI4%KOPQVbfglsyy{fMTSRK,dI7#  "$!! %{iqGb-gmNPOPQNQSO@6CF>2348FPPXSķ.[oxzz{|}}|}~~~~~~xe#,\|X@LMHFEDC%"'1(;\]=!#|SLo}LJJGDDXmcE?;92.fUQQQPXa^]TPOOS*$:>>1M_ZffgL]ejb:#0bD˽~pPnsesrk_TJC><614651+&#%&#"&# '?pvooojdZSNHC@=;:75432331110.) +EsyytgZPE><8dkg_:%"#""!"!  Š|vm[DWSAAJMURV^Y^®¿ۨzxxxnhfeda]XSI;e[IKTwDJIE<4+fydgeb`WGHRonkhcNpVi~xtnkfda`ab`[K:C|{tutommnkkjhedc`_`]]\]\[T?)kmQIG=sJ"2~=$6BD7-+6OJG==ſ\=S"'Bbxka}cwc`" + >E + 0*%XZ#1fE"(@Zb/ #'dƼv[>_j&Bc`b]N>7%> ,VTPN-,uL^`_]Z]YV\^\J%Xuwyxxy~|akB#_wp9X|vvs}vxP(*D)+:VjG2ً- -|T6&8<'!&GOPLFGIGJFEGFJPTPRI#*>R[X2 I`c``gdR7 $~i@ru0fjSRTRPTSQS?JSSXXŵ~{rSx,_ry{zz{~~}~~|~~~~~}sd#d]2#F`oGl{DGGFFH_M?>>;60/i[UTRSZhbYRQQMM(96363.+(%##%%" )>lwrpole\UOIB@=<:8554442211/.) @qyyui]QG?<9`\P;$"#!!" !  +Ƣ{rl^aebPOVYXUX\X^Ͼpÿި|zwwwzvssljjfba^ZTK:``NMS[q{`z@MLEB;(gwB^_mnigdMl¿ZYtuusklifb]Y\YZWSJ78}wqsrnomjgfgfecb`]^]Z[[[ZW?(gmOHG9OF#(1T'$1?CE;;PLI@@_CR#&Kô& + 9> /*%W]&7dF%.CD:Z)!#'dy[MHYA_u )Kia`ZSMA)> 2A+nH\^]YQSRL[_]F'Ztuvtux}}cnD%ctr;Ztww}rwV'*8{)P<':94݋ ++ $>01"&}USSLNPOSPOOMPPQQQSJ$Ljc ,`N@LMJGBD* )+hbSSTPMORTT=4135DNQTZŵ+^qwyyy|~~~~}~ubKHLBLKHEECA!"=I7DYP%'E|I}~GJFDB@]aOA;9326m\VTPSWfppYSSMG'1]Z.UV\cefFltwa5+)FG˾˽ƾoQixivoh_UKA@@734/+('%%$$# (>mxsqple^VOHA>=<:86433212110/) +@tyyvk\LE?;8bvth'$"!"!!!!  ĥymhX>IJ?JYZXVZ\Z[˖¿ݪ}zwx~{|xnkhec`\XSK>lhMMUdHJECC9&g{X[cmmjhdOlZY}ytpljgfc`^[ZXVRI45zyutrokjjifccba_\^^[[YXYW?)goPJI6GSQTPH&%1:>B9>NNJ@>W?K$%<# 99 7,%Xa.$BhC#/L!6V." %%e©eSLPTZLȿWAXl '=KDLNOIA+? =ttro?,rK_^^XHHHIY\]D&^wsoouzgqE#evq8`uyy}{vxW'*9q+@XiYLDߌ/ RuoE0"$װn7s@!  +'XPEM2%CH+ *u]V_*kfSRNGBBFPO;8EF>99:@:40-+(&&'&! *:hvqnqme_VPIB?<<976422311011/* + Buz{wk]PGA97VPMZ%$$#!!"""  +Ȫxmg]I;7GTZXVSX\YZȸ¾۪|{yymkhccb]XRJ>naHKTiDJCB@5'cwsplle_ZhkjicNl^c~tomjgeb_\[[WTRK56~{uqnjhgfdba`_\[ZZYYVWUO?,goTOH;Yr~qc-#8LG9/,9OOIB@ƿY=I$&/v^qewxqVZolt" CA +4($QhM0->Zb<"0D"1O>W6#&(ekFQZWX`olUBUk)A]KIMD6+"Ck¹Ɩ%.pNa`]Z\[SO[Z^H'_trjit{|il>$bur7[wzx}|xyR(,:~*5[^Tߏ 0 )':SX@ ;!7%:1$$ݾ}.A 4LMK'0K9+qh)ncSQK;,-=LN>9DHFHJIFKSRTZaCn-_sxwz{|}||~~~~x`ZpFKGKKEFHF= 'EW-UfH/DYbwxGGGF?IO>NB84/;>pYPNKKNZ\PLS]SJ& 7>2%`OT`YR3\rqh\93ACĿͿϽο¿qZhratne[PH>9764/-)&&))%  (9jxsqrlfaXPIDA=<976411011/01.+ @ox|uj^PHA:6cwgD""$#!""!  +˪xoh]P]\XNTNRSX[\]м¿ۨ}|{|xqxpkheca_XRL:v^IKMWwstGHDA?7(bXFC@@>807RchhaNj¿a^tpnjfda_[YWURONJ27vpmmlifdc`_^]ZZYWWRTQSTQM>*i¾nULI7*'B_"$&9IE5+)7UNH@AY:N'&&#%$#%  TF 8(Ahf`ahk[2"/@!,@>]oN3&%cĀTQaſUH\g (Qm_fi`W7$C m‚O%,tJ\`^[WSUW^^]H'^stuuv{zfm:$`vo9^wxvz{tyK!'--BJ~P"toK1(Bܐ * '(#&6 6FFJFCCE=-)pcOQK=00>MN@;DIGDECBKRPWk´o`i0]uzy{{|~}}}}~}v[_aIKELLGGEFB(Lc.T`J"4lF@HwvFGGC=FLIR<632==sOKGDAKh\\J_aQH%#HTK,^OV`I/%]tvl_A=AGſ¾~pUiuapnfZTIA9511-+(%&'$! )<966422111010/+ +;lw{vk`QKA<9hFEF$$$#!!#"!  ȬwpiS,510*4>PRVZ^Yî¿ݫ~}|yxvmkigca^ZRK;t[IKQ]wkAGD@?7)^tusv}xxlX\bfe^Mg¿]Yzqopjkeda[ZYXVRNKI2;|yxzxrlkieba_^]Z[YYSVPUUPN?)kkRHG6((Ef##%3BA>1A»V;T#%4[nNXYVKKNjLGEI7 +]@# B*)MfkojaI'!(C-Q^0'fVLcd&R^CWZH`L(C<]R-{M^a_XKKIQ_`\G$_uwxyy}|fq=$`wo7[yww}|v{E!(+SU R73J43ݑ  ) ,=B:/^YQ^3!(۽v,WIVk 'YWKRRNNTO'2+qaNRJ:6>EOQELFJEA:5=LRQZu´x.btx{z|}~}~~uY*eOHLGMMHEEGA+#OI'ZgW$7Y?yqEHE@>DbfS8549A=xI@E@;:654432210010* :iyzvj_SK@;5LNZJ!%$#"!#"!! ızrl^Zf_YQTRXTX]\Xޫ~|zy~ujiifca^YSK=s]NJSk{AGDBB6(]zmljid]MgQazwplieb_a`]YRM5>|xttpljhhecdfloopjMB,nkWIH85>Rp-/$#/3;5;MKGACSAQ"%:z U=' ;*&?QUKKN3!)D#))(fżzUI`a&KX.EK'UV*Ca}!.vP]_^XNZVL\^[I%]quwxx||fr>$atm9P{yzy{xzC!')/FIz6$m~F5ޓ ( 3\a# ?#S\?F(!(ݼxgP6iDOGHMLHDDE<+#]c3Xib17~R?cvmFHD@AR_SU;62:B>{?@B? yiMJUueIHHE?7)_~zupieYhjib\Jh¿Bf~vsokiggghh_P9?~xvtslkighhjoqpmlMB-nkVGH7iu>#.2B9<68KJFA<żW>O%%0À +M=* 6*!"Q̿ͿƽnWcxbuoh_VJ?920-,,,)%"  $=]wrqoojb[PJGB>;97543211130/.( 2izzulaRHB;;?4;Y1(#" !"#! ǵ{rjY6C?>IVPRVV\ZV˘ଠ|zy{oiigea]ZWN?kKKWqwLGHIF7(\^VHC@935Biggb]Me¿Qb~wtpligghhgaP9<~zxvumljiijkoolppSE/hnUHI:@7/038&&7=SIJ;;JGD>:¿ȻS9R&%,u J= 4$!>eB%/M!'#&'))*.eĿWEZb #>PIKKID2=Pk.zP\`_[UOPZ^^`I%hwvvxxy~}ir<%bsn5Tyyyu~vwD ''9o(!Z^Xc44ٕ   + LsuD">1aWH2"$*۽$+:C_t/\j4sa_oxZ !;7969659: 7}-qZROJ;4ASSO9;DH>78KCFOLFEGE=."Si.YnW6ARUjxeGB>ADefiX<34=B;v;CA?ARhe\G@ENI!'JSH+"iVI]L$)ifMB8/9JQ˾oW`{buqi_WL@941/.,)&%$! ! $:b{stppjc\QKF@=:765532233100/* /fy{vl`RIB=7]~td0#"#"!"! ! ~ó|rkeZUUTTUWZVTZ[W•ૠ|zwy~oiigea]YVM>`GNUcGGGFA6'YvligMLUeefc_Pb_bwtoliggijgaO9>~{zxuqplkjklmonpmTE+jmVEH9clG%!$-¾ȺQAX"$#*-"$(%(''8+(#&&NA + ;$5=<<>;Nc@#*I'dcd]lnod?(hbLKQOPNUOTG\h!!:TOS]a\><e"*zO^_]WORWV\^^I$hwtqwwxz~fs?%cvm:Vy{yx~xuA!''# #C?HSNFfF + -bWOMKORRG":sl-v[ROK<6DSUN=WKJ]IBMZM"-;&5.#lXL^K#(ji[=3.5PO̿~pV`|guqg^VND:400.,)&%"!  %5a|rropgc\RJEA>;776532111001.+ 2fyzwlaRIB;9\|VA,"###!"!!  x´~tla?.57ITWXTT]\Tǧᬡ|zxz~yxwvmiihe`]YUM=jORRU`?IHA>7*X~{qCIUeihgcRc^dysoljgfikheO;=|{wtqqmfhjlnoppmWE+jmXII58\wI+""0<6''7QJHBCŹR?V&$%" ! !"#"! "! K; + =&'[ngfhhhg?!(B!:Qzysh]H1&ieKUYTZVVIRJ`^  0=@Qb_V>";a%,yN\]]UIAFK[][I%fqxwyy{bqA&fwlvnVRThRSKBA9)Y|xifckfdhhUgc_zpomjjjkkjeO8=yvsnnlgjjjmoqrnXD(emWJH3%.lt~p/#1==.&(;QKKEDĸU>Q'%%%$$#"$$##%$$#  M5! >&"Mc[`ecik<",=/`dqxyzre8%kƿ~}p{upNKj_!%>TX^ZG:3$9g!.wN[^^YLKJM\]]F$iuzzxz|~dp?#gun>Y}|yx{z|I!(+R('P#C<6ٔ   &$ 1Gb +-R9`P:)$1Լ) $S] +K#("=Lht/wXQPL@9AOQOD[CG=.0MI?;;33>DA?pB@@@KbhmV?;BPK#"GITF&&r`GE6$.qm\B;?DMSƿѼpZ_|cwrkcWNE;3/-+**&#"" !;Yzsuqnia\SJEC><:8743131/0000* 0`}{xoaVJ?;9KjgM&$"! !"#""  xý~rqd\e[MKUZWVY\]YЫ⬢|{yynjhge`_ZUN> ln\TSgKKE@C=$Vtkb^WQMKMg^I^fVec]yvmifilmkkfQ=={romkihfd``coqqrrmYG(`mXGG9-6tquh-,BEA:89?MJICC¾ɾƺTBR$(''((&$'('''%&'$""! Q:#& +C""+,/01Jh;&0<7{|||}~K)mŽѤ;RFi\%D`Z^YMJ=& ;f#-{ O[_^ZT[RV^]\G%huwyxz{~eo="gtm9T{{{}~}G'-*wqP% ci1S;6ܙ   +,# -cm ,P#0)"%3нi &<. + .\MGHGEKOT) >j2uZSRJ84@QRNAYFI;5:CEOTSaxyp^7itzy{|}~~~}~|xlF"0F\@HMKHGGF:*#VV&UuJL~[DAAAAD?>:04AEA@i=><;TK<>AA@\oJ"'?#"!)y`D;75Difd^YbaLZɾɿѾ̾տƽnZ\dvqiaXNF;4.+*,(&$"!  ":ZssqojbZQMFA?<:7655322212/0+ 1^}zwm_UJB;8H?6>)$""! !!"$! o¿uqd[c]JEU^YTX^[XЦᮢ|{xxpkggea_\TOF%suXTXfKHE@E:$SWNOJH@;=OhY>[eVcÿe]zo`baijijhaR>;upnkhfdc`][amrrrqpYI)^h[GH:QzW7#):;@<?g:(+;0{zwuusxsG)ożrȭnyĖPFe`"3EADHFEB08JY/yL^`^][^TR[][H#_rzzxy{dn?"fum9S|z{~~xyD&-?jJ%CQ,D.7ݔ  + +7# 37K! +2M#LcfC"$`u)$!#!1 + 1Q946553;X. =YMi4rZSQK@:DSSNCTDE8./1:NWUd|a2"OumhkhqpgjhUc¿f\~t]bahjjjhaT90)#! 4mHG<8942dF )MD?<<::AK! Die3rZROG47ERSOIOEE:345?OWVay|jfxV@fsxy{|}}~~~}}vsE"3C_BIPKFFHH;'*QJ#!>O3PyP@<;=@=;920:@BACe:>>AJNLMGC;?IL##&[AefdZ`jH[OWeLc̾˿˾}m^\dvrjbZRG>5/-+)%%#"! " ! %7Z~utrojdYRLGB>:975433312202.+ *cz{wn`TLD<8WyrY'&""!" !"  i¿{qa:;288>APVX^`[⮠~{yw~yspnomigfa^[VM>%UqWRN[}gqtBCGC>6$O~oqdZXTTUhqnmhXbe\ß}r^a`gkjihcT==rpomkhda_^]emprprn[B%ZjXJK;>9#3I=#!1>@PD@=:JHI@AɼY\}zRA5 P"&>'%/F5M'rŮƵNLjq  %! $""+26*<8#.wM__^[JJQW\^aB&ct|wvy|er=#ern;`}zvz{yy>%'+b|%#f\pj?D؍  +6# csD:L@]EAg- NQ3>:.'##>R +1DEFE>>;/ Djeb4uYQNG:6FQTMCVJGBEJNNTXVezZCiryz{|}~~~~zuA#4Ecm;GLJHHJH7"&VM"#$ \vM:556:643,,6;<<>`661.,(&%$! ! ! "7U|wtsohdZRLGB?;986432213201-* '\|{xocWLE=8HFK]+&#$"!!#"!  f|sl^F?RSZ^\YY\aZ¾ᱣ}{ywyytmonlheea]YTLB$YsTQNbSAF@>7'JVKDD==<>>UejkjY\dXŧzqabahkjijcRA5srojlheda_^empppqoXF)ZhYJL@n[2t>"3;6C99?9JJF?CɺV^~}xxOK; R%%"N /JrxqaOG2%sƽHIas!+42582,1+#8,C<&-z M\^^\WVMMZ^_E$fw{wpszgt=!guo98522 Mhc]6tUSPI=6DQRN@OJHFFIJOVXXi}˿sxzaZvXGis{xz|~~~~}}~~wvB%5GeoAGOLGJKJ1$0_D 6m]<%^w4$! !%(()7Y,4@<>D@>DCBDJ@2~]>gecKNX;bOG^Cg̽Ͽɿp]Z`uqh`XSI?61/,(&%%" !   7\{uttpieZQMGA>=:97432322/..-(&\zzyncXLB=8Rxvd'$##"!!!"! a¿}tl7#ftp7V}zu{~wvB%)* V42D/8ڔ  +5 Pci&8H4M5* Mx?Elc@6\iS.)+/ %.6HU@ *OKLLJGCHC IEaV:qUSRI:7DTRM<@LLF>:5@UXTg|VEjsz{|}}}~}ys;#3Fah?HMKGIKL2$(72,+*&$""!  5Wtstpid]SOFA?<9:7322222//--))Xy|znaUKC=8DQL])$""! !"  ]äujE_a_VY_ZVZ]aZ䱢}|xpjgfdc^\VNC#]QAOUOeRGKEC;"Htp~topqpoljZ]i\{kd_^fhkihcQ>7}romkihfa^^blpppqvTG,VkXFI6eO$rH&'#,'(-5IGE??ƺR`~{wzHJ5! R"()"")/ "G6~}I,uþȱ}ZYS>F[l &Aa[bc[X^=8-;;)0{M``_YKDJV]^[C#cyyyuv{lv;%hwm8O}wz~xuA$)*$&')%!&ihr?8ۓ , 6Hl6 >K Rr;n6a`Ws4/c{h%e? +0?IMMH8/%  JN>oWTQH=7FTTM:>KF5000DUXXp~~~WHjwyz{|z}|~~~zr:!4Hbi71-*)(%#"   !3Uuuvrhd_UMFB>;:864331220//.(+\||zqdULC=;TstX#""!! !!!!! TƤ~todea^YZ^\UW^a^ⲡ~|zyku~rkhedc_\VME!gLBM\OLI>E>%HcSKW{|UVinnnnjY[iX{nc`]eejjhbS?3zpnlkjhgc__blooqpvWF*WjTEJ42@9!%/ %4GGFA=տž÷S^|xyMT4& + R %FWMJB4#&,!A.*)+/045/(vrJPZX^=G]}A #4=1NY94E4:+>@/0vO_]]WNFBPZZYF#gxywmnyjs8&gun:N~z|wwE$)(Yx"'qb*=ݖ  +)   >H bȾ}hNC)`A +,TKD7*!!.. I0AYO?oWQOG;5DRTL;8875343330//.*'Wy}|qeXMC>;GRcI&&%" !!!!!" Qƥ~wrjjf]XZ\[WX^b_崤~|{z|twsljihea]VMB"QMGYkXNJGG=%Gm}~nroR`jjnoojXX¿hX}mc``ffkkhbT>3wnmlkkhfda^aooqpspYJ)WjXHL`V[fYPI/7%:=(2x L^^]WENU[^]^Egx|zpo|lt;)ktk9P}y{~uvG%*)HuxX$9uU/%Aڐ  ++ YW:* +X[ 'FA6/*++-3PqcjMAwWSOG::JRUN=BJJIMSTTZ][]xqSPkYQmwzx{z{|~~~}xp5"4@ib=LQMIIJM)0*4QwkD(nQ~J!:C-8)1(/133.%6?0W8H4G>>=BA=)L<87;7@>54tnzȾp_W^wqjaYPG>73//-+($##! 7Q~vvwtle\TMGB=<9974331221000* +%Ty|zqeYNC<6EhmE*&#"" !!"! VǦ|vrkhc_YZ\[WW]a^¾ᴢ~|zxokiifb`[ULC)MNKW]WMHFB7#Hlddg\dopnk\W¿gStd`aghiiibT@5umlljiedd`^blnoqrpZM*Yk^IM>l~}G!'/JGEB<׼Na}yvKU9! +<*HMFC73$#" %G" !2?('wƾWFSXi6Gdw (Iyh`lf_M*1 )(0wM^^_YUYZ\`_^F"gy|{vv{}mp:&guj=O~vyuwD%),AX"yny@;ܖ +) ZvI BIEhG)mɺ<8G8#0SRJKEDFLB ULmJ?vVTPF52FSTL6BMKJOE7CZ[\_uVOmvxx{}{}{}~~yp0"2EggG6M;LG?8>9:1<<:cFZJTLCGD=E.DB@IFQH<@C;:7monɾ̿ϿpZ!S]ytk`XSG>8401.,*&%!#! 5M~wxwtmh^UOHB><99555312130/-) $Tz|yrgYMB;8DmZ]2%"""""""  U̩}wrkhb^[X]\WW\aY}൤}yyxlmmjgeb`[UMB&QVSY`TCB?:2$LhTJGD889DmspnlZQfSsb`ajiihhbP@3wmklkifdc`\`loprrq\K([n\KJ! %3IHF@@׼·M_|zyKO3! + E1TIHNJA,$&#G#SZ#.)O'%|Żv]ge=Ic&;J>;?;::,9$04+2vL_`^QEIGK]^YD$gx{zyx|~gs;&htn>P~|y~~wvE#(++j2!%5*>ڔ  +* -, CG.b_aVoǻ8Iymjl3 +7C0+)(),,'  WsFAuXSOC56HVRM:EPRMC3,D^^_a{οwvrrrSQmvyz|}|}~~~~xq."0?j`?LQMJIIF'.%0Q@ H4pdE$ub~9E99=B>87@7+/A;(491;6459;83(%+*-11034:/(?g13-*-,.*3)& qe˾ҿпpY#SZttmbZTI?7501/-)''#" 7Q{wwtupg]UMGB?;8765432011/.-( +"SyyxpdWIB<;Tyc=%"!"!"!!"" M˩~xrlga]X\_]WV]b`~ඤ~|yx|zqljgbc_\XPF%RJQSbKDA=:7'CtsqxuonjYPjWtc__ghjgfcSD4unlikhigddaajnoppr\N*ZlYJIAiH4n+YC1 #4LGHA>ԼJa|}yzIL0$ F2F*""$!)3 < HVOj[.G%(}Ž?Hf!$@]SFD5,-,;%394!5wM`_^SKXXV[^[D#bu{yyz}cu8$grk>N~}y||wuI#*'`@&{y $'Iۓ  +/ + c?&"@C*XP][#tǺ3W2 $FHDGFHDGH VpVZFDrRQOH;S__`dϾPPmwzz{}}}~}~~}wi,"0;pVBLOLKHII$0$3cG$5^W5%{P;W " &$!F8!! @d5=>?A@?AD78!p\ξԼǿp^T`xukaYRI@741/.+*'(% !  :Q}xxxtof]VNGC@<:8643321210..+ "PvxwodXKB;588#9*#"#"!!#!"" FΧ|upkhd]Z\^^WS\c\z两}{{x}zuljgce_[UPG!CQPY`WBCHJFDA?+Bwuwpom^RkVàtdabfikihcUC1|yurnpmllhhlnmprmYN(^i]IK;1)&'''*",JHHA@ս½H^}{}MG0' C2K@@DA8(*7:2H(i`k&'~ƿ=Id-OUSb]M:/3%9><#5xJ]_^YT[^^]^]C#cx||zy|iq9&guo;W}{y|{xuQ$)(!sb>$ #(Fؖ  +4 _`1  C>GlgT8sȻ2GKa/+94#&558*  ]lKiDHsQSQC68HWSK;FOM?=FKS]abk;ueQSxRPovz|||~~~~~xd)"/BzWCOSMIFKN). 0U:&Ii0%}Qd`H}~I:11.,*#!"$'*(=c)1-.63/07.+${SͽожľoZ"N_xskbYRI@940./,,(+($"""!! 7O}yvyuog\TMFB?<;854312132///+ + #Ox{yqeYNC;52/,)&##"""!"  Hʩ~xrkif]WX][UV\c]u䷡|{{{~osvmigbc_[XOD&BQOWbsXTDCA;%NxVSPH>8FTRJ@GRY]YXYZacbe˾PTnuz{|z}}~|f)"/BxQCQPJLIKR'/"C_@:Oa+#=-\uJ4"OH@L@7*7'D;:E)H]#zVοѾr`#PdvrkbYRIA83///,,*.,&# !"! 5Nzvxtoh\TPID@<;7534341220//*  Ns}xpeYMC;42.)'%$"!!"!"  B̬yqlje^YZ[[YX]c\u䷣~}z{rwtljhdd_ZVOE&@]QUc{lHGC:)>|~{yqaV[ovsrm]YkTyskgddgjjgbRD/}yvusqpllkjkqqptWH.^h[JG8/3',1-$!"0HFFB?½L\}{|MI/ $ @0G;41*&(5#8LM0]MD)*bQXRW\eXT<Mg{!'CZX[]SK;'"3%1/"2}N\a`SJMEH]^XE"bv}|zy}~cq3$etn;N}|yz~xtwW)-*K=%*Eٓ + + 0 BG[eL @;YOYgwsOF?=+?VIcmvuqlaTjLtrmifediijeQD1~zwusqpnljkmnoqpZS.Ze]CE>d`+Kx{3% !"-GBCB>ĽIa}y{IN0"#$ <%BYQWUH.$% 3!D>4GV%$ȹtxytxvvsg:Ici)DSK=;/*)(!5,@?-1zM\]\VLXTO[]\D$bz|}zx|~gl.(dwp?L}z|vv~Z&--eg+$(Dۖ  + 1 amQ  ?<Fk__`! yǻ}pxx;4qqm`6,T{yski# +(EED;'4II %fmFy'/wO`_^\QIJR[^]>"ey}}yx{|gh-'evnALy{vvyV$/0%zS$%(Kݔ  1 esK FCG^! wɻ7wtH,e$ +$(%&:H%mbe`7YsRTN=48HSSG>KRA1028Wbcfkξ~K|A[kvx}|}~~}u])#4GODRSONQOH/GV>\xM!,J3Puj"-.)'(+('%Z.),,--,'%"$$'$*%'%""!SO&15;98@<62,IǿȽҾ̾ڿȾƺq^"QXrpi`ZTLA9423.*'(-.*&&#&%3Dzxutoi^VNHD@=:77742310010/) + Gu|xofZOD<62.+)'$# ! !""! =ε~vrhda\WY\\WX[c`k丣}{wzplggeb^[SOC*?TVc[`q_<>IJC=*;yojjs|ztqfQmSƩ{vrlhedfhffbSC0}zwutrnmkkmnnpouYO0ZkXDG8f}w`$}V !.FCC>I¼ľµM_}}z}GP2##! +C"0+%$!+% #6:RiS1$&&&Ƶȴ~2Hfg&FbW_ddZ='50?7%/wM_]^]ZPSY_\[A ]v{|yyx|jj+&esn@R}w{}vvzR'/*jhp!%*Gڙ  2 ,/5  B:/:#%!"!ʺbRHP|%KI!!(c-))u" +(A=>>:;;CO  oZ?f0TlTQO<42@OQI=KSSY]_S[fggjм~A[nux{|}}{u['%6EJFPSNQQJE *29'GWF!.wg\lvc_.IEL@?BFDAE'15:3>?HB>=;9755410.10/-) + Ht|xpg[NC<62.*)&#" !!!  9ðɲ{sngc`\VY^]WV\dbj伡|yyyvq|ypnlhhdc`ZUOC+D_OPQXCHD=7*?kLHH@@:9@q}zvsgOnMͣ|toljfdeffhdSC4}{wrqqnljhijlllkiWO0Zg_HG61i^/$M-"0BCB>C¾ĺP]|{{JI. # 5);;<>@@,(& (;1=/dXL'$#gUrxzqbN[4E_s$6CFYYKYM7!1/A<(1v N^_\UQSPV\^aF#[pz{zyz}hl.%irk=Sw{~yw}Q',)h~gl&(Hږ  1 + OD@ +A8Ceei]˺' "-nt +%;99679D76=D:>@)5883<7E=;;>!W@" .B˿̿ƾɾq\!KVtsmd[TMB:6330+&$&+)#"$"" 4M{yxsttkaWPIEB>:76632102010.* + Fs}{tfZMD<73.((%" !!!!!  4ȱ{smeb\ZVXZ[XXYb^g༣|z{xtjiecc^ZVMA)EaISckCEA>;':~fv|yvqhOoGwqnkhgda`a_aWNB.yxuvurpplmkhhd]^_\[]]XYVXWN-Yg\CJ9%$ #&J6",EFDB>N[|y}GQ2 $56PAA@FP>+( $;A7 =[Y%"ɚRKSRTR]0E\y!#9RHVQJUM-!20BD01vQa`^PMLKN[][?$]qzzyxz}ho-$iuq:P~x}~xzQ',)1wV+$'Jڐ  / +cmZ+  B7J\I[["!ʺ1-10!\N5N$-|u[m ++G %qe9s.izVQO;1C9d(!"!!%#"$! !"%$$%Y<*435,0+20+!)3:ҽоĽpd!I]wtng[TOB95352+%%(*(""$!!!5GxzyvuriaXPIGB><96632022020-* + Fs~|sgZMD=94/)&&#"! !!"!"  /ʹzsmec`XSWY[YVZa`fἢ}yyx{~}zqjihfc]YUPB,H\IQ`eCFB@:):~}xvp_mzzvriOÿpFzsvqljheaa]ZZUVPMB-vytuttrpnkjifedcb_`_][ZY[YO+VfaIJ6)%$%)oc"2HGEA?¾üR^~{y~ID0"$9-JJKLKK4(#$41el"!6g((͡r_^_t/Fam$GkXU\\TE-!00B@*2y N_^_ZGOTQ]^^@#`s{xtx{~kp,(fvq=M{}yP'.)ob.#)K֑  ' [dG  K8G`-OW"'ʹ@h{yI tj?l +.HFFBC>CMK 't*fvPSQ7,8JRQHAMR7$&+5[khhgѽ:`msx|~~}~~{uX&&7MKFUYTNJLG9DgX 8G\jxW!1*++'h($()(!"&'%!# !#];,<64436>>@'.48ʿqc$GWstme\VQD;5121*%%(+($$%"! 5M{xyzwujcXNID@<;97641211/1/.)  Fr|zrg[PE=83/)'&$##!!!##"! .Ƴxqjdb_WTWXXVWZ`ad㽢~}{zuvrnpnnljhdc_ZWOF)F[HQYXWBFEF?8*;rSLILC>:Bq}zxrjN¾qB{xqnjgfa`\YZVSRO@*u}zwturokihgedeb__]]\YYWVN+Wd_IKOTTI>RYKQWTObjjinѼl;R6`lsw{}}}~|uQ"&6NDGW\QLLMB=aiN9lH=YrQp&8AICB>FG?06>9;<2\7 "# ""$%&'!;4ʿ~sa%DTuung_YQD:2133+%&+.)%%$#$!3OyxzxtlbWNJFA>;86541211120/* + Cp{zrg\NE?830+(%$""!""#""! $Ƹwpjda^YSTXZWVYac`¾|zxvussqomjhgec`\WPH(BWMRYxFFEB:9.7~|~yupgMrCyojfed`^ZXWVVRN?+vyuqonigfec`_^^\[[YYWWTJ,Pg^JL;Sxssps@ !/HHGAFڿ˻Mc}yxHJ,"'=6G?JLAI7-* 2@:5#'&'ƳPRZY]_]]]/Icu"2!"#""0.8/@>)0uN^]]____^]_\A&by{{{y{mr1)ewp:L~~y{|xK$.($(Nؑ   , VsV  +Q6,5. "$Ļ 3  +(/#+3 *y|%eiURO:3:OTUG>PVGSiogmnkjj̻umm8_ktxz}~~~}yP!(7RFO[[MIMNA!D(;9?qLy",7FBNF>@A6-F@DDFFE3f:# ;2Ͼt`#@Yuwpg`YPC94453+'&)+($%%%&" 3HzxzwrlbZQJGC>;965323211/1/) + As|{sh[OE<81-,*&%"!!!!"##! *Ǹvphe`][RUXZWSYac]ྠ{yxyz~{zwpiggeb^ZWPF(D[TRUugCDC><14yr{~}|wrkRr=vlida_\[ZTRRNKJI=.wxwsnmiiiebba`]\\[YYWQTTUSG/Th]KJ6%""#"K?"0KIGBFǹJe}}zxJH*#)I0<(-4&5617 ,9 +du2"$'&)ƹYZXX\]XZW+EZy"$,""$# 3,!4+;:+2wM^`_^^__]_]\A![u{{|{|nt3(gwpC 'zJC"daURNC87NSTI@O[MDLQUemmjc̼7]jtzz}|}~}~~wO &8QDQ\TKLMMA#9?pG{#")'$&').)"!%$%"!f@471,$+,*+03.&B0rd"BXstme_XQC:5342)%$'*($"%"#"3J{zyvrjaYRMGA<;9754431100/,&  >s{{rh[ND;71-*(%$#"!""!#"! %˸{nid`[WQUXZXVYac\á~|{xyukhgea_[WQI*H_WRP}fCGC?;,5w[W]qYHow~|ytjYtB|oslmjdb][XRSTRPNJ@2ozwrqrnljgfdccba^^ZZYWVUVSM+Rg_GF:&$$"%;5#"2LIHAG¿ƶNc}{vIP+%@,D:18;>22=&2!"P1$$&%(űt}|zn{wyn0D^! -946863<+5/>;(0x"Ma`__^____^aA'_s{|}||~rx9(gvn=O~{|{xU*1+!'.Oڑ + !_G/* N07.'46?<@?<.?.ͽ̾¾q_#=Suwob^YPC:645/(&%$('! "  2I~}{zvrleYRMGA=;98643312100,&  =o{yshZQG=61-*&%##$!!##$" "ɹxnic^ZTNSXZXVXac`Ģ}|zwxvx}pnjjfc`a]YPF)D^WOTvQ?DFB=;05|uzwr~~zwmQtIyrqmffe``_c[QD3wzyxsommmkihfccdffefYN,RhZCE=?JFB?=-""/GFFBGƵMa|{wEW/  =1F>AGBF201 %4![K %'%'DZȟ̰+C_$BbTXbYXS5 02D>$2v"N``b]___ab_]:"\ux|{z{~wx9)nvm;Nx{{x\).*#,/Rڐ  . $u9 R0B`^ZU ($ĹV;=+\6&%6Wt7VNVK>832) 4|%fgQQM@6;NVQCEUWPVVSShnlooͻllh6^muyz{{~~~~|xN!(=XIRSRPPNP@7IJ|OJQPxq^D?;8746996445//~*+,/,%$&&%%%'# !!"l3%%!"%%)-00+F.ýp^%?Pvvnd`ZRC8644/)&$#%$  0Ht|||zunfZTNFA<<97543210000-(  AkzxshYRI>82.+'&$##!!"##! "ʺ|pg`^\VOSWZXV[ac_ä}{xxz|rmkfc`_]XPG)EWJJZ~zUEFB>=46ormhWKVTh|ysWwMxsrmigfiimiVE2wŹ~|zxvttqmmkhhlonqv]R+Rh]BE>`y}K"-GEEBFýɷKa|z{FU.?-LIC:=E/+" +"25IVE&&&$ʭzȣn‹$?c'JmQQ_UQL1"/-@8"0u"Safk]`b`_`]:$dwv{zzz}uw6$ism@?>>@@<;<5=T=4257676?5$ %'(#!###'(''&"&l)M,ȿƾpa'>Ruumgb[QB75341)&$"##! /G{~{{{wmbXQKDA=:9753331/00/-)  +?k{ysj^RH>94/*('$""! !#"!  $ǹzqf^^[VRTVZYVZ_c]¿£}{yyvmigdb_]XRJ'BUJQ^rEHC@=58ff^X[ZW^|xpSsBézsplgghijifTE3wyyyxvspnmjjiknrrn[T0Q¿acDD6,2Wy6aQ .EDDBI¾ʸKf}{yDU.R/ED4;EB** &3$K2T%(&(ȮNlc%%`yz||{z~l{6$evo;N|y{uv{L%.+-3",*  &+Tړ 3 idJC ]23&-%! ù{~:gf*   + 3'gaSPMIGGNTTDGV[[b_]Ripnooʸ8_nty|{{}}~~||pF!*9YOLRRNLMQ8)@RqF`juYCBA?=<>@?=>>>>;}B4L[;799<:;/DDCAHȸLk|{wDl0 M9L>?C?B30$ (4 #H<*%'$(¡ě'>bz"%DU-#-##-*$0%1,1w!M_`ZV_fvbuoZ@$^y|||{z}o};%ftj ]-NmjiQ#'»/>4621/%CM."3LPL:"  4)h`SQRRQNQTRDIVR:7;=Gkpnni͸5cqtyz{|}~~~|tD*;]GJSUOKLP65>S|DE5>>><:t>9WS878:><;=D@&!%&%$$()++*++)(/uH4=@Y\`baeiggDU2˾»pb);984433010//.) + +7my{vj\RG@;50+*'##!!! ! yme_]ZVOPUYYWU`e[Ơ~|zx}tmhfdbc_ZQJ'0QFObjHHB@>5=}wnT½tHzupmhfjmjjeTE1x|zusponlkklnoopp]O3IQaDH:R}Q;QG  -DDDAJ¾ɻKk}{zFi+ X3RLC26?,/! + -MD8"$&&'Ÿvn'_AISSMLOU4@<]quZDB?>=>AB@>>>??=k4;K;678;@A>>G?%"&%$#&+-+'(,-*&.uC8BBdjkkorrrzO`G¼rb)5Sqtpja[QC:60//)&%""""!$! /C}{|{wsh\TLGC><974234311.0.*  +5mz{uk\NE@94/+(&$##" !!!"! ŗ}oga^]WRRWYXWV_cYǢ|{xyykjhdb`]VRJ+/]LQZpTHC@?=7=}voUwEñ|uqohfjljifTF2t|wvtonmihhlonoppoYQ4MJgBF=F>! !/KDDBI½ɼLm}||E[( Q.TSIDA4 ,! +!+1eW[%%&&'ƽжt'=av! 6STR_RJ4#%,.?9#7u!Q_`^xcsi`^]@"i}|{}{|}q{?'lum9J|}uu|L%2*}Rmeeu&'Uܔ  ! ?\C  b,$DP?! #'ƺ" + + 4^Y|,maQTM@8BKPQEHYVEEELTqqqsqʻy9douw}||}}~~~~~|v? *Cf9GSRKKNS3I:bYADeuXGC@@>???====>?Cv?>JD;9=@@ABBI9&&'&$$(..)%)-.*'1v:4@>gnnnpooprNiNϾ̿ſre'7Soupib[QE:521.)((&(*("" /?|}{|zvsi[SLGB?<964322200./,) + +1l{{uj]SG?:5/*(%##$#! ""!  #Ø}ne_^\WSTWZYVW^cXƢ|yxv|}y{xrkifbaa]USJ-2^TTQxZBCA>>7:~xnS¾vGƯxloiiklkihUF2nŮxjkigceeca\^jnoppmYQ.HO_?G>IY[`TI3 -IECBH½ĶȻNi~}}Cg+&?*DON_\J),% #/KK4 #''%ǫ|'>`t!.ENIjaeH)&,/?9#5s Q_`^t_a_];"d|~ww|{}px?'hrm7Q~~xsvK&2'Nkbiu')Zې # #TG< +`'HbSaD#'ú2256714 b> $O  5S`u+n^UTPKNLRSPHM[Z/(2*MprrtrʼAdotv{zx|}}}}~~zo< .Cfu7ISQJINS2J6fkRFwOCCB@<>A?=<=<=:?J@IR?;>B@>?A=2#%)(%&(++)'*---,5w84;:gmnnog]ZW>fJȿrf)7Spvric\OD95210+**).-)#  />|||yxujZRLHB=:97632220.//-( + 2j}{uj]SJA;4/*(&%$"!!!!!!  xpf]\]WRPVZZUW_bUǤ}yxrzvpsrojhfdbb^VQH17cTTVl9A==>7:~}yrS}D{debjkkjhcVG1jð|nmhigeebaaakopqqn\T5NTbBG@liyU% -GDBBFȻOm|~D'c+& J*BA94-%/' )/:$'%'ȫNSRSRUPQ[&Agw"$>Z_`v`\F/"-2B;#5b!R^`]tpa__:#]uslpyz|qxJ'jtl9M~}ytvL'2&gzkt'+Vْ # ej7 ^&TI*QU $)Fl{D  +  8imym-s_SRM<7DPTQDL]cTXc\huuttp˻j]c~?cpvx{zz|}~~}~~~zm=.Ejn@BA@>=>>?D?<:@B7&&*)''*--+''*+-.8|=567bnmk`NFGM9gHԽſsd*3Suvrkf]PE94232/=\WZ[C$ .C{}{zysj_VNFB=;:7333420//.,(  7fz{tk^QGA<50+('&%" ""!!"! śxmg`^[VQRXZXUW_d^ǣ|zxv~xjhefda^YQG56g[SNoO7A>;:3;{}yuY|@xfcbjkjjgbUF0n}nkihfedcaa`jnnppk]M3FRc?E<_F6%RO% 0DAA@C׾ȽMj~{|E?k-% O#-,*)*'/$ //$&'+ȩW^\[^YXX[#8]y%&DaTPV>3.) 0/?8!/JO__\ve\[_`:&_spoqz|~rvK#hsk9J|~y|yu{N%5%!k]|S&*Xؒ & !az< f*OaSVJ"$*¹?9887543nv z?( +  + + + ;_`]i0s^TQN;(2APPDO]fnvqpsvtssq˻`3[zAcrtzyzyz|~~~~}zp>-Hjh;LWOIIMX*D- pQpJ~wLECA>>@B?=<===;965332320/0-*  +4hz}wl^RHA;6/+)'%##!"! !"! ȡ{si^^YUQTYYXVY]c\˦}zxx{wlhfdba]YSG52q\KO|P@GD<;99y~zsWFĶ{jcbjkmljdUD-jnlkkhged_`bknmopm\T2IPiBE@[F6n-DH !.AAA?JɺNj||FBa"% +W5KHKLGM.1" +2-##&'ȮĿ#;[q $@MC:?>9.%#+->:#6J!P_b\^`a;&dwqrxy|ouL#gtk:Gz}~zx~S'5)!/*!! '+]Ԓ   . $S3. +k'1YY?-!$/úv$uE^j:%  +  + + AWS\b4o_USO>00:QPDQ[knrsssqrtqlͺ{Cdpvyz{{z}~~~}t</Lse=<<<::C]48dQ?F=;<:8@B7&')++)(+,)*+,+--5y?78Eam[EA@AEG6lKϿ̿Կ˾þtg.=OrtnibYPF:64:QWS.sM%$" .Ax}{zxtk^SOEC?;8543324300/,(  +-gy}wl^SIB<5/,*'$"#!! ""! !Ȣyqh_]YTPSUZZWZ_d[¿ɢ|{wvyrlhfca`]XSH4&fSKYv@HE?>;:xzsV;zhdcjllkkgTE/knljhfedba``jnmnol^S-ISf@E=94+2%+(!-??B@GÿɺMm~|}I1`"$ +Q6L=8KQOCH@7@WbTNZqvst͹rGbpvzz{{||~~~zq;2Kl[?KOMJHNL)A$tnBQ~NGB@=>@???>==;9E\3DY@FE>;::9780$$+.,)(*,+))+*,+5u<47[jdEA?@CDF3pJti,;Lrsmic]TI;7<_N/wN-r}5&#! -Aw~{zxsm_TPIE?;9765444300.-)  +/i~}wm`UJB;50+)&$##! "#" Ǟxpfa^ZVRRTX\WW\c[ˣ}zxx|uqsljigdc_]YTI2$iSRSyuPBFD@@<;x{vQ;wjeajljlliWF0jpnkhgffba^_kmlqrn\X/BRbAG@RnrhegD"-?@CBF˺No~|{L$_$! I3OQJNF?(/ +21 &'$(rA^""1>@OegcP.!-&66 .J$S```^qVje^\^:'dyxy{|~omK"etm:U~~|xS&8(&sF&()[ӑ  ' 0nO) b!!$,[=zt[(lw?cm7 8oZĢA +  CZlrX7r^SSH:=LTSOFN:(BWZ<:HmvuuʺePpsDdqw{{{|||~~}~~xm76OoZ?JPKIHKN-D)#ykXKUcLGEB???==<=;<<8H]=OYHMME?:<=98/##(+)((*++&&')++>w<31clO60+(&%##"! "#" ʢvnd][[WQPQW[VS[d]ͦ|xwx{piigfb`\WRI/&fONNmUAAHD@@:9szuS{<ƴxke`lklllfUH0gpnlhfedb`]aknmnnk\W0CRhCH?iqaXgU"-BECBE¿ʼLm}{xF(_# # D/OSOSK9$. .. %&$)•D^x #9XVabXI7#-!'$-F$Q]a^ye_a6$fzy{||~qoJ cvq9W}}zxO'5*4n8&'^Ώ ' +ygH +l#!$.g3Zl3SZBpj%}|c>  DcapP9tYTPF46HSRPCLEMeefFDQuwts˹{jmJeqwyzz{{}|~xl17NsTBJOKFGJK#D*#ER2Q7KFDA?>@><<>;<<;M]AMWKFGORQNRH:1""&%%'(**)%#$(-+Bm42.eiB60,(''#""" "$!! ʥwne][ZWQPSYZVU\d[ˤ|ywxzkkgeca]ZSI1iIEMxtKIB>?97p|uS~<Ǹ~zjebihijkdVF2aqnmjghcaa_`koonnj_V1ISoCH=eT2w,@G ,DGE@E˾Ln~{}F(_ $$ C,GTP[\L+++*$($&ǧwa¾A]!%D]`_[GD?("(*41/J'W_aa|f}c_^6%i|w|}|~loO!\qm=T}z~ywzL'6(xtxI &*kԎ  ' %.E_/ +a#8@>?0 #.»p|wn}e+jH +  +  Kf=|M:pYTOF6:HRSOFK:49ZfBATvwvu̸zkgLgqwzz{zz|~~~~}}wj3#6KsQDJNKGHKJQ)'NJL}JFCA@@BB@?>>>?;MY8IMA7>B/:Gto !! +)El}~|xsn`WMJF@<974332101/--)  +*`}ymcVHC;51,'%&$#"! #"!  ɡxmc]\YVPQTXZVV\d\¿̢~{ywvuwjjfc``][SI6oDKP[{?CBB?87r~{tSÿ:ȹ~{hecijjijfWJ2bonlggfa`a_`mmnolj_T3G¿Uh@F?]O-j2@F (HGE?>̾Ml{|B)S &$F3NMH?7,.*'$(&*ǵǂ$B_&(ARTZ`ZWL(!+!7B:!.G$Uabbeb\dc__]6#czty}|~mlS bwpAP||~uu{L)5'!xdlh!&*uՍ # + + +^!XhdhN!#0f/m6 + +  N}QiI=lYSNE42CRSPBPHKUb_DFWqwz|ʷoodPfrwyzzz{||}~}}yf.!7HuLAKPIHIJH!O%$W=,;QzGFB@??BA>=<;;;6QU7KL:8=HA331/3-!#&%%#%('#""##'(Fl7.1eF<85q}ypS~9zihejnmkifWI4dqokiigdb``]jonnnn\T1B[kAH:00&.%20 *JIFCB׿̿Mt}{|@5[!&# =:PINKGG,. +*(#&$*ɝfw~}z!A]) '8JF@@=:2$"/"9A8"0D$Raos\^_a_b__6'byz{|{~qlY_trBL{{ww|T'9),}b* ''r֏  + ]!&@Z]K""0j4224440+b!0 +  + QtG?nXRPF87ETSQETNDDiofSUsz~ͺcKfpvyz{{{|}~~~ze,$9FtH@JPHFHIJV#'hr{}IECB?@B@?<<:;;9QV?LH;>DE7---*:0 #&'($#)' !""!%&En=+>t]@AEEJLNQ7{mF;̿ug03Jprmje^UK??FaX@2$8U##! %@l}~}}yrkdTMGB?;87565200//,*'  +]~~yocWJD>61-+)&%$# """!#! Ρqng^[YTNQTX]XV\dZ}Υ|yww|pnnkkjgfc_]ZSL8jFGMruN>99AD?=84q}xrU7űyifelmnmiiWH1_onnjigeb`^_knlmmq[S2G\m>E:&&&'#DH+GIGBGһ˾Ko~{z?C\)' 7:N@D65H15 *,%&&*ɞT\_VUWTQ^C_y )BaY`khW9$!#.6A8!/B$Qawk`\feWZ^7+bsuy~{}nlZ`sm>M}|xv}U%6)#uh4'%nՎ   + +  U /70!#59`#_)! >K +  + + + + QlP|G?hZRQF37DRUQDQC4IhlG8Vu|{θeIhsuy{{{{|~|~}}zg,%:JxH?JPIHHJGS"(SAJ{FD@@?CCCA>>??=:UO;A?8:?>698,*9/ #')(&&)'"#$#!#%K_3#PomSDCELJPI1~iF˿sh0.MpponjcZUNJC<9?w^`X+!! +%Dr~}yskcUPKD?<:9655100./-+(  )]~|zpdXMC<88540*%$"!""" ! vΥnof][[QINSUXWUZc]z}Ч}yxx||okhfeca^XSL7kMEFUP@=;<@EH><79q~zvV¿6ȲxkgcimomjdSL3fǵyxvpomjigaennlmlpZT5F^n?G>5A<56TQ ,IJFCIιÿ̼Mo~|zz?Eb!++:2SLH607': +%(&''*ʜ`ed_df\Z_"Ab& )IdMPUYcP*"(,<8!,A%Taa]Z4%etqx}|mn`_vo:K|tw}T%8&)fJ#%&j֍   +JSC+"#4D127^/[VXWZXdW +  + QhmpFAnYTRJ98KUTNCTK9Eniequy{t}͸bLgsvz{}~{}}{~~yh,&9CK>JMIGHKER&0OfhIzEE@>?EB@=;==<<6WS@??4576044..8/$%)+*())&#$&#!%%LQ(5bnpfL@DIC9/2gF¾th1+Lpoookd`[QMIJMK`ssU6)$! +%An}{tkaVPJEA=9:645521/---) +^y}ypeWKB?ED54:?*%" !!!!! kͬtpeZZYTONTVYWUZa]q|ͥ}yxwtytkhfeca^YTK6eGBBHC=;=>AFE=:77o{yvQ¿7˰xnifjllligVL5d³yvvpnnjhmllmnnZT3Ca{CF?P~{["-EGEBHйͼOs~}|zA>@CA<;9799<9Ua^cWNRHA<=7;5G3%&)++**)&%'($"'(PK,G]hnmQ4362)#4dE˿¿vg4,Opqmf`VKHHGHGIDA20*)+(&$! +%Dj}{tkaUOGEB?;:7644210..-)([|zxqfYMCBW?).2F6%" !##!!  hʪ{pg\\XTPNRUYWVZ`]p~Ф~|zxxyvmihffda^ZWM8iDBDB><;:;?FA<:66o|yuV;ΰxsohhkkkhcVI2byusooljiklnmnp\S6Ee{BF:2;972RV!-AFEAKк̻Nv}{}C)c$+$ #L-LXS^\I). ($%&))ƾͬt?>c&#sxLL[Y"Ri# +  + + _pjl=GtYQQF=?EVSPFV:9AacZep}t~˹\Ifsxz||{{|}~~|g(%==<==?=^EBNDEFFGGEHR:I3$())*+,*'))'$$'(PJ,/+0ZX5)%$""!1aJοǿt`4,QsrkaXPHB=FKFFC<;9213.++& + &@mztk_TOJD?;63345421000-) ([||wqdZLDJ_;+*/C?$#""#$""! a˥slh^\XTKKPWYWWW^]l{ӧ~}zwxzqjlnihgfcb^[WL:nLHFGEA:7;BD?;<82m~{uS9άxrmghhjihdVM1aytqonlhhiinopn^T5Deu?F9'$$!AT!,EGBAHиͼLs~z~B!c ,% +!>6RL>;5-"4'& %())ƨ—yy;7f|&#2NSU[R=-#$'/B7/='V``]G|\`a6'g~|~}skcZvo@P~xy|Q&7&$xX!%)qӍ  +  +MXb[I+!$E@YrBo|C}TMj" +  + _{JZp:DnXTNE88>LSOEQEboq[CK^|~u̷YKetxz{|{{|~~~~y^''^8NbY[qzDEB?@CBA@@@A??7gKIOEA956<68,#%'')+,,+,,&#%)'UO324220/.,* '^x}yrfZMCD]B.+.E9#"""#" !! `ʡuqh][XUJJOTWXUV`_n¿{֥}zyzy{jkgfdb]ZWL<rUJHFHD<7=FIB==90k~{yT:ϰ~vsoiigiijdWO2_yurpnmkkkhmmom]S5B`q=F=-,-*)01$&'%))&'/5DCCCKѽ¿ιNt|{C*c +" +!:4OQ]ZNO4> +$&&)',ǭB6dz*#9ULG?6-(#"(0<9#.;&V`aatw^]^5#l}|}|{|umeYun@Txw~T+5%$Nydh %%l҈    M K7! #@O<<70fzywY4չ}vuphgghhjdTL1\}{wrrmlmjjlmlno^T7A`s?EDACB?==>=;><>@<=CCBAAAI̸Ir}{x='\*" !>.INQP@A0> ($%'&+ͣ~:2ap-"3MONUQH5#(1?:'2G&X^``g~~][\`5$hvxz|z{tokUtl>R|wwV'2$$N{Tk $$tՈ    + +  +CDBFI.!>m`vdxK7jSW^bL + + + !d{/Jk[SQH84FSTMHNEU\cYKPj{yz̺VHesx{{|}|~~}w]'*OVYTRRO=+j=KntGDB??@@=<969;=8g@KVEA=<;:75851%!&*''(-.,,+&$%((]kfinI*$ #('=YLƿsf2)Nmoi_VJE@BUP=:B*CC/1>)&3' ";;:4g{zvW¿4٫yuoiffhiigYL3Zǻ|{wqoommljlnhln[V3>Wr>FEEFGEA@CCCCBDGADHGD@B>I˼Kt~}{|:+c +! +!E*5*)6<4#8 +*%&'$/ǰ~k^[Xh6-e=",ELJX__K,(.F=(5E(V``bp}[]^1%h|~|{{}vkhUtmCK{xx}X$2$'[}Ug!''{֊    + +  I!U_\e=" CIL= #gVY.LmVTOI;:HSSLJR>;;99=@@9p3 +,&$'%'̢Z^Y]bce`z2)_E%:QTV\[WC&'$+&1='U`a\_W]u[^`2"i|}}|z~vfhWujFL~yy^'1#*n`4!()z҇  +KDWMX< # AE  +  $iae+RiVSLH57HTSMJM44@ot_e~zɺRTgsxz{~|||}~~~~~~|vX%+=N;NUVSRVV3,cgvqlfX@@woo[KplEGC??A?=<=???=5oBKHACHF?754530%%('))+++))*(#%$^EatU*## %*+)GUIſrj1*NopiaWMGA?@I767;1%&00%#,- >e|tj^XRK@~221~i-430./,' %Wx||tf[MB;30-*)'%%$$"!!##  QȠxod[ZVRKKORTXXX_]\¿qק|{yxvwqpnkiihcca^ZTK8|rF>CC@>@=;>L:5>@9'*99$'63)# =`~ytk_XSMC{JNfD2200.0+'  $Xw}ugZND:31-*(#$%""!""""! K˞yndYYWTNLNOSVTU^^]¿qԨ}}xutrpnnlihgda`]ZTL;woH=CBCB;545;?>720d{xxT}5tyxvqokgda_ZXWUQH0J|zwvtrqnkkjgddcda__^[XYXR78ûNy@IFEGHlbAIIFFFEUǶIv|zy9%Z + #?- /% %&%*̣Yj¿32c3$>@BEFC@:6664wHU^NUS@E<52533% %&'(*,+**''*(++eI^pF("!#&(*)LQIȾͺϾͿvf3#Qnrk`WJHC?HX;3=A<&-88''35/' <_~yrk_VSMCdmU;04201.0+'#Vy}~wg]MC;51/-+&%&#$"""##  MΥtpeZWUTOLORUVRW]]X¿sԩ}zwutpmnnnkgfea_\ZUM;qnI9CHH<5337AHE700e|yvRwľ4w|uqmkgfa_[XURME/G}{xtpnkmkhfdbd_^]][ZYWS7:ºGvAFEEHJj`8=8z2979>QVR^MA>;86542%!'')*++*('%$&%&+mO\t9$!"$'(+*SNKտѽsg5#Tkqk`WKHD@CB4144-%(,)$&)+'# +;a}yrjaVYJ7~+02222232/-,% Lv}{tgXLA;5/++)((%#""!""#" KЩond[YWSLLNRWXSU]`Vp׫}zxutroqolihgeb^][TM;smC@CGE:674:CLB51/a|{vPr¿1rysmjfd_][YTQPMC0Hzwsoligfdb`_\ZXYXXVUSS79NtBGEEGJj\>C5l^0q;HGGCBBKDu|y}:R"vs%=9 )$!)*(/.-c, &B^UU\]`Z2(6='Ubb\[\Zyc^3%h}xakNngADxvzT)1$%*.͍#Cw + BF[VO&!Q0  + L;2>pC?A>:::>D@@;=PSSTDD@:95212#$)()*+.+)&&"#!#&m]_x<$ !#'(,*WUPðth4"Njtk_YPJG?<9300*'$%%#!##$#  9\~}ysj`U:w.@s<432331.,%  P~}uj\MA;30-+*)(%$#"!"!#! Kͩvmd\YWTNNORWZRV^bZkܣ|{ywtrnoomjhhdba_]YS=nfA@AD@8687EDCEGic@?=92z8JMNLIJC=;42-3##&'((+./-*(&%$#)taQw@%! #').-ZOKòpf6Ulrl`XRKE?=6113+&&$"""#"  :Twsi^YyXh*_o7632310/+& Ny~~ujYJ@;62-**)'%##!"!#"! C̦xmd]ZZXRUORXYTT]`Wjڧ}{ywtrpnmkkgebbb^[WM9jmEAAB<256:>B?8450]}|ywSk.l|~toliea]YYZSNKG0K}tnlhffa^\[ZWXVTTWVTR<9ĴHw;DECFCd\8C@ʑ/q@DHFBBERCw}y{6d3 !7B+'("&/+(,,/g' "'+CL1 )5A(Ubc]tc`_2'h{VuKpjAI~xw|N&3#"+&6K>&Ӎ/Z|  #I*Fei5!N,"q=:CAA8 W845i1L *ttHNU|MT%[jVTMA33BPVNWjt}}ĸA\jttz|}}|}~}|{wM .D[u5DKHEDFF)ZB#-8?DF. [oSBB@??BD=;B>9788vAIJLOOKB<<8302""%$&'),1/-*((('*tbApE' #!"&+.,\QFսǿİuh:UlomdZRKHB<6663+&($%$ ## :YysiaYSnHyF4662330.-' Py}{shYJA;62-+*)&$#"!!"##"  Dɠrnd]Yax[gWWU^bTk٩~{zxvutpnmkjfccb_\WN=o{HHG@7288=@B@;85.\|{yxTk2nŐypjged^_`biYM3P~ysnjiifb`]\\[[XVSV^ZU<7Kx8DDEGIkd?E=}2w=GFEA@ET¾<x|x{3 h]+  '9C +($[d^]pqXed(..~/b+ $%+/!~&4@+Vaa\thU[___4,j|Ur!IolD@{yyR)1"%+*+-/(+#B,5FR.Ҋ LNW  )T3(" QƼ-#p=87_I31 + R69C5H ,yl664jG)$^iYTOB@AHPRPXju{z{~öD`jquxz}}}~|}~~}{vK /E`k7FLIEEGE(`8"crVDCA@?DSiG>977:=wCFFLQOH>9:9420%!&%'(*/53.-*)('.|h@iO-" %&&(-.*`NIǼοıug8~Upqmc\SJGA:5545-)*'(%" ## + :Wwqh_XQcrc36554330.-' +Ov|ztgXKB;62.,+*'$###"""#! 7ʜoke]_|zqZVV\aSgڪ~zyxxxyvwqlhfdcaa]YP>iJK8/3667:>@=864*Z|zwtVi,o||uojfda^ZZWWcXF2P{upnllhdca_^^[[ZYXf_U=4½zN|8CEEGHic?PjP2x9GEFAB@S;x|y}9#[$̹/  +0A@ #$c?45MMBl$/,|*a+###"%!~#?:4=-Vbb_p^a`1)gwSs#HojAA~vwwZ(/#KeS:OqQ $'E,>AK1̈(Q_n +  *Y&YH(#!XĻn-"o899sf90 N724}e3J '~_2=95-dOHŰtd8{NjrleYPLF?742210/-0/0($  +7N~xog]WTLOE186422320,)% Fu}}teVKB<43.,+)'&#$$#!""!8ʠume_n_XX]cRbު}{{|vffedb_[UO?BG6;C:2/5;76>>74-W{xwuSb/rxwrlid`^\[XUe^K5S̭z~{xurolgfed`\\Z\\ZZpeVG.¾Q=FFFGKrxCIGGBBBS;x|zy5*`"M] =D; (!k<5.b;n"1*w*`0 ,!#! ~"(@17-Xb_bvt]^_3,gzUu)GkiABx{tR'1#HTED#;?:IS0ςI*l~ +  +LFPSe=.$![ƼWa&(l60GM6/ `Zhv^KM\T /W49CY4#dbUSL@=;KTSQZmxzuwyӿ@[lsxyy{{}}~}~~~~}zwE 3I`Y8DJGCDDC%d0oyQBBA??BJC>>@=Bo=>ACABH[<;?<3, "%""$($IT+*(%$+y\-Bb@(*=RWUG8,gJJѾűth=wQlsneZPJE=84212,--0/-)% +8Tzpf]UONH<387633431.+#  Es}{sdUIB=63.,+((&$#""###" 1̟qkdYdSlUV^cTͯŝÿcܪ~|z|dfdcb^[UN@FA8334328><72.V|zyxuqP_1tx~xsmic`_\ZXWk`H6Uشuzxtsplhgfba`^Z[ZZ\pkUH.U;GHGGKtw?GIGAADV>w~|yx7"X 6Ĺd >@9 *#j=4.kDn!5&x.c! 0 ""*k3:-Xdb^a[bea^a0*gvOx)HpiBCxwqS(4!IaB#*2BMB,Ј#tJHP +  -R*ccTJ/*#]ƼSu$)j@jh.9+  3|V<[9A~$edVSPLNNQTVO[nw{rw}>??CFd<@@9- "%&##(,of(+($$3]36\T.TWQSVVH.mJFñvh?vWmtoeYQJC=74210*++,+)%$ +6Rypf_XQHdh267543330.+& Bs{{vfUG?<72-+)'&$""!#$"!! 1ʟqmflgKd{dVV\cQӼȗhܪ}{z{jedc`\ZWR>6KCB;634656:>;61-Wsrv|yvwpN[ÿ.px~zrkgba_ZYXWg_H7R״u}yvspnkgfdca_]\[YZ[mjTM)S}9GFDGHpy?GGEACES½ƾ>w§~~zxt2&Z$25$ +=G 4 +&$iA9>mj?m#5(x*`-!! !~"^ǃ2>+Tdcaw_^`1+cQr,HskCAxps{N(5"KwwH:upxyxFЇ  /T*E/^¼_'&r_osaHHZ3  $6x`JtjB2Rq(efUUQQOSSUSN^oz}rnx:`ltxz~}|~~}~~{uB3GeP:GJFCDED"g+uMDB@@B@YCA<74AlC>>CEC2HGC@/"%&&&*8a+*'$'8e54Zj9KKLTVV4n}NDIJwf<uWntoeYQJC=85430+)(&%###  7Nyqg^XOKI47654330.*$ ?l{{ufUH@<72-,+)(&#""%$##! -͝slfv_RXyf[[S\cUӰbݩ}zz|qyheeca^\XS?7\?;;823548:>:50*Uouty{vupS\ÿ.l݂xrkfca_[YX[iaK4Mٴy~zusonmjfdc_^[]\YX[jmTI0M8FEEHJnz;GFFBCEQ½=wʉ~{x4"W GL!; +)$fCRBGf#<'w(^|/ #   |%k}2B(Ubcammijb_^`0*dUr,HojEFx|os~N&7#L85J0`[YUY>х#.)  5R$@?DD$ "^Ļq&2L   zw;d*hdYVTTSSSTTP\q||wyx=_ltz{}}||{r?!4DjQ>IJGDFEBr/$||KCA@BCX[@967:HeA??GGBC^MIF1#')()0{H,*($%7lH5UgUKU9GQX/qxEHdzxi;r[muof[QID<73200.(%#"#!! + +7Lzph]WKmnjp75643330.+# =nz|seTHB<52.,*)&&%$#$###! *˗sjg^ZYUOJLRSXZT]cTƞzʸaڨ}{z{{trtoplfefea^\XP=-S.8<732119;<84/,Tr`v}yusQW.hۀxqjeb`^ZYWXecH2Mܸ|~zuspomieea^^]^ZYYYgjVJ1½ýP;EFFHNr>JHGABCQAvʜ}y{t6&` HK!3 3'xrqxhbqr)8$u%]2 #u#GY%,(Wcba`[]\_`^\.,eSv-Fnl>CyxptS*1#QK?K+$&!-(8,у'*UJ& " +6Q)accg/!!`%=F  nf ?}_,a_YURPRQSTUN_p{yohy8_ktyz}}}|}~~~~zp;6HoL=NTFAHHA2"LDA@D]~~`D;559=HaAA?EEHb`KKI5$''/Fy{3,*&#$=p]?Yej|_*/*M;B?963/16<9541,Rs|}{vvNV1_݂xqiecb_[ZXXhhG3=ܽ||wsqmkjfcba^\\[YY[ihPH,¼U;GGFGKiu@KHHDCDPµCtΜ~}{w6(a +EO"D 2)+;(u-X. !")$u#6Yō&(Vedaaaa`_^_a.*bUw1HmnA@~yuwU*2#E[H@GWDADHIXN]Y΃4 n4 &  +:O+`3#iĻ$7B "o @]*f`WUUTRSTUUUbs~}}|y>_lwyz{}}}}~~xs; 7KpGS{urraD/'FC@@A?D>>;557;=H_D@@D@BXZMNO3"%'&5cuY3/,)'&#;qkX^dh_**+./xs?Dưwi<o^jwqh[OG@;73/0/0+&'#"!" + +1OyqjaSxGh0ot5433320/*$ 7dz{rdTJE=74/.-+'&&$#$""#"!'̜ljd^YXTMNNQXZZWXcQwӰ̣¿`ܫ|xwusrqnnihhfda_]YO>/]@>9540/2464413+T}{vtKOÿ-T݅xsmfb_`_]YXc_A09}{xupmjjfgdb_\]]\Z[iiUD-~O;EEEFHMVWZQRVPTcGBIGHCCDMCu}ʒ~zx4,d ;D##"D /#'<*u/Wy- # ''}%1±q+%Scb___aa_^`_/,_Nr1GjhAA~yyx~S'6% ! /NFRIXQj!?K.J # +@U.#vƺ!2< >SPSQWQR5B~|U/kbVkjSJUTVTVcr{wy|x8bmuyz|~}}}}}xq8 8KtIjfC!/*|EA>=;;;==6247:A>AFQ}tTHPM.!%&&)*)*+*((''"=qjb_bgkC.(#){hCD˿̿Ʈwk?nZhuog]RIA=620/.20,,'&%"  + +2N{slb\3p3HK145411/)$ +:dwyobWHD>94/.,+(&&$%$"!!"!3Μglf^YXROMOQU[ZVYbTqaޫ}{xxusrqnnljidc_^]YP<-W:9610..0245520*X}|xuOK¿/Gި}uqljghfbfcC+6Ѻ~uvtlqtoppljohUB'¾S@GGGEFHBDA>=?>BOFCFEGCAAOÿ~Cxö}yv3-h 7?=:+#!#> ($%6ÑƼ)t2Zv- # !!z$&')Vebba`a`a_b_0*^Pt5Goi@A}yzwzX&6%:GIKMXWT}!@h?L % +DV$"sȺ#7=  HP4iaW}qacj]TTeqxwx}{п?amtyy}~~yp3!7LxDd|sP?"*,|821121103../224IXBfnQMGQM,!#%%%&&)*)(&&%#C|mfI>]`IB/#"-hHEſɿűyi>j\gupf]RKD=7230246540,+(%" 0F}yrj`U5i4:hj645221.)# + 7_tvlbWKFA:50-..*'&%%%#!"#!(6ƞnmf_XWSPLORUZYWZaZgbܪ}{xvusrpnmkhgdc`]ZXP<-V8963520015:940+S}{xuME¾-DyiaYME===:=B<=J@*5zpe]XVQHEC=<<<=<>::9:;75566:::752.+($ 4O}{slm@Ah79HyL34120.*% ;_yxn^RLDA<5/...*(('$%$$$"!;Coni`Z][RNPSX[ZWY`ao½hݬ{ywusrpnmkhfgdc_\XO>)oM68;642.147641/+QzwqP?/?n`YTRPORPNKNK?)-rzsni`[ZXXURQPRXQTWY[UQB1O@DEEDEE@?A?>>=@B@AEEFBAAMA!u}}~~|{w23c&:)# HP%&"##" (E ((|$8Ɨ(}-c3 "1"!$" %& {$+Tacb``````^^/*XUs2Gll=H~|zw{P,8$=LFKOTTWw3=zs1$  BF""pż  +  PK3l]Xl[\dab[SRhuwοGckvwz~|||~~~|~~wn/ 8L>?KIA>EE:$|$%'&'*''' ,m#FA"#$#%"#%('&%%J|jZ*%!  0aBDŲv^31YvKJMIC>;87975783898875552.,(&$! #7Mt|yxvha{WC=[465``3421//,*$&?XbfXKFC?:50/-,+)()&%%$%%$!!Cr~}}pjlti_ad`^`_[[Xs¿s᭡{ywuupqonjjhgddc`[SD$A;434.+,00.,-/./*R|{yfA>ľw)6}ʰvvpmlljddO=()d}xupjfhe`]\\Z\]YZ[VT:*K}?FFDCEF@>@><>>?B@?DGGBBBJAs}~~~{u2?^$4:2" /AnhJ"CJ1$(6 %&D][_dadi[$8ų+<W;#'"!!$"&&'z#+Xdbaa_`a`^_[.*VQs6Jmk6M~xwM+5&6OIPI\O]z#%#$ +BE$"rû! + TG4q^WVPNOQTVURjwzξDbkuvx||{|~}}~~}xf/";F}=BIKDBDC8'y$#/7?B<;><*0~g  2MJ:"%!#"- (!%'LcNA% 1Y8=ƴvY0'YdX:)))).,18AJKIGFHGEGEGDEEFDA?B=@GNJGMGDBABGQR5H@FQ JsL!"$*!#$$+*#!$t$/Zcaaa_`_``__0(NxySs;Gkk2Ux|uxI,7'KFM@PL\Iy   +>D"%#uŻ   TE5j^WTSRQRUSSRkzxнAcluwx|}||~}}}~|g.";H;AIIBBBC:$h'&+<[[Y^\48xa:[N;8P$I-2])c+DJQ;#6D('οijwV-*jleE<303:BO]abbffdfcabaecjfcaa`adgpqjcc_a][^_][]`_\Z[\[]^]\XXWQOLIOSXVNMJGEMLNNRQOMOOPOKKIIGIJFFHHEFJPZWSDDAC@@EDGCDw\}{{piXMWSWPNPP]ZO[lg`[XYXGõB(y~}|{{{z{|~||{{yr/1la|I(.+Pedbn~NzLD=952650.+0225573*'(39/+%"$)-3*Aw <8''/(())-835&:={&3Y`aa__^_a_^].%MwwTr>Gnm0]y|B+<)!$IGMITR^Rq'  9= !")#{»  WA7g]VVRQOQTTTXj|wоFdmuvx~~~|~~}~~|h+!:K;@HGDA?B5,^*)HORWL*4zvV7\S8@h(c07\!l%JOS2 +'#";=#ôuW+.fec^H79AN]cjnqrpsquvtyxwvyw{y{|z||~||vssstrpoqonppmmllljjhhgfgghnoqpollkmllhilkhlghjijjikllnkkllqswvyqniihgffgdcc`_`adgihgfklieiihiloprvtttqqtsspooqsxwqrswzz{snliijkhgge^adigjkijiijmgmmqqx~}}|y~~{y{sv~{zurqqmmmnorwxz{{{||xutyzwysu{zvy|ywxzyzymj~zxyyvuwwywywwoPS|jnvrkUNTOWVPQW[Y[B;nvaWONJHKIGG???%A>(ҾõvX*.gmn_9<P!q"NEW56U5Z5?"C@&θ˿пôvW,-kvtM:FNWbkqv{}}{}~}~~~~~}}~{|}{|}~}{|}}||}~|~}~}~||}~}}{|~}}}|||}|z{|zyvusuwvvxy|}~~{y{~|yyj[NH72136:@EKRY\bimntw|~|}~uqrrpllljmnmpjlqrtttvnmliimnropokkkmnmmjlilikhkjtwy{wusllpputuvvv{yz}ywrqojieddabWMCIKILJIIOIGIGHHHIHILJLOPPSRRKLNNG?>=GIHIKIHEGMIMJGIGJJHFHJJGDFIGRQJIIJJIFHF@===<8;@AHOMKDDEGJLJLLOOOMORZWQL9;;:<=?:==<;8798:>>EKD==<9788887638;@A;6:=?KRH?DPO\ba_cgDN`TSROKILNNNKDOUXXXYPMRXN?LSRKLMQZ\cmler|tw{{|~x]!'(#!'BszU>/*'$""=sLe,CO#_+18G!:5\/9R!\I2I8$ϽówV/A{aBEOV`iqw{~|~|{yyxxyvuwwyyxy{z|{y{|q\RA62/.9DHOTalv|z{yxyxy{z{y|}}~}|~||{y}}|||zz}~||}|~~~|{{zwwurotsqrsqoomnkjlnqpppqtxwxwx|zwtopohkiiihijliiefgegddccedcbbeeddgdefgececcdca`abcbaaa^[[Y[^Y[[XYWVZZWXUQTXWYZYZYVZZYWWX][X[YXX\_[[ZZYWYXVUWTQRTSNPNRSPRUVVTWYWQPOOWYTRQRQRRRPLFB<<>8<;=AHGIMOJE98:=@@@;<@95Y{||}z])FgkfphiaI516-   ",TdGPŰvE f.%&#c(;PRU$O- ӾzX6cvGCLU\fpv{~~}||{zzyyyyvuvwwzz|{x}|yodZG7/7GQ\bbirz~}~}z{||}{|{z|}{{|~}}}zyzzxwwwywwwyvwuwvuuuwvrvwuvwrqrtutstsqrrrtqqrqnpopqpopqonnmlnoonnmmomnonnnllnlmoommqonmllmnossrqqrttsrnoprpoonpnnqqpttsttqopppmmnmljhhfdddgjlmlonlnmokmlmlgdddcbadbbddc`VP~|z}}~~x\'!&(.2632318=DBKpV?=@EBHLFHG<@UviKSysI.%""!"###$$%!$(j1%#$ #!##$ (  a, 4@29 S+ ĴyTW]GJQY^jqw|~~~}}||zz{yyyxy||}~~{~}}ytph_RP[_cjhlnrw{}~}|}||}{{|{z|{||||{}}~~~~~~~}}|{{{{y{zz{yyyyyz{yyzxwxxvvwxwxxwzywxxwwwvuvuvvzvutxwuwxuvutttrsvwuursstsssstttsttvsutsussusuuvtvyzwvxzwuvvwxxwuutussutsuttttrrsspooooopnnplmonopqoqonmnnmmllliihgfjjgiigikgaS}}}~~xb92146545448;==AA?>@BBBEFEFGGDCCHLLHILLNQVXSUPD<@?<;9;@@:DMKHONRJB;87:=<;=:<<;B@Tf=51142122476478=@::94652/132114Y|30./01+---/-+(V2&Ͻξ˿ƲvVuqKKQW`fnu|~~~}}|{{}||{{|}|}~~}}~~}~{|{zz{{}|}}}~~|nz||{yzxz}}~~~~~~~}|}~~~~~~~~~~|}}~~~~}~~~~~}}}~y{zyvrmlhdccfejjmnpsvwy{|}}~~}~~~}}||}~~~~~~}}|}~~~}}|}}~z~~}}~~}|~|{}}~}}{||}}|}||{{{{z{yxyyyyyxxxxyyzzyxwywyxxxxxz|{{yyywwyyxwtvwwvwvuwvuuutsrsuuvtttttttusrtvvutusrssssstsstsrrrtrqpqrrqronoqrrrrqqqrrrrpqrprsstssrsrtusuttstssrstrpprpppmnoooopoqpomnmmmmlmkkkkijlllklkllmkjkjiikkhfgfdefefgegfedX}}wbGGMRNOONPPRNQPPSQSTSUYXY\ZWVUUWUUWWY[YWYZYXZZXVWVZWWYSSUX\]^b`XWUXWVTTSTT_\\\^c]XRSTROLMKMKIIMNOVSV[XWXYVYOQXVVUVWYMILLMLLUSLQQMISaWUW=53´vhTEJQW_jqx}}|}|~~}}}|}}|~~~|}}}}}}~}~}}~~~~}~~{yy{ywxzyxzz{|}~}||~~kttvuuurtvuwxyxuz||||||}}}~|}||{{{{{yywwuwwwwxzyywz{|||z|}|{{{{|zz|{zzyyxxwxyxwxyyyzy{{|~}|}||}~~~}}}~~||~|{{}|~~|}|{{|}{z{|}||zzyxyyyy{{y{zyy{zzzyyyxxzzyxyxyyyzzyxxutwyz{{zzxvwxwtrqqoljgkmkknnprssttvxywxy{{zzz{z|~}|zyyxzzzyyyzuwyxy{|{{{{{xwxyyyyzzyyxwvxyxxuqvvvwyyvxvuyxxvwxxwvywwxwwyywwuwwvuwxvvwwuvuvuutuutuuuvuututtvvtstsutsuuttttsrsqstptsrrsssqrrqpooopqqrpppppqpqqqpppqppqppoqpqqopppnppppqonnnmmnmnnnppponppopqoppmnppqopooppppopoopqonnomoonkmmlmnmlkmlmmkhiilkkkifgjgfghjihhjjjkjjfeghhgefebbabbeffghhgW~~~}wbHNSXVUXXYXZZ\^[b`^`_^bbacccbb_^__^^^__```^]^^^_^\ba^bd`_]_`b`b_aabaacecb`_^bfdccda_b_]ZZ[]ZYXWXWWWWWYZWWWUUVWZXWWYYYWXXWVUTVVVTUUUULBBFCBпóv}{IFKU]gou{}}~|{||{}}}~|}}|}|{}}~~~~}~~|}|{yyyz{zyyxyzz}~}}}||||}|}}||z{pvxvtvvvwwvwwxwvwxwxvvvyyyy{{yxzyxzzzyxyxvxyxwwwzxxwxyyyzz{yyyyyzzxyzxwwwvvwxuvuuvwvwxwwvvxwwvvxyyyyzyzyz{ywxxywxxyyxyxxxxxywwuvvwwuuuuwutxvuwwwvvuvvvuvwwwvuvuuuuuvurqrrppnrttuwxwxyxvtswutqonqqrpqssrststtutuuuvutuvvwyyuutvtsuswwtvutustrvvttvutttvuttstttutsutssutsqsqrsutssttststtssttsutttuttssrsrttstrtpsrtssqrrqpqrppqpqpqqrqoqqqppnoopooonponpnoopnnponpmnnnmnnmmnmllnnmnmmmmnnllmlklnmmnmlmmkllljllklkiijkjjgejklkkiikmlkmljjkmmmmmklkkjklkjkijmjkkjjjijhijjjjhgghijhhhjghgiifgffehjkjhiihhjkhfhiihgfgfdccghhgghihX}}|~~}udLQY\XY[]]\^^`b_aa_adbccbbeeb`ca`__``_aa``__`aaaa_\]^`_aa^`bb`_abeebbcbfhgdaadeedcbbcdfd____[[WYXYZZ[ZYXYYWWXXXWXXWWWVWWWVVVXWUUVVVRRRSRPR³}T7BHS]fntz{~~}~}~}||}}~~~}|||{|{zxyyzz|~|{zz|||}||}z|wymvyxtvvvxwwvvvuuvwwwxyz{{zz|zyyyyz{{{y{zzxxyzyzzzzzzzz{{|zz{zzyxxzzzyyyxwvvwxuvvvwwxxxxwvvwxvvvwvuuwvvwyzxtvvsvvvvtvvvvutvwxvvtvwwvuutvxvuxvrtuvvvuutvttuvvvvustvstutrqqqooprruwxyyxz{zzz|~|yyyzywvwwuvvuuuvvuuvuutrrutuvutssrrqrqrrrsrrrqrrrrqqooprrrqpqpoopqqqqqqqrrqppqoprpqrrrrsrronpqqprpprqsttrqpprppqqolporpolmmnnnonononkmmnnlmnnmmnmmmmmmlnklmkkklllllljijiijmkkjkkklmlmlljklmmljkllkmljijjihiiiigiihggjhggeiighhghhfhijjjghiihiijjhjhiiikkjhhhijjigfhhhhhggigfefgfeggffghghhhhhihiijikjihkjklkklmlkjiihggfiikjkmkkf~~~vbPUZ^]__a`afbbbbbdaeebdebddcbdedddcdbca`aa__`_]acdegebefigefeeeecbddhfddfffgdb^eeddc`\_```bb^^^_ZY\[XWVXXRWUXWXVVYZVVVWXVVUUWUUUVUVTUUTVSSͿ´47AEQ\eov}~~~}|~}{~||~qz}|zzzz{|{{z{zyxyyy{{|}}||}}||~~~~}}~~~~~~~|}~{|}z{~{|{yxxxxwxyxxyyyxwvwwttuuwvvwxvvwxvuuuuxzwvutuuvvvuwxstuwvvvvvvuvyxwwwvvwwwwtvvuuvvuuvvwvvvvuuvussrqprvxxxyyz{~|~}~}|{}}{zyyxxxyxvxvuvvvusttsqttssssrrpppqrrpoppnnnppooooonnmnookmoonooompmnronnnoppopoopopoqrqrqstqoqqppqqooppomnnnmnonmmnmmlnqmloopnnpoklnmknomnkmkkljklljijkllljiikmkjiikkkkkllmkjmmlljjigkmlkiiihhiiiihhjlhgiiijhjigfhihigghklkiijhihhiiijiijiigfgfhgghhgffiiijihkjfgjhhiihhhhihgfhiikklknmnppooolmnoopolmnnkkikllmllnmbb~~~ucMUZ^abb``cefdgfefedfdfeddeeccba`_^bddbcdefcbcbd`ba``_adcdcdefkjigdbbfigfdcccgfeaaab_a`__^\\\\^`^^[``]\[[VWXXWUSSTTROSRTRUSRQSVUTSWVSSTTQRŵq!6@?DL^lty|~r|~~~~~~~}}}~~}|}}|}{|zyzy{~|z}|yyxxzzywywxxwwvwwwxxyxwvw{|{xyyywy{zyyxyyyzzyy{}zyyzzzxxz{zz{zyxxwwtstwxy{|~}~{x{~~~~}|{{z{yyxxxyxvuvvutuutsttsrrqqqpqqpnopoqqqppopqrpopppqrpqpqprrsqqtqpoqppnnqqppppqssrroorsqrssnrpppqpppoononnlknpooopqoppprppppononmmnmmooonmollmlnmlmooklmolnnmlmnnnoonlmlnnmmkkmnkmljkkkjiljhiijilkhgijkjjhghijiiiiihjjihjjiihiijihjihgegihihhghhejiiijjijgiihghjjghhhhhklkmmmmnnoqprsrrqnoppomnlklnjjgkklkilmmlk}}~~ueUW^`ebdfghgjgihijklliijhjligfggeggeceecdba``__b^_\[^]]^_`_`_``fjjhhdbbfgffcccffebaa]]_\]_]_\__^`^]]]^`]\]\WYYZ[X\ZVWVUUTVUSTVWUVTSPRTTSNRô]!6BIMI[flqtvz|~v|~~~}|~~~~|~|}}|}}z{{{{{{|{zzz{{z{|}{|{}~|||||{|}}|~~~~}~~}}}|{~~|||}z{z||zz}|~{||{y{~~|{z{{|{zyyyyxvxuwwxtuturqsstsqpppqssqppqpqoopqomooppporsstrqnqrsppqoqpqrrqqprrrpqrstsuvuuusssssstqqqqsrprsttsrtstrstrsrrrssqrrrqrrttssspooppqqrqsqoponppnopppqrttqqponlmmijllkjklkmkjkkkkjkiknijlmllkkkjhkjijiigjkiikljhhhijhhjjiighhggjihiihiiiihiighkkkjlmnlmlmnmmmmnppprppoqnppoknmllmmnoolqmljonlmlkkji_}~}udQV\\\^`\aba_adbebcfdfgfehhfffeda`a_aacca_`a``__Z[]^]`[__`bbbca__giljjdeddghggd_`bba`bc_^^^^[\\ZX[^[^[^^`^__\ZZ[\][\[Z[XYZ[YZZ[[\XYZ\WWWWUR15H_cbeigkmlihv~{}yx|xy~|{z{{}y||{vwzzy}x|xy}wvw{vy|w{{xwyzrxy{{zwyyu||}||y{zvzwvvvvrrnpoutx}{zx|~{~}x~~|{}}~zw{~~~~~}{}~~~}~~}|~~~~}|}~~{}|~~}}~~~~}}}~}|~}||zz|}{wxyw{z}~~}|}|}{{{z{|}|z}{||yy{yxxzywvyxwwvwxwuvuuttuuuttrstuttuuusttvwtttuyvuuutvtsttuusssuwwvwxwxwyxwtuvuwwuvtrtutw{vvuttwvvuvttuuvvwuuvvvvttruuwuusvuuttusttuututrsrsvutstrrrsrqqrrqprqronpppnpnnopkooopooonnpoqronprpoqpmlllnopnmnonlmlmlnnlklmmopolmqomommmlmnlmmmlmmmlknmlllmooonnkjloknpnpqppommlkgijkehkgejihjljf}~~}vcPX_`bca``caadcbb`fc`bbdcefcbdcc``a`a`a_a]]\^_\_`a`baccbba``acghebehijkjjgfdfijhecc_\]a`_``a`a]_^_`[[X\Z\[ZV[ZXZ[]]^a]_`bcb_\]]_b^\^a`]Z[ZѾ[B9Wr]hʯƺ|}|z}{|y{uzx|y|uyvuyrrpzttvzwsywyswuussrqtntoplosrssvwnruswtrttvtotustsqvprsvsrwssvrxxuurutwvxvvuwyywy{wz{xxzyyvzyyyyyvwttvzyyyyxyxuv}xw{{zz}|zx{yzwvr~mrxz{{|y|}}}~||}}~~~~}}}}}||}||{yyyvxyz{|{||yz{{|{}|{}~}}~{|||}{{{{}||}|{zzzyx{yyz{z|yzz}||z{}~~~}~{zy{}z{zz}}}}~}~~}}~||~~~yxurxyyxy~}~~~~~~|}{yxzyzz{{yyzyyxyxyyyxzwxzxxxwwxzwvvwvvy{xwyy{~vuvuvxzvwwxxxzzzwwxvwyw{z||wvwxxy}~xvxywxzwvwuwwy|zxvuvvwtuutsquuuutqsrsrutqqrtqrqpooppusprsttustrvtuwutttsrsutststrrtsrqqsqpqrportuspqorsqppqlmpooopqqsqomklmoonnmoonmmnmnklmoqnrsrmlnmmnonmlkmnnnolnmlkmhihijkmklnrrruutrnjjliiilmnokkknjjlkklmpmleg{~~v]LQY_ZV]][[_`_`]`^abbcefhggeefec__^aaaa`bb`cfd`addbcfdhihhedbb^bfedbdegijkkiieededgfdegdccba_^^^_^_^^^__^\_^]_][\]]WUUUUXZVUXXYWVTRWVVXYZYWRSay[eƿ̷̼z~~{}}|||~}}x|z{|{~xzzyxs{xytqvywvvvwmoxxrqsqmsutvmutuqsvtopn{}}jemnppsnqlqrtqpqqunoqspuplnoqkoproljhpklmooprppuljqorilkqgonqjqrnmmnloqtpsmpuqutssstrsqmoqouoopslopurtnsrssmliomqoroqlqpnonnoonnnmnrunnnpmmppopmmmnjmmonjiggklgkhlmkijffbekknpsroqtvvxxywttvqtsxmusxzyxywsutsqrrklnnnrqpuzzzxuuspsstrqoppqqtrnmlnpnmmnnonoroosqppoqqnqrplnlmorpqpoonnqpppqpoqoorturqnottvsmqtuopusqsvwuwwy{ututxttvuwtxtrtpqttvvuuussuqqpolqusqssutsnnqqpqooomlokjljlnmmmnnmlnoppqnmpqommmrqnnmmnmlmprpqmoponmopmokllllnpnnnnmkkkklmjkkklmmihihighhggihiffigdifhjihjknmpspnmljihegggnnnoorpljjfefkhjkfeegdgZfz~~v`WY`ecedegkfhffhiieegijhhghfdghecdcbbcadb`]^][ZZZZ]YY[\[cfghigfb^`fcbbedcfeehghgfdebdedddffegeddcb`__`]Z^_^^]]Z_^]`_^]_]^_\[ZYZYVUVYWWWVSVξ~M__\WfƼǸŽ̶~~~{||}|z|{}{z~|y}|||{{~xuvztx~yt{|~vz}~vvyuoyytqvv{stt|sqsytnrrsptvqkopyorqrlnnonppjojjnjnkmqgfnmhjnhlanek_kakgngebgkblhahiifa`ehac\\Wc`e\`ei[^bca]^`V[afe_d^`bdbeac]_Zb\\W[Xbd]]a]__X\[`][Y\_c_X[\XU[YVUYXTXZ^YW]]\YT\UVXTRRUZ_UWVZUSTVT^[XQ_ZXXZXYTWUXWVS]Y[\]YXY^U\\\TUWY[Y[\[_`^Zb_]`a_^]_a__^baba\_^]\c]]]^`a\\_[]`^^[WZW^^X[][]^`\][\_`\\_[]_a`_]]__ba]ccdb_]\][^_]]\Y\^^[^Z^_\_^ad__a\][[[[VYZ^]bdgcdied`]\\[ZZYXXWWWY^^`dcd]{{~x_RY\cacbbc`baa]]_bab^accbbbba`\]^]^]\ZY\[\\]^_`a_`a`aa]W\\`fhhihbb\_bceedea^`bdeihkffeecb_dbcc_dccbbccddeabacbac```_a_`^]_`__]]\Y[\[\ZYZYYxQi_^ZiųĿŹľŵ~}|~~{}}{|y}zw}~{~{x}u|w~{|{}{~x|~x{{xyv~w{|x|}}t}|||zzt||zqruyqyvzszrutzurwopvvruurmnnzotqtspnumqruoopsmmljmnmnmjnnnjoiokhhpfllnikimglfkfnjkblllfgikbgfmghfgcgfk`eeidcjj``cd__dg\]cgcabcb]cc^[`fb`^e`]`e_]bd`Y_c^W^bY\`^]^^^Y]XZ\[]\YW[VVWQTTTZ]HEFKFE=82:GLIJJFLCKTPMNJJKOLMLOMKLKLHKKKHJDLLLILMKIILIKIJOMPONOPSOMSRPIOTUUUPW[e|}tYHSZZZ`caadda`]cdb^]`cccbjjjccbaa_]][\]^``_ba`_bddc``accb^\[]_fghjfeca`_behflfge_cddecdeffecbbb^\a```aaa]]_`^_`bbbccccacdaaa`ce``^]^ba[[\]oNhb^Ucø}}z|~zz}|}||{v~~w{yx{v|~x~xy}r{v|~|y|}{v||~xx|~x{y{wxxyvwv{wxz{swy{vswxtquwzppuwrxwzrqvwjqtxnquvtquvqmryrnqspmtsrjtsojtpsoqqnltpohimqmrnvooonlqilisnolnklhnfnmmmlkohijifipnjgjojijjffijigklihjljilnkhjmijkthehmhhijggjhcdggeeee`aad__`^\^llFCDCC;0#)EEGHEDDLUYQVURPONPMMKKJLMJGIJKIIGIIIIHEIGDHJLOQNOOSQPTXYbfhgghihhim~~|qb[]\[Y[^[X\]YYZ\Y[\XW^\XWYYXVXWWYZWUWZVVX[WVZWTY[VTTWVWTUXUUTXPOTWSTRPPSQLPNPMKKLJLOPPPRPRRQQTSMPPLNLKLJGFGIMLOPQOQRSQTPNNRRNOLMPNPKMPPNMOοѽ¸ZVphbZc¿Įȼ½~~~}|y|}~zv}x}~~}z}xw|x~|z}~z}x}|w}}{uz{{yzy~|txx{xwvyuu|{zv{vzruv{rwrwqruyutwxstuwqruysotvqmurqoutrmnoqotpqrtoplplrmpmqquoomromkqpolnmmknlpnqnnmnmqlmjjlkinmnlkkihjknjijljlmnkillkkonkjppkjkniflliiknhgkjdfji_`cdcbabd`_csoIIFGB9.!*AHGJJHECFX[QRURONRPLNMKGJNKHKPJIJKKGHIHGIJHIJNLMPOORRRQV^giljjkjklnu{~|qb^__\]Z^][X][YW[Z\]ZV[YW\^[XXXYT\[WUXZUWXZWY\YVYYUTUZXZSWZWVWYUUWWPVURQSTQONSJQPQNOONPSVQQRPMSSPOPMMRQNKMLLNONMNPOOQRNPRPNQSNLPONLRLNOMJIMʾWVlg^XiƻĴϺ·}y~z|}~}wz~w|{~~z||y{uy||~x{~qyy{ty|wyy~wz{}wt|}vpw{zqswznw{{osy}pqw{tsuxpou{qnuvqnvvsouvplrttkqsrlqqqlrrpnpqrnrsqlpqpiorpjmoqlmnskmonjknoiknnkjmqjkmnihlojkklkmkrnllkllklmmllnoljhnkgikjkllhihjgijhdeefeb`_ba^csqKHGED9, (=GHKJIIEFWWQUQRQOOPMMLIHNOJFIMIKILIHHIGHJLIJLNMPRQMRTTPT]ikmkjnmnopuv}~zse``^\^\]^`X]]ZWY\[ZZXZX[]ZYYZZZX\ZYXXWVYVVWYZZYWUVWV^VXVXYYXXXVYUVVYUTRQSQRQQMRRPNNORQPQPRRTNQSQNQQNSSQNPNKNONLOOMPOROQRPNPSONPOOLOKLPOKKM¹UWomhTeˮǸʶ´ɮ}{{~{~}y~}~~}|y~}z}~}}~|}{{~|y{|z|zu{z|yzxvvzzw{|rtyz{wst~sxxvouxypwxzrsuxqqtyqrtwstvtrntupnttqksrvqqqrqrqplqproqqoknrrlrosjnotnoptkoqnhmpskkrslknqgjmngjmnllnplmoskinplloqlkmmklonimkkgjllkjiieiikgjhgddfea_ba`cvnIIGHF<. *?GGJKIIEDTUUWOPPPLNNMKKMNMJJKJJJIJHJIKKJIJJJJNMQRRPQVTTXcknqpnpponpv{~~}}zsd_`^^_[]^_[^`]Y[^^[\\Z[]]YZ\\Z[Z[XZZWXZYVVXWXZ\YW\\WYWZYXXXZWXXXSVWXTTTORTURQPRQPOQRTRPRQRSSRPRROQUOQRQQNOPNLNONOQRPSRSRRQPPOPOPRONNPONJKMӾûRTmkcVbǴȼȰƷ~|~zws{~{{~y}~zx~w}z{{{}z~u~y||}w}zxy}}xx{}{zz{yysxyr{{wn{xyuuvytyuytvuwstvyrwuvrsqvspstoosstqooqqvsrrtsrnooppqoqoporoplpmphpnsqqmngppqhmlrlknnjmnslnjmhllojjjoiknojiprliltmjlnjmpofilmijomiijjeijjghjggedeccdabesoMJJIF<.+AHIKKHEBHTSUVRQSQLMONMKOOHIMJHMLIIKKKJLKJJIJIMNPPSTSPPSValorqqqqppqw{{qa\`_^_^__][]_\Y\[^][[\\\ZY\\[Y[YZXZZVWZ[XXXVY\ZXX[\WXTYYYVXWXVXWSVXWSUXSSSTQQRRMPPPPSQNSQRPRSQPSRNSTPPSROLMMKNOLOSSOSRNQRROPPOLPQMLNPLNMLLǿż\XmjfU_Ϳùȼʻ¶³~~}~~~~~|yv|~x~|{y}xy{~u~~xyztt}uty~{rwxqz{znxz|tv|zlwxyptu{tsy{svvwsqqwurswqovutnurpmuxsqstrlpsqlprsmoqsppsulopslnrrlomrjonokponmlmnkomqjmkokplnknnnlmmmmpnlktnklmjnopklmollmnjhjlijhjhjhggfffecabadylOMKJG=,-CIKLLIFDIUSTXRRSNLLNMJNMMIJKJJKKGHJJKKMLIKJKILPPORTSQSUW`oqrpqrtsprx||pa]b`\]^_[\]^]]^\[\^Z[^[[\\][[Y\[\[YYZYZZZ[[Z][XXZY[XZVZVZYYYZYVWVXVURQRSUTURRSRPPQPRSRRURQQSTSRUSORSQQRRPPNPOQQOQSQOSTRQQSRPSROOQMLPPNKNNMͿ¿VVnkdWbóž½´|}|}~~{~{~|}}}}~}||~}|z|{x~}~t~}x|x|w{}tx||{wzyvy{zvwuvuvwwxwvsvt{uvxxsvsssspuuurupltsurutsksusqttrmqutpuutnnqslnqulnqnkosocqqrlmqoilopkmookinpiimpgknollopkknpioorklnoknnokjmneimnkklkhjjjhjkhdegfgdbcccbdvkMIGHH>+-CIJLLHGDHUUSUQQOMOLLMMSJIMKJJKIIIIIHKKLKHHJJHKMPRRSSPRRUcrsrpqstrrsy|~}{o]^`a]\_^Z]_`[_^]\^^][]][\^]\\\[Z]\ZZ[[W\\[Y\_[YZ^XWVYZ]VYYY[[YQYZZSVUQSVXTSSSRQRQPRSRSSTRSSTSTSSRQUSQSPRNSNPQQQRQROOSQRPOSTOPONLPPPNNNLNML—ePZrlb[eɼĹú{}z}}}{z}~|~z~||}}~xy|~{}{zzzxyz~zzw{x{~wtz|{xzzxr}zxuxyytvyyrvwwtsvxssvvsswysotssqsvtstutttturrqrqqpstqoqrtnqnrmsroloornwoqmmnrlmopgloqkknqjjosnknsmlpsijmrirssnqqpkmpqmlopikoojklmijllikkkcfghhebdebbc{mPKJLI>(/AHILMHHBHVWRSQONNNLLNNMHJNKIJKLJJKIHLMJJJKLJJLNPQSSTSRVYeptsrtvusrsz~xoc````___\\_^\_[[\a^[]\^\\]_Y[\[\^^ZY[\Z\\\Z\]ZZ[^W]ZZ[]WYZZYZ[VYYYSVXSUWTRUTSPRUTRRTTRUTPSTSTUUPQTSQRSOQQSMOSPRTSQQRSRTQNTUSQQROOQQMNPMMNO˿¿LL[rn^WaȳŴƹ¹»~~~|}{y|}|y~z}|~|~z}{~x{uzxy|vw{~xxzxryyuz{zsy|{nyyzswxxru{{ptvxusw|rsxzrtu{qprtspuvtnsvuqsvvorsqnrssppqsorstktqtoonpmrnuklnoilpnipimlokollkmnqnlmpmmmoikiplsprpqonknptnnkmjjmnjklnkijkikkideeihebdeccc~jMJIKI>#/@IILMJIAISUQQSRNMMNKKKKHKNLHIJJIMKHGLJIKJKJJKNOQSSSSSQU\gsuttuvtqqt{~~xndb__aa__`^\]^]\[\]]\]]^]]Y\YZZ[]]]\YY]]\ZZYZ[[[\\Y[XYXYY]\ZZY[YXXWWYVUVWVSTUROSSRRTTUSRRQVTSTTVSRSRSTSNOSQOOPPQQUSQVSQSSORTQOOQNPONMMOMOMOo=J\ppdY`ȸ½ɺžĺ|}|{|~y~~||x}}{}{z|{y|w{{{yy~zxzyx{|vy|z|x{{|sxxzsxxzvyy|tvuyutt{rtwvrtsytuspuqutuptwvqotvpsqtqsvtoqtuopttitttoqrrmrrxljprkqsujmnrmlnpkmnokopojnlpklmoinqrjostoloqjnqsjlnlkooljkkggllhfiiihihgdfecdfoNKLMI=$2DJJNLJFCLTTSPQSPLMOLMLKLJHJHHKKJLJIFIJHKJIHKJKORRTSRQQV\hsutuvuusru}~yndb___a^`a`]\__]\]\]^\Z]b_Y[[]Z\_[Z_]YZ][XZ[Z\_]Y\[[WZWYZ][Y]YZ[XXXWYUUWUUVTSTTVTTUVTTURRTUSTSTVUTSRSSQPQSRQQPRRSTTRXUSSRQRQSRQQQQNPMMOLNNNYBDYnoa\dȽz~{~{|z|~{|z|y}~z{}zz~{~|~|{{|}y~}z}s}|}{xuuxxzxwuyy{vyxywwruyzwuuzutsysrttuwtvtutuvvrtvtsrsssrsutqrrtqqtuprnsnsnspsqtmvqqnplrkoqtmoookooqkknpjlqqkmoomnqrnorqkoqqkkonilorjimmhknnijliciihhfdgecdkeRNMNI<7EJLNMLIGNRRNNOOMLNMKLMIGJJHIHJIIIKHIJIJHIJJOLMPSSVTSTUUZqzxyxwwvvtuz~vk`a`ba__`^_^__b]\]`a]]`^[\a^Y_^^]^^[]]]Y\^]\^\X\\^[[W_^\Z\^W[]_[[[WVXWUUWWUWVUUUUTSUVSTVUTVUSVYUTUUSRSUQSSQPTTQQVURTVQPTTPUUPPQSQRTQMOPNPOO~XB@BWrn\[_ȷ»Ǽù~~~}|}~}{w~{xw~~x}z~yzyz~xy|tz|wwwyx{|tox{ztxz{ux{}svz|rsz|tuu}stwzuqsvsqvwqqrwuqvvsqrssqssroqssqstpnwuthrrtoqspnsqrpppmirnolrlommnqmoimmpkmlmknmooomspppplqpqklnojlpphinnemnljhklchihhgeedd`p{ePOLNL=5DJLRNMKEKQQNMNMJJNMIJKHGHIIHIIFGIIGHIJIKJIIMNOOTUSPSVTTXpxyywvvtttv}~~vjb`^`cc_^a`\_``]^`\___\_]^]^^^\]^_^]_[\[\]Z]]\]][_^Z[^^]Z[[Y^[\][[UYYYWUUXWUTSSRVUVUUSSTUVUVUVXVTRRSTUTSTUSRSSSRTUTVUQQSSRTTSRPSRRSPNPQOPPNkKDADXtn]_^öƵʿ~}~~z}~||~~y}|}zz|{{~{{wy{z}uyw|w{}wr{{zv{{zwvwztxy{qxzyqvt|rvvyvruvssvxprrytuyvsostqntwspqssqqstpsssnpstnqtplrqqlqrpjossjmorjlprlpnnfprqhmpoloqoipqqkqqqoolonmlmnmjklojnkkkijjikjiihfeeecq{gUNMMN> 6FILRPMIEMNPQPNNLMKLJJIIIGHHHIGGJHJHHGKKLKIKMLOPTTSSSUVVYqwyxxwvutrv|}~wkca]_`b`___Z_b`]^`Z_ba\``_\`c]Z^_]]^_Z]_^\]]]\^^\\]\\\Z\ZZZ[^Y\\[XZZXVVVTWWSTUTSUUVTUUURVWTVWWVVXVUTVTTTTSTTRQRQSUTTTUTTSSVUUTQQRROPPNOOPPOɿhECAEVuq_dcű¿űƻû}}z~}~{|{|~z|y}|||}|{~~~z}y~|~{xux}{{xzw|xxz|vyzzwzzytwt{xwtvpxrzvvvvutuwuvpsuuqswprqtsqqsrssprtpqqursprrtsrmqqpmporlqqrjorsmloqhmoskommfqrqhnonlprtjpttkmqpjmknklnlkmmljnmjijljijhijiiffeccfqeUOQPN< 4GKMQOMJCLKOPNKNPKHKMJIKKHHIIHFHKGGGIHJLKIMKKLPPPTTVSRWY\nwxyxxwuvsu|~~}~xlaabc^aaa^``_`a__`^a`_^^__]]^[]_^\_^]X\[^[^_\[]][^^ZY]\\[YY\[\]Z[VZXYXWWTTUSSVVSSUTSTVVSTUSUWWTXWTTVVRSTRQTTQORSTVVTTVVUUVWSSTQPRSKPRONSPMN¿mFBACTtrbd^ȳƳŵ¯Ũ~y~~{}|x~}|v{~{z{z{||vz~zwyy{x|tz}zux}}sy~{sz~{sxyysuy{qxrzusuyustyussutstvtorusprusptysoqurntuuotutqqqsnnqtnrqrnroqpppmlspnlrlonmnsookqnnnppqkrpqmloqkonqlnnlkmmkjokjjilihichjigfeeedfrfVRQNL;!6FKNQOLIEMNRSNLMLJHKKFIKIGHHHGGHJGGHKJKLLJMLMLOUOQSXTRWZZqxyxwxxrxvw}~yk``cc^`aa^^b``a`^_ac[]a`^a_]^^`__`b`[^a^^^_^^^\_`_[Y[`_]\]^]\\]Z\][ZZZVYXWUTSVVSVUUWVYWSVVTYX\SWXWTUWRVSQRUSQOSVVVXTVVUUUWVSTTSRRUQQSNRSNLMпžoGCAAQsradZƹʿƿɺôâ³~|}}{~s|}w||~z}xy{y}}}xy{wvw~zyx}tx|ywz}zsz}vt|}{sxxyrvwysxtyutqwuusvsrutqvqxroutonrutpsvokqvrlrutntsslrsvpnqrmpsrkqqrmnppjprphosnilqpkpopjjpqkpqtnooomqoplomnkklmmonkjlmjkjjc\hjigfgfedeseUPONK: #:ENOROMIENQQOLMKKKJKJDHJHHIHIGEFIGHJKJKKMKJKLMQTSQRUTVX[Vlwyywxvrxww}~xi__`a_ab^[^a`aca_``a[^a_\`_^\^b_]`b][^^[]^\Z`_[\`_Y\^__]\\_Z]\\Z]]YXYZSWXVTWWVVXWVWXVVWUUVWYTTUVVYWUUSVRSUSSUSQRUTVVWTTVUTTTSSSSPQSQQPTPPONѿÿr@C@ARtrai^ʻžĴ{}w~~~}~}}~}~||zyx||z|}zzz|u}z{y||yxu{{{zyy{xvz~zxxzwyxztvuwsvxyuvuwstuwtss{txvsuutsrststrqtsrnortrqsqvssstrpopnrrpnrrpinqrjoqqhqrnhkpojpoojjotiqurikrumnpoklmlhlnmlookilojhieZ]jkjgffggddrfUPNNK:%:FOORQNIENRPMNNKKLOKKHHGIKKHGGGFIIJJKKKJLMKKMNPSPRSTVXWY^pxzyyxutvvx|}}|xiaa`a`aaa`a``bbba^__]^_aaa_^^^a__`b_^^]]_`^``^Z\aa[__^]_^^_S]\[[[[WYXWUXWUVXWUUWVUWVUUWXWVXXVVWVUWYUVUTPSUTSTTQRVTUWWSUVTSSWSTVUQRSQQPQKPQPuEDAARpt\k[ĸķ~x~|}|zy{{|||}|~w~|v||x{yv}uy{xu|~wrx~}vw}{uv|{vx{xow}{qux|qsw|stvyptwytsu{tsxvpqttqqsvtprwsopsqqttroutsorspmoqqlosropmoptmnoplqpoklmmlqmnlkmunqqlmmnslmmmlklkhlnpnnomihmkjb[V`ljiffhggefszcSPNNK9&FMNRPNJEPQSRNJNOKHKJHFIJHHHIHHGHHIMLHJLLKNMLLRTSSTTSVVZcty{xwxwwyus~vjacbbab_c`]`ba`d`a_a_``_a]]_b_^_```_^]\\YZa_]^^]]^\[]^\]^^^\\Y_\VW[YVWYVVXUYUVVTUVXUWYTUYYVWXXUWXTSXUQUVSQVUPQTTUWXUSUWSNTSRTUPQUSQRSNRPRNOƿeUIGOjt`lW̷ϵż¹~~z~{|}x||{}|{z|zx{}~~u{|zyz~u|{|xy|xwz}zuy|zty||sz|}xsvyqtwwnuv{rsuyqtuxsvxzrosvqrrwsruvrpsusrqsqqqssqtrqpstnpqqppoopopnpoqmjjommorlqimjmmpnoonjlmtnrlqikmnjholkmmjkmjhgii_UP]jjfffggefew}_VQMPL6&@JMOQMLIBPQSROMNOMKKKHGJKHFIJGHIHJJLKHHKKJKLJJQTRSUUTUW[apz{xxxxvwuv~~~wibdcb`ccc`aeb```_`a`[aa_^__]``^__bc`]_^]^_``_^]_`_]\__]___]\_^_[Z\\ZXYYVYXVYWUWUUWYWWVUWYZWVVZXVZRVXXTTVTSTTQTVUVVVTSVVVUTUTUWSRTRSSSORQSPPye\os_n^|̺ǸŰó~||~}y|z}|yvz|z}~~w~y}~r{~zw|sz}yuz|xxz{wqxzwtx}|q{}{qswyquxxk{vwtts{ruvzrvw{rptypotxqptvpnuvootuqqrutnqwrkqvpmrtojorolnpniqonhlnnkoqpiklkgqpnklmmjrpsjomkmlkilknmlmljiilgkh`S\hjheefgfefw~]WPLPN4(>JMNROKIFSURQOPONNNLLJJJKIJILHIIJHHGIIJJKIKLMPSSRWTWTVXZ`syyxwyywwvv||~~~vic`bc_dbaabbaaaa_ada]`a_^aa]_`b]_bc^^`^^__]``^[_b^[^_]__[__Z^_][[^[\[ZXX[XUXXUXXWWYYUVYYVXXXUZYWVWXTTVUVSUQQRSRUUSSWTSUWVSVVSUTQPSRQQPPPOPPk]po]o\sʿ̺ȨDZ~||y}}w~}|}|}wy|{u||~{{~v~|uw{{xzzxwy{zvxzy{t|xztxvxuvw{s}uttttzuvswuxxxrvrxsrtwquuwprwumquwopqvuqoupnrromqrqjqqpnnqolqomhlnnjoslgooldsuqinonjprrilqnkjmmjlpmimnjjmpgfknb`jjh\`fhfec|zaTSORL5)AKMPRPLIHRXTPQRLMMONKJKIKLJGLKJGJHIHKLMLKKMNOQQQTWUVXXV[euzzxxyzxwww}y}}vgaa`bbebcaabbccaadda`_ba`ac`_^d`bb`^_`[^_`[^_]\``]Z_aa`__^^Z\^[[[]Z\\[WWWTUVWUWXXTWVVUXXUUXXTXWUUYWSUWUUTSPRSROTYTSWXUVVVRVXTTTSPTURQQQNNSQämZnpZn^qǿ˸ɨµ±¹y~||~~|}{}~|~}y~~zy~y|~|~}w}}z{|xyxy{x|{yx}wvw}ztw|ztxw{uxw{stxwtuy|otuwquuwntwytutumruurttursruusqsqsttpqtumpqqqnrqpmrnnnqqnmllnmoklnjipnlmqmkkqpojmokimpohmkjhklkijnljjolikkedjmeehheT]deffg{zaRQKON:(BLNRROLIIQWURRSLMMONKIKJLLIFKKJGHHIIKLKKJLLKMQRRTWUTXWT[duzzxwyzywvxy~~~ujee`bccadb_`cbacbc`_b__aa^`baabdb_aa_^_`_`^_^^``]_]]``_`_^_[\][[[\[[ZXUYYUUUWXXVWSXVVVUXTTYXVVUVYVXUVVVTSSSUSRQTWTUWVVXWTTUWVUPSRRSQQQPQPRPåkWns]q_u͹̭ͭů~{}}{}}}}{y}|t}y{~z~v{{vxy}uy{xxywuv}ysuyyuuy|wyw{tvxvsvw}nwuurstukvvxrutumquvorstpprusqsupovxposxlhrsplrtngormlqvmfnsnionlilonjknnjlqkgopnmmqlkmlliklmklmkkmkhlmlihhiiliijie\_geegix{YTRNPT:(ANPRSPMIIQUVTRPLMNOLJJLJLLJIKIHGFHKIJKKKLLJKLPRRSVTUUWW\fv{}xvxzyyvw~y~tidd`bdbabb^abccbbb_ac_^`b]]aa]aca\`b^^`a^__`[]`_\_`\`__b`]\\]\\\\^^\WXXXYWWUUYXXWTXVWXUUXWVYYSUZZTWVTQVTQQVWRTVSTUUSVYVUSVSTWUPSUSQRRPRSPMO¨l]ls^s\xӴŹȹîħȦ}~}||}}}w}z}~~||||{{t}yy}z}u}z}yyv{yyxzxwvw{zxwyyyuxvvtuwuoxryttsutzrzrutxququstpssurprsrqpxoputqqmuokpqnnqpkmprimqrmjoqlfmnihmqmgkpogkpkeqvlglrmikojckpnflliimlihknigimiiijigaX\deeffvz[WRMPO8(CPOSSOMIGQTWVPPONLNNLJKLMLIJLJLIGJIJJHILJKMNNORUUUUTTUX[exz{xvv{yyxu~v~thbcccbdfdb^aaabbbcabda`]`^_aa_bba]``^\``]^_a]]__^]_Y`^_]b]\[\\\\[^[\[YXWYXYVUWWWXVWUVZYVWXWYXUUWWSVZVTWURRTVSUUPSWSPUWRRVUQRUSPSSRRRQMQTOLP¨m^nu\sY~Ӵëz~|{~|z}z{{yz}|~}w}y~txzvz{~wzzww{}vuz|xuw{xwy{zuy||uuxxqvyxmtx{stvvqtvxrsu{qsvxqsuwoqrsqrttqurrsrrtpsqqomqromkrppnronmnokjlpjjkmoljllmjklkimskhkrojjlkgjokjlkhikkhhhljfjkegfiif`JZfgggfzx]WSOPM<(CMNSQMKJJQVXUOQQOJLNMJKKMKILLLMKJIJLHILMIKNOMMRVTSVUTTYWavwyxxz|xwwt||~~tgbcec_dfdaab_adb_bba_bca_ac_]ab`acd_^^__`a^```_a`\]^cba^d`_^]^^\Z_Y^]YWYXYZWXVVWWWYXVZZWVXVWZYXXYWWYYVVSPSUTVVUTWWSTXWTVVVVRTUSURQSRRPRTNNQĬnblu`r\Ϻ·ýŰŵ~|~}}{}|z~yy}{}}|}}w|~~yyvyzxz~~pxypu{}vvxzwtvzwswzytxzztuxwpwxvptvyluwxmrq|ossznpqsmruuoqsuqsvtortspprvoloqlkqrmlnrjlotlgmqmgjolglpkimmlhlpjfikjhlmleoomgkjjkkmkhgljiijfkiheijldgiga=Wgggfi|pZVTMOL< )HMLQOLJIKTYWTTUQOKOLKJMMLHIHLLNKIHJKGJLJILMMMNQTSUWYVUXYcvxyuuw{xvuv|x~~}~|scacdc`bbb`ac`adb``b``_c``^d]]`_\bc^]`a^^`_Z^a_\``\]^``a\^^^]^_\Z\aX[\XVUZXWUXUXXXWZZWWYXUVVVWWZXY[YSVWQQQSSSURTWVTUXZVVYWVUSSTTTRRTQQPQPOQOҿοîoWht`}s_Ǻľʼǫ~~}}{{|~}~~~y{}}y|xzyy{|{tyy|r~x}szwxvxyywvxxwwxwzwstzvutsrtttnwtwprpzqsrwqtqtosssnsrtqttrmqsqnqqsmqqphprpjporjmosmjmqnigplhiojjopkhmnfdlnhjnpnboqibljegilihilhghldfjjdgjkdfhfcZcggeehzvZUXJPK<",FLMRQMJEHUWUUUTPPOOIKKMKLKKFKLKJILKIIKKKKMMLNPRTVXSWYZXYguyzvuxywvut~{~~}~|rccccedbaab`baabcaa`bc_a`a_da`_a_cbba_a___`]`a^`ba^]\b``^^^`\[^[]\^Y[\XVX[ZXWVUXYWUXWVXYZVVWVUXXUVYTSVWTTUSPRTQRVYTTWWUVVTUUSSTWSSQSOQQOOQPL°p[hq\}t[˹Żí~{~{|~~~|yyx~~}~{|{~}|~~{~|w|{}s{~}xxy|vyxwyw}xvw{yvwyxvv|xtw|vpvwvssxyoswvrutrqvuuqsuxnquurrqsmqssppptqoqronosqnlrqolqnqmnnokmnomkgmnllnkkoliklkfihnfnomldkphhjmbnojgfgkgihhbegggihhahjggghhfddjysXWTKNJ<" -ENLQRNKEITWVVVTOPOMILJKJMLJHMMIHILIHKKIJMMKLNOPRUXUWXWVXixyyxsxywuvw|~}}}sgeb`fdb_db_`bc^bdc]ae\`ad`abb]bbca_`^^ba^__^\^a`a`_]b^__`[a^[\]`\^[[[XWWWXWWWWXXXWVWUVXVUSTWWXXVWXUVWVTUSSUTRTVTUTWWTUTUVWTSUVSSTRROQQPQNNLırbgn^tZļöù|z}}~{y|x{|x{|v{}y}~wz}~wzsx{}vwx~tww}usx}xtuywsv{toy|uquvvtrvxltvvrusuqxvvnrutnqtuoqsupqrsnoqwmmprlmornjosminrlnpqlllojjnohhnoljnnkhnnkjilgimnkgikkkjoiihpligigghighhfcfjiefgjjffhgfddeg{pZUQLPL=$!0GMLQQOKECOXWWTSRTNNLMIMMKKMKLLKKKKJIKJILKKLMMPQUUUUUVUU[fwxzxotwtvvw~zqgebadc`_dc\_cc]ba__bd```c^acbYaca^``]`eb[_b_]]ba`___a^^`]\^^[Y[^YZYYXZZWVXWTVYXWXWVVXXWVYUTYZVWWVUUVTTTSRTTRQVXURVWSVYVUXWSSWSPSUQRTQNRROOOѹ«q[gmav\ɻĿ»|}{|~~}}}}}q~z}~z}|}{xz~}x{|~wzwzw{}}tywwtxw}tvs}xuvzvttwwvwyys{wuuuswtsrtrwtwrulvrxusizrtnstqmqqrppqqnrpumqqnknnpmjlqljmqlmpokkoqhjmoggknggnmhgolhhkngeklifjmidlmeekmfefi^cklaehfceihdeiihhfffddddgwkVSTLNN=&!1GMORQMI@16MTWQPSSLLNNJLLLILMMLLNKHILJHKJKJLLMPRTSUVVSWX\jyxytnwytuuu~~~{rfdbddbba`_]badbbaab`a`__a_aab\_``^___`^]\``]^]_\^^^^b\__]Y]\][[]\[WXWYXVWYWSRYWTUVVVTUUUVUWXVUTXWTTTQRSRQSSRQUVRPTUVWXVSUSQTUQQQSPQSQOOONPOп¯p[gn\yxZ˹~~~~|~zz|}|~~y}}{{{|{y}{z~~|v~~py|}x{z}tww|xxyyuwz|wvxztty{xuywuryxulxwumvvunvuvltsuqsrumqqqopsunpquopqtqrqrnkmrnmlonpoqlnmpmmmpnjmmllgjllhlmliilihjkkhhkmjijkegmkdgiicefhafgidfedehgjfffggfhffffdchzkYVVONO>$ /IOLSQLI>.)7LWPRRQFNMKIJJLHKMLLJKIIKKGHLKHLLLLOSUPUWVTXY\jxxvlnyxsuuw~~~|pfbcfaaec`_ac_acba_db`aa``bd_^_`_`a`^`a^`aa^`a]_aa][^a_`]^\_Z_^[^_^XX[XVXYWWVUVUWTTWYTVWWSUYWUVUWTRUSTVSQSURRTUVTSRUXWRSUTSVUTRTRSRQQQNNOOPMĿq_jna{y^ʸȾ½z~z}~~{}|~q{xx{z{|y{{y|~{w{}hzzry|ynxx~tsx~ptyxoru{ssyytpxxrpzypjzsuluurltvsissvnoswkpqpiourhpstnmstmoqphknqllmokloqjknqifmpjhkmkkimifiqkeinigjkigjjhjlmhhhkjhfjefghdedfeghhddghfiffddegffddffah{iYUTPNL>�GOMRQNI?,&)3FPUOPMQMLMMKKKKKKMHHIJKJHJJJIKNNLOSVTUVUUWX]jwxxgfsvtuut}~~~}~~}~|od`ccbced^`adcab_\ac]``a[_ab`_`^\_a^[^_]`c_]_aZ^a`[\`a[\[_^^[^]Y]__WX[VTWWTTWUTUWTTVXSWYWRTWTRVWTSWTRRURRTSRQTPQRSRSWVSTVQPTTOQRQOSRLMRNNNPLʿs_knb|x_ʼþƵ~~}~~}|||{}~~|||y}y{}~}{zyznyv{xxqyw~uvv|qzwxpuw~svxvuuvwrxzrpr{pysvutrvtrktqwppsupusolqssjsrtoppqlqqnemmpjnmqlomqjlltkkmmjiimhkijgejnidjliejleejkggimggnmgchjcfhi`befcdffa`efadfeadghgfddccck|cVTSQML?%"4HNLPNLH>-&'(1@SMNNNJKLLJKLIJLMHGIIIHHJIKKMNMNOPSTTSVVVU^m{yu^[ptuuvx|}}~}}}|ogecadcab^c``cb`b```^d`]`ca`aa`______^]^`_^^`a]_`_[\_`^\\__[[\^Z[\YVYZVVWWUTVTTVWTTXXUXVVQTXUTXXUUWTSRTTSSQRSTNRRRSUXTRTSRSTSMSTPOOOMPSLNQPLοt]jo`{w\}˼Ƭx~}}}y}~|~||}||z~|~}}|{|~yz~wxy~xy{}yvxw}u{{}xyzxoyxyqtt|utqxvxrywsswvrrxumrxsivvsoqutorsvostsntounkoqorrqknqrloqslkmolrrrkonokomkjphphjkjjmfmjkihfjiiikjjgiffhddfglhgljgeeg^mifabcdbdef_abebegdbehhffdfgbbieRUSTMMLKJKKHIIMJKKLJKKIKIGHIIJIKJMNNOVSQUWWVWXkxyxutuvutsu|~}|pfcb`bbbabbbba``aa`_db[_a_]^d`^^```a`][^`]^^[X]c\]`][]X^^ZZ^`ZY]\[Z[YYXXUTUVRUUWQVWSUWUSUWSRSUTUXTRVVRQTRNPQPOQTQRUTRUUOPSPOQRNNSSMMQPPQPLNNt_lu]zz\dzýǿó|~}~{~}~t~~~~~|~}zx|}v||{zywzxw|zvx{vy{zrx|zsxxxvutuqwtxoqu{swqyquruyutwtrtromvsmmtqwmmpolxqmmpppmprojtrmipppkmlndpooikloiglqhkmoklmngillchgoidjqgfilfafmgdfiecgkchlofggmecej_`eeZbfg_bdd^^bd`_dfeddcdcbdclzeRUUQMM@#"5IOOROJI<,$$%&').8FJLJJKIJILKIIIJHHHGGHJIJJKMKMQRRQUUUU[Znxywvuvvwutv|~~~~~~~|zle_ae_`db`addabcb__bab_c`^^_ca\^_a`b`]]\_ZZ]\[\a[^_]Z`\`\\]_[[Y]][XYYVUVUUUSRTUTRSTSVVUVUUSSUVOSVQPSTOQSOMPSRONOKPUUSQOPTSQRPSOPTQLNRPOOMKNNʻѿr]kwby{Z~ϸϾɭ}~~}{~|~~||}}z~|{~}{}{~~~uxz}|xy{wvxzz|{||uxvzvyzvvwxurvxvzuvvxuytvtuqrxvqtqxsousumquuoutrkmqzmmprlnqrlonuoljvmnnpnmjnlnmmmmkkipjnimilljhlnkbkjjjfgoidiogfhmihhiecgidhjleghg_fggcbce[afe__de__bebbcdeedcdecablxeTSUPMO?#"5IMNSOLH<+%%%&&(+.7BKIHHIJIIHHIIJIGGGIIIIGHJKKLRRQTVSRVWWpxzyxwwxvsqv{~~~{~~~}}lb[bc`acb``ba^`a`_`b_`aa]`bba``a]ac\]^_^_a_]____`^\[[b`_[[_]Z\\X[]ZVXZVUTVVTSUTUXTSTVVVUXUTUTRUUSUSSTRQPPQQSSRPRRTTUURORUSQTQPOQQROPRQNMLLOM³p]jv[x{[~̻ýï~}}}||z|~~~z~{~}{{w}uv|~v~~w|vzxw{xv|yuw{wtw|su{{uvzwpuvuputzluuusuuzortsosuxqrtwpnrsonsrqotrmkoqqloqpjpppgmrvgkprjnolgmnqgknpkjkmiimnejkofjjkhifjcfikgijhghghghgheeeeehelhiiiffeledbfaaaebaee_a_a^`cc_edddedcbaaj{]RRRNLL@?BBAABBDDCDDBEFGGO`vywuusststs|zz~~yoc^]\_a^\\^X\b_]]_^[[[YYZ]YY[YWW\ZVX[YX[[XXZYUVYWVXYWXTTWWQSUVTUVSRTSPQRPOPQPPMNOORRNPRMORPOOOMNOQPORNKNOMNMKKKNMOMOJMPMKJLLLKKIIIIHHJFHHHHJHwbeseb]jo~ƻƷƽ~|}y{z|~||~~y{~yw}}~z|yyv}}x~}{w{y~t{~v||v{wn~zt{|zrz|}tu{{qwz{svxvuz~vpw|wp{yyosyyruvvnprxlQ>?@NXW.),---3CNU^rpinpqjkmpgmonfkpqdjmogjlmcfjmhfgjgfijdbhmidcgb`hjeaakeacfb`be`^dgd^adb]acc\\abY___]^`_Z[\_[]]_ZYZaZ^]bW`^][_Y\[b[`Y^[\WWXXVXYYWSUVXXWTSSQI?:KXYXY[[\[YWdcGEFIFBB)';BCGFDB@>GGFE?7*##%)+,./1001354654588>?BA?AABCDDCDCCEFHL_wxvutqstpot~yz~~zob_]\__\^^]Y^^^[]]]ZZYYYYZYYZZW\]YVW\YVZYX[ZXVXYVTUWUVSTUVPRUSSUUPQRRRQPNOQPNNMNMMPNLMQNNONNOOMMPOLLPJIMMLJMJILMHJLLJMOLIIJKJIJKHDEHHHEGHFGFFw_crfgaeW@rƷǷõ}}x{~~t~{}}~~{w}yy}~w}{}q~}}y~{o~wv~su{|wr}x{txz}uzwwpst|tuvzqnzxsqzwxmtxuoutsppmvlK962042+)('%%+>Naruriljmfmnlemomcllmiikhfhhkffekehhicejjedbebbff`^alaaaaa`bc]\cc_^aa_[_b^Z\__X^__ZZb^W^^]X\_]WX[^W`^^U]]ZUZ]XV]\ZWZ^ZPWZVOTYRSUUSRUWSRNG>87JYYWVWZZWVXbaCBEGEBB((:BDHGDAA>DDEEB?<,%#'*,//0./12435454669=A@ACCCDEDCCEDDFHJ_vwvtsprsrquqx}~~ync]\]`^^_\\^_]]^`[[]\ZX]VYZ[Z[\]\YXY[XZXXY\VWWWVVVUWUWVVVWUUSRVTUQTQRQQOQUQNNOOOONNMNMOONNMNOONNOPKLMKLMLLKMKJLMMOMKLMMKKJKLKGINJGIJIKHIIHJFGҿþvacufhb^K3?]ƽz~~|}v~x|z}~|z}}}{yy|y~xz}u{w{}zwu{|z{~~v}x}}u|zxv{{ywx|vzyzu{{|ruswutwyvwqvxqsvzuxrwvtrvrrptntnH631//.,*)*)('$! (7IXfpnvnqlkjnkifngiimgfhiehihejhmeffjfdehcdddbc]`dg`fb_bdcb^ab_`b`^`b`^_][[[\]_\^\][[\ZYWY\]XY[]X]\YW[]VSXYYV[YXUXZXQVWVPPVPPRUSQSSOLE=:89KWWTRUVVWVZa^DBEDB@A();CBGFDA?=CCDB?@A:-%$(+/11.12233555344589<=BB@@CDAACDDGGH`xxvsrptutrttv~~~~~ym\X\^_[]^Z[]\\[[_Z[`ZSZZVX[\XY[ZVVXXVWZWWZZRTWVSSTTTSVWYUTVUSSUQQQSPRRQPQSPLNONNPNLMPMLOOLNQNMOPLNNOOLPLKLMLJJKKMOKKMLKIJIIIGFGJGHKKGGGGFHHGHɿÿxbesbh_[B498R|~}}{}}{}|}}}z~{{}~rx|xu~{~{v|xz~wx{}uzxv|vy}qy}|uv}~r}{~wyw|vry|wtx{ztwxzty}{pstyqrv{usszunsxxqvxyprtvnpsujmkF5200/-)**,,.0/,($#" ,;MYompwpehkk]chjcehicfiicegjabfk]_dlbadh`_aecbbhc]ad`^ad_^`f^\^d_[``^]^a]]^`^Y[]ZX[ZZZ^\ZZ^\\YYYWYWXZXXZYWYZ[S[UWVWURRUSSTRPRQLC>;:78IVWQKPVVVWYf]CACCA@@&);ABEBBA=;BDC@>AA?:.$#(,1111/1203334423446=BC@DCB@CEDDGGF\wwwsrpsttttvu}~~~~~|l^[^`]\]^\[[_][Z\]\\YXZXUWYZWVXVVYZWVVYTVZXTUUTSVVTRRUSUORTTSRRQRRRPPONNNNNNNMMNOMJLOLJKLJNOLLMNJLNMLKLJIKJHHJJIKJHKLJJIGHKJHHIKEGHIFFGFEGGDGǿ¿z`cteg[T?158;Kqǿ¶}~}{~}~}|~}}~}}||y{{{~~}|~}y}x}|{w||x{|vz{}wxy}r}~u|||vu}|urwzyouwyszz{nssvrrs{trqwspu{wosuulpsrgnrsemeE62000-++++,./0,*))'(('%&$ &(4GQ_lkgekkhaefhafffbegg_dbfY`dda`df`\ac`^be][aa\\`cZ]__\\_]Z[__YX__\Y\`[WX[YW^]\Z\[VS\^YTYZXWWYYUTYVQY[ZTWYVSUURQUTQQPONKA;<;:67JTUNEMSVUTXd^?@DEA><&*ABA@CDEEEDFEMcwxxsslrsrtwvs~~{i`]]\]_]\]^\]^^[^^]WZ\[XX[Y[XXYYZ[XWWW[XXXWWXUUUVUUUSVSTTUUTSQSRPRTQPNNNNPPPMLMNNOLMMMMJJLNMLMMMJMNJJJJIHJJHIJKJJJJKIIJIHJKKJIJIFHIGHFFEFIGEG—yb`ric]R?/047?Fd|~|~{|~||~xz|{~|xxvv~{y}~|}x|~xz|v}{~}}|yx}vxz|w}w{{vy{qogqsy{uyvuuyx{wxtwquruqwtuqttttztputsqqoqkpouhvc@3/.//.--,,-./0.,)))(+**)(&%&$"! #(9I]npnknffbiecbhdeejacdg`acddc`^_cbbac_^a`\ab_]b`\_`_^\``^[^^Y[Z[\ZZ[YYW\ZYW[YVS\\VSX[TUXVWTTYTMYWUTSTPRQOLLQRQPQQH?:::9868IQUPAISUTTVcY@ABC@?;&);??BCAA<9@BBBA?=>>?:4*&')-//102232332132126<@>@DCBBCEEEIfywvsspqsspvss}~~zi]^ZY^_\Z\^ZY\\V[^\VZXVTWZWWXWVYYXVXXUXXXUUXVSUVURUUSRQSUTRTSORQPQRSOLNPLMPMJLMLLPMJLLLJJLKJKMLMMMJJKKKJIHHHJKKKJJJIGIKHGIIFHJIGHKGGHDFGGFGHGxabpf`\N=.0247@A\ư~|~~}~v~v|}|}y}|~}{zz{w}x{y~w~{v~{|{||~~zw|v~s|~uzz{}}y{{}uvzuxz~xz}~xrf``_gwzqxw~wx{srstWFJ[egrzsowy{ruwylot{qotunptusqrsvqqtpmosrjopta<2/--//.+,+++-/,+*)()*),)''&&&(''(%##"#&,9L\ejqsbdde`aeh^`df``ac_^`_^`bc^``a^\]b_]_b^\`b^Z\_]\[b\WX\\YY][YXYZVXXYWXZ\YXYWVTVWUXVVVTUWUUXVRRTSPMOQRROMG>:9:86646GQRK9FPSTTTfU>>?@<;8$):?>AB?A>=@AABA??@??=<6*&&*-.0221223323100225;?BDCCCCEEFMgwwtsroqrsrvqp~~}~{k_\WZ^]\Z[]Z[\[W\[[YZZVSVWUUTVXYVVUWVRVWWTUXURTVSQUURROQTRQRRNPPOMQQOLKMMNMLJKNMJLLJLLKIJKHHMMKKMLKKMJIKJGGIKIIIGJIIGHIIIGHHIIGIIIFHHEFFFEGGFØxbbld\YK;22113:?GRϻɯ~~~|z|}|y~{}~}||}|}z}}~~}}{~|y~}~~}~||z{u|v}y}x~|{zzyxzvyzx{xz|{ypQ;@@@IOThmvtw|}prvyl_NKV[dz|yrstthnnuooqskoqsroqsrlnqojlppkmqp\:1/-.-.-++,,+,-+*))*+)*+''&&%&'((('%%%$%&&%(..=MUfjjccedX_^a]`^_]^_`Z\a`Y]^a[W[^[]_`Y\``\UX\XY[\XXY[WWY]YUY\WSWYXUVZXVZ[WTX[WQZXRQUXVSUXSPRRPOPPOQSKC<998898405HRPF5HOPSPSdS=>@>9<8!(9>>@@>?<;??@@>>A@==>=;5,''+-/1111234455201233334236;ADLk}}~z}{||||}}t||u~zx|}y}~v|zv~yu~}wx{{yzx~xxx{w{}}uyzqw||xyy~xwyvM@=:::==DEQU]juoqpvyx|{uhWI>KZjmlspomrprpsospuqqmlqopoqllknknmmZ80-+-,,++,++-,,+,+))))**'(&'%&(*)(()(%%'**++*)++,76AJQTNGOU`_c``_b_b^`aa__^_]]\^]`]^^_]_Z[Z[Z[[YX[Z[XXXYWSYXTUXWSSUXTTUVUPWWSPVUROPSSPVUSQPQMLONNNIB;9787799407HONKCLNOQLQ_Q=?;<;:<>??<>@?<>?=;<5/))+-...0/123443210//39CBDBBBBEIctutsrnqspryok~~}zfYX\]ZY\^XX[]XY[[WZ\WY[XSUXWTVVXWUVSUWURSTTRQTRPQTLRRTURPQQMMQPMPPPOSNKLNLKLMLKKKIHKLIIIJJLMKKJLHJMKJJMGIJGIKJHHKKIHHFGIGEGHFCDEEGGFDFFDFGECDß{e`jcb^K=5544338;:<=?FMRQY`fkmvx|~qdOHDI[enlnqwooqypqqwoltrrjppmhnsqdmW60-++,+)(**+++,,***)(')*(&'''()+,+,+,+*+*(*++')+-_F.+ #%%8^ad_]`dY\_aZ]aa\]``[]_`ZY\a[WY^Z[]\[XZ]YXYYWVXZXTWXUTVUWXVXVSWXXRSUTQRRPSSVVTRPMRROPPKF>;97588775514INOONMNNPPS`P>=?=7;4 '8;A@==?>>>@@>>><:2+(*,,-.-/22/131/..-018@FEEDEGJeuvttsoqsrsyll}}~|cWW\\YZZ]XY[\[]ZZZ[\YYWXUVVTRTVVUVSQUUSRUTTQPQOQRSNRPTSQORPMOQPKPPMMOLJLLJHLKKJJJHHKLJHJJJLMKLKJGKKHIKIFHHGJKIFGHHGGFEGJFDFGFDDFCDFDDFECEFBBCѾâye`kc^YJ;34666469<;<67>?;<==<==?><>?=?90))**--..0001210//...08ADEDDDIjwwtuunqstsyfg}~~|gZ[[Y[ZVV[^YY]]WX[YZZZUVUWTSTUTVUVRSVTRVXSTTRPQTPPPQRURQPQPMNNPLPNMNMKKLJJIJJKJIHHKJJJJHIMLKKJIJILHIKKGJLGHIGEGGGHHFEGIHEEFDFGFDCCDFFEBCFEBCCĤxc]haZ\H<46768667:=DILQ~~~}|~z}|~|~{~{}}~|{~}{~v~{{}vmdcbcdilqwy}|~u{xz}{wt}www}wu|yu{wxuzyyuyywtzr{yhC:8876578764453443114339IVZ^dgktxxa"#"#)1<;<=;=<81,(&)+-.-.2220121/..--4=DDBBIiuwtuxmoqsswmi{~~zk^\WVXXQUWZUY[YWYZUUYZRUUVRSTSOTSSSUUQORURRRONRRPPPRQNMQPMMNOLLKLLLNMJKIFIIIGIIGGGJHIIHGIJGJJIGIIIFIKEIKGEFFEDEEEGGECEGFDEECEFCCECBEECADDBCCAǣwb]cb_d~_J;457798789=@GF]|w~{||~{~~{}|ztmxy}}sx|~qu|ur~~u}~~x||z{zu}uqw|nbTQW\]aehjkrw|v}{yqx|sxt|rrw{so|zwrxwxotvzqvt{qwlah[92556645422333100.-,+&&')2?CEO]cfkkkWP9)%&(,=AJZelqqomnmjelmlhiotL1-++****+++*+++))*('&'&('&')**-.,+--,,...101/' ",d"@g\]`^ZY]`ZZ]eZY^_WWYaWV[^YX\cYXV]VUU[WVY\VTUYWTTWUTUWUUSYVQSTTPQRQQQPOQOQPPOPOPNMOIA;:756666667741..DLKLLKKMMLPWI:9;:57-)499:;;<67<=;=<<;;==;<<:9===;5)%&&),,-1001100....-04=EDELivvsssmpsssykgz~~yj\YWVYVTXWYW[ZXXXZYWTTTSUTTTVSPSQRTSRNRUUSSQOQQQNOLLNMNOMLLMNMMLMLLLJKKHGIIIGFGGGHIFGFGGHHFHIGGFGGHGGDHIEEGFFFFEFFEEEEEFGDBEGDEFGGEEDBCEB@@A<äy`\c``Tv`K<568688:;;:9BWpvs}}~}|~ux~{|zwq{wz|~{{|ygP\gjz|v~~w{MKT^SHTfpwzx|z{||wx~vwSMVgk~}id`Ycabhjhmsu}iYbmpoupsvwztv|xwr{qypwtxqws{qyX3:C834211122202121-*''&%%'&&&.00148<>LZ`ikc>+),28=DLMLVjkmmjghllhkmoI1+**(**))+,*+*,+,++*'(&')'*+*++,,+,,,-./1112.#*n!@e\Z[\XY[ZTWZ^VZ\]VWY^UU\ZVW[^WWTXUUUXRTZ]OSXXSRVWSQVZTSTVPMRTPMNRNMOPJLOPNNPOMNOLG<787545565574120+/DKKJKMLKKMPWG779857-)478999956;;;=;;;<=<<><<;<<>><3,(%%)*+..14:9:9997557;ENPUluwtrnmnqrsylf}}xj\\^]]^__Z]][[\^`__`YX[Z[ZYZ[ZZZVVX\WVX^\[ZZZ[VVRUOMMLLMLLLLMONMLNLIIMKJJJHHIHGGIHGHIEFHGFHKGGGGGFHFFFHHEFFGFHGGGFEFFEEHGDFHFGMRQOMKIEHDD9,(*Ż}_Xc_anPgwV@:69:78:<;8GX{zrwwQw}z}}{qDJ\l~{gftw|}{|zcLQPXblx{{|~yo:00IPNMGAXst{{x~{w{|||sSEIJKXaiwxXQadsw{xvt{{uyy|oxvxszvywyp~T6;:9;864//0../133/,(%%&)+13210-..,'CP+!$!$%%'''&%()*("+gjjnkfikihlkhB0)('(()')*))+)+,....+)((('*+*+,,*+,+,-./22/0-!*l Ee\\[[Z]\ZY[Z\X][ZX[XYUYZZXZYYUWVYSVUXUYYXRWWURSTTQPTUTQORNPQOMNOQMNNPKMQNJNNOKMLB:655544553456321*(1CJIHKLJLKJNZE57:744.(4898898357:889;;::;:<9<<:9<<;;7/'%%'(+-3Jhd^`bdbe`eghfcgpsvulgkpqsuy|jc~}}}~~zeRSVWTWTWTVXTMTVXZVVWVSRSQPTQTTRONPSSTRKMOLLHIFCDBB@@>>ACEEDGKIKKLMKJIHEFBBEFFEFGFGIHDEGEFHJDFGGFGFDFHGFFHEDCEDFFDCEECDFFEFDBH[cgihjhf_[TC0&#ļyaU\bw~YNbjpylXE@AD<@GOi|qq{yQWmzz}}|~sFGCQ`lz}~~xg^o~{~~~{}jSRQRRS]fv|vppHD@GMONNLKFP_stv{{stxzxsyxzqqYJC@?DA>LYhua3?HNVfljszspw|wlsuvpvvyqqs~UGU;8886645420-,-.//.,/13478420),30/g8!"""!$&&$#"#$$# &onkjkhjhiilgb=.)('(()())()+*+,.--//0127=;20-.,**,+,,/011/.+!,X!AY[^YZ]aYY\`Z\^^ZY]^VVXZUVX\WUW\TXTXUVVYUUXYTSSUTPNTSRVTPPRUQNPSQPOQOPPOKLKONNMI?7664355554455524/)*5DGJKLIHKKIKWA649622*'367:98616898::;<::;;<<==;==;:=>8.(%%&(*1DRIIIILJHCE@;<61+18989;<>>=<;;<;::<;98867789:98:87:9:::9799::88<<=?CDFEFA>:416;?@CDCDEEDDFCFGHDEGGGHECDFEEEHDCBCCFFDDEEEFEEDDCEL[cfhhnnqmnlmmkfàycOOOc]RWdlqtuyw{y|~umtzLWXf|{{}~}pGGDAFTbt|x}{}|e]h|~{}zxwz}{{y~z{~tjTRQQOOPTXaisuv_UOPPPQROLKNNLDEKQX`gfgkjlloqppnhfYM?<=98CP\fioyvrnpsvosrznqs}RcvXH?94756556761-,),+/478851,,%%54)o/#"$"!#$$#$##%&" "mmgeiijbhgg_`:-*(((((')))**+)+,-.0/338@TWU`<6/+.,+,+.01/0.+ 2F >ZYZUU[\QZ[YSX[\XX[[WTUWOTYXRTXWPTSUSTVXOSUTOPTUPOPQMPSSNLPQNKNRKLNPKILLHJKNMMG>75564454334544431..6=EGJKJJHJIHLW?537411)(266988716999;:9:9::::;:;<>>==?@<91)&$%&-9CFFGFGHFEHHEJKQlrqkYbkoqqs||]g~}|{{|~~~xWB>>=;:;;:;;:5668978899987866444446778889:89:;<:;<99:::;;:;99::;::;<;=;98889:;:=??>@@ABC@ACDDEEGIHDFHEFECECFCCDEBDECCDEBDEBBCJZacefhjnmllijljť|bC8L`XW`gknmlqpuvx}{{tks|qIV[`f~||}}{mDGEA?DOZix}~~~vR`qu~~~topqrtx~{|~~|}}}y}gSQPPPOMONPW]`[MKOOOQTUUSMJMKLKJC@DO]_fhhjllnpppnljgZI@:88990Bke99:867779=EL[fozrnvsulstzrwSfroke\QD=:556799574/('*0595-"#/6&(! $# """$%$$$&"#pjfgfjjhfdibc6+('()))()+**)*,----/.37;AUV\a>>;><6/*+-/1//.)1J J[\]VWYXQ\ZYWYVZXZZZWTUVUYXWUWWXRUSUSWRTQTSRPQRSNPQPQSQRMMOPLLNOKKLNJJKIHJNNLE<6443355323355232/.2?>=92+'')-8@BGHHCFECEEFEKWksqfP[gprrt}yZe~}{|y~}|vV?><;;;;:9:9:9:799:8:::9:9:875321/156878:;::;:;;:=<:;;9<==?>>=<=><>><9;=>>====>?>>A@>===?@ABDFDDFDCDEDBCD@BCB@BDAADA?BDAACGV]acegikmjkggihťy`2*rqonkgdab]^^\aa^^U^lhWE5S}xucBPW]aew}{~~x~{zx|hEFCA<=AGL^pvv~~|}}JQX_nptpnnhfe^`dgmpsu{|~~}{~|bOPNMOMMONLNRRVXUQONPRSTWXVSNIIJKME;7AR\cgjjlossrrqpoogZH>:5((*0MkZ9;97666456656;BS]_jqthkope_Ramihjklk_WC>:878677765./230)'}& $$ !!"##$%#"$iggjhghihcije4(%((()&%(**))(++,,+..38@?==<6,(*.7ABBEGHGFFGEEFITmvsjCPhqrrs~uYd~}}}|{{|~}tT;=<<<<::9:8:=;;<;<<==<=@?>==>?><::9;=<<;<=;=>>>=>?>@@@A@@@ABB?=?@@@@?@A@AAA@@@@@AA@>@@A@A?@BCABCCDFDDEFCCFCAEBDEHFDCCBABCABCHTZ^abceggggfffeƥx`,-~mhgfc[Z[XXXZXWWVSOQLJC2Rpo]EKS[`_^n~~{{||z}hAEE@<<;>@CMZcpwumtvxqntyseffjnsv}F?HOSTWWZ[]]ZY[]_cemqvvyw|{wv|`LOONPMMNMMPOMOSTXXYVRQRTVWWXVNKFDFFIB54<893*++*.-+*'D8 '-, 'O\VWWUPVYYRXYYTVYXSTWUQTWXNRVVRQTVMTSVMPPQORSRMOQSNLPTLLKOJILOGHKMGILLILMLH>5332133331135333.-.6@B??AABDEDEEFDEKJ=355331'&/124553.3476457779::8:;;;=@@><<>@=>=4-+/;CAAFEECDEFFFFJSmwslEGkqssuu``|||~|z|~~}zW;>@@?A@@AA?ABBBCCBBEEDFEECBFEGHEGFGGHDADA@?><:=>>>@BCDFGEDEFFECBDDEFCEEDFFEDCBEDDBDCCEDEFEEGHGFFGGGHFGHFHHHDIEBHCEECAAB@?BB?BT\[]^`addedcdedǨw^*2mlie`ZWVUUXVTTSPNKKJKH@A?B@BCB=657640:KUV[\Z]^_``achqv{|}t~{~~xu}~yUINNMMLMMNNNPNMOPVY`]MHIKHIKG?CEB<:;@D8-'eOIA7./98/(#"&&&'%*1QqS897755775430.*$#$(,0YG@b\OG?:99<<:?CCDEFJTWA/*((')((.056.". "! !""!"$$%(,.ecfdefgg`bihV1'&&&('('('('&''(+,+-/3=>=94./;ABDCCA@BDBDDEGOrwtqjfnptuuo\^|~{{~{z{{~|tP1CPSQVQOQWSQQSPMLOOMOMLQONOPOMSRSTUVVPNRROPTPMMGB?:8?RVNQLPOMMKIIHIHKNIHONKPNLLJKFJJLOLOPJQPOVPOQOQPPSSZ[Y[[WQOTOSO>>??>?A?==6143/1/-/1.& (weI>/#%# !&',6WS386554443320.*$!!$$(^M;tNGA511132141664520.,(&$$'$%)))()! + ++%$##$&$$&%&/B&3fhhdfkhdcegcY.'%&%&''''%&(''(()--,.1@?A@ACDCACBEDDHH;/153/.&&./02432.1455558746:989;;:=;==>=<;@A@?=95<@CB?>>?@A@ADDFOrwwrnmprsvylVa~~}|||}}|sJ9uqrruqmsskrjronntnpiiiiljnmqulinlomjomjlqjor~vnRWwmOQLQSRLMIHIJGJMGK`WNPLOOLPNQNKNLKOJNQLQNOKNOOJOQLOMOKLNRfdgpFAAAABA?@ENSVWXZ]]]]__]]\ɿǨyb'9yrljd[VVTSSVXUUTMJGFGLK:EeYXRHHFLTY^cgjll~|w~z~x|zz~x{~~~xmBBD@=;:;9:<>@Iy{raQI<53420-..,+)('$#$%%%$#'/=Wovqrtspjglqtpnqsx~|vzyrxbPNIIJHJJMOLMMNNPRQUPxuZOC;765641010-.0( +)t`L@@=@?>=?BDECBCDBBFH8.152/-$&,-/2330,14334554378557779<;==><;=>=>>?@=>?A>6:=07<98=CFSuxuqmprsvy{oZ_}}~}{}~~}|r@1yyne^`\]]YZTWXYYY\]ZYZZXZZXW[ZXZ][]\][]]^^\]^dt}qY\[H<==:::;897:999;<=><>==???>?>?>?A@??B@<@A@@>>??@??BA?@DGSifSPC@>=?A><@FNQRTVXZZZWZ[YZZ̪w`'8smkg]USTSTVXUSSNJJKJIN<;9:89;=>K|qbUI=74420--,+)''&%$""#%%$(.*P¿yuu~ws|`MMJKJFFIMNMMMLMORT_UXQA8676654002/00)*z]K;0# &# "&)-PO995443321210-)$ !"#*p=FuWFFE<70./06;854320/-+(  !!"%&'('  (,XXRRVRRRMMPN(5dgb]`jd`_ccZQ2*('&&&&&&(%&'')*)+../3>ESZQH?:63452+'*)()*((#`!EjB#!*QVVYUVVURYTVUXXWSSQUOUSSQRVUOUTTPUSSNROPNRSSIJLNJLLOKMLKGFGJGGJJGEFHFB<42210/1310/121011/.2<=>??=??=>=?@A=,7>+4?3%.2?Buztpnqtux{|hX`~}}~~}|~~|s9/w<:FABKDHHCJGGKHHIIKINMJMJLUYQWRYTRVXWZRQSV\ex}sNbZ;'$ "$"%#! !"" "&$!"$#%')+*'%$""! !%)''*+,/.0/-//1137;FicPJA>=>?>>@?AIMPPSVVWXDOTSSQȪw_(:qokg^VUTSUUVTTSOJHGIHN;EVF66?DKLLMQZcfklmmzu}~~~~xgdsu|~}}{ghy{}}z|}gBB>>;87:869<:Fx|sfWH>8551.,+)))(&&#$$$%%&%(-!V{z}vwwzz_KMJIHFGIMLLJLLLNQU`LRL?789:942/15642* +.YM<1$ (& !#&+1VH13130140/00-*($! #%}6BvODEIA7/,,-/224200./,)$ ! "%&%()! &,dd_`a^\\YZZL&/OUX]^cbda`e`XQI:2,*('''&#&&&(***/215AETZOD=721230)&(('''&&"y!=N@!!)VXTWXXQRVWSSXYRTUWRTTYSTTRPUWUNPSTONPSMMNQNONPJLMLJKILJIHHIJGJIGGGGGB94200/11010./2/012116<@<>@>?@=;>?=@>@?BA@@>-3>0:?92?@EKtxurootvvz{lT\}{|}~~}}}}~}{r02wF   + $*)-29=GL`xwEQF#  + + + +   !$)/:Lp^NI@?>??=?@=@EJMLORRTT>CFIMIȫzc%8tpmh_XVUUTTVVTPMKJGIJI=@N=/6=BOTSOOSZbfloopy^Yzx{~|pXnuw}{~}mgpux|~|yv~w~nYG;98669868<9Dx{tgXJ=6351..,**)''%$$#"%&%&)/#\¾yx~orv|v\KPOLIEEJKJIJLJKOSVbGSQE>>>><74126886- +3\M<9, #" $,($&#'*+V=&*+.0131/0/-*&!&7FxEDDC?6+()*+-11221..-*&"!!!%''(*  (#)E=99:;<<:;CH#&>==@?>;;=>@ABBB@A@@EC4*+./,*#%+,-//00,0/2643254477688::;;?==>@=>?@?@@DCAA;21:/7=60>==<>=;?EHJLMNQQPHC@FLDϿǬz^)9vplg_WYTRSUWVRPKGHEFIG<9C3.87?MQQQOTW]bgjlmkqV\y}zw|zcXqr~z~zxuuzmU[j{|vwpXN@524666788D|vhXI=544//.,*+)(''%"#$%%&)*7^~xr{jwwwqXMTTRJGDEGHFGIIKNSWjE{ZUIAAEEA?<96989;2 +2|zpfcddVONRW\TL;)'*)TE,(%%&)-....-*% #5JuGCD@<3+&(&'*/.000.-,*'# "&&%')!  0 !"+/*''))()+,3A %7:<=?=DLPTY\YWUSSONJJH?94-**(**+078;DLUSI<3-,-/1.&$#"#??J/YRRTTRQXTUOQSTMWTQMPTSOQQSRTNONPMOHROMMOOPIMKKJIIIJJJJEEGHDEFHCDGC=60/0/.-..//./00/12.26;=<=<<;=<:9;>>??BBA@@??A>2((.,+*!&+,-.-.-*-/122//223435668789=<<=>;;=><7/,<-6?4*5@BOpuvroqrttyz}eXa~}|{}~|~~zr,?B    (2;I^nuIH:  + +  +  + + + + + + + + + + "'0?PoYKF=;==<=<<=@CFGHJJLNNJKKKKJϼȫ~]+4xqnh`ZXUSSUWUTPKHHGFHL8490588AIMNOQVY[`cfghgeehcfdZSRv{}}~w{{s~~~{w]Peklzzxrpogkjmpvx{oSb`v|{{}|~zaSK93332545DxykYK>746412/.+++*(''(&')))+4i¿yt{u|{ssS?FEAEIFIKJFGHHJMQWmNz`ZPGBEEDDA:8::8=8 2ũ{zxvpnggjlc\L+&()W˲E2.*&$$$%')*,)%!;JmGBA?:0)%%&&*-/01.,,+)%" $&'((*  8z!':?5/000-/0..58 &,08A=0'(-,)) &)*+---*+/232202544335776699<=>>==>>?==>AA?<64*;,9=2-3@ESvusrqtvssyz}j]hvz}}|}}}}~~{q/C=  + $' "(017AXrzFR=# "$#$!#%$&#"$$#$$%'()'&# "**((((#"&#)+)-.000.524;;=7+,DXpWLE>===<==<=@BCCDFGIJHCE@HLMȭ}^+6}xrnib\XVTUYWVTNKHIGGHI944196>>?; 2z~yvlcege`[SXYZUP>&%++Yn %(%! & %D631-+*('&%%&$!%2RiICA?7.(&&&(*-000,,*)(%"#''()( + (!#'+05;BIPQWWZ^___YUSSSJEAEDD?=<=A@HMSM<,%$##)*&f-O`I!07EOLHPQNKSVSLOPOLRSPLPSSIQQPLMJOBJPNIMNKGJKICHJJDGGFCDDGEHIB92100.,,...-,..,.0202:=:9:=;89==;;<:;==??>>>><<@=/&)-+'( ')+,/.,*+03211234464767757:9<>>;==>=?>>=???<9<-4+::/0??DRrsrnosvtsxz|h_hrvwy}}}~}{}~zn/B=;;=>;:;>@?@ABEFHB9:8>FFȰe,2~|upjc\ZYVWXWUUNLLKFGIG7/2488AEIMNRVX[]_bcdeeghgfd_WP?=Sbnbabn|ssqguls{pjrocVnxsgigSg~Y]_ffifcjt}z}yzslmllnnnpqsrjRYZaccq|}x~y{~vssu{~nN\R<1/1.C|{z{rlflnvxxopb[Zbpxy{yy>y¾lO^g~nsw}tkze]XOD=45;GMIKGHJLOUWZxg_SDLOOIHD@??CDF; 3ijSF9U;2,%$'.32#'$&+*_h!&%" !'C86421121.-*%!%/ZfHCC>7/(&&&))-/./,-+(&#!!$&&)) + H^!(Ygb\]`bd^X]YO,  "'(,.7>HVhuvqqluw{wmaTJBFHGHIJKMNNRTM5-'%%%%"`)DV)$2>GMKSQQOOMOOURQMPQPIUMNMOKNGLLJJJLJFKKLFIIIDDCCBGCGHG@82///----/1.,..,-13346;=99=;:9:;99:;9:=>?>==??;=@;.%(-)'&&*+*-/-++010011123425566569:<<<<>=;<=:9;4<+*(90/1=:;==:::<@@A@BDBD@867;B9Ͽɮ|b,7|wg]ggd\WXYcUSLHGGJL5(/:8:FJLNPTTY[^abcffigfgef]TK:9P[}{dPRUarsokip|srrgfqvqkSYouw|vhhf\Qq{xsnf^SKGK\p}||~usumornqorrklpprvpqmolikkkoty|~xM`aWH61,?x{y|smifhmzyqmn\Y\blv|sf=þzh~xt}uqjaaZYXXSOB>8:=DHKMNNPRU\~qtzeLH;3.141/..-+...0/---+-/158;==<:>>:::<<:<;;9:>?==>=??;=?:.&(,'%$%))(*--,.22002113312556689:;=<>=??;<=;=>?>?>>==6//*(1???Svvrolttssuuus__dn{~{|}}|~}zr(J{7,o/#&%  !""DYEA;78865410-)BoqEPGBcrtuz{|||{yvsspnlhjiecaX7Nqtvxywuupnnqptrqtrtpppoke[.%EgtYNF><:;9:<;::<=AAABBBA>>==@:ȭ~`+9~wk\ol~uVXbw]fOHHHLI4$AACJLLMKLPRORYZWX[[WVUQKD>;3ASW_j{~~vTLKQ[amszroi\k`hu}|rlo`Xehz{snfQ_gr{|z|}{x}wzywlfgW\u{z}|uqtk`TKFBLdľô[`f_eecddghkkmprty|KOY\]VD44gxuwrmjfgkyvono]Z]enu~tl?ÿrhedZTVPSSRVWVUVQI>98>EINOLMWd׏OG+""&*=><;=<;==8,%&)&%%&))'**,+,00022124334545688:;>:=<@=;<=;=@@><>?==<=<;9;>>8Wtsqmlqpqqqtun[_bm~~~}}}~zi+Mx3$c1%%"  #Hj]A74791-,++*'Fm~n@RAFfuuvz{}}{{xurqmmljiihdd]7Nnvxxwvutpnnqrsuvwrtpookkga.)FiuWNC;:;979<:9:;=@?@AAB>;>@A@@Ȯ}`-6t`_e[kjSY`oX\MHFHHH3LKKJHD?::;764321.1*)($$$"&+1GTYZUaknw|{{x{}u=DIR]\aaks~}z{{zxtqgQcb[^aeimqstrqpttrlkcK_]_euqz~z}y{zz{|xppZO`_cijssq{yzsrtsqsvuwviie[aqqxntrptofgb\UMHGFTv|mkfgkfebfejlmmpsbZcbhjk^<>i|vqsrmighlxuolj_Z^inpvysp@¾hpyfNLKQOPOOPQRRSWZ\\L48<@@FMECQhҌV@-,+/17I@ DĩL@̎оL#"&**a˩11rsT"'CA`GIWL=ELSUPF@?FH9B8RdLHC=;6+&%'*+000/-*)**$! "%'*.- eK7k_[ZZf`^\Z]L,!#&),3=?CFJKLNSUUY^ceee]J5&(IBc@I/iD+&(.2-"8416GMMJRPOONNPKNNLJMLIGIIHAHIFDIFFCEEFBDFFB8..0/-,,,-/-.10-(),05989:877:;88::999;989;<>=<:9;<<<<9("&'$$$%('()*,)+-.1210032354245779;<9<;<:;;<9:=?@=>==?<=?==><=A^ssqnprpoopsrxkUW^dpx}|{|~~~{b,Nr/-f6&#!  2pOQ3.+(!$%%%%&#Dj~r=P=Aassuxz}|zywtrpnokkiiifbY7Npuwxyusqnkprrttuvrsqonjiea)(GjtXMB:;:878;99;;<>?>>?@;569;9<ʯf.7u`d_W[WY\]qTQJNIHIL6KNMJFA;6543/+-*'&$"!#0MUY]_begjlpvwwwz{{~^BK]g^`^fjkpsquxx{vyzvzxxvxyxwyyxyzysoZ[f^aegmoppqqtstttqmmVY]\`ddjclnknqoonqruvvwx{{{vrpW\aaccddekkmmjoonsssuzunke[hi[]dcfiubZZVUUNGGJOcÿýE#Np}~xrroljhhkqrmjf_[_jlmrsql=»qb{`IKMRSSSSRSTUUY[\^U=8?DEJME>Hh}[OKMEEIBMD@țMDЩķ͒z+"##(,+dDFŔ%/6)vF;60%)2BMNB544/#<6XkLHIJ=8+*'),.35220-,77(%"! #%&*0- +oJ3\RMNQTPPOPQD)#%(,2-.A?::0 g<+-+DLNQUOORNMOMNJLLMHGLJIJGIFFFIFFDDCFDFG>5.,-.,,,----,+00+),06;::;:8889989:;::;;8:;:<=<;;:;<=<:6("$%$$"$&'%')*)+.1342113446447777:;<<<:9:=<;:=>>>=?=>=<:<>D^ssqmnrrplorqv|hXPZ^ry{||~~}~d"Rk*4n3!""  Ad23)-8>0,,,+)% DlsAQBHhtuvxyz{|ywtprppmkigfdd^6Psvvywvrqpmoonorrsrqqomljg[')IjnZPA;;88:::;:9:;<>>??@;30/116ͱ~d04}uo{`W\\]f_bMNM]RHIG:GKLLF@;6230,+*(%" /MS\[^ahfikstuxwz~HHU`Y]`_ejknspqttwxwwyxxuuwxvzz{{{{ut_Tg\^bimoustsstvyyvnjZQ_[\dhekkmnprrqqqwtvw{|z{}}vqfQY_bghijlnlompopuxuux~wkkcXhnghimopvZPSSSUSMHGSbÿ¿7Lt|}xrqomkhgjmnkhe`\`knonqpc7»{W;ER\__Z]\^]ZYZ[[YWWJ@GGNLE1>ivl~ĿÂZNAIIIH:QB @|ōRJߔôs#!"#',*lD&Z 40!l?7( %$% ""%$0&Z_MK[~LB/.'./-EP071--Ji,&$(""&(/2,  +hD4A-230//322?6& $'-2;Ni~rimlnjkkkhbnqrsrx}~wdC%08*u\& e6 !@HKQOHLOMHIIKGJIJFHKGEGIGAEFGCDCEDGED?4.,-,+*+-,.-+,-.,)*07;=<:;99:<::::99;<:98;:9:=<:9:<<::93& "$#$!#%%%'()(+01023212553357668:9:;999:<:;:<==;;=;9:=<:;?=:61126ɮ}d3/ʪkrc\[^]aZTNMHQJHHD6JLJHA>;51/-+(($#!&R?@GJKPMKLNMNJJIEB?EF?M<(.ROIMLPPPPTONOPKPPNMMOPROQPMIIIED:0&.IPR[^[WYWWY^\USKGC6.4RVQRW\[_bc`bc__]]YZZ\[ZYWYXTNEQZV^^a`b\\]\\^\][WUWZXO@11Dhilggkd_nWIHJHKMIB3""(%"#&$#!!\fRHd|5h[WTVv)%(OaRPTU]F/  qFAdSPRSTSPKRL>&#'-3>Pk½ʾI?5$=]j[' +f0 %AJLOLLOMIEGHKJJHIGGIHCEGFADDC@CCGEFD<2/.-,*+,-,,-++./-*+08<<<;8:999;889887:;787:89:;=;:::98990%!##"#!%%#(((&,0/-/11/044234635796889888979;:;;===;:<;899;:9;@autrooqrpklnmo}zZJINYgs{|}}~y\%Xg$@w1$"  ##!"! )<}`+171.2///.''*$Co}q8M@Hlvtwxyyxxta``nV_kjjiec_Y4Ptsvwvtsrprmc_amWjrrqooljbS)+IomTM@9:9:75898:;==<<>=>?@=69=>Խǯ~f11ǣ~yfc^WVVUVROMLHIGEGE7MLKHC=94/,+*(&"! cE(/,32*'&%# (x:552,('%!""";N583*)%$#%'$#" KI.+/1./*/**(%*'"#""!!  !$.Ti;+)()*'+%&!(!!## !! ! RakM3;6../4'%')*,--,-1?Ckþ2 Mr|}xwuqolighnmigf^[^kllpvtr>UKþ.0lytUXOC814>LUFQnȶtpe4E0#9"9/:9!#',,nl"&\*1W>0#$'&#$)($!Z`SIcx<"%(rM/  wB Co^^\^`_\][ZA%#'+2=OiüCK)!@hD'p, &CQMNOOMKKIKIJKKHJJIHHFFGGEFCDCCCFEA8/--.-,*+,-.-,,./-*+17;<;<;8:::;;:::999997<:97:<=;99999777.$!$" !"&&$''&%.20/11//353234557::9:889:89:;<=<:88:9767658788;>`wqlnqqonmmmmm|}T?@DO`sy|~~yW'Yb Cx0$"!!###!! ! ,Li;@<.,,*+*)&*0'Hr}o:KALnuvwxwxwuqc]Xd[glkkhfd`Y5SstwwwsrqpplfabhaqrsqnmjheT&-JkmWJ?;99866978<<;;;:<=>@?=88<=űl3.uutogca_]\STSNKIHEEDIF7KNOJC>940-**)&" YrK4-($0P983.*'% !NS) "!RU-)'%%$!##!!"c`-" "" Ohs1/)#!  !"$%*5;W¿0!Js~{uwwplifdfmpmiha\\knnuxuy;vQQ+1rʕXZC)"!&.CTE Ug͸}؁׷!D}iR#'++nȷɧ" ,%0S<1!"(('$1TQ2,$!ddVJiu3fchalt$'+VustvI/  {8"FhIBCDW[IEEXG%"&,2;Ql»¿z7U"%BQVo&'DMJLMLIHJKIGHKIEGIGDEEDDEDF@DEDDE?40-,,,,+,,,-,,.0-,*,16:<;:;<99;<<::::;;9878<:87:<;85699:776-#!%# !"'&$$%%&-0001/234443444576798888779::9;;9767:8443233669Basrnnnmlljjjjh{{J.3:E^s|~~~}V%`h ?q/&#! !"#"#@kEG>0*'$$%&&*0'"Lpj=MALotvvyvwxxvoljlhlnlihgebY2UuttxwtsqorpppqssspqqmmjgfS$.OmjUJ>887567757:;<=<;===>>;6224Ům4%̥mggqVV]UKIIGGFIG6EPMIC>94/-,((&"\c2-&#6_771.*(# QU# #!FkC%![8%$Fk7-$""&/4N¿0!Nt~yvvxqlhdcekqnkhb[^hmlux{|1cuJR+3vvZA2/.449RH WcƻqsѺwd'+/sΣE0a)#7p%3F:gD+-,)-ǽ{1%"nbTN^j>;8J8ANv2<6/,Jq()+."/a$-kM. 2#"Mg6%&+EY6'0N@##%+1=Tmÿq/Y#H\N  +u# &EGFILKIFIKEDGJGDFFECCDECCCC>DEFD;1-,--,++*++,,*,0/,*-387:;999;88;:78988:98668:7889:9403897665-##$"!&####%%).-./.20111113634357777777997689866455442123436;aorpmkgedcdcfjxn5")6Yr|}}~}W'`e?r,$#! !"" EdCAC=2,)()**,+%!Ks~h?I@LjsvwxxwwxurrrsonoljhdbaV2RptuvsqpolnonqrrqqnnnnijfgT$0IlhTH<76656876899;;;;=<>==:542/Ůo7'~rlcmgr\X`WMLILVHDD9KTKHD<73.-,(('#! _•U.,%"8i55/-+(# !LD#!%!Ci=# X. &$?jz.+" "$-1Iγ2 Qs}xzzvpjfefkvrjha[]emiuwxx6$krHS)1v}sxwXE@HJNEIWG^erϴ͵ŹY(,3{W"tY5!5D;ѪF).n>$#wgTMMH@<1R+68umB`WPGZ^)+/ME8D/ ."$CkA"&,=L6+3L? "%*0?Soþp/i Nxc' {# )HGHKLHILJGFGHFDFFCFECCEECBCCED@90.,,+,,,**+,++-//,,17;;:=<9;;::;<89:9998999:::8998882-3787676-%#$ "&"!"#$$*/0/13401243337565789:8679:8899985300.1.,./+.237coomke^YWRPS[hvZ".;4+Os~{R&caAn+$## "$$$%##$$$%OWA<67<644651.+$ Mui@J?RnqmsomporolnmlhkhgecaaaV4Wsturghddgdg`bedgighfd]`ddQ$/OojUK<6578963698::88:<<>=<;:><9ÿl7$Ɨq~}eZYZd^tZPPRSzeFF9FPIJG>83.,+*(&#"!VO0+&! 6=70-,(# "L4%!%!;\7"N-!%$9iq,+" "-1Eҹ2#Ps~z{|vrkgfhmuypmcZ\ajhturl5]tCW¿,2yžtUA'%'*/?SF__qνā|Ě^',1z2'!:,!?<1u),20`;&#}kXNLIC=Lc=LlTg/-0lz_il@4 ")"!5I8$$(+.*)1@1!$*1>To¾o%u #FWxF" ! )EFILKDGJEBFGEDADD@BCA?DB@?CED@6.-..+**,,*,+*+-/.,,1:;:<>=:9<:8;<969<99<;::9;88::77970,3688664,%###'!!$%$)/10133022332254678699757866:97885312/-++**)*.17]nljcUI?;637GcoG&?HC&Jr~yL%f]Er.%"# !"%$#$$$$&%-V|P:;88965543/,)#"Nt`?H>Smq_l_\i`]gZc`b`e\\]UX_`S2\tvvrY`]^eZ_[X^aVd\W_VWWb`L"1RxrWI:54568324579999:;;<8558<=>ûj?"qvXWXg_uYNNSSwqGJ9ELLKH@:40-**(%##!PG2-' !4>80-,(! !G1%"&!5V4" O+!%!5fq**" ###-.Gӻ/(Rs~}z{~vqmiggjotpqcWZ_jjptrg4%»}ICT*9~٪cYJ6'"'8OP>b]nߺðőҮc'.3{06[ 9!~"AB1]#(F=Cp:&" jYSQLG:GnubMkHtvvv|`212@Iygo]<:0+)#'+'#$%&&&(,/) #)1@Vo½k"zw!$ -DGHGDDEGFDFEFGBDCBBAA??AAADC=4.,+*+,**+,.-**-.,,-17:78;>=::;:9:8789:89:9887967:964881.4887774*$#!%&! !%%%+/0//10/121133336623457875257568842///+(()))(,.8]hgdWB*(+$"+C^nH+CJ>'Jw~}}M'i`Fr-&%#!"" ! "&%5[M:40+)))&"O?2,& "8>61,*(# (,!B-%$'!0U0! K$$%"6gd*+$$!$*,Iέ3'Sr~}x}|uoiggjmqorfZYallptto3%WetBY*@i\[NK?AEOQWC j[l~sa{jw<"(/1}NLݺ*jC!yzH=+ɪG%;}p4%! ngZQP[WK@@@J]qYDPB>nZ& } .DFGFGIHHHHFDHJEBBCEBCB@ABCC94.,++*+++++,+++---,/37897;<<;<::<;:8:;::8:::898978884589426775463)!" %%! $#%-//02110113;D417Rcgknqysrlh[PQ>/1-.-)%%&&'().5ZfbVF3,.62&2N^pW(+/*6WvzK*kYLy-'&#!"!!  #%$-dRDIA0/-+'&&'(*""Ux_CE8Ynsnrplononlklkgfefgfaa_S4\pvvurppnnnnnnqqnoonlijkebJ4QraTI:5556422346699979:;51.,.3.þk<"urqk^ZYXY\]ULJLLKMIF9AJIID=83.,*)($!M63,( !5A80-))%!!!%3R! ?-(#1/!.[2"H%%9'$AB??@DCBB?7.-+,+*)*,+,++**-.-,/58::9:=>=;=;9:;;::;9;;;::8889::8856:9766556341)"! "$! ##%-../1/0113:VG,/8s7/,,)'%$$$%%$)2R_UE=:::<74HYcpa2$(3KhzuF-rVKv0,'#!#!! "!#%$*h`NE?950,)(')+)"$Qx]DF:[uuuwvuutrrstspnlkkijfd_R0]qvvutrprroqsrttssspnjkihcE 5SwfUH9455333225787887:::99:6895ɿnA!{yrnkcYVUTVWUOIGGGCHJG<BHFJ@<73-+*((%!Kh@3+&#"4{:7/.)(# !1/12979:FX$ #:+* )SG (^6$!f1J!!1u5#@ib0+"88$$ "*,Oб3-S||}{qjghiqx{{j^Z\ehljhe34{A\~'EׂZ3$*7R<kdq|ol^PKOLJD??DHGB?-%'/4j%qJ.6eG+)2yw4(#srhlý\\`DI;[G81<,6%"!B\^^\ZXWYVTG,#(/@Yq¾]c1J$2p!1FCACDGCBCD@ACCAA??>>>CCC>4--+++**)+,-,**+,-,.26789679:;:9<9::::89:88::79987799664579865566331)# #$!""$,-/..-0115Pa3,2JL*+(%$$$#$$"#&1LRB=<;?@<4D]ahwjI97N[q}~xE/wRPt4-)$"#$$"!!##$&#.rQA739?852/-,+& "UvWDD;^uuxzvvxwvttttononnmjeb`P4^quwvvuqrrrpqrtttsrqpokijaE!4Vx`TF:77655444578777788856878:9ʾmD!â~wrpibWSQQTTRNHHHFFGIE?DKDG>843,*+)*& JgD5-'$PX!1t=80.,(%!*Ylmigbab__#"#9)-,;;=:659=`B'S1%#,'# " )x4D,+.489B=YN@D5fH Dc1=/?%!%`m__\ZWWYXXU.!&+2BZtɶ\ V)U *,n 2FCEGFGEDCC?CDCADA@?ABDD?4-+*+*))+++-,))+-.-058::9878::<;:<;<:;;<;9::::8::98:87774258855556340(""$"! ""$,./010322>hM,-8ha')&$$"  !!!$/AC@BA>A=8>VegoynTFFWav}wF4vLUs3+($%'&&" ""#%!7wP?8986430.-./) %Y}XFD8^rwwxvvvwvtttspnomomigb_L4^suxwvtrrtutrstsssponnnjfaB 6Zw^SI?=====;:89:<:88:9871//./44ƿlF Ġvroh`VTTQSTRNKIHDFHJD>BKFD?822-,,*(%!GsC60*,.#!'$#(!RQ !+d?<3.,(%#%:>?AWWSDE[#"#:|,.={uuq{E!!J.#N/@v?8 Rli2+"(M`b^WVTR[A$! #,,Nøϲ/*U{~unjidiqwqwmdXZ]fllkb-3}xC^ԧ%Fwtjmo[=ASSQVRX>lmüsxojfonke^Z\_]UN3%),7ѷ ,(&#DhP-/jK&;~7'  %swpmVHNEAKZx[^RDIzH>?1ZFCCNp-<-  +Jv"".ma<6<>7:9:HY/"',4FZu}Ʊɰ~WQ9wK/  j2B@CFDBBCBA@BA?BC@>@ADC;2-+++**)***++)(),.0/588:;9999:;<><<;;9<>=:89;<<89:9:;7788423775455513/(""" "$,..010214So5+-@~k$'##" "*5=BFC@=5?T`gkxv[KNZjx~~|u>5zLZp-)(#"'(%! ##$$!8oL=DH:21,*)++/,"%WxTKD:_twxwwuuwwwvrpqpqmnligc`K3buuywurpoqqonpqotpllkijif_@"8[w^TKEBCDFEDDCBCDDBBB?<;1.**(+,þnE ơ~wtphaXTSRSURNJHFCDGGD>>GEF@812.,))&#!Fv>4-*QW*\C<30+)% %Ac[bN0I!""?p+,.>?>BPZE7\B&L0!9]VU^ddDE- U.=Q1(!-xyrYTBC<;7~,B,  Uw!,ljTRO<$1IQVW."',3F^uƸƘĿ{Q%D;p7 b!7@@ACBAA@?@@A@?@@<@BBA7/-+,+****)),,+(),..247579978::99;<;:;99::8689:9:8977887787224555555412-&"! $-/.-/-01;eX,,1YZ('#"  (5?BB?;3,E^chuy[SR_q{}{s;9~Hcf,*)$$&(&###$%%$"/[WICB85520./01.%%\wPC@>buuwwpnnrollojpkkhheeedcJ5fvvxvsmkihlhikgfmiehbciji^A"9Yr_RKFEGHIIIIIHIJKLKJIKJGEB?:;5¼oGÚ}wrnh`WSRSTVQKHIFCCFFD?)\U)gF=40*'$"ZX*Gg-&!"@h,*".bmdo8@69[/)l~_c.:z"vx1\1 Fgm.+" *ATQM283"!'(Gúqΰ1-W}}nigfhr||yscZ\djmnpg$?ul_u?`ɹþ}%TqgjigbG?BDEJPX=oq\C:/%(##"#'.8ëcX'-dHb@(6MVZ6))#,yzi}RI<;7;BEJHAPzl,=71_WXYdh*C*  Yo"$Vi`a_F*4S\WH)!&-4E^vƷʱzK&;4pa$ W6BEB@BD@>@B@?AAA??CD?4-+++*)))++*(*+)(+,/37877:988:;:9:<==<:;;<::8;989;;87::998870/2544444332,&" %.0000/26Lq>*-8vF)&$!%4?C=9434E[el|dRYgw|}|v==Hik/,*(%')'$#&&%%%#/ZSF>:9;;964420,#%]{NG@=fuuyukljlmfhmaggbdc_ceaaI6ftwxvre_hcjgbkhadedcZ^ihe\9 >Zt`SKGHJJJKMMNMNNNMNPQQRSRQQRUQoIxsmhaXTTSTUQLJKGEBDIE==GFED;42.+*,*&!5zJ9.*%g,;O,sL<52-(& !ZW)Pf'"!=k+* !@8"gP%#Kb/N968!=y;:2F&$ Edg0* 1ZG/NF+#!! )(Kϲ¿1.Vz~|migfiosqriaX[fgijop+A{;^ʿo}%Qƹ{hcee2"&$*+;W9 tlĻ\A4+! '#""$(-7μMHDMJGCAHMJMV`W{+"$0K/(Wŏ/*#0wybB:533>@?>@BC>4.+++*()*+**,)(**---28:868:978;;::;=>>=;<=>::;<;89<;988889885.*1433322232,$$-.01/.1<^g-)0F5*'%%4>@:88AGS`hsiVZf|}}~zt3>Dlj*+*%$%'%%$$###" 1c~OICA><;84320.*"(]|}LJA:gvuxukjlhohhlhdeeaaZbea`G4hrxxurlbiejibjfgbhfb_\hldZ7$=^u\SKIKMMMORRRRQSUTVVXXXYWVYZZ[ŽrHŞurngaYTTTUUQMIIFDDFHGA>DDCA9410-+,(%"9yI9.*#+Evi+hG<64-(%!;^[gO " ?@<=>=<>?@BD@90++**))()**)**''),/027996579779:888;;::;;99:78;<:97:7967688766.*1323223311,##+,/./-2=kO%(/Y))%"$4::9;?" 7b-.% '`rpj1#& B_*  5}!Bq&{(&& %mbs2*!!-L]QT2 ""#+(Nд͵0+Ux~rlkjntxvvla\Yelili`$EƿvaU9dӴĿ{)TgRQTRTRQ[; ~t¸?%*.<ϼ*"### !&+:VjZ%Ij''Ga1!'+)!6gnVWIxZ0*%>*"#&+//22J)  +nl !7gjhb`c`]X]_H%!(/8Hb{ƺעŠyw;:,Cbm>( >}lf`[jD#>@>@A@@?@<<=@?ACA7/+***)))')***)''*)+/47::97899;;:98;9;==:<;;<><<<<;=99:9678999892-1434444410," +  $-/0./07Do@%(2k#(#!#18>EKSZ_dhm|paaoy{z|~|q/D8d[(*'#%$! "$%"!2`aUIC;41,))((+*.ZzJN@Afuwzyvsususqqpoooljhgfc]A9guxyxttssssstrpqopommmlid\0$E_u]Y]adgkosvvx{{}~||~~ɷpIͥ~wrkgcZTSTURQMIHGFEFHH?9GGDA940/,,,*&#?P;/)#n9 +#rD<61,)&'-^gaeaebbfU!!!6b/03?GHJYbE=O:!:W+! -30*Cgb5"? %*,;ϸ3$!$ %(6Rk\!P3%aȬ[)' 8glPLGM[X\cnsHOfmR.-&A-$%9UV=.,G$  p`!!GpY;?@>=:<<>9:>A@B=6.))**)('(())))((*,,/3578569977;:79:<99;=;;==9;;:99:<98::89;;98792/3543353310,"  #-031237Gl1#&4py &#"1=FKOU]cgjpqb`mw{|}~|yp2I7iU+*'!#%$!""#&%#"1bU>:.;94320.0-'-]|KN?Ajuxz|yuuwtrqrrnoponliec_@;jwwxuvtqpporromllmlknlkgd\.&E_v]]hou{|µpIˠ}vpljcZVUUUSRKHGFEEFIH@;HIEA941/-*('&# R.!_(!=k :61FY3,',V`c]WSOTW9%"#+)Rгɻ.+W}}|yrkjimpusxk[WW`lkca`%Jļy{@gйuVθzka;CMC?5Dj7|ãϷ;%+.@˷Ph[aҍCk1'Kƍ-(!:eoRxL;2%.3BlDYIEZru1F-'R`1I% + q^ HpXFAEHGDEGK>#")0:Kd}ôu3F!#FeJ! S4?;;Go965$9:=<99<<<8:>AA>3+***+**)(()**('')*.1676665798778868::89;<;;;<8;:9999:8789459:7688224445545320+$  "'0=<=?@[n'!$2xc!%%$5CIKMW_dfmwsa[k}~}~~~xm/L;hR*,(##'($"#%&('&#/eYBAA@@=932562'-_}=E=Dlvuvutnpqpmknlkmoijggc`^=>ktusgpkjilkjbdhgcdccffh`bZ-(GatYUgzzuwtuqropsտпɼqHyɢzwqmjcZTTSTTRNHGGEGHHEBCA?==:0"?q  HA?=<9;67$4Hc2,'/^E9822/02)"!#*'NҾy.*Vw}vrmjjmltntk\XW`jjiia+LŽjzA?;1,(***))**())**'')+.4589878698788888::89<;><<9::><=;:::778:78::9:;:44999:2764662,,/17:@??<+""%"$/;CQT5*&7T)+*'"*^.!_` !8p By'8'1zVb1-',I1""('#! +'Rӯ.(V~}wojmqtzzzq`ZY`kqun\'MĿ{>iƻǿx#[žzmbH-"+Hdl;}þ7&+,AѦ/PA]YmN4K*,wʳp()  >fpXý9AR{lX6#K/TB00;r^D  |Q! )MZNPTOKMMMO> "(/;Keÿk*\!VD qIa." A@*!';><99;<:=BA@91----.0,00**+*)(&'*/6979:;769<97788:<>>?BEEGHFEIHJJGEFGHJKKKKNQROOQSVSZXWUQNNLMMOSSTUWWYXX[[ZSROJIHA8,! &5ET`|R4332.*"*?GLLMQ\cjnytdYv|~~~~~}|n+S6 vR,+("!"%%""#"""" 2T}QGAB?@<51100/(2_yCK=Bruospmlkejpjmild`^bcbZ_\>Anvxtfmcicielgjegee__f^`[^Z+*DdxXO+$'*/3584122̿pHp•~xrnhc\UQQSTPLKGDCABED@9IFC?70-+)*((%!6I9/)E  + ~I?840-+'*FWXYZefWVO  $"6e20;]]][\YV[n:Aa+!GQGLJGIISt( ;p/VHKY\bcp>2v]V//&(ABCB<95;G9%# '&OҴͿý¿.'X{qmkpvyzwmbZV^jtsl]%Kþoq>jБtz}wun~x$`zf>.L^\j7{,&,+Bө""#"@cB~UHgnc7%$KhDxF%(  ?hqU.5;ik6M3WX.44-Q\D + F! !BKHFKIJJKHH2 #(/;Pff%e%J<> -e|T/#(;>;8:;<<@CDA:7<@>=@C??A@BEIKLOONPQQQRSRSTUSTTUUUUWXWXZYZ[[^\]]]^__^]YWVWVXWXXXY[\Z\\[YZZYVVUPD9*"*6LbvN773430-'+>ELMMRZcjlyvd^y}|}}|}~~~}zg*Q|. xN,*& "$"!!##"!#!0J{IC?A>=702321.'6byBJ;Epurvrniljlpiqlkhf`a^b]\\62/++*)'%"3IKQ@Ee3Vw{4b@ '-,AҠ!I?G!ߏ*&}o7'' GioYnf]abh{/awk7Q6_d466;h\H! + 7!5mpeliac[Z^Y6#)1;Njc#p~&`=i_XYlO$)9>==@@ABDDEFGDEEGIIIGJKKKKIGEFHIHHHLJKIIKMNMLMORPPSTSRUVSSTVUUWXY[[\[Z[Y[\[^`aaba`bbbbbcaaedbcaa^[[\]^]]]^_^_ababaa_\ZXUK@-%.D\sT?><955420$/=BHJHNX`gkyvddx|}}}~~~~}yh*Uu+$nI+)%! #&####$$!!9U{VLDFA>602311/*2exCI>Koxyzwtsrptrrwqomlkhedeb]&-q_U6+%$.80+,PC05'"!#(&Rӣcɮ-/X|~{vqrqs{{unbZYamnokb"V{qCoлïʻw#bçynfcc_^Z[d2 vpb]_\QMPRQV\SC&&+-EѺo'4:?zi)&pn'' KdnLZMTq$-6T>_O?;U9Rv]VN^XN" !/! JqVFLNKKHGPW<""  (-W|}MQLKA:5034331+5hsBJ>Iqxy|{xsvuusruqopolhhgfb[=GnvvvwuursssuspopoononmigaQ'*KqpYYn{owʿtHe΢~zvpid]TSRTXRLHFHEDGHDE72.*'))(% 4L8/)'" qLvRE:40,)%&=?<=9:25BF!!$1]65Gx{jVsv>7^1%\' :m3v;+uVT6-'*O_]\W[MPZ6$ ")%Uұľ-/X|{xtsprzqe_Xcnpnhe YĽlEs̑}wsz%lۼ|qigdbabab2"úĶ4&*,Fe1;=jY5&%`EOU&' KckKUdR9G>sZ?0HwDW>A~mOES!%/# WqI9>=108>DU4#'((('&#')<;::=DCGGHJKLPOMPQQPQRSUSVVTUWWUVWWVVVWXYZZWXYZ\\]_`^]_`b_`^__a``adccdeeddddfgfgiiiihgijijikkillkjkhfb`^ccdfhiilklmmomnnoppolZM:+ $4F`wzxspslqqmfc\REAABCC>CNWdgzwaju{~~}||{~~|zc&_u).{G**'$$'(&'(('##%&!"Cjd]PG?952451//,$7gs?I72.*(()'$3N8.)@|_xWC;60-,'%795;6;99CP#!$0b63365/-+$0A/ Aa-$7E?=:7&:F "7o'L@<;9,!;H(+pU\7,%%47893+"0?-"!*(SԪϺ,-Y}|xpqwz}ytfaWgomjfb"U|oFrt#lֻ|snjeddbad0"»0%+*Ih\4@"،('e!'$$ UioVk?|skdbx:V8W{GGR!$)"Nuh`_P/)Da_P)$//.//120//-;0LuμiBHEIMPMk\\)Y`2=!;hf6!"3CDFDCB>9648?FJKNPQQSSSUVWVWXZY[[]^^]^]WOU[^__]\^_```abcbccedcdefdcc_`fhgghhhiihhijiijmpqqpnnonoooppoqsnolic_XW]fiklmnonpoprsrrsvvssvfYB2% 7XTO7//.036210-9LTSQRNLLQZ`t~vgks|~~|}~}}~}`$_k'8~@)(&$$'(%(((&%#%&$$Ho}x_G8631331--)!8fvAGK5C`c&)T7)(# \kn`˜QwhJ;T9aX;;?kSHQ , !8\^Y]U4-,)'#"&%%''&&%"%%#"Gq|vX=<543110.&=itDF8Rsw|{zzxvtsrqXjllkkjiigc]8Onxzxxwttsrrrn[WdnnmmmmiecP#,PwiV]p}jǼkC\â|~ǻmXXztXLGfSDGFF</CCC?720*)*((%3lB9.*)7.-,/--*QM}jZKA:41,)$01(*'.2/21 #,]13;jtvpga]]Z4Bg,#!!^w&"!5s2xw1-|aP9-%(E_^XPKJLL2" '+Oĺε,-Zu}yqpvxz{zti_[fjgika!_½nBtЇcþu(nŕxm#"(*Rn/%|}ta^agXHNOi*%,/I֎ X5&;AB3:F&1кP(# ZjjpJ.?pEFa\TPXn7P3\C7:;fQHO 0"!DWJGJG@:>@7.*J^_cacfejmkjkf^Mʴ¼Ǿ~Q<  "6޽?9DMD8A?.btWLKMONQNPQTQTQQJD67;MTY]_cefgggghhikkjjkllhaO=>BUakqijjnkoklrnnrnmpnnprqqrptvwr^_hsrpronmmrpt}~}~|xsh\LHKG@DJSW_]bbedhfb_Y^fekkaA& Bzmcedfgcdgnkj{vflu}~~~~~yW(df!.n<,*&##'('%%%#%$&'#!Go}ypZFPQ:53550%=jn?I9UpvyyxyvsqpqnVflkkiijhec_7QrvxwvuuttrqoiQXellnnpljecN#2RmcQZr}xlįkC\ʸe9<\HGZRBEFG?1IEF>810-,,)'%  0pA;0):UtZWK?;31-'$865:68;=CJ&!#0a52.#%% Am0$$$!#1l '  ,%%n\Q:-'#)***&$"#/'!!&'NԳy̫,+[x}~|||}zvuxyyxuqjc]emkih[d{njFsϳԤ¾r(q̷o(#%&(,Th.(u¼''-+M؋$#"0++;%hf@ON)*WqU&# [oqqeC600A;:CCC@<5Us9L*]NJiEGK 0# %`pb]``\[[ZO1 Rxmzidtrt|~{cs{J>&#%%##9qmfszvVPNORTUTSVXVX]]^^_^XNA?I[jnhnnoprrsrmlooopoqrpiV6*@m}||~{~|uusurTBKkaZ^`]URYXUTS[]#!!! -^UINNONLPNOMFHszv]hs|}~~~zwW+ib4n@-)%!"')'''%####%"!KkyxpTKZkW<9<:3* ?nuEF930-+*))$ ,H=/)('  j~`K?94/+)+^nj`hpba_h+ # *X47RLKEADJRh"/s #>GIKZX^gz:(p^V<-');GE?88:?D5##'"Nӫ|˾(*Tt~{|z}|z}vswwwsqpke^eppjfZf|mGsνбn%o޷|q]cWN`[^l/&sg#%++QӍ)/+*'!x)+4&ἝQ'4kwc;(% [zpijU7[_B?=E:3Yu+F%5e5AB 4!! ScXUY`_]ZPC)  +0 "'1:NoĽso}an|}F":0PRB+%Dٸoj]Z^]Xzq=cz}}~V}7p?7$$'*.(#/4.(/35r'1RURPRQRSSSTTPEvzv[ds}}~}~~~~}wR(la!4j:++'$$''&&$"""$$$$HjwmpeUPNKHDBCA:.!@mlAD=Yvxz}{xustqnonlolmkhhfdbZ5Tqxyywvuqspokefoppppomjec^J%3Wx[PYsnlsҾƯyc?U{ԣZc[Z_s^RLGLvi>BEF@6JF@<950-+++*%!-S9,*-=--366:Va[\YH- . Kʾ®xB%5!#(>0*"Iٺo[k\Y{{q-oqŮTvG9)$%//##.0% %./p$6VWQRQQRRRRRQQGy|sbds}~~}|~~~~|wP/q^3f6,,'%%''%&%$###&$ "Cg~ki\URPLFGFE@;0!Bli>B;Ytzz}zvyusqmlmnoomjjifd`T2VoxywxxuqssqlfjpqpppnmjkfcI#5Ws_U_r~hwȾ{~znkhet|~za?Ny͘]^XV^ueNMIOmACEHD 3EEB<7421.*))$! .?7-(AjxhPD=5.,,0O7 ->"&,P& # .S66Rs"&/iI%O=5a,%!t[#J/#d"7n7(!,~Y!(`>$Vda;/*1P5(/JB $<5$ '%Kֱм* @`kgee^_efaVV]`]\XWPI?BNNGH@yƾl>tϪİÿq!tղ~sn^NGLCVn,-xtmXN<-!*#"$&,,R`$%2:%|BDwR'-|yvp&(' `}rguME1kJs,>#evig~w3A9 @x !CPV_d[MLST?  1  Rɳ¯u=++ "^xz{|zzvsqqoomnnnnlkkgcaW2XowyxxwtrsrtpgjnqomoolihdbF!5\v[S[s}fjʿzxzn]VPUV`gwz{~~yb@Q}Ϡ`\VV`x{^QJOVADEFA2CCB?8300,)**&"+fG9/';||,TkUGA:4/+)+;6,03037=R(!# ,P56IS *2"'M8 7l0%"fP&A"m"1n0t?,")`=$SbX:-)*@,"&.' :5$ )"SקҾ!2KVLD@941--/.,+)'(&%&((&&&++"yļr_?tϻǻo#t~²yK(Bem*.q|ynYH8"$*,Tѱ8!j#34)h!#)),L~̳:*' [{rdDC7Z>j;f^PL\_2=!=83l|3>5 Dp",lphYN;.,7BE%  ' $Qʹtw71*#.?:3,$WޭBNZQ+*;1(|i+st +ôO54'&"!  "&,|uE e.:UVRRPSTUSTRQPNxzy_ex~}~|}~~|ytO2tV;a5**'%%&$&&%$&$#$$",Y|vg_QOPQNKJJJB6#Ju]ED9^ux{||yvtromoqonnoomifedX3Xqx{xxwustsstpooqonmlligcbG:axb[_wgȾ~zsaH7@BFN\erx}~vaAOyn^VVay}[DMwGBDFEE 5JC@=51//-*))&$ %VH;.)9:;.YcvDH?72.-))Ee]aghcYe[(!# .S76>NLOLLCGUc< .e.# <>9<:425>n.m &B6:>CFIZs?(keR8.((;:<=<56:I7#"'&TӻЯ&0OSND=731///*)(''%%#$"$%%(*,!uƿt?wːurvqshn%r~m4(+--0\f&/iz$%,*Vl!%&#h9y2+lI,(4y2)& evoa=>a6c^ 5nw%'-._՘jkhec^dFy/1J!;/$ &*$ yvofSyÿS+_K8>3V?13#LaV[ik,8( +f> 7mhRSUQNMKB:" +" #núþj*P%8SK8$(u܎R~_}x<.2n5T&%-#(!jEyp}qmbg+`S4 BYVSZȕJQNZ~{v^hu|}|}|~}}}~}zlC8LFX0)'""$%%%&%%&&&%2ay~vhQQSRMLKIGD4&SyXC?=fwy}~|wuttqqponnnljjhed`P8_wy||yxxutsrqpnsoomlkjjfb_6"Cfl[Teyxrź{yuWDABD=APct~{gG@~xymh[WUUSSOIEFKICEGEA(D?@?942.,*''&#! cL=,*&M6 + 4 e^(hcP:.&(FcnbUORWV5*(MIJĪ%2RUKD?820-*(''%#"! !!$%&)+c&""+p:sжi$x|tnqcLO]fd\8q{Ϳ_"',-_Զ}|Ak,-f,$("#(# q_LL`uiB,3KH/2eI51!3% ';-:$  +a6@t_@7:>=<<72 + "pǿþh%]$1JF0$*z׏GXSG1/=1:}L3{[G}d>%'r|srM~vtowsiqyecNun? AYTSKImoIRMV|zt^huz~}|{}}~~~|yo7!*D@A@:42/++)('# "M8.)3rq ]dMSG;62+)*)$ !#  "! (P449B8:<4,...# <2%":QIEF=?91) *f #PWOFC@@@4 *ma_9-$ .6544/+.-#)'Qկgqʲ&2SSLD?940-*(''%#!!"!#%$%(,#`0DHF22A6+n|5qР{h#vĹsfAMLJLD^a@qyò}`R:0'"'-.^зf."&]yHc0:ӷ2$#(# q^OKD?-,16:P_8z3!$801p+A%  s5 3pl`c[XUTQK9  #|IJÿd!cy%413*#*ЫvZ{xG8{'>#-S}jwmszdixfR=0')"!FZUUb[8=NSMT~wr_ivx}}|}}{||~~}|zu5DEd[($ "!!"!"!!!!7byzkM30/*)()).5,%RyZH?An|||ywwvsqqoqmnmlljhdbM8dtxxy{yvsrrpnnokmnnkjjhgd[8'GmpUVfyrȾ|zwrfdZ[_q}jH3xaTQUgi_TNGhjBEFDE!*EA><941/-+*'&" iE<.(,kl`XZUNEE)^nVVH;52-'&"#  .c01+ =0%!! +u "!'ohW4+$'&QջxȰ$3WUKD?940+('&%&# " !!#%"#(,*[Pm%}w/p;q˅Թÿc'vt_%(Z` Bxov`H@@2-(!",CSWcE#'./cX"$ ]vHe22E $&! t_xiL@/)-/54NNaXv=8/3v|n)C#  + w4# Tj_ZYZVXSH/ % &õbmt$04?,$/~8MWh\Q]Pe{h.Eg{j41;kzy}p[]r~zspolfe^[VL >^cB%+##DWUVOXyWOQKY~{rbgvy|||}|||}}||{o2GFbR+$  %$#"!""## 6_soC7&#+*&$#$$$&!!U}QE?>nx{|~|zxtsrqpopnlmlkigd`K2cswzyzxvtsqponopnmmmjhgfeZ3(GhiXVgxrvþ~}sdfms|qL3wԹq3*9eNAgs?FFDH#'C@?<8320-,+*&#!^J;.(%" +  fpVWE940,)%!"  +Z+0) &o5%!!!+s &pkU4,$%#P´ƺ$4QVLD@82/+)('%$#!!"""##$$&(-5Αl{aw=@DCa\E~nyxyy}Z#'-/e֏$%!.uLa/9{p6#&  xZ}zywH-9759U:B5(2c=AV*@ + +}8!#E_UTRTQPLE* , (įTPGSQZUZ\tp&CL5*%1χfyds}z8E0"5}7ck¿$&hFF6 #&#FUUV^|HRNZ~|rdixz}|||~~||||}{l2F>$hU-$"$*,)'('('$ 0S^}Q**&"(,+(''$""!X~QE@Eoxz}}|yvtsopppomnnkighf_L5evyyyyxxvusqoopmmmlkiggge[0'KpiXUiy}rºxr|t{oO2wɡnICLo}q\QQmu=DGE?' 'CA?<840-,+)(&#!}6E8-'$  f\MVF:21-*'&!" -R*0+ ! b2&" !!+p !&rmS7.%&%Oȼȳ!4QTJB?82/-+'%%$#! !"##$$$$)*,c4++*(" %.ws;tοϯ¿^#vɹsjZVGFRDebIt@"&,3hVZt!R`vrRa+?no:$&  sUžAB`SD?L,E5.q]+=  3 9emc_]`ZZTK5 0 + 3ÿ~Y{g*4)'&3ˤi_~|}2E+#9z;yzy}ts`hý"P$%#$'"HWTUX{eOTL\~zu^hvz}|}|}~}}~||~{l5K@,jN*$ "'+'(,,-+($ *=N|U<3)"&+&$$%"! #U~UF>Jlw{}~{xtsqqppmmnmonjdgf`K9ftyyyywvuutrnpnomlkjiggge[.*NtiVYl{|xļmN,yp^][YYYVPKNjr?CDFB'*FDB<630.,+(&$# u}XI<-'$   `oQRE81/,*)/KW5"4F;   -Q/0,$$P0 Z2%""*+ #*" *k%=?&"drX8.&"1K:-DF-&'PɘϽ%5XWKC=831.+(&$%$" !$#$%$&(-'T"(,+% $*9uq:ṭþ`'vǴq]*#)$-a_Mk¿xK0#&,-n@9"6ljX\,!9/Hy5 %& oSBfAF.Aa9ip:0; - MscQONKFKD<0 : 1w}U~Z(=:3+$>`@a{v(P|!*>v:sgIy,0;.)*"#MVTURTv|T;QTJ[zvejw{||}|~|}~~|~yk0N>!!*hOwm*0bxrYVnS2,)99Gt)&$QåƳ¹"4VVLC;731/,(&%%%#"!"###$##'-$H>o?v̕þ_(zzŤw^".KX=KaXKf¾pYMD9')'!&+,o4Sа i%|caT,!(*(*%!#& +tPvjRH>DIPPV?W_JvLi7B,9fl}x=+3< (RqT:7?<9887. C 1µÿt|PR+?=8+%AB:JII' *JDC=852/++()& p~aH9.**Hlq+2U]>ZrKOF:4/*+3y`}'  /K03=o`oh><:m5 d7%$e}yP{ki^m " )e*~ejU_VYLe# VpS4-+Kwjoa;&$NžĴ¹#4VVMB:620-+(&&$$"! ""###$#'+¾f\nVҬ!2aiQ,"'*)%" $& tRNF@5'*,2/9@bU8t/^5B),?2; #Fqj]\]ZQVRD0G 6¿{OF)1*(%&G׻|;eys$X8ɻFort|~r,!,[MR/-,!$RZVZhe?StGRKb}xhixy|}|}~~~~~~~|vk1WvBI_.+$  &+5?<:64.*#.VD78.&()%$&''#!)Y|QG=Mrx{~}zxtssqqqnnnnkigfdbA8gty{|yxwurppqronmlllkihge](-QrfT^m|p}ľuO!'wvd_XQOSRMKFFZ]>BEFA' )LCA>:51/,,*)&! ob@;-*1wl:v{z7 UpFMG940++3l^`v&! -}J220*P|mJ3Pl* Z3%&  ay{|Q:rdj $f'yoblTWgaNjJ6.+@Y'p1##Pó¹"6ZULD<951-+(%'%%! "#!"""#$(* ǿT4-/2*'%'+6wm9x˭ÿ\&۫xY1937::\UTdrdS\A%&*+sT Iͪ #f`oN, '+)%#$% #rQFEB7*)-4Hu:Qd/@>s4F)?}K;CsW1: !%!-\cZZUSLNH;' B >½wJ A&4=1""KܻfWy~zq] =̵}NrsrrZeg~bnun"  @ +*!&RZX\ih7[jEUHh~znmvz|}|}~}~~~}|yf*WsBVo>0'$"$(-6>B92/,)".YC59,$&'#!!"$#(YxMG;Nrw{|{wsqqqpmmmnnjfhfbaC:,*##Jľ̾¸#6VTIA=:3/.+'%&%#"!"#"#"""$')!üB"$$$  $*vi9zˊ¾V-{ɢrW!-ZSWhSB73,-7>GOW_dZKNJH5#%*/y{+$]VtF/!(*)%# $# +(ŸkTSAB5)(,6N:6/()/v?8+,5o/mvI Zw]UI;60,,1[e3!"$  %H447`N.:717Eo? +"h0'$!T~tV*"d"%bgZ@-  KeI5.+@Z=) AǾ¸!7TVH@;81.-*'%%%""""""!!"$%'*#E;NlV$oi=x̰ξ¾Q/}ubJMDHVG[UXgr^~rkb[gT:)!',/wҺtOzG/$(*(&$ &' *ÓPQn\@543?9E8lslj~>C)Gn72@hS4@ -% "$"$&!(1,9 + ?ºt@&7(1) CPD0ECBCHJOQJKLIF8jHǬhE{kXVTxchkj[_\\`VQRUG{ve0Q6B%&%)WVTSNFloKHSTDq}woryz}~}|||~}~|yc-ds:OX<;81,(-5A<32.'))%0TsB39,$%&#  *^vFD;Wry|~~~{wuuqlpoklliihifb[B?lwxyxwvvsqrpqpmllonliihfcT&4WwbW`q{pvsƷuQ$"y̹j70ocK:Vt;CDDD&%JDA<8411.,*)&#! qzIG<.*,?fkQ(:UG a~nbN>72.+/Afyd<*! %{C56Iv: #|6'&!ClyiU>)$c  9[pYZhbJ-AbI5/,0gr{|tV4 :~}}¹"7VZKB;71/+('&$$"""! !!!#%&),"º}Y\i?wɭŢ|¾N1wZAN;9C<\V eZs¶pQ1#'-0yέGO/%(*)'#!$' -~XT47,)8F43 0#-/+12(!)66&  = I̽¼p:*0!!Hb* B3"p~Lȫ_?eze^}OOylopnqg]gn_[a^Sy^,19& %&*XWVY^o_JTRGqzxoty{}~||}}}{yd/hm3CuQEJH:/*1@IA632**,&._s:0.+$&(# ._xDE:[wz~~zyvusqppmmljhigec[:DnwzyxwvusqqqoonnlnmljjigeU 5ZvaV^r~yj}tƹtP("yŠydZ^eiaSKLcs@EDDH&$HEA<841/-+)(&$"!mw>H=/),A1+,5;:6IA ^zgZM@73.,/@lhRna6# %R646G41/+)&3a8 e9&%=}P?`b.$d  CuZ&Ch}a8AbN41,.ixPivC"'jymnvsnfc[[a\»º!8WZLD;720+(&'$$##" !!"#&)-"úyb]W\ULOPLL}h?{ȶM1¾Ýt]&,"4`S iZvȽj^A#',1xD$'+.,'%(6AH.$()(%#!$& +0mUWý4QI=8;j/(,,Lq@20 .|%4Y[QSO90HLN42*9  N¸ør50. 2<8. Kg~rs{CxtRƣ.YefL|_^mcKUyopolmfcrodZa^O}Gz~J)Zf  !".YWTZjMRTIp}yquyz~~~~}}z_(cl5O|SU[UL7*0EWH5.0**+"2dw>10,'&'$" !!/`tHE=Xxx}xxxtroonmmjkjihdb[9EovyxxxutpqqsonomllnmjjlhcU#3\vaWar}n~q÷wO&{`^[UTVVQHDH`bCDEEH("EC?=:50.,,*'&$! nSG=.+4x}VRp]UNB73.,.H*/ZM  %P64G< c:(%!:ixrbE) "` >exkZD/%"@cG40+:udI, %! !KokmklZ3565:1745CG}вȰ!9YWJC<63/+)''&%""# !!#"#&),%B!+xd=|ˬĩÿL0ۣwsM=XZGNaR q]qϰqnlz{xngXOVaZUpO#'-3z= #! .AH.#((('#"%& 0kVY=GS93Lq0/1657AsS0, 7q$ !Xsc\_R9NZS[I#B>Yp0^°\WV[pm2>*#8HH6 U=\HSWjRZ3h[ʟua?&;XkAV}mku|oa_mla]`iUq "!"!0% /XXV\pM?XYNTLq}}rvx{~~}}~}|y`,kn3Hd`^UN:,-3A?/&()*) 1htC3,)()+(&&&#/brFD;Yux~}xxwsqooolllmkfgfaY9EoxyyxwutrprrnllnlkljijmgcS&6^u_Ves~|ohµsP'|`_[SNRQRJEGLJ@BECB) %L@==;6/.,-*''$  onH?0*/A5-),.&#%&Pm^UPC82+*/=b~a@%! 'Y533M;=|:'#2' j9)%!R{kS5'%4  \ "ZW2*D =aE60,>u[=+"0V+0pxdcXTTOMTYkiɞ˾¹!:\VMB;52-,*('$$$$""!!"!""%)+"~l`bZ3FV8+|i@|L4ƽyf[^]`_TdSt`lĢ|pekrqehY!(/4~<% 2=A. '(''# #$ 5lT[K?8*H6>Z?qpHv+..MogG0) Jf" )cg?:@?DWE=OJ"H7 F19iɸÿj/D%%1I=. tOW-&/q6$\)!^ ḑx1Oo}?atljedaluje[b_Sn6- 1WXX]rgRrUPQMt~|sxy|~}~}}~}}|wZ+ld(C|^hi][=-*-46+%(**'4nsB4.((,-(%%'#!5cp=B9_x{}|xvtrpoomlllkhigaZ7GrwywwwutsqppnlklljmkkigebJ!:`v[Wbs}qzoºsR(|]b]TRRRRJEECBA@DDB. %ICA@<60--+(&%"  qND?0+2XXQUZZZWgXNqe[SE:4-+/\uW5(! 'i22350iK (Bt@ d8($"7[ZNQURVhf !W!0WQ[afepx#>cG6.*&_/:˾|zprdđȽ̾ù8XVMC:50.-*(&%%##""!!"!##%'($i851-+**(%!  lVB=0,9}{tuO K}mbVE82.+.=U3,(079Cf*  #f54BAV9&&"+ryqq~oja $]!/dFFN_?Uh@fH5.)(isKPjjHvu0ȿqebUO\Uɇ¿{pngf~ļ 8XXMC:50.,*&&&'$"""" !##%%)'<*/CSHUsi?N=ҳ~Z,)(07BoZccv¨|) (,5̿!>2B.eɟB%(iI513+&PvjnzW\QN8[0e0''>QJ`ZV;1' L`Wpd]cdU4>Z[>S6 3,' wɽlehXe(S%%4>-( n0,-*-6Pg& *Gpñj:^<zAH?DMLQ\k;prhmhmvxupb^aZ[ @@<50+*+,-*#6ipAD=_uy~}}{zwtsrqpmmmmliie`X7GoxzzwvvtqpppmmmkjkliiihebE"qjP_bI?930//00.+4gf?C;bu{|{yxuspmmlkkllihd`Y6Hpxz{yvutsppnmkklkkkiijhd_D ?hu]Taxr{mʾxX&~ʉtbWQRRMIFFGGDFDC@.$KC@<62.,))&&$ \{>:>0+2K   K{FCIC:62.1FP+/YT9_~  $`122Rz|{{zw{J @8%&$b=W{N |[!&fg`^kY'_F=m}I5.)Fyfbe}IErM"#Kwke]WTVYYUOQPSYTӯpɽiy$:\WKA;50/,*'%$"!"" !!"!#$%(ƹF0uuI$ &3hGI>[IuRY_Dn[]aMC8, $!!%!*+7ŭ.!3.#~E'[q?.#$+R(dpe_k{jRA;9@W\M8E\P,01#"%)(+D5+$  OO'$")(!%1,%a% "L4(ǹ¥~bc~#1G@$ |jaA+%&58sb-[nMGOcfo:urpyyrnovrihgbVJ!=;>[.N$$N +=.9ZXWV\t@PTN{~~|vxzz}}~}~~}~~|}|wT,vW"VuUJ<830.*%#$%###$% $BlZK96951.//1/-!7jgDA=cv}}{xvsrollmllljifdaY8Ntx{yxwqqrnmlllikkkkjiihd\B"=jr]Vcx~vysȾwW)~|gVQSQNKFFOTDBFCD.!HE@>640.+*('&" !W}15;.(,gTHIGUPQm` Ku@HHC:5..4fwsPD|B  $b22.c~SPMKFiv, +=;'&"Q\:'(< &$!S  X@((  >mE3/+FkG.%#I{pb_YVWYXSQSQTZXСjvѳi"9UTJA:51/--(&%$#"" !!!!""$&$!ͿĽbG%"-$*>gGǚzJBȣoSvnex[dXJ>6,"(+5ҫfeu!?/!~>%vѰ8$ c}:&dsbZWSMB602575302.% ""#$&&&)&&  ]M!""!*+%j$ &W2+ɼw}afs!%7A;$ s]$p_j 81HFXYr'#rpm`}8yuyyv{{xxskgmc=!N`,%)Q % :XVVSFBu\HRQNz~|{x|{}|}~}}{vP1~X(ajH>4200/-)#"###$&&%GwrRL@5352/-.02/-"9ohMB?dv}~{yvtrqommlkkkigebY1Rsxzzyvsrqoqnkljkkjkkjhfc]?#?hk[Vfw|uw¨xU*}_g^XRTTQKGG_kGCECE/!GA@:441.,+)&%"  `D==.+2xQ ClBTLD<5--0W{af`r;Owf#  %]336w>L^CG? +<8(&$Jvy[@(!XB_voWL5, =nB4/+0bv]?*"$Jz~qb_XXY[YWTRNPZVǸѼmqΞ~!:WTJB;62/,*'&'$#"!"!"""!$(&$<$(:AO\jh@ɻFI׾kDijONlsWgXM=2+'&$'-;J L!p)%?#p5"*mI6$% XsZQQOHD:0.12100/.+$!#%%'&'&  jK#2/(&,-&),6:)p" ?41ǹ|\nb%FZ?% bpl^AG,#(zvmNip8qvglvtyv`W_6dbOq* yaZQJ;3/,-.%%D:# ! #S22:^g5 =7(%$6isTlvrJ% _ :ij95cvA ?tI7/*/ww?YA#Ht|{pd`YWYZZUQQPOYQݥzfas|˸}":XUJB;61--)''&$#"! !"!!""%(&#fiywpje?ȳĿBF^Pm4!,FrWodTI:2*!.bt+'*8ت& $M l)-w6$p~.#"0ū?% `xMKHGA2,,0111.,+)""$%&&%%#  pH 7aUNNPSLLMUR1|Ž ,X34ƺx[sU*ED-!$ezC5& _$~ievmprrtxz(SJC>Og|Fxtl{uvswicdc2%_M6!G04?  ?ZWWRVwb_nWGUQQy}z|{|{{~}||}~~}|{tO1~M,e_?;;76:96.)#  %'#(Oy[UJ?9:8457547>%@r]E??iy}}{wsrqnpnmlkjihfe`S2Sqy|{xxwtqppnmnllnmllkjgda6#Fpr\Vgyx|w{X+}îa-,ckJ?Sx?DFDB0!DB?;751-,,('%"  iH7;-)(%DwR>hYLM;3.,+' 3~t $T/0+%'! 3) V6%&% 4olTm}qK! a-[sg`eV<( AfN5/,0^pU:$#Kusf`[XYYXRQRPN]Vģrbmίɾ!:YUMC:61/-)('$#$!!"""!"#"&&&$ù6 0{a>~ʱGIλo~W 0eauPfoZl`PF0.965.(!&--5y2!(*9۠@P(R̕J&.n5&uw!& $j=# {JKGD=.&(/01/-+)& !$%&''%$  p<?nia`b^ZVUaX7ć #D7ƺSVWRl{UO(8%%! ']B8:cu)+;7763++uCle^skbjosk*Oz) D0@ -0$!@[XXYhgJTQTx|{||{y{}}}~~~}|zwI9H-a[FIMK>C@?40(! "$#,RrYVME==<985548:&Cp[ID>ky~}zxurqmnnmmlkjhfd_S5Wsy|}ywwvrrpmmomllllmjffc\7#CjpYUk|u}|ħZ(םmURplYMM]DBDDC0CA>;640.,+)(&"   e>9=0*(% ?M  8k_OK<3/,(%!R5?2  'Z22:yxx}ur{vT s<&(% IxbJ0i$YniL1,D`O4--X{[>"#8#"Iuof`YWXYWSRQPR]ZȮ}­ͷȿ;VRJB;720.*''%"#"!!! "#!%&)$»~q`O3*7GN3Ć3p*;ðſ{NH3JJ8! 1_yyxkc^OUT]wcdW]`^fl~r<40PN|nNz\ld_orbOX`Ur%(ckJAo8 #'! @UWY`ERSY|{{|y|y||~~}}}}xvG:H3gbWXa_SPLG?;/'! !"$!/T}n^XPH?A<94468;9&Er[EB@ky}}zxutqoonnmmlkifd^R2Tsxz|yvtupppnnnomlklnmihd\6'FnqYXl}~y}ƨ}Y*ndaY[qxVMHO_DACD@/FB<:751-,*&&%!  !!eSD<0))0$|ssxHLyU g8('$!]oR;0.-0KUf&aP0,6AL[t( @_|G2-+S{onngo&$Hw~jb`YXXWVRSQQT\Yɾ{Ů=WTME<630-*(&$#$# !""$&&#[@)*Hehhfh^>ʺĿyL!> -9D/  5SmTOShj\Xdtk`jQjb[[SWKscg{3.0OFE0lk`agc\[QLHG9/(%"#%#5NiXWRGAE=:569;;5$EuZG>Epz~~||zvrqponolkjjjgd`T1Wrx{zxvvvqpoopommmijljkie]2*Ty}acw~}|é{Z*w|db_VWtPLHId>CCE>. HC=:72.,+)('%"  [HD<0)1p|zi .{LTNJ;4.,*'"3u  xX54=;ibRQ H6''$_|{wwUqV"brig^\L EdD3-(Ajlca_H$Ju|i`_VWXVSRQQQT]ZƮwǜ:YUMB:52.+*(&$$$""!!! !##%)((6!$ /16ejmmK>{Į@SгthpvnouI$_vYɩkty~rf-"(*>M$!A?k;Ƭl#$!0ƫ6 kt[LFB=5)"#(--,*($#$'&*,*(% 5"!Cp@!+7>-';J-#x >îÿ{G";!)++' ;ONSZaxk'/s[swkr^{ ,ŸÜ84`z`fiY_io|ux&P_2-spA ),!EXVW\u|FUNT|{{{{z}}~|yr@>E4rqjimja\TOMLF:3+$%'%#&CXlWZ]VQUKDACEGB9%EvbQBNn{~{zxtsprpoolligggb_P0Yvy|{xuvurqpoqnlmllllkjjeb54]zqa{|{~vĨ~Y-w‘}|b_^WXo`JLMz?DEGE4FC>;53.,+)('&# Rr/@@1,/ikT[ge_XT3 1mA^ID:2-*+,(#=<3 ~_225f-&0.*TM I@$(%-1El4(#$tZ Cc2 CbG4+( "SH1!Iuyfb_XWWXVRPRSR\]϶u|˰i{Ŀ ;ZUMC:72.,+(%%%# "! "!###&((+=&#&$ !+7noomL>yǧIJCVʷ~yxwvv{J$gNc\WL?AENSP=539=C`Z*"(*Aת$# `El6Oy!")v5 wmYKEB=6+$%+.-,((&"%()),.+' +)!,RN839:75=J<$*p E`QAg\`lxB%1"/038DbqH^x4uLUWnckZ$=#ks<\ ;|]rpo_dioadqjuu+\~;%?A;8 /0#B[TVXpbOZ]hMTOT{z}{|{}}}}~~~~}ys?IR@wwolvvka\VRTVFA5(''%$.Rpwge_YWZUMJKMJE='AhnTU_ky{zwvsqolonmkkjgea_K3\txxywusssrqoomnmmnmkijhe^AGijZux}wtŭZ,l|{ą\ZUVku~ZIJy@CEHF1ED>841.,*)&%&# O}/:<0*0uM( 0_GSA>82/*,Lxrolejoo3 `016J <=)($ 3I~p*'**;FfR .OwH(8@Pg) Ub{B3,*Kebdfz}%"Et}xia]ZVYYVSPSVWgeͻȹ¶rfgvW{yyyw|E+kGK96*(( $(4OK&!(*>؈!EHa2*u""4k4]j1 qYJDD?5)##-.-+))("!')+-120)  ' 2b_V\^[RQUdG"2l +X>Mýu=/11B6XDfA"(5#jYF*XU@?Gl\6+!'4g{ AyVfVUr]ywW}|ey/u~7$0(&-- =WVUSNPQRORSROV{{{}|}|}~}|}~z{zo:941.+))&%%#"!!O(8:-)/Oj{S2 3dRKAF82.,.Q{xw.  #|e21/;1.v7' 18(&%!b~bcL$#ozksnni[!bkxC3,)N~wztnmS #Kuzma\WVZY\WT\dhriѾ̻ .2:CO]VGTkkeUK'#*?ڕ.Lsga_{~Pc>T""9nkTe1  yy\JFEB7)##,/.,)('%!()+1574+ !$ PoWPTTRNJNVM%5nV{eTλü|u:,- *I;  ]Mzj[IEBSAbdcdwKNcjl`I\{.s|nw C|xp|]MgSw~dv5QJ&:/!*(AWUTSUUUTVVSRI]}z{|{z{}~}}~}}}z{zlA)$3Fe}omtuqib\XgaH5,)*'&%+UpzfdTLRTNFHLKJJB5?~ʫű9Y}|xxuxD 5k[ǫfchsowvi_\)!,Cǻб_ñ{LY>u&" O>*q6  fVKHDB;+$%*/.,*)'%  #(+-0462+ }#Xg707MK4.]%''S»^q7.'!.D0' +gP9!!wqoo|oWPL[zp8vv6>k FXMJWrzT{{d\m/hxG;.4 "-* !E\USSTUUUUUSUK^}{{}}z{}~~~~~}|ymB&&Ics{~xplkbU:*)''*(%#(Rsyg]O@KRKBHOMLI@59;<=L\ix{|xtrpnnnonkjijic\H3^sw{yxvurrqopmpnlmolllkie_UGF^~ptkìY2d|xwaWTUROLFGDBEGEB8EC>7771-+('&&$#"! JD@;0+,Kx55g`  +aURCH;2-,*+8t63L a636}}i6 CK)(%#$"!WI$AisA1,$"$($Mqyha[YWW_kz~hWoϾκ! >ZRIB<73.+)(&$##! !"!!"#$$($27! (($$#/`a[_HCѵy¾:[~{xusv>6sWåyu}x{|va[U%!*EͳRUSKGB*0A;6225euPW:Z, ,z0"$aof 4h&F^QMys]g@ya!#$0 !*'"H[TSM=>:CAISSEazz~}|{}~}}||{xq9 ,Pi~yxzY7-*$#$&%"(Lnxh]PHQRJFLMKJF>69:;=M_jz~|yvsqopommmljhgfa^J4ewyzzwvutssoommonmkklkihgaQ@Of|su}qĭ|[/by|tmjibYSTXSQJFHDCDFEA6CD<7552.+*(%%#"!  I08=/)(9ZxO& ,^SJ4E92-)')7X"&+ u]42+Ty+ 6dxe( FB**&##8my["_T"6[V):mr@3*! [SLC<72.,('&%##!!" !"#$'(3|wc_^51]a^eMB}:]ʶ}zwxu{= 9c\Ӽ{{~pdOJC:2&$& !,H̴PRIC5' %[nO[:#_#!=w( pZLID@9+%'-.-/.,,+&%$"!#)../2511. +q$ !Xg^WTXY17RT:GV(:\̾½om,Dv !7/ jnp{*$cTLIjuQ=[yV <ðrgϠX ,uXXkUNM=L\][l=d|z{xvw{yt{xsn^Z:q'Lv% #1,"KXVZqUQSIa{zy~|{z|}~}~}}|{{|xp0/KduD*-+%"""$!&Hksf_QIPTMJMNKJG>6464/(*ssJ* 'r`YPmjvxN BE+(&$,jpEjXaU%,q`_i aaaiGDÊȲ¿:Z½Е~|{zx{; :]Z²wqy|m\N "+C̮KLG=7) dmT\=9F $ Tѭ- pUJEBB:+%).0/.--.+(&%##$(++.2674/ 1k#!8GHGAI@;AKK5!QOR1fǻÿl+Fq!&:I:(sA= &QG))}cQ[8;hG CïW][[impϱ1!gP_eRdpzJ|xqttqj]K. 5 "Q[UWTcwp]PSSDg}{z~||y{~~}}~~}}{|xl- 2MgjG:90$# !'KowjgZNQRIHLNKLF@6264;J]kz~|zutsqomoplkiicb_G6ftz{zxywvspoponnommlkjigfhG@To~s|iİ|\3^uyuqm}F9dUNGDBCDFB6BD>541,,+)''$""!  6q4=>/(-J<%&%*C *xqbZA?83-+(($  rZ4152FK +=?+)'$1|<CiYQ!Jg +!z&THfHlǽþf%Ll"*-#% o@7/(#!Ce/Qlwsl[M;[A GƳwT"3=~Ib|M`qiKhfvw{{yzxfA:1 CdoG %TZXWRWKGTSCl|{z|~y{~~~~~~}|zl/$;b~gO?;8)" )PnqefYKHHADB@?BE@6341:M`lz~zuvtspnnnnlkigea\E5cvzzzyxxvsrrpoopmjkkiihegcB@Pmp|aƱ}]3[{zuo7NqoRQSEBCGDA5EG;62/,,,((''$"!  6r=::0*,e% -toPE:5.+)&%  qZ520Fj*#TL' <>.(%$-niAgYTN&9o$8h' =cK2+"$iPsk.ýT8^˽ǟ¹?XTLD<74/+**(&#"" ""#$%($7#/-,(  )%2`bccDJһ6gӟ’~~zxy6 >]>{\WN8E]agoy"!(,Lf#fɾQ-gi[9*^c"GdB)# ROHECA5'$(1110/11-)&&&%'+--.011.+ JZ! %aeQQPTPIKPUDZ9MMpǽýdSa &@C9& !?rUuw[eK\p6Pʶwu,H?j]WsYlGdfoaXox{zf;%aZ)/FCJ +$QXV\reQRQDp}{~x|{|}~~|~~}}}|wk,#7[wdM?74+%$#!'Istd[K?4$ UYlVPF2&10+,24!)-LP"vؿj u`pY9Kъ*% _¯x% '}MQKGEC8+',44564695/.*('*-/./01/+' H]!*g]/6;Q@%5DPAb400qû¶½` \\%-B3 iGF&A]?sEFNb1RǴý<o{sukQruqhuxz|yf20]ss5Cxx2 'U[XX^v>39`ORRDlz{}z{z||}}|}|}}|z{yl'%1QrcP@70+*,(#(IwlWQF=:<<=<<:999331.:K^m|xwvsrnmonmligfda\C@jxz|{wwvuqoopoonlkjijjjhh^BAZs{ltŰ~]2X~||͌g[TUSQPOV{dCCFGE:F@:6411.)&&&%%"!"  4w<+;2,*(0_|P--Pp  ykeOE82.,**/z~ib i\50+ *3 @O.'$$GyqhcdYUosbG%00.4EF=Pz* ;kC/*(Htkcj(&tͳxA=[ͤzɿ~w¹|!A\UJA;73/.+'%$"#"  !"#&)&7±\suqLHgeGMhdlpFM¿3c}~zww6 ENZ½wnS"*,JS("AƫA"^_uW74|0'#Lՙ# 'wQSNJGF:-,.589<=?A<51/-,.100010+'! M]! -gcWTY[K*M\Y;n6 "^`8!s˵ðgqqtsqgq^k[!(3CEHFA:532156554210+" TY"+ef\ZXYO.AWL/t+ $nW%w˷®`diadkguXuH)EJ6 !kp&Xq0^rQ"_ϲtsr`$(&*# p*zzxyy|Rwn~xmtp'!J#"<=1:H -_%RZW\opbKRRDozz~{zx}}}~{{{{}}z{vc'$2WyfQA;62-')%#.Mn{ZLD<::::9963677.22.>Qcq{zxutsoonlmlkieeb[;;iuz|yyxwvtpoponmmlkjiiigc`EA[vkssq²~^5T|}dd\VTURMIFZPGEHE;?A;:62.*))&''$!#"! ']?4>4*(.[5 '2. 'uvhg=G:2.*++/J@==@Cpc jk8/)!5Yyj* QK*(&$!&6s>4^LfU.&>5?X5 @AA80+151ARcq~|ywusqqonllkhfdc_DKGDH^. #rxg`:G;1.,+,6l XT8/-+,L|~Q" MH*('&#AIdV#Cg^& =h}E/+"9\6ļƧùy!DWRIB<630.+'&###"!!! ! "$&(#>¿ne2"3KA*9eamX2M¾6u~|{2 T/@ZKLMims{{a>).Qz! )qHyK8&WJ(%&#VV  2oLJFDC?3)'*15:?CDB;2-,('+-.00..,)#  _Q " ! "(*$&   $ɸ½eo{Uz? 4LKB 0UCgrf2*RX3`q_bhUw\[ZX\O=BelƮXVX.V!}zS{yql{sz! Ld+VZW\|DSxCPPKm|zyz|{{||~{z|}}}|ysa&'4[tcB:8773222)(@]oJJG873,273?Sdr~zwwtssqommjigieb\8Blv}|yxzvsqqonpplljljihjhhaA?[ztu}rų~b5JxzcgbZUSTQLFHYMDDEC@HD:740-+*))(''#"#""  -xV3;4,-czp* {k_7@=4.+**,EtfXK + dV700[P  N@*('% "Apjc8! YT#D]k8 & >cq@3+"3t[qD&1<Žδй¸y"EURJC=620-)('#$$!!!! !###&(!7ñSM2<& %),(6\`uoAO;w|{}3 [*du!&.P֛U5)0LFzG9Cg3"%$[pVio# 3eOIFDD@5,))0457:;80*)(&()++./,,)(" dG"))$% -Ȳ`{lm^}Sv9#*'$ 2UonklW{ZXqƬ6:%yY|yzt}3uS0$ '.%*UXVUcbKTPIs~zx|{{z}|~||}~~}}zw`)'=`}V<41/,('&&*@\j]ZK:7BKKHAFIHC90394CTcrzyvttsponnkjhhfcZ;Bjv{}{yywurrpnoollkkljhkhe]GD]xyw}zĵb6J|{mc_WRRROJCKdGACHB;A@8741-,*('&$$""##!.W4;3,,::(!"#%"+ ~eZ7B?5/,,+4lK2cn_ \Z82/1:\xrMA8 M:-)&$!4CId50^ziU MV"C:852.-+(&%$$#$#" 0[<;2,+T|wux}{zz2 yaY;CC61,*,2tsT;\4 + W`9325 $0*)Uq"J¸v!CYTI?9722/)&###!!"!!! "#&*!8ƸfyRKTUgccme@M<}z|* \>B:74100'" L¿ż¸vATQG@;731.(%!"#"! !! !"%(*#<ŨTQPS73./52C]dkc>M¿C|wz) ^EPj[N:.(,"!,V͎LSK>1(!={J4S3$&&Wl|])! IzaXUROM?66;@DDD@>:51,('&03210//,'  }?+31(5#!FN[I-ƅ 2, 2ŻÿtE%x-")?E& MIIRK>77<>C.RUOogQd}j{zy}2z[SN`mxveu{ph|tv#o}wP L~:.SZURQY|uoDSONw}yy}|zxz|~}|}~}||uN4`{oK.*&! ! +C@FYfr}yyvusronkljhffe`V5Gmz{z{yyvspppoppmlkjkkhhgf_Z\>i~zqŶd9E~~yunPXtvKQWBCDEFD<<=;:830+*('&&&#""!  )kS>;2,,e pycjJCF6/,*.2q|U1;5 [Z751Iij=&@T UB-('(#  lW @\pF2)#"O˾rDVPHC=630,('%###!!!"! #%("CĤ5.//("":^glg@TùF{wz" `ByFC4-!%1ZɊFNC70#6H1,d<'&%A:0#""Smc^ZYYJBACGJJHCA>83.*)*25431.-*&  +=!,XS!,ʇ /}}09~sA'y&"((  +T?E?46?>7:=*]iSbkIHE@8:z}wrpz~EItXUJCDEHFD=9==;61.**)(((&"  &bT=65+*S_/ |ioA8A71++.8wPGQTz\ + Pc:508el@;cn0 ^E,,((%m\  @S\A1) "#OϻͿ·sBVOGB>83/+)'%$#!!!! ! "%' Dýl;!!-=GR`hleBRҺKĽ|yx" ]=kGA4-  "(0UDZu:$D^XL 55}F0W*%'&Det@  _jb__^_PIGJQQNKJHA952/-.313333/)& + +6!?lhBn|8} *e"Kqlikt=0q!",4 +a9H/%(21,-8$\Sqo ~BW :_{izyb{nynwc9{l% #h;6UVUavISNNw|zz}||z}~~}}|~}||zuP#+dT9)('&%$! (=uznlf`[TMLPTQROKCEQKPMTjt|yvvssqomkjjjffdaT/Ot{}|{{wuttqnnomlkjllkihigFDpzOrxlŹh:8w}wqkijkabytcSMGDEGHFC>7A=;73.++)'''$   ){Z=85,)Psghlpmnx6 wxfi>6@80+*,7}W +K_;6/Ap->czneG [F-,('!)ADFECBDJO"UP# ,' =XuC2*"'3./,)*1GO#"P̞o}t"DSOGB=730-*&%#$#  !%'HŬO8**'"+E_aV`ikeHYϽHſ|wv" dBhF>.* *0W?%? <2 uG3Y.'),i  ]jebb_^PMILTRPNMHB61.+),121121.&! + 6!1uR[Tzm@p/QAȽÿĿp:2i!"-;'" +e@LJ5!,69 f +!´qOnz½v?k@|pkkjxaxtZ/ln5! -QR'8SVVUWdKVaBPTMP}}yz~|~}~~}|}}yvS.5^R/,(#"%$" +Bwyljfaa[VPPUSQPICFbhRRPct|{xutsqooljiiifeeP0Moy{zyxvvusnb^Yahjkmjlkjhb>6ayc{fŸf=6vzvrnga\YX^UNKHDDCAGHD?7@==72/+**''%#   *[@64-,_kllgdWTQ% +xta]B>B6/-+*.5?i1#;E Sa<72;Uc-|yutp#VS NbcjlkwD 6[G1($Fw%#P̰ɠr"HUOF@:410-*'%$!   !"$)AȤ!!7bjkeL^IĿ~{wt! e9kT}pk_QRZXD).3?CCNR2!(1\3&?&-#}B2;n)$&&yI" PcX[X[[]NKHKNNLKJB<2,)&%)------("  + 6 <9/`HnqDe6_$ Gʿk86i"#5E?, +mLA5+%03 "ONNQqT !c|~l@{tsnato~[snQ5fZ& #  'fvq'!2||vqmh_XTTUSOKGCCB@FGD:6?=;621,+*)'%#"!"" &lS?75-(` knWM<:B70-)*3{u ^d@84od4 "4 XQ/+'%!(86ZC7T"NS# l|D?>A`? 3YB/(#8hQUPlmT6%M½ʴ¶s#DZOF@<60--+''&"""  "#$(!DͩZektojhaSEN_mmhL^ľHĿ|zu"lHngp|yV",5_׾5$Jn,&G6Ȳt-%"NC #$!]re_ZWRLA?CJRQNLJ>;2,(&&*-//.-+'$  !s5BtpsS}ga^IMW "A$ O°¿~]PW[VVPi0.|~xqlhaXSTRPOIFCCBCGEB;5<:950.++*'$#"! !! ZQ@65-)MX1! +_l`VB5A91,)*-ORHDNLK6 IZ@82VJ:JSNV^~o GF*))'#Kxo|lfqP HP$-YVJJTW- 4^D.'"Errr""Rҽ˳Ƹ·p$BZOE@;81/,(&&$"""  !"$) Gϣ@Vmo_ZjkhJaDÿ~{ut9ff{S$*2_Q!,+&-(}@3~@%&aC' r~qjec\RB59DGQUTREC61,*)-000-+)&" ${4MOr&&'WN Ba) TĬ½uf,Ga#//)$ n9`\;%H@'#'O}_8+7ARVTSRJ0)('ApflSPy}|xvtpqomnliggfd^O3Us|}{{zxvriXH?BE@@35:;Zdc^-/Tn^S[rr}gŻh>+z}wnkgbZSUVRMHDCGEDHFB<4><94.,-*('&&$!! ! dV>73-+Lwzyzyw}; Tok`M>D9/+*))%  +G]B83czc`cj CB.(''#-|k@"C[`C =I#.6Salab? 4`=.)!-LH?`|h/FD$"P̷̮DZp"EXPFB<830-((&%$!! ! !##)FН)!).0HkgjdJbDǿ}|v 4^PcZ^[fc`|vJ&*6aړ##Y))w@2wvB*&+̺E!mi`^``\ZF9:>BJQLJD=3-'((--.-,(%#!! */.'$&]:riq~L^^  ^;yg+Kb!#:;4" |+*8=@<72.*0O]cYEfgXF7)oVC9EUNpPjxrlggssnsKru{}wh1"Hwb# !!"! 'LZXY^sd_OSJY}|{z~|}|}}}}~||pyOP?Fu~~|wwunqomnkhiggc]M0Yvz}||zwusrlO:@IH65=:50.,*)&%$#"!!  aY=73,+H{nhcf_VRN# aqllP@A9/*+)& + Rm@82,"a`&!QY B=0)('$-Q!#!#}+cU]]nwmes|vnV=#$)3ay'R#+?.g)*&#M{K h}mca_]ZI>@MJKJGC?8.)'%&,.--+*'%$$ ..R;}GhY  fͼza%R[ "460! x)8PRH6+.758OPL/*}~42}VNPp|Oy}~{d+"!MiE  'bl""KXUZ^U4B^YPQLW}|{z~}{}~~}~}~~}||l8EM7;$  JxH*)*0R^ZXVO7(*$@u|BJBIv~~{wuoabmllkiieeb\J0Zuz|{yyvtqhVKABGB34VQfifbS*1[p_TYttt{g¹i=*{~yr}~j]\TWgj\VbNGEFGC>3>=:72-*()(&%##!" !eeC97*))'!Mh[ \gxvK=E:1,,1H@945;A?ZK Cd?81,U;Lt|r, WF/+*&$={wxyysswu( ^Xmν}`#UP%;B9" {u1+*)$!"'45@KLH*0|D\Yl@=\EyHUcit]{o'!&Y~Y !  '1. IVWZdrL>EOPIU}{z{~}~}~}}~~{|zp6GL<8$ Fs3(,15I^[ZYM4)-$EwzBI>Hv~~|zxuQLJgnliijgec_I0Xow{{{zxspVEJHMK<5:62-+)*)'&%$"! jdB46+'$"R[wf  Wit_B9G:1/,8W 7U;60)jb QK++(%$.TTWSQIB@C @F# - "# +2fA2' !#\x9-9'%Pm$GSPFA;62/,+'&$$!!  "%' Iƾz'(4>Vhlmjd=cǶǺ;Ŀp5do{ddhkoL#)4`ÙbPp"(v;1(y-'%-ݶ8{saa`^^`RHKORQPJFA9545558886784115 ;q'l>HnD Y`<sμgIK~{aWG"-4" pLL;1,(,192MJ|=sS(0|Ygk`t::{eQ|=QtAWWZwhcrvhp #HfO !!  8rIVVUQ^MPQJ[z|y~}|}}}|||{{xl3MF>5"GuR*&-669HSWM@<50%DtwEG?Mv~|zve<:72-*(((*)&%"  cY:.9*&#!qsK~ MdleI9H<2/.3J?We{s5, + A\762=I04AABN`j ?>>=<765 Ao(n#1=sVAyC 49*{ͼ}]dE"029 rDF>:349=@2WYLTP}OFZNPFRFr#8˴¿/Xhthqvit#U|L   JSGFWWWWX}XQSI\zzz||{}~~}|}|~}|zk2L@=1 !BcF&(-39@AJOE?==3&H{wIO>Ry~|xwL9ALmmlkihfcb]F0\oy|xzyxrq_V[WAD8EP[Lhff_R'7cs_XaxueĻjC&~|yvm^eVZ`VMJZEBDEGC=.>;850,)'(+0;=>W.  ^[?.6*'$Vd|c +=]uO=H=2.,,*#:F + +Ec541e|x\ ;@.+)(#B~|r( DF! IG +4rx=2*%?z|r[WcL &#Oÿ̿¹c#HQMFA:320-)'%$#"   2ͽfjkb:d¿¿~7½~zld6tLOY\bWFFB>F;>JM;AJB(%+7bװ@" -}.l>(Ń#'%'`:".3tongdijdOEHSZ[\[_[NBD:9:?FHIFFC@<; Gc$)%XP7L bs= ̼}Xg?'>=3 )a*&4753-(-'cîkys}gb{=̲qennnSXW\Z[bd+a{{||k~~ HM? 7}F!LYVYesVIRODa{{yz~~}~~}|{}~||yj2UA=0 +?hJ&%')2FGBAILnijkiffab_E3\oxzwxwwrqi\ZaTUSZRXCefe^M'9enYVb{uqtoĻlC"}yuvoj}TYg]QP[GDCEGE:2@<730-)(*-AsZ!  +HRG/5,)*%MbX%3' >ewH55F3#η¶|Sd:#$%# ,R:BLF>>>=-h|}²IѲ^~tjX[W)=SV4n(fxdmiTpo \tR; $W]@!IYVUYw]88>NSQDb}}zz~}~~}{~}|yh+U<912A`@)&#")>D=066:@;&Iz}pCI-.((' )Ed&"*:W% FL H^NIBBUM +/YiE/+(Qb&%Nÿ˿b$EXNH@:62-,,)'%#"  8wlmib7c<}xfe>Yg~lpibdi5$*4f=#UO)y/f9-hf&%#)Ŵ6 xrokkj\ROUQIMHE>7359@EDCIGB>=;87 ]U!T$*1.1ptn1!2 V+)ɺ{yQa4#$$( 3YJ/(% (.5)pHIGHJCJZMҰpy[ſfv kpflb\fhvyx(]e@+ >uu<"L\WXkNQQKh|{|y}{z}}|}~||}~{yp/V75.(Af9,*$ &4DC/0-;B;%L~}oFF:Zyzj_f]X\illlhfdea[=6duy{zzyvsrqlWPLI`akaGcff^L%;in[Udx~x|yr¼pC"}|xpwQTXhYOlEFBGFD; +B<620.,(),6mUtL  GXK25-+?ud\eHA@$ PPT8 ^P8]V-)% !\E/ɴ{M h/!%27 3N@701%%3+}`^bz{}}Ob}QqRӳ[}tAZw"rejopx}q(e|?! ?P#NZW[jcemLPRBi|y{{}~{||||}|}}|}{wg*Z88,! 6]8,/+&&/IH($$5G8%O}qHG=\wz]gme_[fkllfeddaZ=6dt{~z{zvtrrpVbN@PQhZHceeaJ!?jn^Sdxv{xxümB }|uumlxWVWbUSrF@CGGD7 )<;:30-*(*-=fd<  +BOC02.+,%>P JJ~R9B<1.-7[ + 3]?3.Zx -=/,+(& ,A6741.('@# 3?!!3HZddU& +0dqB/*(Tj'#Pÿɵ_%HUNGA:40,*)&%$#"!  (~wxqmlhc9c¿}A¿ywb _TWzzmUI>IIGA=LQUIJK&)9kض+%*Ĭ% #w3c<)!')&#!-EUA%'}|{zufVSS^`]]SI<:9;;CGDCBC?<327_M o_b_O.pzz)+â# +6*2ɸvG"f/08& +9A0CCF9%25#o}\_yhrŸ +UѳjgbyF+${æB#szzzo|{h 5)!*+./-*'0B='&RXVZn;@YOPQDh|xz{}~|}}~~~}|{{~}{we*Yu5?/  +/N1+1,"!)>>-J8(R~kAF:`z}xbmifZEIhkjcbed`V:7euz{{zxvurroWh\JHQgRHaee`H%Bln_Wgx}v|{|ûkCz|xOPSPHHLFEHHFB8!(A;941.,,+-7w[d4  +EOD*6/*.BG{5:=HiB :Gv}U8@:2./1JD+*%%)(;> + Gd?1.\%,,&*] />.+*(&?STQSSQcv- 4="%W{O@3 +.bxF0*)`CFB<6/E>($Sϯľǻſ`%KULF@:52.*)&%$#! -{xkoli_>d~FĿzyaQORvzpi^OZ^^ipw|F&)7n2UA!4p:b>(&'&%#(wP]/0y~qrtxwqolcYQTNHDEKJKKLD;>FM +mE"$?`vy)+ǝ$ fyL9ƲvF$f,+E) AI<.,*)!%, mq|T_u +[ײV(Wy>:S*r}{ez{~vX$".;?BCC@=XR&RVV[onORNGhyz{{||||{{~~|{{~~zwd/^q4;-  ++L3+11$#/@( :7(R{pHJ9b}urkaXLLM``ZMXde_W<>ev{zzxuttrpiah_QF\i^WdebZC#EljZVhyz~wýnEy{yMTYNHIOGDEFFE<$=;841,,++-5n$gn  +COK26.,;{x> ;52.*)%%#"! *~ttjqmme=c}G~g RJLo|urjdomw7%,9lf"'" `i;b>&$)&$! / Gt*ExutrkbXVUVY\chikmon# rCH) g#0Õ" Q38Ʒ½wB'd(# + E{MR@2)$!'7:"¤qVdbsuKEE`L dβu{*ttnssxux]{vtpN'7:DKNRQW[a^RUYE(4CIJKJJC#0/ 'OWVWI\yJMSOEm|zzz|}||}|}~}{z{|yu_+hs24) ++K3'-3)$#'56 $+$T|gIG:d}zfsf\RIWQJDKGDbe`Y6>fvz{zwvuusogfgb\N_ijfgeb]A%Fni[TgxrhymrƼnEzzv~vrPWny`RhICCEGG>%@;:40+-,-./Vifb  FSJ<7-+-@1(()'# D:jyY;;90007VSQPOOvh  + sS65.%5ya" '6,+*)& (oL%2T$+TV 7?"Uc@ "D. *ev@2**H|tkbgS'"SϱuYVX\^Ƨƿ`,KTLB?;62/+'%%%$"!  +}qunspng=gyM¾k!}JCrAUioiagpm|W"&-#%(&% 4i<&!J{}wonjkqtwwwsso yE R|{o;ǔ. :Ƹw<*]% + ]t6678358CE5!kGr CVH>5+'%!  `Pt]::91-//NoJECQcl.  + fT:3*$8RS?. +)50-+(&!B*7yp% 7C! 0rwtf{A +([j<.)(@eUQMVU\rV'!SͼηžX&KTKEA<72/,('&$$"!!  ,xt}twvsqa>huPÿh !~ICCPHGK99H?=EAFj;W;#$(%##;̿( Hwssttsrpopnnj {v9!t|?ɖ4 >Ʋp51_" h35;=@:32. !ucfmp tϲSCv 2|V~}q|zqh>=TQ`lqqrppnmjbZO.K]adffbZ!+'+U_X[{ENODnz{wx{{{||}}~{|{{|{{wY*kk(<* *GQN==4%#' 'Xy^JC=gzuSpdaCO;Hdmd[^gb^U;Aix{{zxwvttndUif_I[jkheecZ>%Ith]Xiz}oĽoG }|xWUUTZLRAEFGGC?" )=<:850-3bBzhya!! >XM:3-)$  hWdpUF@:0..-.AYyzP1/  @f=5+# +,:31-*'"&=@>Ju$"7/ =D  :~}Y8(=' *Xb=/*(R|vtO%Sѿ˜h~ּžV'JTKFB=72--+'&%#"!! ,~vroqyvogX>ivJſi!y>E~Aug\RVVRm{x~z+%.@{Ξur`:+ #'"#'$&[z\FC?k|jYhc_UePZ^WURO]a`Z7Aku|}{xwwtsndTmfTHQijigeb`9'Mrd[Wjy||}oſmG }t}aUVdy]QnFCEIGB:# '@<;83//1Lm@[`=Q8! +CoQ44/'#  Uoyb@?;1-+,8mdihmos~  + +9aB5-$ + *910.+% 8)E& :@ ES& +'Yf:.)'7Sb::>YN&%Wǽ}Y$IRKEB>82.,*('$"!+|vwsnvrje]?joK¿dyHKrHvmklhk-%,;@IB4.~zY7:897LV !ʰ~E;g:wfkp{M{~vr||shg3Oql}~n(7CM[efdX(AA .VWW^^bHQQExyxy{}}~}|||}|}{z}}{qR*og*6) );6197) $+*032**_tZED@m|cceaB1>LX_\^YQX_^Y4Eiu}|zxwvtrkecobSITmljhgc_3(KxhWWhzzemG |}t~ppVUY\RPTDCCFEE9 #;;863/.5{~vpwK  BtL43/'$  Dxh8@<1.-.G{yoT   3L?6-% ':0/-+%":vy~z}ou18D! (* #Vl=/('Wb'&RƼrѤľW&GSKE?=72/+(&%%#  -|xwvlnnkja@klSd #EJqFvkldeh1&+:y۬&%RW3cU7&$(%#">̲{( wǯ}xsptopsuspmige__Q t2.sckxUMo $I5+n-A` +knDH*$!&('7z{dvob^TK!Ȱlt]O("Uɲ_Ovɡp˿˴ľR*KULFA<51-+)'%%# .}yxwojhile;jgQe'8IsRcF;7>@LVWYXNCB=3&+@ۨ(' # RQ3jQ7($'&$#^gcSasfa{Owsi " 8]x}[Yw&3TWTQOPQQLJOQOIyzww{|}}zy{|}||yz{}zvM3ue)#;) #@A("!$#'/352#*[uVGDDr}xdmkheUSOKFD@JGUa]S3Hky}{zyvvuo]HKGG\XbighiecW/*UwaXUk{thɺmI y}|vtkimUVjhNOLFDIIC<# $:8862125lsdXdV!  +/YH75.'"   AsU@<;4/06{2bO 5G<6/& 3<00/+'$!P$KA,q2 5@ *1/2Q74-% +*91..+(#Ea"[, 5> `wgc[fL #DQ=.'%NaVc\0kK$#Nеn»ϧʰľN*HNKCA;52/+)(&#$"" 0~zxyrjflpb:neT[$|7PpHhD828:<=ACKQOX`U %*==%cGAfD2!"''% !$$ "|~}|}{xvrrqnn[ "b2gym_6h^ /8ɿe!9C +}X?>(=I8DJ9( H{{{|TQak.#uwtwLGvlj~zk`qJ}zvvsv" !,342346/>MK4USSSK[~xKQLKyzxz{|}~}|{}}}{|||}zsH2xa&*A* )Y>,-312.!!37/55/%*cxTMCEpripRiT>TeYEGIKSab_T,Inx{|zwywsocZZX_mnljkjhfa].0Uw_SZp~siǼrKo{}yNVX\UQSDEDDEF; ">;:843/0OPtXdT#! /LB:6/($   .QssWD<<3//1QRJuW + 6G40+" (?1.-+(#>bWacfilt4/;! ($XgR7K + "CW?.(&Y[2tsE&%Q¿˺sxĬžL+JPKD@;51.*)(%$"!! !!0{yzxqifbab?pcU½]$;TxPk]d'(+<ɚ ,EBqH4" #%&$ " !(|yvv_ (^-$,(hJ 3^q9ɿ|aA; +XGYQJE776. R~~v\;B<6@a%!l[_GjZqI]CI[jy[Ssjlw}?zxyurwq~$!@P@DS< 6wvlQ.245785%.KJ41TWX`dNCKRLJyyxz|z}|}||}~~||~}{zpE5v^")B, 'V@,'*''("".4489:'/evQJCIuonrRqf]Xkb;@EFUbd_R3Qqz}~{yywrmW^\\ckmljkjhfaX-0\|bVWptjɿĽmJ ry|ytoef}QWbj[LgKFHGGE=#!<99852/.162wiB4"  +._G<4/)$  + .LhiMB<=4./0IsUizl 5G5.)# +.B./-*%# :ol{ZNG: -]T&O̽ͽƾI*KRLGB<61-((&%%""%2~xvwulgdgmcmH3"$$$$ !%|{xy] 0X-$)'nF . >ɿÿYK9  +E=E0& Xuv|nHAa:Ifcuy/~uhej`[a^ZZ[^^\|~"'*BlL$@//7 1689:75"6†2UWVXYEEPOMPzw{z{}}{z|}|{{{}}{zs@7|S,E+ 1g>*&%"%'"!.359::(2foJG@Kw~]loVlocVmo@:9@Rab^N0Qpzz|yyxsqe]\VXXjlkkjhgfcQ+/\vaWTnqn¿pJ!cy|xrkivTTaiZVrLEGHHD@& >=<:51.-/.3rht$  0kL;40($    *Ju}M95@5/-/_c:#4*  +=G6.(" + *:/.,)&$6}c"?( /=" n> RJ + "Pp>-('em&"OrxнƿE,KTMHB:64/)('%$#"   (0yuvulidgkaJwtfjoafgZOk_D;8DJYa`O1St{}}zyxui_cE>IQjmkklhhfgW)0[t`VZpon¼rM#iy}z|QT_d\TYGFFHHD;$ !??=;72.+,+,^xLfQ  +0hS82/("  +  *AYlN94<2--.ek4%36?^  8A70*% + #50-.*)#-XWIU[h}7 .6 OrVe+ + %OZ0+&$f:+Y4O]&"Pt]ÜžA(JRNG?;4/+(&$#""! '2~zvvvkneboh6mWVO%t4Kg[ùb% %++("$'$+A¿9}<:fH4$#&$! *}}}~R =W($("6 4% BƽÿyW]3 +'y<nhUcafHC?\vF%$P(Lniw_~m^aco:zqxuvuvomebl &FMYy_/"1797893'^pH6UXVYb^ZP\OLPAP|yx}z{|xzxy~~}}}{}}yq9=T1D+ J][M0Uux}}{xtrXjjIA>Vllokighf`V'1^r^U]onržtM#`z}wvuSUafXJ[IEHHHD;(%B??;63/+,,/jk}j  +/eM740(!   +):FQC74=2/.2q +3851*% &62..+("8{{tjaT' .7"  ?S;.@> + IO0+&$DI,U_&!PĮhaѶǢvݮľA)OUNGB<5-,('&%#!  +>~|xyxoqfln9qQTʿ~I .v5FhUW!%+Fι:T<6dG3$#%$"! .~N + 5vV(#&!0 #+ SƼľxQ`0 +0<u;R.@*  DrN*! "! %==6:6+ 9o}tFGBRzsge[tVVFDGQX`_\Z[X^YJ0Tv{}|zzwskmV3,/@Vklkjiid`R$4dp^V\snwsȺvN!_z|vrf\_[UemZQzQEGIIB<)"A@;8510,+,7WfV  +-SB31/&!   + (=HRC7282/-,ATkzMHej  0952,& + -B40,*&"&##! *5# Vtm{quZ + +@P;,%!Ub!(#RǪjqwѾԹzgŽB*KPNFA;51-,((&$"  /C~}{ztprbbxe=s¿N[ɿG .q2GbDNH1%*=х7$($ `68^F2"%%" ! .J + ?zQ'$% 4  XƻþzL]. + 6z9sJ}sR:Lv (oKS2 fQ%"t"Uk`wef5m*=[^We^[VRODR#'g; Bwn`;#.25465-?X=QSSRD==EeRNQJU~xuzzz{||{{{||||}~}wn6EO1?)!QmS("!&<=760':p}qFL@QwsfdMZVSJJL>XONLLM^aVI3Xpz||{ywvsoXOMOVYhjiiihc`S%7dl_WZtrxwǻqO#ev{x~SSgk[XnKEFIHC;'>>:811/---9nq`yl  +'F>3//'"   %?HWM?.950-.6SP.c|i.  1C;4,% *@30-(&!/@# 4=8gu3E + +@I7.%!>idjGAV= '$RȗZy~gaƼśjgƼ>*LRNGB<61-*'''%" !""6 C~z{}ujigacS8nÿHZĽB1r1K^C=3( '.@ݞ7"$&"%y59WC2# %&#! &I + GW"$%""1  aŻ¾wII* +;u7qWDOj +,qtL.8c%_\NPaY]befdbY|aE}Xdhha@3HY]Z[½K"( !0)).81*#*+./.,'0K9%@SUUVSLSTdJOAR|x||{}|{zz{|||{||ym2GQ7B.$ MbU)#0E@71*#;s|rKI?Wx}{}ttononlhdif`YI2Zt|}{{ywvsssvtqpjklhghhe`R";ep^TYwvwzƼrN'^w|vzx[P[aWR\EBFFFD?+;<973/-,+-2Uo]t\!  (Q?4-0'#    #8C]Y@2?63/.SzZ\dlnp~  +4K:3,& +*E0.,*&"-:# !]v^hJLf2 + =C5,$!Dhp\SeZ )&R“oZiϽ†doĻ3)IQMFC:40+)'%&$#"  !" "3F|zwqsxqihqomY;p¿H\¹?:k5EY>=3( %-If(%"!46v}oDJ;Uy~{zwspnmlkjigfe`\F1Xr{}|yxwwtsrromlllljhhhf^K$9qt[W]vsv}tN(^xw~[Ubs[RmWAHHFE=-=>;7401/,,;}LZV!  'Q@50/'$   $7AWN<0;71.2w{zymq  +3G74,$ 'B0/.*&",8$ P7:_f< + MM8+$"Psn;!)!Nfgɽڝ{ļ/-NRLHC:5/+)'%$##" !3J}{qmlec_d_esqnV8pÿF]ø? :f8EN<<0& %,HT' $KMEG$:RV1@UA1#"$$! &:spuodiw|{zuro0 PS&#&'= *WV4#uĺ½sA'P# + Ws4uibcrnUH[uk -nO}Z@'Au*]gTQS]qytHH{mw|a@+$.Icib½6!",k},-;3="")(**)(NwW- EWUTH<:EX]JE6`{xz{{|{zzz||{{|}~|wn2KF4;*!,^zc.MSNB% !AxzoFLA]}~{zvtrnmmlihihdc_F4^tz}|{yxvssrrolklmkiiiie^L'=lh[VaywvtN&]z|zoldNU[aSQhO@IFEC<-;<<7531/-,2fj;QE! %E@6//(#   %=PeL;3<70.1Wa*);0#  '/F['[²3-ASA4 !$$!",RվzmcaMDK[TRQI>;:>D + LzL%&&3$ "&$}źq; ,O# + +Tk-!~}|i2=f^ 1eUQ5-xU+5)aXyxrpYq~@OhbflA00)-2Q`]/ ")( 37O<'"'((()'Azl6!BXTWjQ:kRE>4^zzxy{zy||{y||}||}||zm2RE.6(,\rf#ETWWJ#"$C{}nDI>`|{zvtsnnmlkiifb`\C3]qz}||yvurqppolknmmkkige_L%?okZP]wwnƾwQ'[z|{wrfg\SUXZJKnQEIGEC?-;<><52/.-.6p_vo%  'NA9-4'#   + !6CRD;8D90,,/U]  +*:1-+& $85/.+)%+;  G~}}~R  EM6*''ugFJ"+#QaºЬŻ3.LUOF@:51,*'&%$"    !'5H|zvqU9týA_~9 ;g/HW?;/&  &/E~#_ʽ$|.OuR 8~|-fccuy=R8MY]6KhTP9GR[)"=:^{X4M?)!#())''#!DZH""GYTUOdY:?80bxz{z{|z|||||~~|}}}yvl/S=25&@dmo"3][VVI+(*%D}jDH>720...3nl&<8  'KEJ23(#   + $77HB<3@8.+)+,$(46-#  +$61/*$ #82.-*'#.N}e2) + DI7,'&hf_A"*"M˵sVGKwpû1,QTOF?;51-,)&%"#"  !"'4J|xu];tþ?e9 >t1DX>8/$ &/H۶R9QûO=*?U@0##$"!!5= + QvH%'%9 <* .Ĺ½l4.G" +R]((jL3FG >{{n-hZpwhkqejqzrrnp7\s1UfSEjjXmHET_!&$/9vBB$)NJ*$')*''$  c: IVSTSUg{u<874d~yyz{{|}z{{}~}z|~|zwl-S829%Dm9*EIGGF;-*#D~yhGI?e}}zwtrpppmjhhgdaZ?<_sz~}zywwtppqpnlmkjiiihc_F CkdZRc|~zd̿xT(Qy|yrmjdYTTXYQRhVEGFHE9+<<>>630000.\jzu(  %PFJ+0("    3:PPD496.)'&# + &<2-)% "2,--,&! -f7 =e3K[<;." &/E˘xy)AOB1% #$"  !vzwsuwxyuttc`gcblqiegUTYTWR XvH#('>ʎ %D3 :ĸ½l33C$ +_Z&/< IcX`SDP! '3e2pf]^p~|wsttso2Zc+R]:F}vavL3Ng*(Hhi3L% AF?3%(*(((% IWTV^iD7566f}yzz}|~}|}|~}||{}|zwd%V774#":[_4%.464-*'&!E|zcBH=71/2@YQiwXeb  + =DE,/(#   1:KRE7850+(&!  &81-*% + +2+-,+&! .? =pfauu` 8A6-"$+"Mú.-OTME@941-*('%$!  "#)< pƿ}yzsW;u;d~3 Bc1G\:;* '.Iո_( )Y&AI?0# #$"! &nvljjgb`MGMROSY\VRKEEBBGDDIIIDi~}ywtrspniikjhe`X9;gv}~|zxxtsspopolmjmjjigd_9)GsgXQez{ydyQ)K{zwpkhc[SQ\`aphZAEEFA<.8=>:6202U~wpqs"  ;A=/.)#   +  27BED:<7/*&$    &:1-*% + 2,-+(%#/B"Euh]SNNA1* + ùg)7; pT  ?¶^EIHKI?=O+NXm}c@gU6t\]NZ]~.PZ(Lj^LncWpKVQh%)85dm?0!1((,1)%$ (niC GURSLG;:71/,-5e{z{zz}{zz|{||||~~}yu\*[/61 2Q{|oXEHW_`bcWPNJA(Lw]FD@l|ywvsqoolkkhdd_U4?hw|~|zywurrpooomlllkhihc`6'JohXPf}w~d¾vS+Kxxyrlhd[TT[ajmm]DFFDA;+7==:62/1ANF`Q\h   893--(#   + +01?E?7?6/)&#  $:2.+% + 2-/-)'# .I!*6 + >C2(!"^Y4"#+$QϽ,0PTMEB<3/,('%$"  ""(.qU5t¾7dekxuqt* +C`.?I;8+ &.Iڌ(&%%DM@0 #$$!>ysaimmnlfozmjmhmjkprqilifeY hi<"&&Um 2e"E¿~e&;< ]L HŶvycFf Pf8ABiz>KR2t~_yUocf~+X`?_S9Aud`o2?Yr# /.Fr9B6&')1Wl:' 5GW$!GSSSOI@8520-.6j}x{z{}{z||}}|||}}yr\&\,3,@_svqld`cb_\[XTOKF&Pw[DFAl{yvsoopnljghec_W4@ex}}{zzusrooopllkjljggd\1+NufXTgt_¾vU,Izy|slhbWTSQVXNPHFIGDA:+<=>:42/1DVPohj(  130,-(#  + +  +.1=F;3=6/*(%  %<1.+$ + 51/.+($ +F=;31/09eE9xhK( 140-.(#    + +.5>C:/:8/*&$    &;.+'# !2..-+($ '7[]Sa>:ER + GP4'!MinyJ"*!Q÷¿)0STME?94/.+'%$"!  !#*Cw|pc;w¿1e־qeo~jjk% Ab1@H32( +&/KB/;³-|v;B<- %&#7|}yvneh_aa\YWSS@ d[6#&%aȃ,PW Tÿ}c!=2 !hG ^ȶ~zmPmVdagag886,.0LC=xwutyks)l`b~zwS_vpkfs!,'G;!4EO>)..D1%& 1:?$DSQRQJ=70)'(*9n|wxy||{{||z~{z{{|}ztZ(kx'0, 6Skkhcb__^[XVVTPJ?+UzXGEFp|{yvtpmbjiifdb_Y4Dkw}~{zxvvurrpnnkkkkjhhgbZ2*PmaULn~qc«xV.Czxyrlg`XSQTRLJFCCFGEA;.;<<;61/0AfI[^d  .52-/(#!    + .7CC<352-)'$"  %8/)'# !0//0-(# %1_5#/N FQ1%!9ruR/9p=")&MɠqiYQX*3PRLE?:3/.*'%$##!!   !"$'+{ysd=z0gгtl_gwzpnb Dc-DJ72( %/L<(Wի`pt >C<-!$%"5||~}|vzurotqlhe_[STO9 iY3$'$he 69Rʿÿ{_>0 )q@cǶH@z\acM.Iyy|rqqtu:>ue*p`Ehq`QiS]YPec¹d#0E'I:' (,1psR' *rm\%GVSQKB;6,$!!';n}u{z{}}{{{|}{{|{||xtT)hz'2.!9Vuqke]ZZ\ZYXXYM@2'XuSECGs}yvvspjPjifY`b_Y5Fiw}~}{yvsrpqqomllmligjgcY1-Vz`VSnpgyY/A{w{smf_XTPUSMIECDHIFB;-9:996201ILU1S' +171+.(#     +.6A?=143/*&#  + #6-&%$ + 1.//.($ %3CX3) R\2'!!^a? $vO )'RϷqtĿ'3PQLD?930,)(%$"" ! !!# {wq~Z;xĿ-gŬwuflxzhX +L`/?J82( %/LB'YԨ"7s!FK=/"!$%">vurqrtxwwqkkihefhfe`Z[WVXA k^2&(#qL 7K9Uʿ|^@. +,f<nɸ~A3?FB@BFj eXe,Iü\$!I[TXV<$2(&!!'-4OB6% +17#KVSPH@;91(# "op.e( 5G7+.*$!   +-7=@>26<4*&$  + !3+%$# 5/2//*%&0#"(6EQ + NQ3'$"NU;=EKQWR!*!Pбÿ$2NOKF?840)'(%"!!   !" {ĺyqxT:wĿ+gŭubo~ydR ET297/)#!At|y{z{{|xx{}}|{|}}|xqR-mn'80"6Wqqh_VTZYZ[WUM,'$&ZwRICDy|zvvtrWHkfUKcb`V4Hmx~~}|yussqppb`fkkkjigdcU.2Ys^TNqsyr{V.:wzsmh`YRRTPLGDDEGHEB:05>:8522/19Vc=#Iq  +5G8-1+%!   + 3>HF@43@6*&$   !0+'$" 72300*$  %1AzsQ  MO3'#%g{S"(OϾ¾¿)4SQJE@83/)((%#""! !" |zlrO9x,cŮq`fylbS@ +JZ.]J/1@JýC  ?6%:L>/#(+/Q5,$  32)$JTQPM8.,(&#"%Dt|xzz{z|{wz{|{|{{{}yoQ+qm%80#+Ound]XXYYZXXUI'$$$\sJFCH{{ywuspNEjkGId^ZS/Hoy~|xtqqqqmSSNXekjhhc_Y+4Zp[VQupxt­zX-;z|tnhbYRSRPNHDCEEGGD9.8=;8621,+.+(!'+)  +184*/+%    + +1@SP@758/'$#   "2+)($ 1120-*$ &3)9*LxJ + QM4)# +7.*$kP!)"O'3QSIC?830,('&#""! !"|}tfK@wÿ'eɭrpzmYTTPOA +Q_5:=4/%  %0NپČj!DJ;0"$$!Ly{yuqpnqstsld^[VN4 + 3yZ-&&%< GlDlȽÿwR <( + Kp8ʹ{ +pmssrw})Pmtuv{|nqomyk,G)>QA(4G9(7FOñ8" (!F/$:5@("(-@kA*#:Q'HSQLC72//0-,-Gs{xz{{z{zz|{{{zy|{|xqM.po :-"&>h`XZUWWRSTTPD0(%'bvHEBM|~v{ywtrSNgeDNbI[W-Koz|zxusqm_H=BAK_kjggd^S)3ap]VVtlzv®yX0=;60/,*+)&$%$"!  063-/+%  +  + /9UcB954-'%#    + "2+))'  +!2/./,'!  $2 9{X'0AF  IE3)$,2-/,.1851+''%%%"! !"!{xnBAxÿ&^ƫrxg[cMJWY\mn PU/9<3/$ &0MdQUYVLKJAGd#CO:1! $"V{nhnz|zxnnsvxwvtne]^ZS3 + 0w\)&&!#5.+ oǽþvK!>' ;U4Ƿp vyulVtx,Urfnpeejof'A8GMK50,35AGT­1 #HG2'&%',7iL(!&45#+LTRI>7333420.Cy|yz|z{zzz|{z{ywyz{voJ1pl#;+!">iaXXUWWOMOPMIA<.'ayBHEPz{fJ]rwrpXZm\KXbL`Q-Lpy~}}{yvtqmf\=;61,(%%$"" !!!}}yuL@z{$Yϫ_m^Zhdrr +QU1=?5.#  (.PA$!",`!?L=1!%"]zqlnvuihlfd`eiknkddf\WSMSQ1 + 2n^)&&(+  tǼ¾tJ%?% 2C/Ĵc p|yR_q<4E{k.Z^gus^c}b&|XDJORNB3AEEHU+!(6SI3(529+$*.6F3%!WU,OVSK>33222/.+Eyzyz|||{|y|}|{zw{|zvnF7uk 9,  6Ufd`YWVQPNNMNJD2.gxBJCV|j;KYSjroV[lLHSKO^M.Ln|~}{xvtqnrmK=Ndjkghhd_N&9gqYTYvlv}[23vvtohcZUSROKDDFHIGEC:/8>:860.,**('&%##  +><0.+%  +   +/6?E;0,/,)'#  "4-.-(  .*)+'$  &2&C;.()$  + <;2)$'}HEh96|T")#Rĸ6VTMF?:4/*'%%%#  |yvsJ;yy)bӵfX\> +QS7?A5-# !(.T9) !3^"?G?1#$! ]zxuz|wppje`cdfc`^]d\ZUQRO. + 6kX$&&!'  "yƼqB&;$ EU/Z s|wqInbPqH}}|f1\[^R^U-O1@TY[^RTPKFT"#"!*?huc$.IFK(%*3SL%)<8*-NSRNB34211/+,Iy{yy{{{}{z|}}zzy{{{vmA=zg 5)  2MGZ`]YUSRPNOOOJ42m~tCI?YxuVK_KSbgTYlPOOAP[M-Ln||xwtsroiaRHM`ipjihe^O&H>1/1.,'#   #6/.-(  0**,*%#  (4 + 6;5'$&\G1EZF$*$OЬYVPJKT¿7RSKG@94/*(&$%$!    ~}vtrOiiYMXxprò|X2/xztoid]USROJGFFHHEE?805;77610.-+('&$# '55.-+%!     -2=D>1/40+&"    ":0.,& 1--.+&$  '2  /4/&"$O`[`YadiL$+#Pϸ¿6PSKE?740)'&%$#! !z}ztqoL=zľxX[SgdRUW][Q OK2>D/)   &6UԽC'Z#>FA1!"##!U}{xtgjfnskbhe_XSJICGOPND9:>?: FnJ$)&4! 'źp8-?! BO/ #=injjR3Fdfcswz|~|xvqlL+Z[VE]>/_0W4)O^WT\R41`*&=!+..5#(*-7Z5$2B8.PVUThU9:74452R~{|x{{{{{zz|{yyzyzyuk4A\"2) /V8.^}z[J@YM:NEPUXZQOZT]G1To{~~|ywvrk`KCR\G_afjhec]L"@mjZR^ymlyX21}wzvohe^URSOIGEFGHFG@74398630/.,)'%&$" ! +(<:.'&#!    + ,4FG=007/+($     720-&  + +3210.)% '5   + +2.&""fzruL$*$P¾|6SSKE?71.*'&&#"! x}ytqoK67;<>< OvI"'%9  )ĺn5+7  FC. (9 jefffbdghkoqtxzxzyurojf@+_WO=\4=?1d_VV7&$3MIMj)&@axP$ocXN[zrhz^5)yvwunie_WSRPIFEFIIFE?95299631-,,*'%%#"!!  '==0&'$!    + )0DB73/1-*&#!   42/,'!  11110)$ )8  +5/&#*Un[l1$(%PϹtn{8OQKD>60.+'&&#!  x{ysppJ9z¿x&r¼aY`dZ^cix| MF1;8,% '/W̡[>R(=LA- !!"!f|rmkpmnkg^WRNNNV[ZWMFAEGB@! QvG#$&<ß  .Ļn3-0 D@, ,5$rgccb^befjnrvy}{{wspidd<(cIPv0X4Ot!7ktxpZTHHZd`k&!*Z_]h;0*63(*.>H7' (^y]%0NSTNBBQC9:974S~{{{}}|}zz|z|{{zyxsi3GI"/&6J9KYXa]]XUUSOJE39txi@G;a}jXT>\\N@>HKWN<^[Z^I0Uq{~||zwsqkTJMTTFY_[jhdd^M'?pdTQ\|ve~\5&yuxwoif`VSSRLCEFHIFFA8.189840,*+)(&$#$"! %570(($!    +)-:H?911.*%#      54-*&  ,/...(#  '7    + ++5/'!$gvG:_xfC#+!PάYZ^YYdv8QQLG?70-+''%#""  ~~yxrooG9}ÿ{+Ŀm~ YM584*"   )3Tؽ4,$%$;O)=S@) "##!Puvvzzwvp^`_]heffe[NOTYXVRNLC@GK== NwF"%&FȚ .ùl- .- HE( 9))xpiea[^bekt}xqhfl>.hN[|>d5XfOSivqxturlnb_q'*7%81 %-'*1[o0&  i,1QSX^qp<78761Yx}z{~|xx{{}}|{zyxuj0KJ#1$*AIBnjhg]VOKKLJHB3;uxkBG;avxejdLC>ADCSC9UWZ\F-Xr{~{zvsqmYTZWXPX^^ffeb]I$FtbWR_}zi°|[7%{xzwnif`WSSPJEEEHIDCC;02;974/,++*)&#"!!! $13,(&$    +,-2A?:10-)%"     41+&# +++-,(%  &:   + 091&!4L=558}¿}* SF575+$  +'6[ټ4+!! XL,?N;(###!MxxuyupnbNYXMRSVNHDAGKFBDBIF;>77<7 Ld> '&Oɑ   6¸Ŀf*.+ +F=( C· $xrldccho{yspv<+kTaok=s+WrV=+-3Rerr_U;Kux'//$$530<-'*-AR/&!<@45QUWerL:872Z|y|{|||{z}z~|{z{yytg*LF$0$,JXgtnkld\XSOMJHA7>uyjFG?gp^eopUgPKLE@HDA;BWZF1[q{~~}{wtoTYktXLVXX[agfa\D'HjaWKc~|{dð{]5(}wwrnidaXRSQKFCEHHDDA810:963.,+*('&#" !! #24+*+'! +  + (*/574,-+($!    + 0+&$" + **,.-)$  $@    + .62%"d@#- Mÿþp9SRKG@93-+(&$#! t}~zvqprE>}ÿv*¾} NB377,&  '6[ݳ$ ?J,?G;( ###  Lnoouqnhfed]_e```ZY^]XV]\UQPJKMKJB +MV9!'%Vʄ   ;c&/* + MB( D-|yqlmu}y|=2mOjxFHC}\~~pe[]|n)8{o/C9 />H4 ')3b|G& ,* 8SUTFU^UO>7656\|y~|{{|}{z}z}|{|{z{vg-QD$2%2gwkihhkhbXSMMJIF6AxhFI@heNd{fSrYWNHTPE=4@YYA2Yq{~}}{vsjQXjoW75296530/-(&()'%$#"!  #/2,+,&!   (),470,.,)&"    + -,&$# ++,-+%"  %=      -51#ISGDOtv?$-"N͸ÿp9PRMG?82-+)&$$" r~|wunke@=|q vNB285+$ !'7_ؿqfXTOENwC*EP9)"### >VZ`cbb_YZ^VZ\afeZ[fe^`bdd^WSTVUUI Qd< &%X| <ÿ|b2) NA# N1}zw{z|76m@fn3;w>v/ozmtkrvktu~sl_Ŀ_&9G"-#3>,,"(+1I=-$ $!#(ESTRQQNMIA8448a|w{}yzz}}{||}{z{{|zta)UD$2%#Bfpcdfgihd_TPPMLL3={u^>J933966410,('(*(&$"!  -40)(#    +&-/793/.+)&#!   + --)&$ ++)*,+'"! $8   (2." `}ZbA%,"Nΰ¾p8QRKE?93-*('#""o~{wsmd^7?~ioľr QA242)! "&7cǻC)IP8'!#""6PMMQMPOJFF@HLNPQJMPKONWYXOPXYXYZO +bd9!&&cx @¿}f4) %K8! G{ 0yy{~x~23tJib*u3='(!#!#3ĿT$%*,3xt-" ,]|XSRRQQQNG>616d|s|zz|}~|}{|}|z{z{wb,V<&2#*i}rjhgfc]XQNOOLJ1?y_EF6as}}zxupbTU_`UT[e_XHUebY9)Lq_SRj}we²_9!xtusqke^VQQNJGDDHGEA<:5188851/,)(()'&$#!  +3.'%#     +(+08=3..+*(#!   + 0,('#   +,+++,*$!  "6   '.)!)/*'d:%,#Nþƿ¾lBGOMBADBCDHTSX[ZYYXYSRVN l^2"'$ij F¾z`1( +/R;! O~ttx 2xszy|+1uQsuH)HwEu2>./$!  %;@#'*+/8D'" 0\PQPRPQOOKC936c{t{{{}~~|||{yx{|zzv_+Uz6,5$ !!Xxwj^YWQPPHJRNIH0Cy^HD250% $(7a;,KG6( #$" "IreQEE8=ENYQFC>?@A@DFHEDB@C<;KE f^3"'$qg  Iþ{`1' 1M9 Jg]dmle_UCKfoi@8|~px|wp7wDqv@*+SpOo/GH52  >ſ<+C96:e{v{|z}|{yyzz{|{z{xs`([6(.$#$" BlxsgXYXOKI>=C?<:)EuX@EArXamuGKR?BGRYC7>OQQT^Y;:bw}{xumdi^VLAUWW\cFQh^V2+Sl^SNmxpõ}_7!yusuqmg`XTTUJECEIJFD;851;::40.+*)(&%%###"" $2E:0-%    &,7E@8./+(%" + ,.+&# ++++,+(%"  ".    + +)3.& "u|A75(,!Lϯqlw|¾e;RPKE?91.+(%#!!  n~{ywrqpj-Bþ_Z}~c D9140& #'7c»8+FA6( $$# X~j[XKIE<<7:A@ACFEDD3 `U4!($y` J¿z[7% 8E5 + 0J7@K\bemk`_~[6@u}xs}wyp!#:~Ra-8hjUjZUǿ61lm]NN]iokc1)*,9wg+#2`}TURTRPLONKD76i~x{}~}{{zyyz||{|{zt\*bv0)," $#" 6ah^]RTSHFI=78997*H{[HE?sdVMXA566<=j^F`w~zyvoOSccQ8BS]cfOPb`V1)ToZQOopynĸ~a8{|uuplg`XTTRLHDGHIGB<72,:9972.-*(('&&%#""!! "1N>4.$"    +%,7I@822+($!    +,+*'% +()(**&"#  !.   + )9/'!!E\WRbskU%,#Nc:SUNF@:3.+'%#!!! qyyxtpnb3Aÿ]\ysS S;26/& "&5eü9-D=/#!%$#HkfeVSKJMJIKHGEC?0  YP2#'#}L Tȿ½xT3" + 8E3 A<97;8+N|wUGF@uV0:3.&"   +"*0:>914+)&#    + --+(% )*(*-(%$  -  + +*3-&"#nx<$-"NY ;WRMF@:5/,($#" q|xvsolg<@ÿV\|toM ]<24,& #&7ký8-E<0%"$$$LwbZWTSPMKGGGA:0 'bO0%&$C  #ZǾþ{S0! :?2 9dZ]VIDHKV^[cK4auz}vx1  Ixty~vwutf%:zyM{_2(IsjV||vcĠv+#B]mvc]RE9XĿ,.]ytWEDJP[]UG()()(((&7 5fyUTX]{k_ELCBm|zz{|z}z{zy}{{{||zpV*fs0+, 4^a@2.8>:897;=975*Q}zWGDAvxqfpaAQTUGLT>W[VTMca\7?jx~|zxsm`e^TP?F]_`Vcd`Z,-[iZQRrsvrų`<y{wswukg`WRRPLHFHIHEA:71'8:862-,-**//&%%())5. !/=80.)"   +")/94.+*&!!p|wtplie;@R[|qfer\ J8.2,% "%6j½6+E?2$ #$$!Xsqkf^ZUQXVRIB=951-  +je4&'"@  WǽÿzQ2! @?0 +MLLD;89;E7C\VDXb_W6@ix~}}{xtphme??EVW[^\bd`S.,ZgWORrsupõb;u{xuxsmkbWRSRLGGGGGGB;:5%797752),((R/'*Fdk}|*  .@8.-)#    + &009;3-/)&&"    + ,.+*# +++((*((%  !0!   + +&/-&!#vu',!Mϱ¾Y ?YTME@93.))'%! h~vqlcai ^ǽýwI!3 C;. "=B=620127L**7>(/m]+);Zľ2Qbf`]THEGGELPO9 )*()*'$ ;8lqVT[qDVcOOMIu}{y}{zy|yzyzz{||zyxqV-ji1*( GxrFE?;>?;;9chLP_e_T3Ahv~}{ywvq^ltcNVdNU]Yac]O.0\jWQTsnmuö~c;w}{wtrojbYSSSMHFGGGE?986);:9954?3#$Lh#',_s1/&  -64,,&!    + -9126/++*(%     + ,-))#  +)**,))(%  ,    (/+$##"^i@mnE'+ NɸwL=Gx¿R!;DB>;*VrKEADuf`OtJYVG7LG@_cd_W4Ejw~}{xvup_blqbR`NS_Uef^S*/^iZRUuhk|Ŷ{a<s|{ttqpibYTSQLGEGHGFA:83(;<;<>{]S0)+QoL^W  ,42*'$!    +%.,.50+,,+("     ))%#  + +/-),+,($ 3    %.*"#lG$) Oο|~¾K ?URKF@93,('&%#! msq{plmnnyd2F½LZ{}|xuE K731(! #'9p»//F>1"### U{ofceca]ZRGAE@=. +*dL')%!8qļ½v@%/ VI. 1AQB0'&'(,3?D5DC8=00*" \oqVq}x|wv|bgsW(DbXznU8#*QoL]tvm5du!1tz)9xK(Jo_þ'))+(('%0)!;nlQUTK\mc[VQOPUvwz{}~|z}|zy{{zyyz{vpK/oc-1(" Pud@>:87:;99<@@>5&Y~rJGDD{[Q88BHKUJ=76C_dd_V2Cky|{xwtn\W`riFOMM[Vge^S*3\eWKYykgzĶb=oyyssqmd_WSOOJEDFHGE>:7/(;<==L]r:"(+PwYYO'54*'$#    !+,/41+.,)(#    ,,%   +-0*+,,%! 1      &.)! XR8N{x!&*NľI BURKD@82-)'&%!!nun5HH\zvmm> D91/(! 'oiTVSRQPRQRQQQQxwx{}{z|zzz{z{{yyyvn@1s^**&! Bq_<0/0-358<48N\IHCObcc^U2Fmz~~}|zwwqbXLd`QTKGSVfdbR#5^gVOUwqf·f?p|{usplhaXQPPLFDGJEC>:82$8==@Ut,Q1R#(+NrL:H )54**(#  +  /..1.*-*(&!    + 0/'#   + +.3-,-'" 4"     '.("!"deMB%,"QKAVUJD?93-'%&%"!ntw7G¾J\yy~wqll: F91.&! (7qɿz+ ,H;,%%"Jz{|{tri]ROIETMHP\]ZQ;?#  .gJ$+d7# &(-+he9N!ú½q<$( +A9#  "'((&%&&)38;3/:?77@:5' +!irrxsuvotv^_kH*@junnihrquwwn^G/!#G^^Y\^RћJ("2X}vF'6s¾z'$.$-++**)$ "AjJ$?ufYUSSRRPPQRSKWywu|{~{y|zzy{zy{yxwvm<5s](-&! GqaJ=333369H@4" #gyuzrw}rp\^nG-?ei`^[\cgf_]RC6,!)EPJA<*ͥf:#)H^e_7"7|n&!CL+****($ '}w9c}{vwspicYTSRMGDCEGE>893$:==>Aj[QMm5&+;Z_d<#).('$"    + /205/)(&&"   + +*,)%!  ,0--)&"  .     */+#.io63~@$+&LŃ[a\RrA!@TPHD@70-*'$   tŽ{}t6D@]y{}xrnk8 R3.-&  (8}ܛ.SY0" #!ds)!0C9+!"#"C|vqmcZLB=?=><;>7-*-,- -jM!;X""(&6)_C:,ùl4&% J<  +)$&*'%&&(-1.&&5GC@@O?# +$o|khf{~ii}^Wj=,fgQL]fu{xnb\zyϿźjAe|zxxsoicWTRQNEEEFHE?964#:??>=A9+.+@o:=7.U[ZH#(,')%!    + "15350()&&$     + ),'%  +(/.)+&!! .    +)1,$"_rooxy|C$-NĖ{z=$DSPIC>92-)%#! ~Ž{{/Aÿ=^yvy|~yupkj4 "M01-%! %:~ߛ0sYENVVV6eP"nn'/A5) """@}}~vxxqpceed]MGA>90),023-$%11:$ +1bA P^("(%=*Y+j$*ÿj.&# +F8! +,#(1.'&(*,/+$&1>799\\! &nogb\maZgs\Xi8+7><;@JKHB=?7.*(&#%-0/((=L),Rf]_RQ^w^}kkU$ B~\R\gG'CRhIDjL+ "8UVRRRQQRPOPOBO{|uyx{{{{{|{{yz{zxvtl6E@TtuvWbmo_VGDFUxiFB[gb_N2Us|~{ywueYZKNHGNCBI]ff_@=84,)*Ax:hn=t^UE "(.'&&"   + "/6461++''%    ))'$ .4+(*'!  +/    + +,1+%!]xqnfeE%) O˰{re:!ERPLF?71-+'$#"!ž~r~~5B¿;^ztsuv{}{wrojo8 + %I1.+%! )>~ޘ(oqf?Z}*mm-0<3)#$#:vptwtikokce_]YQIB>>B==?KVj}~}mj7 + 8mC DL=&#''Fɧ2REx'1}l* )! A2 *"%./,(%%%&#!#&/.15C. 'tnbljoxbOVhp\[j2)226>GHA:764,*(&$!#*,)!)=,@xkI&NkL%5;B:AELPPSLE>3jznBK;Kcb`O2Vu{}}yvyvlqURT`gre^daTQ28gbO<>?A_LBiC135^@DP"!(0%%$!    !/5051+-)'#    ''$! /0(')&! 0  +  + +(.*$J0%+MͲi[cbis7$FTRKE?60-)%%$! {ù۩ƻn-D¾:\zvtstx{wtqlgoB + %H1,,%! (95ke[ftrFXBJH#pi%3@4)$$#:srsvvwti]][YWSHA;;865:9B^k~{vgb4 =f?D.+;P7$*&Lǟ7P%J!6ſ}Ŀh&&! C. 1%/654/)&$$%&)1650%#' 'zuW]ljgRCKco`]j0%25:DC<401.*&'$%" )( !#'(&.Q9%,Xa9h3%=]NĿ?&/sQOi$&,,**+*$ $>UTSTSRQQPQPODV|~ywy|zy{|z|{xyzyxwxf8C~R%" QwhSXYUSPNRVTP;1+-kzoFOLQqg[c[^Wabged`]I7czvqqmiged`b\UXXSONNKFDDB:JhgP8;\jotz{zzzrmlfeb]^YTPLHE?=8750/))*ŸjD^{yutspib]URROHFFFGA9781$;>?>ClZ.%'V}`eF!*.'&&"   #22/3/)+(%    + +))%! )-()*(#" !/   + (-*$$+Jϯ6$ESPLF@82,(%"! ! ùզƳ3H¿7[}wuusx~zwtoo|< *F0/," )89IgzjZZRHJJ='!wg(6=2( $&#Avtx}wzsr_RQNKKHC@C?66;=CJTSMRUWah2 :^8BVqbN,#)'WȘQ';_8ÿ}g#)  D2 +'M&084FU: + $]Ualm|eXQatqk^n-$39;>81+,-)&$$$ !+*!! #%$#.r0 0Eb8.%Jſ6$Cmub:(+**+(&" %>XVUURSTROQPNCW~zwy{{{{|{{yyyy{xxve1F|P '!!08+  6]ZYXQ5*(-i|m@Q_Q}}vynoe`ZVN>BXWKHECGC@AA;>BBBDGEHLNPSX[Y\bP:3<@?:<975333/./02203.)+//210330235´iEZ}{vstphc_WTSQIFEGFA:592";<<>Fl/B"03QjcS9!*/*''!   + "4/,1.('%#   + *+'$"  )/*(+)%" !4!    + !-)!%,"Oζÿ¼3%EVPKE@82,'%$" !!""Ǻ}ǩ2D5cyttsu~|xuty2 )B0/*#  '8ߕ?A[bODxe)8=3) #%#CpppsleRHH?;ABC<581=3467EDDKOQUUODA`gW?7YU! + AScjvqf]o~siZh&"0551,('*)$"""!" !#"!##$E÷-2t:&'Rƾ1%"'++))('" '?WTRSRTSSSQOL?W~{z{}|z{zz{zyxz|xxtg.A{O ")""#%4UZP2!&();\a[VN:5<`w|qBTH=Pdeiklkiidf`c[ZRQPLBDFEDOSSWRJFEEFEHFFIIJLLNOPSRRRSTTWY_PB6:>ABEGGHHKOMLMMLMNNNSRTXX[YZ\VXaYµiDcyuvpid`XRPNIEEHFA:887!8<>@Hj~wZlkb_]ZU$ #(.+($!    "1-+10*'#"     +%*%"   *1.,-*#! !8"    + +!-*"&/ L¾߼;ƿ2%CTPJF?72-(%$! " "$yӺwý|,@5 r|xvx~}s* *=0+&#    %<ߎ17AyyrrtFF6yc&;=4*!#%#=hkjmid]SMF=?BBEA?7=FR]x{w0 >]8!"%*8* hΉ$K ".@ƾǡ«thog' 1P7 EHJMRRSVSRPPPOMQSRTSTRRNNLPQONLLHIHGLSRVX]`bbegfegijlihkomlojjkknmpmmlke¶jCX{wrnid]WRQOIDFHF?;862?>>@BtpjjzJXQGGDOT  #'+)%#!   !1/-11*($!  +%,$# '.*),(!!  %F"   +"-)"&,LĻ¿ɽŷ.$DTPKG?72.*%"! " !#m؊fpvvmXck~}p)C5(t~qpy{\" +>/)'! #(9٥\PF=)"'*%#%%y^%9>5,$#'$9hhdgkkb[XXhka\\^_bakoux+ + ?X7D?2?8HR>LPXe1 pu1=5 Fžŭxb( 7H/ 5X]stwnghcd^BA6He[GQ_[ + -wm5YgynbaG\ntOEQ_ +*'$$#$%"  "##!" "Hf&<;q"58TĿ !*.0.+))!.FXUTSSSTRQQON;`{z}}zz{zz|{zzyy{ywwse/I`K^\TOJKHA@?EIJHIMOQY]fjkldfe^^_]_^[_dbhinnryyUZVZYWRPSPLKFHEEFACGFIGGLLMLKIFJIJGJFDAFJFHHFHKLIMMDPUQTUY]bhjotussusqtwwrqrxywvusruxwsqusttxuqsķmFZ}ytolg^WRQMFFHGF@;;81@A?ABKD'')4g>dq``a\L# %)+)%$"   #30-21,*'#    %.&%!  -,*)*&!" $72.)#"! ! !# ĎjoiaS`z~zttt&G4(y}xmcbfqoR ,9.,(#  "*9ľX#7=6-# "&$5ad`dj}yZ_gxzzvvw|/ + G^4evY^MVfHRQge+ seOiU_bIſ}Z&  6>( =Z^muztmkeha6B>FULUgff( ._f5Vk~]^cGPmmKDOZ&##$$"$# "$$"# !$Lk&LS;S=;Y$,-.-+))"'&7I[USSUUTRQPQM>c|{}~zz|y|}{zzyzzxxwte6]lgnohaa_]_\\b]]`hedhnilqloonnosnpsorwrpnjljkn^SR^_LKJLNKLBAFLRNQND?GPTSOLSXWXVWTNTYabiggliomqvtuwtqmkmknsxyyyxvw{yw{{ttx}vrswxvuvsquwvrikorsrqppɶlE Y|vtnij_TTVSFHLIE@:;93>@?AIkO786Aj2=;,W`S3 &,-)%&#  + "0-.52++(&$     +%,'%! +-/)))($  !:#   "-*"%-!G¿)&ATOLD?70,(#""  !̡~qv%F3%x{ysfb]_^][P,7.-'"  ")<¼P&4;4-$#%# 9[[S^isxudhp{{v{|, HY0]b`RHSeKKBE@'{Z'{c_Pu2QƽxU) /0" \}lcgomhc[`cJNJRVVqob`% +0~be4RukQYY8EfS:=HN %##""!"! !"!!!!#Wn#Ya# <6`þ(,..-,*)#7T.""#).7BOZUTSSTSSUSRM>i~x}y{z{}~{zyyyyvxut['@mmpqrmqqkpoeieikffhab[W\XRVMPMKLLGABKCFHICEDEABJ[_WWVY_cgcXY^cktngdbgmrurruxzwyxtrtssuvvwwxwuwvxyvvuuttsuvutuuuwuusrrmpsqmprrpnonlopomkiklnliihjŷlH O~|rfqjQ]iUE<::95?A;>D|J$*@r98Y! &+3+'&#    + ),163,+(('!    %-%#   +*.**+)%  #@%   +".+#&,K''EUQLD=71,(%$#!  ſoclr&G++y|xpcccc^YXP)5,,'- + #*:ĿQ'"481,& #$" CkoojcdVeLSqwewcvu}{% ET-.))&)$#"%#(+#SHf^xF]ŽzT$* 2-elbcggb^YafYUVWWbqnee# !4vcY>Z~MANODPpJ07?E %"### !  "#"!"""fy#$INBL(#',iſ,1/.+*)&"!#$$')-0;Oc_WTQQRQPRRPNVw}uyy|x{{}{{zz{yxxuua92FUXXYUNGDLJNNHJCLQLKLJEKSZYVYYYZ]^^_cflgmlmmsw{|}ztszytutryvqommqswxurrsrtrsvxxxzyvwtvvutvutsqstsrsrtsrrrqsupqrrqnlonqrnljloqoonkglokkikmljheb_a`¶kF N|{owoe\PLE=8::4:=:;?drROMKZt9&4]HYp- #'70+'%!   ')-64+)&%$!    &.&!  + ),(+-*%" +;"   $.+$$'/!I¿&(GUQKB:61+('%#"! "}ֺüyvoe!G-+y}yuqoic`] +'3--(*SAI_H[ltE&+<ĿM("7:2-& $%#A[W]hic^f7OR9]8k>Fq{\Ml[xuzxvt DS,'("IWpgiKbüwR$' 6- ]z^dghfc_]_``YTNThnkfe #2cNaaaQEGNncB3/*!&###"   "# !!?n.,Y~H" .o½z)(9143./.*'%*//35489::CDDHJJOVZ\VUQUUSY[\Z@[uu~|zv|z~||{zyxwvvsaI[]a`\[XZkkgkhbebhjhghiipmkjkolonnqtooszxxuvursuux{}}zwuutqoprrsrsomnjkkswwtpijkqrrssrstvutststrrstssrrtsrssrsromlljlnoolhkoqponmmlmonnjklmnica\ZWXZmF Jz}rsjphaMFGD;99:3:?<88:4+*,)-FXN%:SV7 $(10*&$    + '),35,)'%"   %-(&$& (.(),)$") <" " +#2/%7Q*8.'0#G%(KVQKB:50,'&$""#" "zոľ~z~$I*.s~{wuqpohac_ (1./)0ҩ2dyv+[%+@M'"8:5.' #&$ LpcdhjhfsORWLUeuG~[eZzyvqrri GP-)(!EVƀq8hüwN#& +*UgaijigfcbdRHCCFXggagn $6I=3TyxwpiQ6!&5gZ2"&#"!!$"!!! F:.?NA#!"#0Fuz7.%&! !"$$%"%-3;CLNUWXV^\`bdabcegdigmlnqttutlmjiflkjklnqsOQso|{z{||~||{{zyywwu`I`dkkjmnnqqpqqmnopspononolkmmnnqppkmpporqpnpqpllijmoorsrtuvrpqpprvttrrokonrtuwyusqonmmpomlnllnonkkkmlkmnmlnnnmikmlonqqnmlnprqponlmpppmnmnmjgb]Y[[[XV÷nI#Nwssjejh[HAFB<::;25?;;730/,*'+2mr#%$/5 $$)'*)% + '-021,)''" &   $-))% !_X8/# (0,+/+&#XU;=+  1 hC <)  ";:(WbX!'1$G¿"'HXQJD;40-'%$#!"!!Ը¾}r&L')k~vnnnaae^_TWWe` +(1//,*HwTkR*xc9$+=ÿF%$==51'"#%$ Elipmltoeg|yhyzt}zwwyn HL) ('!A;X{ik' k¼wK%& 1+ MVYfgece`^\B/.6@M^]]kk '88Qj7,3:GWft|{y}rn_8%&#!!  !$$$"!"!)K?..Bjh3"",HX?EpBSQOPTQUVWW[\`afmnkorqpovuxututwspoqrqpopqrsqoomnnnmmoopqqWPyowzyz|zy{|{yzyyyvudVbhlmnnomnpqqrrrpproomoponpqppppqspnpqoqrqqrttuqppmomknnqsrsqqrroqqtrpppnoononnpqtttutwwutoooqoommkknljkmkgfhkmouppnljmprronppnlnmonlmonlg]]`ddfba]XȶsK#H~||}ytkk_i\UMMDACC;5 4<9971.-,)(+4icBNM[n. + I]W0..%! + ,45<5,)+)$!!!b> CE)  +#1**)#_&a '0,-0+$" `r  0>-P =B(K~x%/%D*'HYQKC;3.,(%$$"#"  "͹y{r&J%)l}mqkbfibf`Waf_ +(2/-*"*8+%$#.$*<þE&$9>51(##$#  ?SNbfgox|}yw|xx{|i DI' &&&<plaa>$q»¾tD%$ 2+ )cBR\\]\WN?2+'/;CYSC_[ 6.VGA`p}}wrG%1.'%&&&&'())'(+++*-.-**'**17BVWA7<=:1.07&0%C¿6!GUQIB;4/,(&$#" !"Դijnmpt}&J%$l}msuwtnrrigbW '0.-(#!%(?ý@% 9>5/'""#" Mg^_dffhnwwqmoononpz~\ >>#!&#*6BC3%s|þu@'% +2*QdKKMMNIB4.005?END;@C"',0DPjzquwsx}}}yqkiQ=CKJIIIJMMNQMPUPTRWYVY\[Z\_baaeeeO;9<49MYhlcdklmnnqpqsqqrrqrrssrsrsrqqppopqrqrrprutuuuuuutuvvwusswvttqsttsturstutrrqqqpqponnjTZ{uyyz}{{zzzzzyyyxtc]eloqrststsrqqrrrstwwtrssrsqoopnnmmmoomlllmmkkknoprrtpqqsvuljlosrrstrnonssrsopqtursrollghikmniijlljjiijikihgjihhimonqnllqmlklorqnmnoojebdghjmlmmmjkjʵnN%G}wqhj_]ZUKFCA>93 .:88752/+((*0h^$,#&1# %&*,(%##),4]  ,,,660.)%#"#x   2.+)# U@L $,,,*)$"dKV  -!X%R^ /-">B"#IUQIA<5.+(&#$"!!#ɴ}npbp}#FĿ%*s|xwupjaT(2.,&"$%<ü>&!7;3+&! $$ Reikoigd[Zxlfbc\fnuT 93 !'& 55+|Ľz¾v@0'=6#,LVTSODCEGCBEDLPRTZZZa[]ab[^^``][ZX[_`edgcdhhffhjillnpoonqpksrkjjkmmqmonssptnmnrtspqp]8/95ChloqllmnqrrrutuuusvttvxwvvwwvttuvtuvxxuvvvwvwvwwwvwxvvwuuuvssttvtttuurttvurtrsrqppqoomX^|xzxz~|||{zyzyyxxsb^fnqrsqqqqqrrsstuwwvusqspqqljhigba`_aa]__dijmqokimmkntvwutuuuokhortrrsspomkprupponporsuttspqqstnqprmmnnmijlgjllnloomkknpplkmmoonlnolh`beilnmmmlljjkkųnN%F~vodZRORPIIFGA<;70 1?;9630,)()+/_x=&!"" + +$((%" !!ud   +&)+33-,*&!!k1$!## "&&% p:z  +%**44,*(&"&VU q  .*%#$/ + $,(('&"!:- +#$ +$)&/$HF#EWOF>83.+'&$#!!!  "Ͽ~xzy$E$/r{yuneN ,62.&" %)Cü7$!GO62) %"Jh]bklml}xnzzwsrt{{uK-GK?D3,)$&'%#(,/,6BOXSwr<00)+,10/009976tB:dueREIE4+,----=LMV^cmhbbiobRTWO]eghgikklossrrssuuusuxvxyyzyyyxxvuttwxuvuutuwwvvuwxxwxvvwuvwwvtsttuvvuuvvuvxuvuvtttwxxxxvuuc;Iamoqprrqsrqttutwwywxzyz{yzxyxwxxwyyyxwyyxxxwwyywxxwwwuuvuwvvxvwvtutuvwuutvuutsrsrpqppooY`|}{xzy}{|||zy{{xxxt^Rdkppruwvwvtssuvw|z{tqje]YTRSQQSTWWTSQRSSPRRPNT[aehjlnkjegksrrpqvoleennppqssrpjjjgmnoponnllnnmmonnnlkollkhghjiimllnkjiggkpllkkookfaaegimnlmjiea^cjhfµuN&:|}zxtqmhb\WQPNJEBBEC<891 -::9630+(&'**)'&"!"" + !%&%! *PK"qd  + +$*)31*&$# !B')   +,+%! +#,&&''$#  *# + $)(0'I¾½M#!BUME?83/*(%$#!! !!ƻ|yuxqC"(q|wrj^@ +330&#!7ü430OrM<. #&$##8dTPSitnhhqx|}|wxymL-/1697B?ADCADJLSXemy~|}siqlkjcggedejklmnkhe^bjqpyxgbbamqvuv|~|uklrjii[^db_a__``aded`bdffigehiihigijilmqqsuutuwxxwxxxyxxyxzy{{{|{{|{|{xxy{{zzyyxyxyzzyzyyyzxyxwwwwvutvwxxvtuwxxywxxwuvvwyzy{z{zwvqmqqrttwztsrrsuuvxyzxxyxzzwxwwxwuuxywvwwxxvwwwwyzyxvuvvvvvvxvvxsuuuvuuvvvutvusstrsqonmkooSb|y}|vzz||{{zyyyzyyxrZZknrqstuuvsoqrtvstmke]Z\ZYTSSSVSUSQRRQNNPQOPOOOKMN\cgjkmnmieitomgotrjefosnlnpppqnllieffgihihihhikjlkmmmmnlhjjkjgidcacjmkljceecc_[XY_^`_\ZWVQOQQOLLLPƶqR)?{zvsog`ZTQRMKFCDEB?991! );;7432.)''(('&$"!%"  %&& #= + +&++.-'&$"  + ,)"  +%-)''&%"! '" + #*&5&LýJ(DTLE>850+)'%%#$! r-sĺ}\D")e~wsmaZTLGDMID1*-3<,$ #(4t¼S?>;RGA71111,*(+-,4@>CDBGEKEOS[]flisz~{}}wsrh_\dghhhedgnkmrwy}z~~~zzwvttxxvvwzxuwxxvttqqqsvwwvwvssqpjjmmnhnllmjhkmllmnoooonlkoplklllmmlmnoponooqpqoppqsuvuuyxz{{{{z{z||{{}{{|||{||{{}~}|}||~}~|{|||{|}|z{{y{||xxxvvttwxwuwwwwxwy|{xzzyyy{yz{z{||zyxuuvutuxxrssuxwwxywwxxyuyvwyyxwvwwwwwxxxxxwxxyyxwttuuwwwuwuvvtuuvwwyyvwvvwuttsrtrqrmkmnRkysx|y{|}|}|zyyz{{{yrTYjoqqqqstttsvuqnhc[XSSSVTZWWSQOKKONNJIGGGKKKJLOLLHHOZbfijjlkgakrmmjlsphcgnpplhkoosqpmjjgfcd^`]ZZZ\]XYZ\Z[VX[XSUSVSRWVSVPJONNNKFHJHHFHHHFGEABDA@@@@@EзoQ)<~yzxtog_XSPROLEBDEB@:75!-;>9542-+*'''&##"$)$  +!%'%    +(+),/('%#  + +'" (4/)+('#  +$  !&%2 R¿I5>WOG@961.,)''&&$""!!!"%/wr3.4BNSS^WYX\bgi[SF651-28@MV\TSXH0<01**)*-/.3/:AED>833+(&'"+GZgpurx{tsslllrutqcUQNLLGNQJMPUY_ahrxxrhifdf`_Z``_fdcfgichibdhhkgkheknoqtwvyvxvvwvwwy{{}|}~}~}}{yz}}}|||||||{{|{zzy{{||yzywwwvtuutqqtrsstqqrrrsrtuuvwsrstqpppopqppopqpqrrsrrrsruvvvvwxwzyz{zzz{{}|{}|}~~}|{}}}|}~~~|}}}~~|}~|}~{z{ywxxzxxxxxwwwxwvxvwxxux{xwxwzzyz{}}z{}}}{zyvvurppvrtstwwwxxxwxxyuxvvvwwvtuyxxxyxvwyyxxxvwzwwvtuvvuwuwvutvyxxwxwvvvvutspprrnnpmmlDo}qrzzz{|}}}{ywy|}yvqPZgijmortttrrqid^YTQQUUUPRQNMOONQNPOMHIHDCGFDDHKMMHEFLP\a`aafjc]Y_ea^]_d]XVUUWVVWXZZYXQVQPOQROONLMKJJJKJIIFGHGDCGHGFHDEEEBBDFDBDCBAAADD@ABA>>>===>=>@͸tQ(8~zxtsng`YSPQNKEBCCD@;74#.;>=:60,,+'&')()$%0+! + ')&$" + +'*+/1*('$   '%! + +(=2,,+'# -'" +#&"0 RŷDA9POG?8332--.*'((&#!!%'&13BP_iudlsuniZ\QKLLMPMQVZcgks|~~}rjgecgkfdfinnqrpuw~yxqje]YSTONKKKNPIJLMRV]em}znhjjgjmpptvw}~{{{yyz|zwvtrtrturqsrssttvwvz|}~}~}~~|~}~}}|}}{~~~~~~|~~}{{|}|zyyz{zyywxwvuvvuvxxwuvvvwvvxxyyyutwuttssrstrqssrsrstuttvvxwwxyz||{{{|{z{|{||{|}{z~}||~}}}{|{~|~}|}{{{z||{{zzzwvxxzzzwyzxxwywxuwwvwwx{yxwwxxyx|}}|~~}~~}zvwututszypqpqtstuvuvxxyvwvvwwwvwuwxxxyyxwzyxxxvzzwxwvwwwwuwzwyxy{wxwuusuvttttssqqqrrporErspy{y{{{|||zz{|}zxmTWfilikmonnfaZTRPRPONNLIGFGJMMNJMKJKIDBADCEDBBEHHHJHEFEIONOPOMPOMKKNJJHIKKIDDFGGIGDFIGEHHFEGGCGFCBCDDDCBBDCDCCEEFEDDEDDEFEAADB@@A@?@BCB?@@>=><==;>>==ǹrQ(4{{{uwogaXSQQOLFEDED@<93!0AF@=70-.,'%')+.&&0*  ',,,' + '/112,($#  + (& 'D4.+)%!%<($!  (!&&(TþƻNSRXYWXVVUWSRQQMOU^___ejqu}|||~}~~}{z}|~~}~}~~|||z}}yy{||||{|||~~}~||}~~~}}}|~}}}~~|zyz{yyyzzswwxvvwxuvwwwxxxxxywxxxwuxwuuuvvuuutvvuuttvywxxxyyyzyyzx||||{z{z|}~{{|{z|||{|{{{}zz|{||{|{zyxz|zzzxxxyzx|{yxyzy{yzxwvxwxwwwywwwxyz{{}}}}~|zywwvw|zqrqsuttvvuvwxzyxwwzz|yyyyzz{{{{zyyyyyx{zxxwyyyxxwy}zyyzyvxxwuvvxyvuuutttsrrrqsJxxnv|yz|}{}|zz|{{zvnQMQTXUQVSJHKJFHHGHIJIGGDDFIGEHFFHGGGGGFFGFGFGFGHGGJHIHHHHDHJHFIJGFGEBDE@CECACCDFFFDCFEDGGDBCCCDECBCCAFDCDDBCBBDGCEEFBDDEDCAABC@?AB>ACA?A?<>>;:<>:====¹tT,/zwzphaZRQUQPHFDEDA;;5-ANA>81//+'&&(+/(+,'!   %(58) '-/-0.)$$ + '' '9/*(%"$<*!%J*$%/7M[qĸ~qigq|qpwulbYQ\pu|{vrvw}}y}z~~}}~~~~}~~~~}}}{}{}|~~{}~}||||{zyz{xyz{xxyxwwxxxyzzzzywxwxwxyzyyyxzzxyz{yzzzyy||}}{{{{~|}}}~~~~~}}}~~~z|||{|~}|}||{~|{}}{{}|||}}}||||||{{{}}|}|{|||}||z{zy{{zy{{z{{{|~}|{{yxzyxxwzxxxxxxzz||zxy|{{|{{{|~|{||{zzzyy{}yzyzxxx{zyvyyyyy{}zz|xvwvuurqqsqooqrqqo]~vs|y|}}z||z|{}{wlUPLOOPQOJILKLMMIKMLIILKKKKHKGIJIHHHHIHGGGEHHFGGGEFFEFFHFEEGIHGHGDFFCCDCBCEAAFFDFGEDDFECEDCCCEFBBDCDDACBDCCABBBEHADFFAADCCDC@AB?>@B>?BAA@<;<>;;<::<=;=¸vS,0swtnfb\RQRQMHEDDDC;;7+:@=?=40.-*&&(,-()*&" $$&,+#/)+),+)%&$!'+$*3,$!"#!!  (7>KTUTQ0*:NXbmsw|WREBFH?KLIKNTZ`hv{z~zzskjikkmp~{}{wurpkiehmmty|~~|~{{{|}~}~~~~}~}~~~~}||zz|{|{}}{}~||||{z|||||||z{zxz{{wyz{{{|}}|z{||{|{{||}}|}{{|}|{}~}}~~~}~~}~~~~}~~}~~}~~~}~~~~~}}~}||}~}~~~~~~~~|~~}}~}}}}}}~~|~||zyx|yz{{y{{zzy{~}{{z|||{{||{z{zyzyyyxyxzxz}yuyvv}}wxvtvwronqpkpomojjkkjjhhgfeedgm||mwzzz||}}z|}}|zvhTNLMLNOMKILJLMJHJKIGHJIHIJFGIHKHGFHIHFGHGGFGFGGHGGGEFGFEFFFEHJJHGGCCECBCDBCDDDDEECEDEFDBCCCADEBACBBB@@ACA@?B@CCDBEEFCCEDCBBABBBBBB@@@@B?=;;;;=:;<;;:;ɸuT/3{tjca\RPOPLGBAB@>:<6".376852/++)&%&'&$&('&"!"*"7*"'56<4$%$,*(#$'"#1APWT?*2,!'/6GWedY<85/-,))-1/.168CS]mrzkRJJE?CIIPOSX]iq{}y}}}~}~}~~~~~~~~~}|{}}}~~~~~~~~~}~~}}~~}}~~}~~}~~}|}|~~~}}||}|}~~|||~}~~~~~~}~~~~}~~~}~~}~}~~~|~|xyxyzwvyzzzz~~|{{xwxz}z}}{zvvwztvssstqmkmomjihf`_b`ZYTTVQPNPNLMMHOLGEGIEL]bbb`bbbbccduwz}styxz|z}|z{}|~~xkPNLJJLLJJIJHMMHHJLIIHIHHHIGGHGHGIHIGGGGGIHFGIHFIIGEGHIHCEEEDGHEEGECCDCBCB@@B@?BDCCCB@CCBADECEDDBCCBCABDCCABA@DCBEDCDCACEC?@BA@BDA?@??AB@>=;9;<;<<:9;<йuX37-5=;8;97/.)/,..0..1459ABGHJQQ]j]MLIKB==@<3699DCBMS]du{}|paWLJB6..()+&-*+,+169?@CBOW]jxzmib[[\^fhjor{~~z{{}z|}~}~}~~~}~}~~}}{}~~}}~|}||{{|}{{|{z{|{{||||zz|yxy{|}}{{{~}~~|}}}}~~~~~~~~}~~~|~}}~}{{}}}~|{|}~}~}|}|{{z|}{yy{yz{|}{{{yvuux~tprsqtttqqookjeiigfhb`]ZYV[WZVSSTTNPMKHDED@B>==>?:9998:<=;:;;:==?@AB@ABCAJ[db`aedcgijn|pt}xoz{|{{}~{z{}}|wiROLKMLLKLJLLLJIIIIKJGHGGEGGGGGHFFHIFEEEEFFFEFFEGIECDGEDCEEEGGFEEGDDCDBCCBAABCBCEEECCCCCBBCCDDCDDBB@@ACBB@A@?ACAADCAEBACC@>@B@@BC>=>>?@@?<;:::9;<9:99;ιxZ7#k~zhiiga^[STRKRRVZRU]_bclvrtsspokokmpolnqtu{{~|~}~}}}}~~}~}||{z|{}{|}|||{|}}|}||y{{}||{|||}z|||{|~~}~}|}}|}|}||}|}||~}|{{}}~~||{}}{}||{zz{|}~yyzzzywvvtuvvvvuswturqpqonnppmmoolllkhec_`[]^\\^`YQLB=B?9(&7@C>>AB@=;>>;@<<:;<:;:989877::9:<>>A@@@?BAEL`hdddghhjjltzvz}swz|{|}|z{}}|uiQKHIKKJKLHKKHEGIGFJJFFGHEEFGEDHFFHGEFDFEEFFEEEDGGGCDGFDFHGEGGGEEFDCDDBDBBCBABB@CDEBBCCEEDAADDCCCABBA?BBA@@@AABACBCCCBDCA?@A?@BCC@>=>@?@><;:;::=>;::8:ɸu\:)}~{||~}~~|}~}|}{{|~}~}}}~|||}{|||}}~|{|{{|zzy{zz{{zzz{zzx||{{{|}~}~}~|{|y{yyz{{zwzwvwxwyuusuurtsroqnnnkplnnmmlgfgkhihiegifbeff`ab_^fe_[df]Z`bXXWTRPTWVUUXRGC>6::3!4<>>>=>98DJEEEECA>==?<=<>>>;<<;9:98878876999<<=??=?@A??DNekghhiijijlrvt|zty|}}}}}xzzvhTLJIKJHIIFIHGGHHGIJGDFEFFFFEDEFFGGDCFFFEEFEEECEHEFGEDCDEDECBCFDBCABAABCAABAA@BACDDBBCACDCBBDECBC@BBA?ABBA?ABAADC@CC@ACC@@A@?@A>??==??;>?=;::9:><:::7;Ÿu_<%~~~||~}}~}~~~}||}}}}}}}}||{{|}}|}}}}~~~}|~{|}yyw{}xwvvvwuvtpqtqqprrqnmpoklonkmoljlghdikfdehe^cffddf_bbd`\be^`_g_dfgabad``cc_^^^[ga_[_c\^__XWXTRSVVTSUYRFA:777139<<=;=;6BJDEEE@?>><<=<;==99:;87776766645778;::>>>>A@==DOfkhkjkmommnu|v||t||}~}}}zz~~zuePLKJJILJHGJHHIGGIIFHEFEEEEDDFEFFGECEDDDDDEDDBDFCADECBCEDCDBCCEEDEBCCCBCABBBCBCCDCCBACDCBBDCCECAC@AA@?>@A@?AB??DA@DB?BCB>?@@@@A@?>>?@>;=<;9;:79<98:::<¸x`?)~~~~~~~}}}~~|~~}}}~~}}|xyzxzxwwvwxyy~||||y{ywxtrqspnnlokooooooqooponomkjlhhjgadihgfdihfghhefhgdfcccagfe_f_b`cbbacadbaca_b`aadcebcddaadb`ba_[_bb_`__`_b\YUUURPPUTRSSXPE?957411::;;;<96AGCCDDA@@?<;<;:=;:9::6577777767867:;;==><=@><>COflknkllpppqy}~xz|w~}{}}~}|{~~zt_MKKGIKJGFFFEFGDDFECCCDBCCDBCEDEDDDECABCCEFECCDFDCDCBEEECDGDEDCCDEA@AD@@BB@AAACCDB@A?BD@>@B@BCBBBA?@@>?AA><>CABA@BDAABB@AAAAB?@@?>>>><<<;:99:9::8888:9ȼy^A)~~~~~~}~~~|{z~wswvspqnqrvmrqqqqpttvupsvvvvvuuvtuvtrsrqrrqqprsokpppknnnnnllknkkjlkmkiikjjlojiljfggfghihfdeccded`cicaed_bcc\^`c^^ac`^_d\`dd]`bb^_dc^deb\[^][ab\Z_]]^]bZWQRRSRRSSTUTWND>7663229;<9::75EHHGCDCB??><;<=<9<<:9888976677766688;<:<<<=<=@@Oknlollmqrqnvouxw|u||}~~|w~{u^LHFEHJHEFFFFEGEDDEDCDDCCDCDEFGFDEDFCABCCEGEDDDEEDBADEECDBFCCDDACEBAAB?@@>=?A?AAA@BB@CB??BBACCCCDBACC?@AA?@>AAB?ACBABB??@A>AA=>?<<<=;<=<8898::877867:7ϻzbD&~|~|{~}~{}{|{vyxvuuxuxuvotxwwwwuqmnonmoqpspnlprvwtptpwrtsttsssvtruvutttrssrjuvurqqomprqkoolgoqoimnmfmlkfhjlijmjfhiggjkjddigacffedhfcbebddib_\e__dfV[bc[_bc[]a_\^]f__bc\_`c`_ca]fbb\Z\^\^^^[^]\]\^^[SSUSQRTTRQQVOC>9731+08;;<:744BEHICBC@>>><::><9;;76776644556436555897:;;:;>@A?AAAA@CBAABAACBCCB@@B?>@?>@A???@?@B@@A@>?@<=?>;;>=<;;<<=:98889987676777Լ|cG)z}~~~~{{yyz{zxxzvvyywu{zuzvwwzxwvyusrussssswwxvvwy{yzuzxzw|x{yzyyvzxyz{ux{|vux{xu{}usv|sus{tquwwstxuortpptwsjowsmprqjprognoojoqlgnokflhlhjilggjiihhhijjlfehdffffeceeecfcgggadaedccc^aab\bbb__\___^babaabc]bbb``ad^a^\Z]]]Y``_]_`[\_]TTURQQSUURSVNC>:942*39=:;:765AGEDBCC@===<;;<<;<:899866233676557689:9;;;;=@A?=BCA@@>>AA?@CB=?>=??CA>@BA@>@@>?@>==>>><;;<:<><9797788677766754̼wcE.~~~~~|~yy|{{||zxzxvsuxxqsuswvuzzyy{xwx{ywz|}zz~w~{}~z}}{}yx}~{}}|xz{{||}yx{~yz{{zz|xvwyutz|sw|ztwxysswxvuvzqqryqovwuprurlqsrnswqmotqlrqroonlilnqpnmkmonmkkioljfikjjilgdiifghgdcefefffcbbfaacdcbdf``bb`aaa^^^`]]abZ]_`Z\_a]_bbW`ba]b`aY^]][Z^\Y``ZZ`^Z\_]UTRQQSTUSSSSLE@;853*29<=<:866DKECBCA>><<>?>=<<;;:;9887544687565787:<;9:;=:?DSnrrqorsusrrzxuyx}y{{}}z}}|}{xo_IDFGGEFFDCFFDEDBDDCBCCCBBCDCDCC@DDA@BAABCABBA@BBBBCDD@@BAACCABC@BAA>@@???=>@A?=>CA@?@??A?@?@BAABAA@@@@>@@>?A>@A@==><==><<<;=>=:<<99<:7675555455456534ȽycD,z|}||}z|}}|{}}yzyvswsmmlljkkd`^XR_SRUV`hnongmloiillcegiejlokloromqtvpuxxuy{{z}{y|z}~y|~~z||}}y|}{|}}}{z{}zz}|xzz}x{zywx}xvx{vwz{t|{yvzz{y}yyyxuxvuruvvstvuuvusrrsstqopplnorlprokmmjlomljmpkillidikhcgljfgkd`fieceidbeebcdgbbdcbbacabac^_^b`^^_]_`]Z_]a]``b]]_a_a_a[a^`be^`]a]b^Z\]Y_`[^`\]_^ZVTRQSQUTSPTTMD@<973&49=>;9974GIBDB@>?><:<><:::879:657775666654345799889;:9=BPrvttrpsuurt{~ytx{{yz|~}|~}}zp^JFFEDEGEEDDDCECBDCABA@?BCBCD??@??@AB?@A@>=>??@?@ABAAB?@A?ABBCBABDAAA@?@A?>>A@?ACAAA>>@BABA>@CA?A@>>@>;>?;;=>=>>=>=?====;<><=<::;;;:;98676653445588544Ȼ{dH%~||{~|~~{v||wwz{yy|z}{}{zx}{~}xzw~z}yw{rpmc\]UT\]_^YVQMKJILHJOWcdlljelhgahijeihkhllmimorrostvvyyxwyz~}{x}z}{~}|~||~~|||}{|}}}}}y|{{z}{}z{w{y{uyz{yvz~vuwzxvtvvy{ttxxssvwotuyurrtplrurmnqmhpqngmpljopjglmjhjnjjmkhgljfcikgdghdcdgeffhdhhdccbd_cdcbe`a`c`a`c`]_b_][a`_\b^]]b^_b_[^cd]^_a^bcc[`c_W^`YU\_\]`_[\__VZXTUTQSTUUORTMG@<;:5' 289:99:75BECEC@=>>=<=<:;;:87985875445545655578889;::99??=>@BB@AAB@?@@@A@ABB@?AA>@@@@@CA@AA=?@=>?A?==@@>?@=>>@?A@>?A>=?>>?@>;>>>=@>?>>=<<=<;=<<;9999;9557555655733575351ƼzeH*{}}~{yxx}||zqrnrtwywzy}}}~}y|}}|t{{{rwvxmkrvllquoiorslloulmtwonw}soy}}tx~vz|vv~||zy~y|}tokfc]VWZ^][UQLKKKJJMOR\kjiljiglehfkbijmigkommnsposvwsxzyv|~|v|z~~~}}|~}||{}z{~|||~}zz}xx|~xy{}vwy~zvy|wuy|vuwyvttzuyyxstuwrtuusstsnrrqomqtonqomoromlnnllmlklmjknlkkmnjjjmgfhfghjgbegfdfhdbdfd`cda]cfb_di__cf_]`c__ad\Z_`\]^`WY^_X[__X[``\\^_\___V_a\Y\\YT\_Y[`][[][S_[WWQPQTSQPSVKFA;:97)!28:998757BGHFBB@?<<<<::;:::998696545455556646686:99788:=Vtuusqrssuqoyvwwy{y|z{}}~}xlWIEDCCBBCDCCBCDBCA@@@@@CC?@B@??@>@BB@?@@@????AA?@@?>==?=>@?=?B@?>=;?@>=>?=??<<=??>A@>A?@AA@?@A>?@=>??>?=<=??>>=<>>;<=;;>=;;;:9887885478655445633464330ȽzeK,v|k\Y\XUTYSMLX\\_]\ZZZQDI`z}~|~w~~suuvkmlxokrunjmmqjlmsllptkquxqoxyyuyz~r~{}y~z~{~}|~yqnid`]ZXY]`\SQLIIHHJKJR`geikjefhhbgeicfimggkoijpsrqtxtru{zt|~}vz~w}~z}{{}}~|}{}||{z|xy|yw|~|{|{{uz}~yz{{yxy|zyyzxxx{x{xvwwwzxzxwtvstqvquwusppxtmloroqsnknrpilpokmljijlkgimfgjkjghhg_dhcaeieacdcaceb`ada_aea[^eb``b\``bXY^_Z^_cYZ^]Y]^^Y\^^Y[]^Z[]^[\^^Z[^_]c]\[YZ[YY\[\]Z[\YYY_YWURORSRQRUYKF@;9:6* 39;;78845AGGBAB@=:;=98::77::65573444233333545455776577:?Xyvurqqqtussyvtwvyz}{z|||~}~~xlVHBCDBAABBBA@AAA??@@?>?@>=?B==A@:AA?>@@@@>==??>?@??@A@A>>>?=>=>?>?==?>>?@?>@A>?@?@?@>>>>>==;:=;:<>>=<<=><;<=;9<=;98;::77874555554446643455220ziP;{mTSTRMMNOOPU[]__`^\^WQH\q~{}{v}t}t|mpnurowprmqiskpltlmoqkvsquyxryyyv|{w{~z|~~|y{~{{zxrmfb^XY[\^]ZTNLKIJHGJNS`nhehjhfgghbggjiijlilmpnqqwuuuwxxwyz{}||y~|z~wzz~}~}}{~~~}yy|{}}~z{x}}{|y{|{x{{vwz{wxx|vux{uv{stuysrqwqptvrqrunlmsplnplknqihmnkimnhfgkhegighjjhfghgbcedeehfadfbabeccaccbabaccdbb`^_d]`]c\_`a_`[]\\[^W[]^X]]\Z\][Y\]^Z\_^]_ac\\\\V[[[Z]\YW[]ZZ^^WURRQRSSSSUVLDA:9:5)"6;=:99845AGEBABA?;;=::;879:966676643245344557656787898?@@=??><<==;=AA>A?<=@A>@?==??@?@??A@>?=??>?>==?>>>>=<=<;<<;>@>>=?==>>=>><>?>;?@@?>><==>=:<>>;<:;=;:97689867654533543442135235210̿~hSGp||[NRNFJNMJORX^^__]^[WREMt~~~|~y|t}rwrsswtnntulnksjjkqjjjsojrwsiw{{rw{rw|qy|z|zzzxvsmf_ZVZY[^^WSLIHHHHGKJXiifeghggihdhfigikljlmppootrstyvuw|yxy}z~~yy~~~{~~|}~|z|w}{v{}w|}}yz|yz}}uxxztxz}vuzyuvx{tvxztww|sqqwsqotosrtnpqsjkmphlnnllonmoljmoplkjikjggfejgffggfgdbacfcceicbbdac`ca`cd_^aa^\\a_\]b[_`bX\``Z\\^YZ]_VZ[]T\^]VY^\UZ__YZb`Y]_]W\ZYRZ\WU\\YXX]VW[\WVSRPORRNPTSLB?;973&"4:=:88639DFCDDA?A=;;;;;9896767668730023454456545667875:?\zywvsruutqpx|uwyxxyxz{}}}}{wmOCBDBBAA???AA?@C@?>>==>=>==>????>?<==<<<=>=;<=>=;<==>>;<><>=<=====>?=<>>=<=;>==?==<>?=<=?>>???@@@?>==<<>=9;<<:;=9;<979;:::9767655457534642342122032/00Ƽ~gSSb{`OQPMLJHILOVZ\]]^^\[VJRh}xqtv}mppvolpsrgqgseknmfghqnjvurn{zxtwx}t{}}y||}~~wzsqlc_YTWZ[_\TPLJIHGEHHTYbokaglhagkibgjldimlhkmpoopsqpswttw}wuz~}x~y{}~~|{||}{|{|}yy~|z}|yy~|w}}}u|z~y}y}xzx|x|z|zxvyxywyt{uwvwwxvtpttsouqwrrqvrqmnomnoonkpnjnnjgkmjgilgeljgfgmfefie`fe`^bec`dd`_cb`aab^^bc\]bbY^ab]\\a\_^_X\\^[[Z[YZ[ZV\Y\V_]\WZ[[W[[_\W^_[\\\X[ZYZ[\XZ\ZYYX\[][[WVTSRSQQRQRQGDA;961""28::85639GD?BC==@=:9<;89;9687764563/002533454334456756:B^zzvutssusppx{xvyxy{xz{|}}~|wjNCACA@B@=>>=>>=;:<<<;=?=<<<;<;:9==;<<;<;;;<<;;=;:<<>>;;===><;<>>>=<><<;<;;:<==?===<==<>?<=?<<==;<<;;<;:9:;89:9:;9889;:89897755566334442101100111/.0û|eXagW^p{rVQPJIJKJJMRV]^^`_^^ZQJ[y~{~v{sus}lqowrnsovhrfqfnkifnjonlqmosyrsvxuzyyx}z|~x{wxuspjb]WV[[[]^XQNLKHEGIKQ\cfkjbghhafffbihhakjmklkqqsqspstxvwyywzz}~{y}{|~~||{z}|~{}}~|~~{}|{y{||}}|~{}~z}~}y|y{z}y}xw{|suw{vuvysruwpouwnorspopsoilpljoqljjnjgmnghijgfhjffikfgcibfedda``b]bbabcba``aab`a_`ac]`b`[ba_][[]Z`Z\]^[\Z]ZY[ZXYZ_XZ\\YZZ[WZ\]Z_]YZ]_ZZ[][VY\YYZ]YVYYVVZ]XYVUTRQQPPSPRRFC?:760#%187776628FD@C@>?>::<<;9:988:87543543223531455542466648===;<>==<:9;;:;<==<=:<<=<;<<<<<;;<==;<<<==;<=<=<:;<<==;<<=<<;9<;;9::;<:<;==;<>===>=<<>==?=<==<=<;99:;9;99:98;:8:85667454235333322110/1//1./000ļ|gWdmOFWuķq\OLJILKJMLNSY_``]^][XRUx{{|z{yt|uyttuuuotrqjnkrghhqhinolgpuujqwxpsw|ru|zx|pw}y~wrnha\UTYZ[^_YRMKLJIHJQYkjlhfiffefgfeghjhkkpinlomrsuqsswwwz|zvx~y|{~||~~|~|}|}}yy{|y|~{z|}z|~}x{{}z{}~wwwzxyz|vtyzruwzuuuxstuvontuopqroppqmjlqkillhikjjjkkikijjjgffhgefhebehb`edcabba]aea_bc`_aa^^_`^_^``_\`\\^\Y\\`WW\\WXZ^VVZ[WZ[[UXZZTWZXTZ\XU_b[X]_XZZ][UY]UVZ]XWXXUVYYVZURPMNPPQRQSRGA>9860#%398556529DDDC???=;<<;:;<87:;76443234233233436642354226:;]yxwvtqrsttsy{xywwxxzxyz{{|~}ytiICA>??<=??==<==>=;;<>=<<<;;;;===::<<;<<::<<<<::<=<;==;:9<<<==<=>=<;;<;;:;;;;<<:=<;<<>=<>A><=>>?@><<<;=>;:7689998:9778669966653531244431221121113100122Ϳ{iSblND\xƳ|dWQMLMJJLKKNTY]]]^^]_VS`}{{}|{ptx}tmuxvgqrsfjmrgihoiikjkloookqrvqstxsz{||}{|yy}|{uslea[UWXY\]_ZQLLJHGGNV^bjikgciif`dhfcfkjcjjldlqpknsvnsuxuu{}{xx~{v}z}z}w{x~{|~z{}zz{}yxy~|uz~|x{|y{|x|yxyyyw{w{yyxywvvwuuuxvvswvxutrtqpnspopronorljjkhgjnhgjnifejgfhhdefhd`dgb`cd]]ca^^aa][bb^^bb^^_a\[]^]^^][][^Y[\]Y]\`SXZ\WZ[YTWXYXZZYVYYYXZXYWZYVY^\ZU[\YYX[[XV_WXYYWYXXWXYZX[UPNNMNOPRRUSIA>:760##499666638BCDD>??;;<:99897688554323111210212133225544659Bazyywutsttttywswvwywyxxz{xz~|yshKA==>>;<<<;<:::<:8:999;99::::;<:9;;:<=<<9:;<;::<;:;==:9;;:<<<<<<;;<99;;:99::::::;;:=>;;<>===<=>>;;:99;;997799:9899887777655444444324310212100/10//0.-1{fRdkHCXwʿº˻wi]RLKJKJKIIKLTZ\]^`^]WUct|{{zy}s}tyupvxwnqqskokmjkijimifnnmjknqmorvquwzuw{{y}{w||y}~tpj^^[SVYYZ_^[TQMKKJMOY`khiikgghiebceedhhfdlknippnmrtvsvvxvz|}zzz|y|~v~y~~y~{~|~|}{~|}{{|zz}||}}|{|y}}w}{~zyy}vxyzuwywtvxwstvwsuwvonqtmjqtkknrlimohgijefjkegglhfchgcdfcgeffbbfc`d``aa^_`a__`ba_ac_]__`^]]]]_]][^[b][X]Y^[\WZWYY[XZY[XZXZX[YXW\ZXWYZYXX_]WX][Y[ZXWXYVWVYVVY[TV[\WX[ZUTROPNNOPQTPF@=865/"&499865428BCCA?<><;<;:989888876444431321/002233225435659D`zzxxwtsvtrrx|qvwwywzyxyzxz}}zyscE><<<=;;<<;;:;;;:9<99::9;<:8:;::;<99;<<;99;<;::<:::;:98:99;;:;:999:79;;99989:9:<;<;=?=><<>?>>?==;;::;:99<999::998886467555433233420120110000/00////../«{hUdnLC]sȿpYPPMLIJJIIHLRY]\````^YXq~zz|zvux{vstyslolthhlpjgkpa_lmddjpqhhpumpvyquvzvx|zw||~w|usng[YZXZYY]]^YUTRNLLOV[gilfkgfhlfdbgb`gijhijojmmrmnousuvzvuv|{|~zy~z}~z{}|}~||~z}~{z|~zy|~{y{}|w{~|vx{xyzxrvxwsvxtqquwrsvuppssonoqkimnkjlqjhllfjhjiijhgjhjjheghiedfgccfebacda_be^\_f]\`b_^``]Y^_\\^^Z]\]Z[Z]Y[[[QXZ[YZY[UYXYTUZZUUXZUYZXSXYXTSWWUX^ZUW^ZUXZXUXZUTTVSUWUSWWWUYZYUSPMMMPPPPPNE>:544- &39:753429CBA@?<=<;;;::888878942331211//0101111112145438BayzywvustsrrzvuxwyyzwvwyxyzzzwqZ>==;:==;;==;:<:99::9::999:879989:8899989889979898:89::98::::9<<;;<::;:;:899998;=;;>><<>;<<<;=>;;;989:879:8888::8668534656544324333232111010001--////0.ȿ|iWdoKK^v­`PPNKIJLJGGLOSV[\_a_^\\gxzzxxony|umtuukpkshjkphfkndelhdejnnfjnrlrwuqxtyzxvxzyzx|yxrmd_ZVX\[Z\`^YSTSOMOSZ_fohihmddhkeacjfbimkgnmmhknqmpsxqux|suzvvz~zxw~|{~~uy|z{|zz~}w|}zz|ww|}zy{|{vz~{wyz~wzxxtwwwvwvtqttxruuursprpqnnmnlllmlmjllkhlhgfiefgjddijc`dgbacea^ce`^^`_]]^]]__Z[]b\\]^[X^\Z\\]Y\[[W[\ZYZ[ZTWWYY\UXTXWXUWWWTWWXRXXWWZVYTUVVVYYXWWZXWWYXVXZUUTTTWUSTVVXZYVVUSPLLMNPOORMD?;765,&477433109BA@>?==<:::9:8975665211211/..00/./120011133239@gyxuswuqttrr{ruzxxyyvyxzwxxxvuq\?<<98<=::;;9:;989979:97899778878:678889989<:89:9;;89;:::;<:;;;99<:8;<889869:879;99<98;=9999:<;89879978::88789:97767644577544324213310110230111.././0-+þ|jVdkQQ[sĮý}oZOOKLMJKGHIJNPRVZ_a`]]_xx||v}v{zyvtvrunslqkmhlmjiifkjdijlgklmjnotqpt{suyzxswzytvtrld^]ZZZ\[]``YURRNQSY[`gjmgiikccejc]fifbimkfmlkjmoqnrsvtwyyvz{~wz|}z|}y~}~|x~|}}|}{|z}}|x{~z}{}y|}}{{{{}{|||z|x{y{vxwyvwxztrtwsuqtrrrsmnqqmjmnlklmghknhfiibbafcdggddfg_`bfcbcgdb`b_^]_``^]]`_]^^[\\][\[`^[[\\^[][[[][[Y\[YV\VYZZSVV[VXWWUXX[VWVXVVXVRUVUTWXXTWXVTVXWWWYVVVWSSSWTTWYVYZXTUTQLHINNNOMNKB==944+&46542210;;:;:;;<98974222311///10/10011112200327Bi{ywturotstu}utyxvyzyxyxvuwwuskY<;;:8;;;;;8::878998;;9899789897:998779:88;::99::;;9:;7678:88::9::8999787868788:;;<:9<<;:;;:;<;;;78:88:;9988799556554434532134210003//10./0.//0//.-/.++ÿ~jSemSS^sľſƲn\URNLLKHGGHIHHJNY\a_b[Zpzwy{ytz{ultstklmreilqgbkphahlkcinmfkoqjnszkovyuqtwyqrrlc_\ZZ[ZZ[a^WVUURRY\^bgmklhmkkeedidceffhkhhjljkmnlortruvxtux|{zz|z|}~~|~y}~y~|}}zz|zy{{{{}{yz~}yy{{y}|w{~|wyxzsxxztsv{sqtunppupnqrkknokjklklmjgjilhhgeegdfefceefdebdcddddccc^[_`]Z_b\[\aXW^`ZY^][Z[^YX[[VY[]XXY[UWWZVVXYTUXXQUVVQUXVRVXVRVWWSTVUQUVVTVWVRTWSPVXWRTWSTSUQWSVTTUXVUTUTTSQKBFOOONMPMC=;624*'3555331/=CBB?=><89:;899:779632320/0..//.-/0//1101001225Fl|xvsqqosqrt|~yuwwwxyzwxvxvvwvpnY?<;9;<<;<99:9889999:9978867777768:75677678777779::99997877789:::89:9:9988888:<;;<;:<<;<<;::<:9:;8887677689669845655544552232232200322200//00-/10./.,*+Ӽ­mTaiXR_oʾȷ~|bZWSPMIGIHFECFHMNT[`^ZWiy~rux}usy{sfvutfilqdhmqddllhbfgifecifkkqloovovsvtrwuwqoh`]]][UX^^]_YRTRTVU]_`jmqfklkgfefdefgddikmdgjlilopjosvqsvynty}yyz~xyzy{xx|u{~}w~~w||xy{ux{y|yyyxz}zxwz|y{}{yz{{vwyzrvwxtttusrtqosopqqnonoikknjlmojfipikijfegicceg^adf`bac^_ac^\aa\[__YXZ`YYY^YX[\UYZZYXZ\YTXXUYZZUWYZUVWYUUSWSVUZPVWURTUUUUVTTVVUTUUUUVSTWTVVVUVUVVVVRSUTUSURXSTTUTUWTRVUURQJBFNMMKMPLD<:411)(1554422/9A@@?>><8998679966754331//0.././11..01112112339Kn{yurrrmsrss{|xtuvvyyywwxwvwwvqnX?99:;:9898987789777656566664466678755766777767776789986866899:977897799768899:8::98998<:87899::8798777557756884456742433442122111/012100/-/.-/0.-/,+**˼ƯlUbiZT~`mϾº~ze][VOMHIGEECEHKIKRXZ`_]i|}~zuxx{vxzztlzsvklhojmnogkjhiffajifbigmemmsnoqwrqtvvspnd`ZY[[[Z\a[XVRQRQV\\^fcnmqbkmjefefdgefdcgkmdikkjoopmqsustvxqz{}|{z~{{z|}}~{|{s|xt~|{|y}y}zz~yyz|}|{|yw}}{y}}{{{vzwxuwuuwvqrtsonqsinqpikmmeekoggjjfdejghhf``de]add]aac_b`bY^`a[^b][]^\YZU\YZX\[[YYY\ZXXYWXWXWXXYWYYXVVWVUVVVRWWWSWWWSVWVTUUTTSWURTUSQWUTQSVSRSUQRVVTQUVQPQUQRRSOQSTRSVVQSTVXRF@FNLMOOPJB<83351"(2661201-99;;:97999:88977985566777755666765555555677467755677657567699877897886667898999999899<88879::97789996666467775446633324421122011/0121//..//.-.-,,)*)'ȯlTcgZU}^qʹö|trb`XSKGGEEEDEGFGIOU]`^^Zvx{{wxxwsvxynuntenmrhjipgcjld^kjhafhnaglrgjmvqlrqopkb^\ZYZY\]^^ZSPONOUY]`fpmpmlinjhdhdfgicfhhhikkljnmqorqrqwvvuwt{xz|}xz||~}}|~}|zz~w{||{}|yzy}x||~vyz{yyz~zu||{wz|zxyvxuwwwstwunpqqkppogmonfllpfeinehhhdeegcfdebcacadbcbdabac__\b\__a_\\^]Z[[WZ\_XZ\_VVZZUXWYVWWXWVWXVUWVTTTUSUVWSURUPTUSOVXURSUQNUVTPRTRQUUQPSUSQSVSRUTQQUULPRSNPQTORRSQTUYQQU\ivT7BMMMOOPJ@<735IbR0/474010.,=CA>>@::;:67877986364210.+-/0--.....-.///./0116Fp{wsprpmqqqty}xtuvvwxvvuwuvttrmfV?9::9:989887888677655744556675545344355434643665466556655666998788989668868988988::8879888688778878985676455566566534445433221111221120./0/00/-+))(*&#°kT`i\R{aoķƹrd]ZSJHEEGECCEFFILRZac[^kry}stx}souxvd}lzgmnrbhepdclmcaihe\gfkbfjlgjkwolppqkd][ZY[\]_a[WQOLLMOY]elhoqsfiklefilecjnfehmjejnkimppmsstpvwzruv{wxy}sx{y~x|y{{}w}z}x}}wz}vzz~yzz|x||}yzy{xyz~{z{z|yy{ywxvxtvwvststopppmrmpkonmjplnjkililgfgieddj`edd`bbd`bce_^`a^\\a[[]`ZYX]\XYZUVY[VWZ[QUYXSVWWSVWYVTUVUTUVTTRSRUTVSSRVPUSOQUTSRURPPSTSSRSSRUTRPSUTSSUSUSSSTSTQSTUSUPUPRQWUUUWSSU^t`HLONNLMG?<527Phs]A5630//..:A>>>?;;:9787669852631110////,..--./...-.../017BmsqprromrppsuyvttvvwwuuuuuvqrqmgU>8977877776687546644484354334443334334534454465545557875444477767666546755576797688799:8998786797688666867655555743543333112//21010.//--.....-+)((''%"žlR_k\Qyanſ͵z~{{}g_^VIGCFEDDDDEEGKOZ^`ZU^|z}sqwytsvvtj}kwlpmmgpglgkleghfcbbiageheghmjpmmroic^YZ[X\^``YSQMKJLQV^dpsfkpsdhjpfgiibcilgdhlhfklijmnmlsstptr{twuzv{y~z}z~z|~}~{{z{zy~xz{x{{x{}~z|z|y}{|{}xyy|{{zyvz}yxy{xrwwwttutnrrpmnptjjloklnmfhijgggjdbfidbcf[bab`acb]``_]_``]_]_]^\[X[VZZ[YXVXXZWZXXVYYVTWUUUYVWUTTUVVWVUURRRUSTRTQUUWNOTSOPSUQPRQPRVRPSUROPSSQRUQQTXSQUTPRQXVRSVMQOROTUSRUTPPd}jKKLKJJE?<317Qep{lE620.0/0<@>@??=<::9:878:7435300//..-.,/,++.....-../.004CcnoqpnmlroosxywsvxvwwvxwwvutsomgW;:;9788987689736675555553655334423432443334445443325664343446555546544564567777557988988899767875674575555224443321421211011/120/0../0.-///0.+())&%$""þnX_lZU|_löǵú|{|y{kc\RJEEDCEDDFEFGFNV_`]W^n{uzvvtzxsst|issvgjovggesjahkgacehacch_cfjfglopga^YY\[\^_\XRNLJKLQX_jlnsljjqhijojjifdijmkhikmlkjjoomosvuvuwsyyxsyz~z}~}}}}~{{~}yw|w~zy{}{}}}{||w{{|wy{|swy{zxy~ywv{wsy|vovwvnqsrlppnhkmnhglogjkjgggiffegbdeh^dbc\b`bbba^]a`__a`a^_]^__\\\]WXZ\WVX[VWV[WWX[TTSUSTVXSRSRPRVVOOTUPQSUQRRRNSTQKPURMQTRKNUQORVOMRRPLOSPOPTPORUQOSUNQOSQQQOMOOOMSTQPVRPTdwTEJJKG?:528M[covoL52/10/;@AA><>=:86766864333101..-,,.-.,-,,,,--,-/-.101=JLYplmlmport||xsvvuvvuuyxsttrqlfU<99::89:97786447664442542353332322222210122212233446652343364445556435645445854456898657866769954653554455244442233222111111233110/000/./.00,)'''$"#!!oW^l_V^kž÷·Ʋ}~z~zr^WQJEBCDDEEEFFGKNT^\[WVfrrnr~wonx{irusdmmoajdpjghjfb`]iaa]f^efihkple\\[[Y]_]]]XQMLKLPU[aionjqsjfonnfjmjhefkimmlgflnlhlsqnnsurttvrvxzvwz|v|yy~z~y{~|yz|y}}~u~w|x~{{v}{|w}y|xyz~uzxyxzx{{wwxxvxzwsxuvqrrsnplmilknklknllhkkhghghedcgdfagad`c_acd][]b]^_a\\]]XY[\YZ[YOVYZRUWWSUTVTTUWQRQUSURROSRRPRRQNPTSPQRROQSROQSPNOSPNQSPKOQQOPSOOQRPPQPQQPRRRQQONQSSTNSUSLNOQPPPSQRTTORXg`HJNH>9748BNW`lsqM8111.??<<=;;998767654431//.,-,-,-..-+--+-.-.//--29C?BOVeqnqrqr~vxtvwwwxttwxttsroldQ;7888887688656434531442342112101310110/0011212013321./1232111322322432476444556666657656666654344456654212220231430.100010.12//02/..-0....,*))'""%"!ǯlW_l_R^lƿƹì}}}z}{}|phUIDB@ADHKIKGEDGJOW[\[VYjzupqwunsuvlnrviomulegngaegfZifa[bbggnqja]\[^\\\ZTRRRPMKKOV\dfkkknopmmklirinmmkjjnjmlkkomllnqpqrtsxvvtxvww{z|vz~uz~wzxx~y|}}z}}x|}w{z{y|txz{vw|~vwy|xux~uvzzwrwzxsxvtqrrpmpomfklogjjjdkiighfj`befbc_`Za_`]_^`^`_^[`\^]`Z\^`[[[_[]Y[WVWXTUXWUUTXRSVYSVVWPRUVPQSRMSPSPOOONNQPLMQNLMNOLMQNMNQOKKNNLMROMPMKKOQMLPPKLMPLKOQONQSOMMNJMQKLQNLNROMLQYvzS53422358>IPYhpt^>11?B<>?<::;:79:76753232---,*+,,***,++,,++++-.+,-2?MEMRI_porsqs~xzwwwxvwuuvxutrsqmdN:888797668645423443332001/0...0//,-..0../1/..100212000112321232233332345233344456755555334354443324434422331112343102002102200///./--/...-+)(&$!!#!űpZ_jcLYlþû~}}}|}zhUC@@@CGJIKHGEEFHMV\][[Qktmp{xkrtygnr{footfeble_eeh[me`\eehnohaXZ\]^_\[WQPNNMJGJV[bjjqlfnroilnmhmoqlmllkmhokimqjfnrpoqppouvunvxyuzz|w{}vxt|~|wv{}}x|}vz{}t|}ztzxyr||}wxwuvu{vvx{uuwwurvyuqwssrrptoqmmjmjnhjiiijfiijdgghbddd^_bb_a_b^]^_[\]_ZZ]^[[[]XX[\XZV[STVVQSWWSRSVNSUVNSUSNQTSPPQPMQPRLNNONONNKMNONMLNMKNNLMPNLLNNNLONNPMLMMMMNNMKMMNLNJPOPNOQMMOMMKNQRKMQNLQPMVy\;0023468@<>?=;::789:88752231/0.,,,**++++,,,,-,+,/.,.03F`Z^fYbqoqsqqxzxxxuuwsvvwutrrqneJ98655733455553011112/001..0../0/.-...0/001/./000210//10020/000011232/1430112334545334543102546534542133233213/242//0/.01./0/./0/...---,-*)((&#"""# "tY`khIWlĵŹɶ}}}|~~xp|roRDB@ACHJLLKIGFGILSY^]WP_nsuvmrjxmrp|lrpqkmblfjjembneb`mllnd^Y[[X\_[YVRNKMMIHLT]bafjohckrnflomgppqllkljjdojjnokmrroopnrtutvuyxyuyuzu}|~t}uz{z~z{}~yz~yz{|u|}{{x}y~{zw{tw{v{x|zywwvutxwvtutqrusnrnpljklejjkffihdgikbagf^aadX_ba\^`bY]\]X\]]XZ\^ZZY[S\[ZVXWZTUTWTVVURURYOTSURVSSRVTRRURRPSOQQOOPOQJLMPLNOMMNOIKLNOMLOOLLMKJMNLKMNIJMNJHLMLMKOILOOKLONKLLJFKOMJLOMLNPMYkB1123568;@HV]nshLBA??<:<;879877865442001/,,,)+,,*+,+*+.,+,,,--/4JecjolmsqqrqswzyxwvwwtuwxussrpoeK;96677565665332102320021..0./0//00/../.-//-////00111113442131/0233421442122200220,.,,,+)&"$+0342243001//23/.0.02/-././10/00...0/////-+++)((&$! !!!þtX^kbG|Sh¹ʹ}z{xy|~v~|q}~xylYH@ABFILMIIJJHGFHNSY]YTYintwtgvwvivrvpnnphhimh^ihjbbdmojc]WV[]\][VTSPLHKKJPV[beciihfhklmjmomhspqppjonmhlkokmqrmprtsruwxquwwty{{vz}x{y~}x}{}zv||}{}|{z{}y~}~txx{wvvePmx~tux|wvwxtssyuptvqmpupjmnlihjkfhhjhfifacdfaadeZaabX`__]^_^Y^]_X[[[\]Z[[]ZZW\YYXWVXSUQSUVTQTVRSQVPPSUMOPQNPQRMNPQLNPNLMMMGLKMJJKKJLPJHLMLJKNLGIKIIMMJHKKHIKMKIMMHKKLJMNLJMLLKKLIGLMKKKLLONQO_yJ2332668:?IQZirmSCA=89=:66976674552/...-+))(*++))**)*+***++,-.5Njhotrqrrsrrs~xzxxvsvwurwwustrpmfI;66665555552022101100/10,-.-..,/00./..00120024555979;89<9731,+*,-++)(($&$"!"" "(,/132112/011///00//00.//./00../-...../++*)(($  ˿oY_naExPb¼ǻ|}z~}{{~}wz{}qvaH@@BFLLGHIKIFFGGDMRVYWWbrysbto~fsrvompokiiffcnkjcgkmh`\XVY[]\WRPOOMJIILOW^ecdfkeefoigmpnhmpsmostknppjklthosvmnqtppvutksvwpx{{vy{w{{~y|}x}{}zx~yv|}|x{|{uxy|v{yzpxxtvpYDQp~ttvzstuyqrrxusuyophPZjmokhkikjlgiikhddhbeccabbhaa]c]^__]\Y^WZ[]WY[\SX[[SWXXQWWWQUUUMQTUNOSSPPPRORQRPQOOOPLMNMMOMOOLJLMLJMKJIKJIJJLKKKMMLLLJIJKIJKLIJJHJMJJJJIKJKJNLOLMLNKJMKHHHKIJKKJMQMNSjZ=33557:;?FOYeooWD>:;;:8788646455310..-,***,*)))**--+++)*+-..7Rnkqspstsssstzw{wvvuvwtruwuusqqneD94465445332101110000///0-,+++,+,-.-./.120/,*//,,+(&'(%$$#"!&.0/02/00//0..0/,-*&%-0.-0/-./,+-.-,,+**)(%  İtV\nbFzVfÿǼIJ}~y}vz~x|z|x{}vzx}qystJ>@BFIJIHJLJIFEDEKQVXXX_svj{hjuiumqommnielojehillc_[VXZ\_[VRNMNLKGJMRW\dg_bhk_afphfjpnhlnqioorkmlsklmrfoqvomptppvtxvvwxszx|y}{~yx{}|w|{}z{|yz|{x}yzxyv{z}y{xyv|xxiYSIRyzwuxvwssryrstuqqnrZ;Gmomlihfiiiegihcbgiabdc[`daW\^aX_`_Z[V]ZYZZUWY[PZYXUWWVT[YYSUSUSVTTSSRTQQPQPRQQRRPNOQLMPOKKLPNKLNILOMJKJKIIJHHIKIJKKIHHLJHHIIHHKFFIOIGHJEHKKHIKOIGKMIHMLFGHGFHHIILMKLVnhE3666689:9878856653233/..,+*)***+)*++**+*)*+++-6RoprrptststsuxwxwwvwwwtsuvutqppogE84676456552122112100//...-)*#%-.//.,./...---$*-../---.,//++**)'#  гuV\kaAvYgξ̽ï®}}yx}{wy{z||z}|{{}zs{vv~t~qT?=CDHLJJJKLIFEFHJMSYZV]nt|lutyinqxnkpsf`lpkcgnlb[WUW[]][URMNLLJJJPSVZ^bd_cdgbgdihhinrnmnslrlokolrmnnqouruusquzvutxxutvx}u~~~z|yz}}z}|z}|}wz~~w|}y{}|w{}|vyyzvwyz`VUSIXr|stuzpqruppsuommiNGBQfghjhdfhebffeacegbbdaY`_aY_]\X^\[X[X][YXXX[V[VXVUWYXXX[TVWUPRSVOOQRNQQPMOSQMLPPLMMNJKMNJJMMKJLKCFIHFIJHFKJEGHIFFGGEFHJHFHHHGFFDEFKFFGIFIJJFHHHFHKJGIHIEGHGHHHIKLJJLUsvG773679;?FMUcpweJ<898778756552121.-,+)))***+))*,+*+**++*+.:Vqssqopssrrqw|wxvxwxvutsvuttstpmdD95754456552101001/./,,,-,*(#$',.-/.,,...* +&*.,+.-*--*(*+'!!!ɲq]_keF|wYgżķ}~~~v|z~}~yy}{yzzq|~vuwwbA>DHLLLJJMMGCDEEENU\\ZSnxntpuflptjlsqhglijgjib[UUXZ^[WVTNMLKKJLQXW]_c_abdbd^gadgllmrrolrrsgmnrkqtpjosvnswwqnuvrqvsvqvyyq|~}xzywzy~xz{xyz{vy}{u|}|ty|{szzyuzyxmuxxYUWVPDOVVbfpfqpslosupnhZHLE>Vhihkgigfeeddcecefc__cb\`]_Z]]`[[[\XY[YUYY[UW[YTUWVSVXUNSUSOPTVLPQRJNQOMPQPLKMKJNIJJJJMKKKJIJHFILIIJJFGIJEEGHFDDGGHGHHFGGGGGEFFHGGEFFHHJGGGHFGGGIHGHGGGIHJKHFIKJKJMQu[:7799;<@FMX`lrlJ:::98766644331/--+***)***)'***+,*))***,/7XutrqpqsrsqtxywxuxvwvutuwvvrprrmbD96644555442410..00/...--,,'  ',,,,++--( + "(*,'!$'(()'"²pX_mdBxec¹ļõº}~zz~~uy~wu{zuy{o{x~xtr~{dH?EILPONPPJGEDCBGLTZ\UU^swmtnwpppuqmmokgijg`ZRSZ\]\WSRQNNMLLNRXXU]]_\a`d\b^f^dfjmnppnmtqriqrqkpoqiurwnrsqnpwrttxuzvzuzy|~||x|}z|y{x}y|z~~{x}{s}|xwy{}v}yzy~xxpytmWVXYUPHE:A@?AGLMKNTSQQMQQIEAFikegjmbdgd`dfc`dfe^^d`[^a\VX]bYW[[VVXXLZ[XQUXYRWVTSVTTRTRRPQSSORPMMQONNTNNKLKLLLJHJMKMLJHGIIHHIHGGIHFGGICDFDCDHFFGGDEFHEDHJDGFECEGIDFILFDEGEFGGFGEFEHHFCGHDBHIFHGMU}hA68:;=>CHOS_mynP<:99777642230/.,*(('')(((((()))((())(),3Uqqsqpqossruzzuzvvtuvvsuvuwutspn_E9533556742221010/00....-.,' + #(('(&$#!   $!%%  "s[[of@}xf`ƹ¸||y|~y~y{|vx|xwx|xw~{zr|y{v{puwxrL:>FLNNPNOMIEEDCCFOUY[NZrklownkuvmelnhghc^XVV\\]ZWTPOMKLILQSTYYU[]`Z`\a]iaealgkprppntwnqotnqqvlqr|ntuvrnqvxsvwtrw{s~xy}}yz|t{|}zx{|st|{xxw|zuwvwrqrtqnnso\VUUXWWOGA==>>?@=?EFEGIILOKCEBOecgif_cfb_fedadee_]__[\^\VY[\VZ^YTVXVU^YTQXYURWRUVVSSWUOOQRPPQPJLPPMONPLLKLFJLHGIIJHIJHGHIFEGHEDEFFDDDEBBEBDCICCEGCCCDAADGDCCDBDFGCEGJFDDGDGEEGGFEEEGFCFEBEIHEHHMXxH9=;<>?ADIR]lvv[=9855662101.-,*)(''&&'&''&&('(''(&(''+6\srsqqrppstuzwvyuvuuvvtsvttuurnn`C743444541110//1/.,,+*+,*+*% + +  ! !""#### +  #'t]_mdA}}f]~~|~|~xy|{}y~yvu}}xz{wwwspv|wmWBBHMOSRQTNLIHEADFJRYXWWbmrvjhutihjeefa\YXXX^`XTQPPOLKMIKT[[Y\[`\``g]`bmadeofhqxpkjrrlqssmqrvmptwpswxnorwvsvxwtwy{q}}w~~oz~vx|ty~zvUS\^ZYZXbfffgijiiijkjkotXTUVVWWUNH@==??>@@ADEFGHGKP@?A?Yfgefddbdfideefbcc`_a_^\[[]WYX\WWVVTWWXTRTYRRUXOTWUPSVTLPQQLPNNIONNIMNLIIIKGIKHFIGGGHHIGFGGGFGFEDDEDCEDC?BBDAGFEEECEDDDEDEDDACCEBCCEEIFEBEFGEDGHEFFEEIGBCDFFEGIGKawVB<<>>ACGMR]kw|aB<865645211.-+*)(''''')'&$'(()(''()(,6\qstsrqqrrrtzruwuwuuvwusuutsssoo\84246555523430020/,--++++-*# + +  !"%&('$  + $(+Ƴs[Znc>}|kVſºʷ~}}y}w{sz~vvyuou{twzzssvkls{qps]B@IORRQNOPNKJEDCEIKUZYV_ntmpqmjcbcb^[YYZYZYPNPMMOMJHKJRVc^UW_^V_bdU`ck[_clghorohottjruvktozpqtykuvvlqtvsqsuzuxz|z~{}|v|||y|||x{|zuG@PYVVTWYX[`dddcdffhkjmsfPUUVVWWWPG@?<<>>@CACCFFGIJK8??Jlcbeeb^bddaaed`bc^Z]`]TUZYSXWWTVTTSVWVPTTSOUVSPTSSPSSOMPLPOQLKJNLKKLJLIKHHHIJIGIIJJFDGHHEFHGEDDDDFDBCBEABCDBCDEB@BECCEEBBCC>BCCBBACBDCD?BEFCDFEBDDDAFE@BCDDDFGFLga?;?@BCFGKR[gv~mM=94567101.-,+*'$#%&&(%&&(&%'&%&&((,5^qtsstqqsusu{tvyvxttwxutusuuusrm[%!,001555431110...,,-,**+,+$ + + + +   "%%%&&'(# +   #')*ѵuX]mdF{~m]ɾɿ̹}~|zyz||x{}qz}|oww|utxyxuvtxqwq|pvvwtwttoIAHNQOPQPQPOKHFF@CHVZYSYnrsjfcZ^`\WWXZZ\[WOLLKKLKJJLRXZb`WZ\\UaaaZebb_dblhkmppoppqkuttptozsuuvqzwrnwxryxux{zzz~~~w{{{z~xy|}vzz{yyA;AQXTWVXWW[`accbcdgijily`QTTWXWVSMFB<=?@ABAABDEFGKOC:=@V`acb_^`a_^`bb^_a^V]^\VTXZYZY[TXWVVXTUTYTWVXWVTXTUVVQQQRMOPNIKKLJKLKHIIGFHIIFGHIEEIEDEEEEDFDBCBBDDB??BB?ABD@DEA?BBCA@ABAABBBECDBBCB@BCCACBECEDBBDCDBBC@BCCDEEEGSmoN>BAADFIMR[ftwU<3563//.-)))&%! %$&&&%$%%&&%(),5apssuurqsvvw{pvyuwtstttuustssrqnZ$$.220//-.--,*****('((#  +  + #$#%%%())$ + + #)(*,˴r]]jdExo]ù~~~|z~{{yq}|u|{uuyzsyzzqzwturztorxuotpQAGLPRTSRQPONNNFD@HKT[WTizg\XWXXWVWUX[ZSNJKKLMJJKMW]^]_a[\Z]_e]_`d_bcicfhrmmptqkqssnrttltv{stq~qnq|urvzwvy|zv{|}s{}~uz{|wxzyrz|y{ug=8<=LXVVXWWWZ^bbbcdfggjio{YLTVWVWVRNF?=@BB@BBBDFEGIGH=:>BWba`^^b`^`abb`]]^\`]\\[WVXYWWYZVUUUSTUWRUVYTRUSOQRRLOOKKMMKJMIHGIFJGFFGFHIGFGEEFGHHFEEEEDEDDDBBCBA@@B@@AADADA@@DABAAAB?@@BBCB@ACBAAA?CBCBCEEBBDCBEGABBBA@DDBBGTvz{}W@@BEEGJNSXdr{z_@951021.)*)&$#&'&%&%$&((((+6`psrrsqnrvuuvrvyuuqtvwvtwutuutql\:%%.10//...-,,,*++))*"   + + + +   "#%$%&''(*($ + + + "(*++*Ŷv\]iiJzo]½Ž÷~}~}~x{|xxyxus~ypy}ynvw|spryrprvrnsvrVABLPRTURQQRPQRJGDCIQVWS]ZSQNMNQVWUWWRNKJLMLJILRU^bg_[_`[T]^`Y^`a\`ek^fkqkkptpjrsrnttwlzsxntvxursxsuxzyzz}zz~}|{|y|{x|w}y}|}|qR<9:<=ITUVVTUX\^_acccdefhipwTPSVUVVVTJD@ABBBDDDFDEEHIIG79?Jcb^_cb^_d_]_a_[Z^\UX[ZSSVVSUWWSTSSPSRSRSUWSTTRRSSSRTONOPMMLMHHJLHJIIGIHGGHIHFHIHFEECAEFCDEFBABB@A?@???@?>>A>??C@@@A@A?@?ACB>>=@?@AA=@AA@CEC?@B@>CE??BA>?BA@CHV|~gHFFFFHKNQW`m{{eG6321/.*)'&# + #$#$%%&&&&(+5_lpqqrqosusuzpvzvusuwwuvvuruutqjV<>=+*..//-,+-,)))&&'!  +  +      !$&'&'(**-.( +  +#%%&y^]kjGrsgĽ̾Ĺ}{|}yzx|r{~qyysrs{upvxvouuyusttoqqqtrsnoilIFJNPTRRSTRSURMHDCDHQWROKADEHOTUVTOLLIJLMKJMQ[\^`f]Y^`^W_^\Y^`a[bdj^ijmllnrqrrpusvrvt~uxr{uttutvx}yx|{x|~w{x|~wwxw{|xz|uF:;9::>e^OTSRUWX[]``_^addegokKNTTUVWWRHEACCDDEDGFFEHGFHC7;@Q_Z^c`]_d]\\`^ZZ]YWYZWUUUVTWVUUUTUVWTTUUTVUTRPSTOQQOILMLHKLJGILKFFIGDIEDCDFDADEDBCDAACC@AA@@@AA>=<>=?@>=??@=>>A;=@@??@@;@A?===>>?A?=@BB?@CA>>A?=>C?>@@?A@?BBG[xuNHIHILLNRV^mv{qS>4/.-*(&&$ + +  "$$$&&&),:^moopqpoqtst{qvxsrstuwtuuutrtqpkP,.39<(%--+**+)()'&&%  + +      !$'((***+,.-  + %y\]kiNskXɾŷ~|}|}~x{~zv~{qzx|tvsxuuvswtzpvvwrnoyrmuxtlmoo_NBLQRTUUSQUUSSNJFDCGPTRD59>BFORROLJIIGHJKKQ]icddd`d`_a]_Z]`b^a_g_cdmiknpooptspxxtqwy{tux{qpv|tvy}uw|yu}zwz}~uy{syxvvxwy{n@99:;:D_MTTTVUWZ]_^]^aaabfo[MNQTUXXYPHCACEEEDFGGGGFGGE79=DT\^_aca_^^^^_``[YYYVX[XVX]ZTTVUSSUTPSTSQTTSOORPKNNNIJKJHNIFFGFGDEDBAFEEDBBCCDCCBDCADCAACBB@B?>=>>=?@@@>BAA??=A?A@?>=>>=A@???<>>@?>>@????><=?@@@=@>=<=?A=<@BC`t}uyYOMONMOQSX_it}uX:2/.)))'# +  +  %%'&&)+;\loonmmmruts|uvwswtwxxvwvvtttrppN)(*,273# %*,+*''(('%# + + +      %())*+,02-+( +  ´z`XfkKmmX¿}}x|}wztvx|tsr|rmuxsiwwxipv{lmrvojtvsjjkpddUFKNRUSRSUUTUPOKICACIRPA98:@CFGHIIIIGIJMUcbffld`dkc[`c]P^_bR_\hXaembelomhptolyvpouuwkuvyorw}rx{|w{~~{y||{y}}x~z{||xyt}z|{}`9::89?S`RUTUUUX]__^_`aaacinTKPOSUXWZPJDDEEFGIIEFFGGGGE8;=F_]\_`_[\_]Z\b]RWYVTVYWQUXTPTVSPSUTRUSQPQPQPQPQNONNLNMKJOGGJIGIIIEEDEBDEFCCCCBCCDBABC@AD@?@@<;<=><>?>?>?:=<@<>@@===<:=>?<=@=;=<;;=>=:<>=<==>>?=<=;;:;=><=>BGgv{}kRQOOPPSWY]gu|ya?3-))'#! + + + !$%''+7Xfkmlkllrtsr}vywswutwuvvwvvrurokG$&(,/63" "'% #%%#!  + + + + + +        (*++,..,/11+# + ̺z`^diNlt_ƼûDz}vq{vwyu|}~owy}oqsokwvolutxostxkntshhtrpjmkphlebJGORTUSTTTUSPNMKEABGJJC957:>BDGFGGHKNWen`cglabgldZ^a\Ud\aXgcf_bagelnmonqtqqzuttzt{w|svw{uxw}|{yz}zz}y}v{~~xz{~xz}yzP<;::>>?><==;;::9:<<===?9;;>;<<;:;<;:==;:<>:8<<:;=<<:<>>=>==>=;;<:<<=<<<@?CLnzz^RSSTUWY[[dr~}fB.+*(%$   + !'&+8Zeijkikkqtrq~wxyvvswwvvwxwwsutnlG$'*-394# !$ + + + + + +  + + + +  +      #)+-../11210)  Ѹxa]dhLio`÷ú}|~~pz}q|z|x{yyuzx{rsp|rwwopqwrxutqrqvqjorrimqqgihpcgcSJQSVUTRUTRSSSPHHGECDFE9337;@BFGGIPYillbedhcjhkfc``bchT``h\`bf\bgojiosqjouvqsxzntw}otv|vux~ywyzvys|}wt|~}wxzqvz~pw{}vqJ;99;=>iPWVWVVY[]^__aabcgmePNOOTSTVQNIA9;DJIHIKIHGHGE:9;@R]a]Za`]]]^\YYVTXWWVVZVRTWUQQUTPSUTNPRSPRTOE>FJJNNIEFJHDFCAACCA>CAAAD@>=ABB@B>>==?><===<<<<=;;;;=;<<>=;;=:;9;98;=;9;:;:=NbNUVWWWXZ\]\`bbbdjqUJOPPRSUSTNE?<@EGJJMJHGIFEB67;G_^Z\_]Y[\YUY\WUWWQIKURORSRNOTSMQSRNQQPORSP;,/>JMJGHGIHEGAACEABAC?@BB@@?@?>?@=>>=>??>><<8:9:;:9:::::9;;:7:;9899::::;99:989:968:87;<;9;<;99<998878;<9:<=<@PwvzmZXXZ]^`cgq}qO5''(-.)('#!  + 5W_digedfkpqryvtqvttvsvyxwwuttpi= + %(+-.,& +   +  + +  + + +    !)01232455531+ĸ{dY`iKgr\˽òĶ}{}zx}{zy|yq}~zstv~lxvzlrszpkrqvktnqmqqoijjlmfonjepepelilhjONPWYTSQSSTUURQSRJDA==93115=BMYgls`lmj^hklgfge`^ea_V]]a\`ac\a`dhighknolruwttrxuzuxqx}z~~}|~{y~z{}v{wzzwzz~vzy`;8::;AaYQUUVUVX[\[^`a`abkqTKOQSRSRSSKC@ADEEFHHIHGEDD=7:BMYYX\\[ZZVWZ[WWUUI45BQQRSRQOSUTTRQPUQNOODB0#&.FIFHHFDGGD@BEAA?@>>>@?>>><;>@=<<<;:<=;<87887777888:988999778879897:8966578:8856877:;<7999::;9:;:8::<:<=A?ANy}~|{}޺vod_\adbboyvZ6()./,**(&! +-V^acegddjrqtzwsqtsuwvwyyvwtpsqg<  &*+,,-&  + + + + +  +      './2458987770$¸{`V_iMhsZû~z}{yyzx}zw}x|uus|r|kxozutrstortvlsswllmrljnssilnofmhpagfzdHNQSSYUSSSUSRV[[QMGB?>:3/3:Hbdjgscqhjdpkihijd`djbb]d]]`d^bababfjgfntolsxvqttwpvx}qwvxx~yw~ywv|}u}}w{}swz{tyy|uyvM<:;;=@{WRVUSSUXYYZ[]___cnmLOPQQRRTVRICCCDEDDEDEDCBDD86<>?=<>>><<>=:;<<;:==;=<:9:9779968<989:99988989689:9:987:;;677786799:99999:;;:9:796:>AB==@=<=>>:9<=;<<;:::8899977967889?;<>=:>>>=;8757886555668657888886=G@27699;;AMgyp_cjnie\Uazv|ƴqE110/-++*) + 'QW]baabchmopywpqptvvtwywvtsutqeC! +    +/00/2( + + +    +  +  +           &.588;=?>?@ϻ{dZgmRrY̽Ĺɵ~jfmqmjkhjules|zyzyvp`dsyxxwzvYtyrprqpooorv{}}||ux{{ww{{u|xypuwzqnswpmsyrgpqqhvmsiljnkjmjkkojpepajgudknntfRNPSTUTSTWW]_YJAJJGA>@\yPTUUWWWWZWXXZ\]^ekKKOPQRTUUYQKONONLC6=FHEGHG=79>IMNTXXYWYZWUWPD.%')*0BKPMKLQPONNMPSRK8)""*?EF+7??:<=;9:;:::;989987:97767678745668456656458866778:=;<>=?Aʾһ{cXimjjvs[Ǻkcjpppporxi]sxyyxuvjPdvwvwywm^xvrpprolmnruz~~}{}~}}}xqy{~zxwzzrzs{ourzqsrvqsuqpksmpqxjpnoiklrlgjoqcjhm`hlodbnpqmeRPPRSUUY]geSSI?IKIH@;AMgpt`nmg`nhlhgcgefab_acY[Y^[a`a[]_dc`chgaellgnqtksttovwqz|vxxwv{~xvy{{ty{zszxxry{ytwyQ9;:9=@pxSTVWWUWYWWWZZ\\_e]KMMQSTVTWSPLPRRRSNHIMJJJIB96;?A;FTXZVWYXRSN@,&$%&(0fgaZSSQRSRTSUVVVPI:75456614652576;J\ICKXbsLEGOU`iq{yruvgSNHKOU\ev}¿A00/-.,-,& &KTZ\][^agjmtxuqsrsvttwxvwvttsqbA1,(! + #.35420%  +  +  + + + +          "/8=>AAȻzfXorŞezuVohkqprtrxwYjzz{zx{vaPkxwxxwsc[vtpoqrnkmoqv|}}~zy{yz~yv{x|q|vzmvs{lpoxtlnqrepsufjmohejribkloegfmZmkkdesnjc[SQOPPSWZ`gyM[cS;IMMNMGBBIfrkrihjqhikodbejeZ]faYZ\[U]cbU[^d^[`fa]aggdfhkeppnkzvzjzzyt~y}tx{{vzxz|xyw{x|wzw{xyyxs|F9:;;?ErOVUUVXYYY[[[]^^adWJMPSRTRVWTOKSTVWVTRUSQPNIB99=A>:LW\RPWUQQN7(%%$$%(-<@BDFEGHGEFGIIJI+!(4BFGFCCCABBGTN#(8<:98:;::9::9;866666886664356867>]kkh[VUUUTVVWWWYXWH934443/2531456@CEFGGEEDEHHJK?& "+:BGHEFEEEEEMQ4!-:979878996788644446665444356566<]jknkbZWTTXXYZXZZYXK7453212523236:WpvvYPRbnhKRV`ls|w{pXOLNPX\esx80..0232..-!  (JPRXZ\^afknu~wsottuwwuwyxwuttspb<.*+*((#   + %6::;:5+ + +   + +  +             +8ü~eYnušyvXþtintuuwtv~ict{zyyyvkTgtvvvvshNtqooppnklmnmoip~~|}~y~}}ww}u{xxzqt|wyltrxqvqwnxf|qoplootkrlqhgilkhfonclmn^hftgmpqkaTSUTVXOJNRTWRRU[_DELNQPOPSNNJK^s_kloelhgaha``[ZWWXZWYX]Z]Wc[^]f[aalaafc^^abffmpwqxq}suxsvxtwz}vty}vvzywsw~xrsuwpd?CKRM/hdQXZXXXXZ[[]]^]^fiKLNOOQTUWWYWVVZYZ\ZZZYVTOI:8:@@ABDFHGGGFFFGIJ5#&1?HHGFFGGFDGOT-'4778889886776555667545656685468@[ejnrok_XWXXZZZYY[]]P?43233334225=\pswzhQR`n_NSZcnxtwwk[SPPUX\eql3.-..002100**JMQW[\]`ekpx~wtqtuwwuvuywxvuuusa<.+,++*('&!   '5<=<=8- +   +  + +  +       οü~hZotǙx|sXgkrvvwut~w]fwxxwxxtbWjtvvvtpcbqpppoommlllkde}~~}z}|y~uyz|wxrz~oyvyqum}lqrzikoytfnqnbnjqcehnheinibmki[dbrnoph[TRVVWWSMIJPQTUSNKJMOOQPQOQTQQRPMR\shjjmdbepa\]]YLW^[KXY]SY[^TX^cY\_e_\dc`a`a`aehmewlxlijrlrs|oouvrmpsmosoooqrnjjlqqlcirwsOy]Z[YYXY[[ZZZ\]\`f_IKMMOQVUZalxWY]`_^]]\YWTQI:;<@=;IRWRJTW:($! !  "&)5@BEFGGHHFFGGGJE/(5EHJGHIHHEEIRM&09:9789877755566677345544452346@\chlossoc[XYZZZYZ[Z]\U@2232/00/03=Rhnuz}wUN[i{mRR]gqyzyzp]UTRRWZdq}S0-,//003431.&*INRW\^^_dkp{|vtrtuwxuuwwvyvutrm[>.,,+++*))'%"   (8?B@=:/   + +              ʽgVlt͝yftx[}Ǿʵ~s~t{WDi~mhowutwuvo]qxwwxywo\`qvvvuto^qqqqpsqnllnlfa}{~|{x}}xwypuy~sstzzmwyudtrxgnqwiirtscrpjbvjtbghjfiljhaoff_jhopmdZSUTTXURNJIILNRTTROLOTUQPPPPPPSTQOMQfbjkk__di[WX[[LW\YKTX[P^\ZW]\\X_aa]]hec_`a_^^]ef{ouhXPJSV[][[]adcdghgiighlljhhhlrrmpv|{s~XW[ZZ[\\Z[\]]]_bkQFLMPRQUZgtwYY^^ba_]\ZYURE69=?HKJJIIJHGIMT0(9:7577755536654564123434441235>X`eimsvwuk`[YZ\]]]]_a_[I632000/008I\iouy|aGUcw~XV^fp{}}}vc[USUV\goD..,.//035841/%+GOU[][]_ejo{zurruuvvuuwvvvuuusqW;0//-,,-,,,))'! + + + + + ,=BM2 #'  %,:EHGHJHIIIIHIKH7 '/AILKIJKIJIIRK(3;6777766457434652212555434556BS[`fmqsvy{sf\\\]^_`__`__M921000114=HZfnwz}kPN]l]R]epyz~}g[VTUX]en7.,.-./1368852. +.GPU^^[^_dioz}uqrssuvuwxvvwvtusrW>2/...-/./0/.-,(" + + + ,;GFFHE3 + +  +   +   + "+1@@8,()))$$#"#%&#! "!!'*/9=ͽ}i\jwɒj[cjnruw{zjrqsvz|}~}hUXMTspyG8WjXOM|^Y_]Qalsxz|tcnsqstts{obnwzyxxxrc[ruvvvvunqusrrqspnnopnl~||}~~|z}|}y}y{~zyzus|qpyzzptw|lqtyhpouogmpn_glodcjmeccmaejmfUhkmlf[TNRRUVXNIIGIIJHHJMNQLEBDFFGGGD??AGNQTQPMU`bbh]^^a[RY^[SWZVOYYZSWY[UV[`ZW[dje`^__]\][YZ[aa^TTRNLOQXZ[ZZ^aadcddefgikmnllnqvxw|^]a_^^^]^\[\]\[\fZEJMMRV^wyzyukhb[XZa`_[VJ;;=?A>A,"%-@B?:9;>CKPLRQMNW_hZVY\QOUXXUXXUT\UXS[XWVYX[Y[]fhc``_^\\]\WVW[]\PRSRONOTX[[Z\^abbddeghhjklmonqtxxx~v^c``a``_^]\\]]\_aLGKMQUd}Ϥzvtkg`Z\^[[WI;=<@=7," &/>IIIKLLLLJJJHGH(%/?NOONLNKIIKV\8.<826766443032422320242121217BNTY]cinrwy~vga__``___`a_X?.1010148=FTepw|~{^VVdz`Y_hq{wg`]\Y_fhtŵ^.,+,,,.2347:;962- .KPSZ^^_`gkp{ztntuvxwvvvvuvvttskY:4234445335533466566. 0AMNOPK1  +    + +  + + dm;-('&''((%&%#%$" !!!&$&%(&)+5?I[U>@B@AA@>><=@BA@BDD;<:8:87787żgYhp^\aimptv{~s[XX]cgkqtvz{}~~owzfnfUeo]Ouvx|cWJ*=DNd|skj_OLQ^isz}~nipustusrxj^oyzzyyyvf]uxwyywvrsutrqrtsqpqsssy|}{~xz~~|zy|z{{vw}}}|~xy}~q}u~uyx~ptruxroxtjtuviqmseuskkpnioimhkhkhkgiffgjjhljkh^VRNQQTTTMIGDFGFGHKMUje^MKD<:8889:979:;AIOQROKKTh]]X\XZ\XX\ZVXW^TYX_XVX\TWZb_hhdba`_^\]\ZXXVXXZVTSSQNOSX[[^]_`bddehiihiknonnsvxxz~yldbbdcdbb````__]`_HGJOVjӲ{wsqomga]XSC<<=@:2(# '/BJLLLNNNMLLHGI?#)3JORONOMKKKQ]]48@68776433252214432422342126ENSVY`djqvx}rea_^_```abd`F5200/137;BS_nvz|~q\Xase]`hq|~i`^[\^ddoŵO,,---,.2258;@?;84+2IQTTU[`agjp|{rmrtuwxvvvxwwttsrlV;667667856889879:;==:61$ #:BIMLOK+  + +  + + +  ceE=B?96AK?9:604898;>>=>DFAFABAIVU`^8=@ACD@@>???>?>>=<=<;;998766żgUdob^ahlotvz}w[VVX]adkqsvwz}~v{yegeVPgp{yhL{{dT]_cgghnoqqppvryusy{zdW00AEIOUV\^YUYZbgcMIJWdoy}}jnvwuvwrvueexz|{zzytXgwyzzwwwqsusrsttqqrttssz{~tz~}}~}~{~|}{|zx~}|zy{yv}{}u{tvqsuvmpwsyrrntkonuncnvmflnmciik^cehebfjgelkbXTOOPQSVQLGGEBEEBEJNT]kc_\UJ?97654576678:AJMOOONMQWdZUW`ZVV\YNY[^KYZZTUVYWV[b`hgeb`_^]]][[[XVVVXZZSRVVQQW[\\\[_bceffgghfjlmmnpsuwuz~|l`efeecd``_^`_^_`TGINZmˡ{vonpnmjd`YNC>BB8.& '4EKNMMNOOMLKIHL5"+8MRQQQPNMMQW]F+;:8644301331011002332210/14DMOSV\_fntxz{ka`abbbabccaH5.//.1569CN_nsy|~|aU]lj^\iszod`^^bdew÷C)*+--./0458<@BC?;7+9JOSUTY^bhio{xolquuvwtuwywxutuskR=88767:;:;;;<;:<>>ABAABA;.  &7275-**& +    "/.T~gbX[Y]_UOPQRMTLJHJLPSVNGHBABCCESW?>A@CDBA>=@??>=<;:;<;::99665æh]em“_Xaflquw{yRTUW\`dhmquwy{{u~u}|haaSJLRUX`flnnsqury}}}mR_rkZ\\bdfgjkmopptuxyvwy|hR6(@CJLMQVUUY[]afjXC?Q_jtz{}ljuvruwsuyn_tz}|{z{whYuyyyyxyuosvuuvvtqqtvurt}z{~}}~~~|{~~~}{}w~}||tz}txuwvvxpowzol{txjpktgmnrhglqifmjfbmgi]ceecdgfijjbROLLQQPRRLHEFFDEEDJOVcdk[]cjUD996524532466:=CJMPNNJPWWWV_[WWZZP[X_Q\[YVYWYX[Z_ehfcaa`_]\[[\\ZYXWXZ_]WVVUTV[_`_^_adfgfefhhhikmnmosuwvz|{uy}onkgedfddcc```^\IEO^sÝ}{tsopomjljjicUXV>5(! !)5FPORPPPNMNMLLK,'/DPTTSSPOPPSYa6196444313322101/2222001//07DLNQTX\cipux}vfbcdddccddcO:/.,.237;AM\gox|kY[fxmbdkt{rd````fl{ƹ;+'*-.//1459<@CHNJD@8?KORPSYaehkp{uolsuutvtuvxwxutsqmQ<;:9:<=?>?@A??ABBCDFFHJJMLA-0/3, &    + + #>Pxylgfllkfb\NJGGGFCA@@ANJA??=<=>=9:A>@A@BCAB@?>>?>?=<::;<:;99766¨kXeqÐ^Xaimpux|yPRTX]`ehlpsvyy|~}|q\WYcnottvygWZOJLOSWZ^_ciloqquxyz|uVNizoY\_bdehjkjmooqtvxvux{}mP91$ (8LQTSSSSQONMLNG (6HWXVWSRSRSV_W,47443235323312442011//0029FLMOPTZ_ekov{~jbdeeedeffb[?*+-.178;@JWcpx|wcWelofdkrxxkc``bgn}Ǵm1+*)-011246:=AELU_YTOONOQFKYaeilrrlnssvuwtsvxuvusqplP><;<>BA@@@CECCFGIJKJKLKLKLICA,"&+1:/!  +  1E{KKMGECEFEEFB>>?=<9;:<>;69<<<<;::=?<@ACCAAAB@>??>=:;:9:89:66644jWdoÑ`[cinrvwz~ySQSX\`ehlpsvyy{~|zq[W^djnqsvx~iWVOJJOTVX^^aglmoprvxzzaPdvv`Z]aeehjkilmoorvyxtwy{xSE$1FFHLNQTRUWZ_dlpQ7:-# #,>OTUVVWTRSPOOT:#.>UXYXWVUSUTYd<+975334511121231/000.-.029GMMMORTZ`gmsw||edfefggffdaC-..0258;?EQanu{kZ_iqddirzzmgddehozº]/+)),.00248;>BELWftuh^YYYJF[cgklqqmpututustxwsuutqnfG8;=>@BCCCCDGHGHJJJKIEGCA<;:1//+!22/'  +)=C~}GMJJIHEGGFEA???><9:<<=?<<==<:;9:;>=ABCBA@??>===;:8997643432122ſj\elŽ`\ehosvw|~wQMTVZ_dhlorswxx{zyt`V[afkoswx{kSRNJILSUZ]adfkkmoqtwwx{eLXqaZ]aecghkjmooorvyzuxz|ON0$DDGLMPRTVXZ]alp^96L\fpvxx~rrvzywyywzs`i}~~vZv~}}|qe~}|{|}}{xx{zwv|w~~|z}|zw~~~{}~}yxzy~y~zy~|yyx~wx}|v{zo}|rwx{potxmhs{smmovekkuajkpfeiklbjkj_igeXchiibRIJJIJMNPPNIFFBBFGJMVYZ]abY]al]_`hZVN8554422000224359<@AEJLJU^[QU_\ZVb^YOZYcX\\aVahfded`]^]^\\[[]`\ZXVRSWZ^[XYYWZ^``_abcdggggklkklqqqsvyyy||nt}΀RAǸ|tniif^W4aѷztrqonlhjjigb_\OD@?<7( $-@TVVWXVVWUTSTP0(5JUYXZYVUUUWa`048423330/11000//00/0//14;GLNMPSUTZahptz~reehigghghfI1-,/1578:AP^ms}t^]hogeksx~oifggjn}O/-***-00357;>AFOYgu~jbb^]`ehkmsnkotttvutuwwvuvtspf='257:====9795210+*('%"!!  !   IKsFLJHHGEEGEDCCA@@?>>=>>?>==?<<::89==@CB>;:877889866876//--12.23k\dlďa^einrvy}zNKUVZ_dfkoqrvwyz~xxxaUYaeknrwxzoUPNHHLQU[acdfklnpquwvxznTWlugZ[aefhhkkmnoruvxzwwz|bR:9BHKLNRTUY[]ajolD5>Rcnuxyzzpv}zyx|yzzn_{mh|{er|}~}z|}{wzy{{~|~x~{|{}}}~wzz|zvx~}q|zqzyyy}}uu~sw{uyyvu~}uo~yzowtxqqrvlpxtqrpmujrkphrnneklgkhjeffj^_agig_PIKJDJKLNOLJFEDDDILNU^ZW`]gS`_gV^[dZa_VA444431122011333686=EHMSYW_cYZ^aVWZa[a`lb`bghededa_^]]\[[^dydZZVRSXZ\]\[\YY__``aabeeffffjjjknqrqtvyxy{}yu|^6q6z{~zusj[.VЪz{~~xrppomnmkjhec`ZKCA@?=1%  )1EWXXXXZYXUUUTG(#0BFQ[hwnjjdfimovjiqrttwwttwwuvvsqmb=$%$$# #$   +"%   =<;99;==ACA;8;844444677764/0/022333ɿiWajȑf^ejpsxz}QHTWY^bejnprtuxz|~wtv`PU`flnpwwy}sTNNHHMPW[aadgjlmpptwwxz}VJhkkY\]bfhilkmnpruvy|yvy}wXI&((2G[^]`]\YYY[``739420110./10..11/./..04@HJKMPQQTW[`ekpw{njkikjiikh\5%,12479:?IXgrziacgdfkqvzsolmkmq{´700.---,0248=<;<==99;<99===BBA=:<;98866667985455047975ǪiWaiǐc]ejptx|}~UKUVX\aejnprtvxy{|xludVU`glpptvz}xWKPHILNV[behhjmnqstwxz||aE^av^]_dghkmllorsvwy}}yz}bP-3=JMPRVXW[^_dkmaD7EZgpvzzzu{~rh|xjtq~~~~{sx}xz{~zzyzz}{{}||~y{~ztz~x}x|~y|{ywu~|t{|vt{xqvz|jvzynmqvnltxqgqtqjkltigiqfahvhYehjab`i^fhdWJGKGHHIIKLJGEEDGHLQRWba\\bfWbajZ]`h^^Zh`[`gQ:301221/..00.,-0236:FINZ[V[Yc[_`_]ddibegfdccdcab^^^^^\]iMVd^WY^]^^`bbbb_`befecceddcdfijjjlnoporttuvxywy}ŵT8q}tquwurz)[zsgrutpmmnokigea`[PGDFFDCD8( $*2:JX]\]]]]YYWVS4.9U`_a`^^]][\K(1:700100010//101//.-.04@JJKMNPQRVY\afmtx}nljkkmkklke:&.10467>=:75546746;;9<@ETcoypcbdfhkpuzzuqqppnwŹ^7412211./03:;?CHR^lxy{ztflrrsuvsttvutturqmb/  1oQ' "$%&(-3;BC& .EbHKJJD@><<89::8:7:899764213866998;:Fg`Z]_[T^\T_\TWRF]ES]SYLJCDIĭn^`jdahmqty|}bNUUY^aeimoqtvwxz|yajVV_fmqorv{}~cKOEILPX]afhjlmoqssvzz|~~hQMXyd_`cfjkkonotwxz}|[G"!5?MNRUW[[^dcjpoa:3O`ipx{{{xciv|{npxz|pwy~xx}{yuzvty}osy|wsy}s~z}x~}uy}z}}wzts||}xww{zsxtyttsvqpovspptunlnriqikhigmidinh\ehgege]RIFGFFHIJMKIEEEDHMKPTOZ_`_]d]^^b\b\d`^[aaa`gc_bj\?5212320103563566458=CIW]YW^c[^ehbbblheccdddbb_]\\^__qV[WY[acba`_`behhebdfgfgiihjjiklnooooqpprrrsssppv{V5/Kr~wruz:8ş~wvqicbd]Tgikieca^UIGFFHGHHFA0#  &*6SJDQ\bcbba^][\[B#"1CQacdedca^T;&/;>51.000/.0-,/---//.4BGGJJLLNRTVX[`fmrx~smlllmnmmme>+-127:;>CGRanyoccefimptz{wtttqrzQ432322421037;>CIS`k{xcZgnorttosuwutrssrlb)  +"2& + !&%&'*/78?L\Z=! )-  !&,CLVan}źaO`jlprspouxvusssrnb&   +.Y5! + +86774==;;<:<<:<>?;??BCMPE<<;LyzbNejghgQ`[RfkPNNMOJJJG?<:nY_cf\flptx{~~iMUUW\bgjnprtuvzz|~j|t]X\flprrx{|lLEIILOU]ejkkmmqruuvz{{}z[JR[q^adgjmmoqptwy{}~}jW@/9FORUX[]abfioqt^36Pdntz~yar|zztvwxtzx|swy~{wz}{szw{tyxxro|zow{|uywsqw|zty|lvw}nvu{rsxvuptwpjsrtjolspljrmggjqgkdhchejefheiekffbWIDFGDFHIJLMKGGEEJJMMXWY\a[Ua^^T]a\VbbaR]_b\]`aYYab\X`L;20033369:<><;;94118BFKR[ecdcghldhdcbdedbaba_^\\^oY_\YWW\bec```abdfklggikjijlkklmmnoprrrttrtwvvwwrk{}:g)2?wxwx{{$ۘmxzuurndYRKPhfdb_YPKKJJIGJKJIE<(  "(1GX\fSJMKLRYbbdb`]R,% +CH\hjhgea[S@&&,5:410//1/--/.,---,/:DFGHIJLOQQTWY\_bhpx~ppqqppqqrr[/*,147;=BEKQanx~mjhhjlnpru{~yxttrzǼ~5566787676645:@EMVcp~qN`gknqrrrtvvuuurqm]&   *#' + '@Z_d`^idbbb`cd; "!==CMP@LNKLFLKKMOAJKHFI=QXVVKLHISS>>=ANNUSHAJSSOMDLJCFAAIFCCDCBA;mX^df\gmqsx{{|lJWVX\`eimortwwxz|~txtbY_flpsrxzzpR:IIMPV\bgjkmossuwwy{|~mXZL|bbdhloopssvxz}~{gYJ($:DOTVYZ]`cehnrqg>4D]jrynirzvvsvnvxwqsz{wuxzyoys{tzswqu~~vovxzr{v{ry|xyz{yzuyu{t{vzxwwqwuutlhwnpjumqqqnppnkdnkocdgpabgmg\gghdaQEFHGDBFGILMNKJFHJLLOVa[\]^YZd\ZYc^\[d[dWb]```]_^ba\^cbZU?41210387;===<;:5008?HIR`dd_deiffedbcdbaaa`^_^\\{rZh]YWVY_cfda``beglpokkmmmllnomonopppsrtutsuxywwvqyw0P~uvm98%Xygt{|G>Ք{bwocuvrmmg]_fdba]XQLLMLMNONMMLG8! ")3NXg[PNPPMJKUZ`]]C$$ >YD\jjjhd^[R3#).870,--/./-.,+-,+,.:CFFGHJJNORTWYZ\aekt|ytspqrrrrsQ1,/148;@BGLR`mx|iijjkknpqv{~ywwt¾q',68;:99:99979?DJUbp~jP`fglproqsttutsqojX#  (C#%-4M& =R; +(?=;;7214<==>>>=;;CLVcq}aO\ejooqprstutsrsoo]" 5* .::5  8WQfhkgUO\acecV?&42,$")?]siOSjobieUhiUd}]QkS]i][dVKEHC=>C?8ɭq\\^}k^fmrswzz|}}}|qJUXW[afjmqrtvwyz|}ywyd\_fmrtsw{|wsa8JHKOW\agilooqtvy{~`^I\lbgjlprsssw{|}mbJ!/7FSVY[\`cfilpusd:1Lbnv|sqvujvu{qtrvuruzwszzzqux}utwzwqyzvotx{ntr|qoy{yhzz|hvvzpqtusousqmqntjmnqfjotjfmmefjikehehciegedhbWLEEGECBFHJKMLHJLLIKKOYdUZ]b[Y^b_Tbc_Y\aaZ_ceO_abQ\^``Ved^]`a\O?40/-169:<=;<=<51/3;AEJWdl`ehfdbcda_^``_]]^^nToqq_[VRX[^acbceeegzunpqrrqsruvuvvvvvwwxxvxzyyyxvyǟrnU7@LC630.E96;.pl'ݮjnuWg^aedighgba`_[XUTVVUUTUUUSQRPC% !)(-@Y[||k_\YVTSRNJJJGF?0)*#Eȩo]^cd`WE)'0;@31.,-/,*+-,*+./;G"  EE>3  +>sD^eehYNRURX^R4)SVD44//'! %:[j|e^ikijlTdl\ZbiQFPJKIEAIHDF?<;98542/ɮnVVZoh_gmqtxyzz{||~}|sXUZY^bhkmpsrvvyz{|yvwgX]govuvw{~}xi;IEJOV\bikmoorvx|~dPJNq_ilmpqtttx|~wfb,%4BTWY]accfjnquyvJ/B`mu|k|~hsrvhtowrqsurr|vtmvv}stuvttxsrpvuvptnwv}zuxuwxrxrvuzsquxwmprujpqqkkkqqnlopeborajjregfpd]eeaXKEEGECEFHKKJLKKKLMILPY[`PabaY[_^\[d_\[a^c\bbbXhac\ea^cfjbbbe_]^YB93/.1578;;=<:730036>DEThZehecccccb`_^^^^__~x\lnaYe]VVXYZ_dcddeiwroprtutuwyxxxzyzzzxywyz{{zzxuȊ|y{t_L,-<[]V)/2fU:ѕ~s\bUWVYNWgfdb_\WX]]]ZZYWZYYYWXSP> %%/F]jylc_YSTUSRMMNKH=3//5EҽmTQVT9',4=80.-.,+*++*(+,-;DCEHFFILMPSTVYZ]adlpxA$9[gR;3/26;ACFIOWduƴncksy~z|PB=5'):GHHHIHGFCAFRcrĿbLbhmppmmspstturrogT -B` !:K0   @sM\fcbUKLMJR^O(9TA492/($&:OGPYQREEUTPJ>FLIBJGJKRJKJKULEB?J=C@@@AC>=C>=>;9;;:;9::9866630ŭr[UN||gh`glrttwz{z{|}}}~x|zbXY[_egjmpqruvy{y{xrsh[]hovwwxz~~nAHDJOV[dhlnoqsvy~hRP;~bhlorsuwxy|os?#1:LUZ^`ddejosv{e5:Vhsy|h}ysnowkwouvutswx{tsqyvywzvquyxpqsxqsu|opvsmw{opuvinsyojvyqeosselqrigltkfkqk^fpiQkllbeehcaf`UHDDFFDFGIIJKLJLLIHHJVW]Y]\ga^]d_[_ca[`b`\`cc^`cg]`ch[]cd_[`aa[_``PI>50--2589:;89730/04:BFMZjfdcabbbb_^^^]]^dcijifWbdZWXZ[[\^^bghu{hkptuvwwyzyyz||{||z|zz|{|{|ztqz}~W)APPO$#(IDMS(Yݴ}w|}zql]ZYcOdhfca^YK_iqlfb`^_\]^]\\XR4"2IXpuka[VRPTTQOMMIG8.636Cz•c[G&!,8=3---,+,,+++*+0?CACFEHIKLPSTW[Z\_djnt|,$<><414:=ADHNV`pǨmffqvz~NGD@8(%;LMNOOMNLLGJ\r~ƿ[RchlnonlprssssrqqmV )^^?D ;--  ,=6(BmafpTQTPMKJJOF"#'?b]]c`bd\RQ\[gW[OMUZf@WTTMMVUKEAODBCBBA?A@A@@><;;98:;;;9776510®r^UHypahahlrtvxx{yyzy{|~}xeZ\^`dhlprstwyz|{|zosi^`hrwxwx{}uI@IHOV\cimpqstvz}rTJ;wgfjpuvwy||||rU$*5FUZ]aegglptvzy;6Mfpyɮgmr|{rtsyqruxurtyysqrxrqtxsjq|vlptxlprtjmwwojtwylptyikkwlmuuhfqorhpookjiqejnokdkjhdrjgakgfge\QHDEDDDEFHKKIIKIKJIJKT_\^W[_bWZ^d\Xbd`Z]a`XacaUc`cZccc^add_`__d^da`ZeRB82,-02368898310//37@CMcfdabbba`_^]^\\]k]{f_Zh`\\\\\[X[]ahiqgeipsuvxz||zz{}}~}~~|}~~~}vg{|t=I@Ij\E<6&('$؍gkuzzttrlkg]fhfa^\WMbESfmnmjgeacbaa`_R##4M\{~rh^URSSRSOMKHHE?9558M~˺\C2D=0.,,,,,++-+,3AABDEHHIKLNQRVYX]^ejmqx"(:??:645:?DGNWaoҺ}flmwMLKIF<("6PVWUVV[YWUUh{ƿYUafloonnpquutusssqR 2IlZB[K %?6$ + #4+#)9_coxUHSRMMMIF;#!)?Yijgb^omJdoWfrfPU[QcENNKGGCF><>DFDEEDAA@@??>?CQFCVC;:::887531ýľp_P;phXg^fkosuuwxxyzyzz||}v~bZ]]afilnruwyyxz{}zovl\biswywxz}zSBKEOW\bhnopruxz}}VB>ahilqtwxx}};.>WZ^`ehilnrvy|Z9:anvŤ]assloqsnqruropwuportnqsvristsopquhpntowwrlpvuvosmtnqntqsyronqlpmwjloodjksohjjpdfipeceoieg\OBBHDCCGGFGKKIIJIHDCMWSZZZOZXbX\^b\[cb`\`^_afb`_g^a`ia``ed``c_\`dc\[`fXSF;2-+,.1457653211/15>H^dbaaa```_]\[Z[^x~\RlgYVa\ba]]\\[\^`ddndimoqrrquwwyzy}~~~~zjiƍHYP\g^jRObCG:>Ʈoanhsrtpolkigfcb^[M`b,/106IfsqkgfffeddM#4LdzocYUWSSTRNKHGE@?<655Tül?/.,,+*))***-3?BACDEEHJLMPRVWY\_dhlrw~v(%+>@A><9764.++,/255343////05Gddbabcb`__^]\\\cm\]khZ_[[bb_]\^^]]]`ctuckrqrrrrsttuvxz}|~udDžOSpp]Ro\D^BD)lد|n`aehdgmmeakifc_]ZOpL..,*'$(@fksqmmjhhi< $6Mjuf^WSOQRNPLIFED?==720dĿa5.-,,,,+++++-6@CCCEDGHIMNORTWY\^dhlqvzX*#-?CACB?=::>DLXerſLr|]W`paMUWWUURM?)#C]fhhjllmlmqǿXXbgmqrnpqturttqsqhJ >< 4&9.! +($9aennWYXVSOKOD: ,AAB=@A==<=<<;<<98:::;>CEDDCB@??>@=:|WA@;:98644230Ųs`N/nbTf`bfjknpqqpsvuxwyz|~s|hV\_aehmquuvxyzz|}~xypcbgrvyy{}g9LHNU]floruvxz~lOD4dmquyyz~},-7IZ^bhlnnptwx|Q.Fhu}ҳioP6@@~qnnstnlquqmtvsnoquqnourlppqfopuhosxpipsodqow_pmrgmssmkmkmflmldpkldeinlehmg`e^jcdggcUKBADEADEGHGIJIJKIFGMa_Y\`_W\\cX`aeZ^_g^X`c\Xa\aXdfd\`bd]`ad`^a_]`ef\Zc]Z\d^ZK>3/,+,-.1113321/-.Igccbaa`^]\\[[Z]d`ecjhSlXYba]]ZZ[[]^afnhmnpqrssuutvvyvxxx|~|qZ^ysM[QjWZGD'ԑxsf`Y`YX^TWjifc_]VNl8/-,)*(&$$(<|sokm_$!%7No|odYVRPPONOMJGFEA?>53;nN1-*+-+,*****,6?ABABCEGHKMNQVUX^`dhkouz<"#2CEFGGEC><=<<;;<9:;?EDDDCA@>?@A==UG{::::877421/αwaN*jbSjaafjklnpqstswww{wv|xkQ]acejortvyz{{|}~xxtgcksy|}}q;NGLV^emrvxxz|vT@8lnsw}~~M!/9\`dhlnpsvyx{i69^rẓu{{HSseYZWT3L~_VRSEvoqnpqqhissmktspjoppjllvqiqnphsnrkrsspqplqkxgzhzkonwpoooomninknouihhnhjkkkgggj^kffe^PEBDEACFGFFIHHIJIKHGO[b\P]]^V[_cX`ccW^`b\\caX[gadakcb^ebdag_^dd`\chd^YdfV\cgVZ][A8/**++-/--/13321Pdcbcba`^]Z[\[Z^sy>MthcUZX\ba[YXYZ[]^`jjjooqrtw}{~}yuruvx{zĞZXzpWQroOD7Eѽ}wqg`[YbOXekgc^YP]a42,)()&#&"-Bi~sj7 ##9Vr|qc[TQRRNNPLKKHHHE?:26I¾w;0,+++++**)')-7@@@?ACEFJKKNRUW\_bcglrvz}&!=FGHHHKKFC?AGTevľhDZdy]ZZ[^ccbb^[XF2&>cyvwz|ǽU]bhppompqrwsuutsomA + 4R*E> #/$  $BC/"?uucYYVUSSROKD8"8CEGHIFGGGFBBEDCEC:@AA@AB@=<<<<:;=@CDCDBA@??@@=AQKvA;::986542.ƲtdM"ftps[[_dffkggjjq|qU^`cfjnqsuxy{~~{yucdmt{~~wA?ENW^gotvxy|~Z==fprvz}}| *2Scdgjortuyyy}}I5Oqxְ>mRI@:9595!I.224DHHGGGFEGFDCDDEO`D?@>?@=>><<<9<=?CEBBAA@@A@?=DEVr89:9976432/³wjQ%gȿqY`behlpruwy{|~|zxfdmw|~~}M:DJV_iouxxz|c<?@<>=<:::>=@DEBCA@@A>>>?E<]r=;99866420,wjQ&luZ`ceilpsvw{|}}gdrz}~X8GJU^hov{{{~e:7C{jwz4#0??@BBCEGJKNSWX]bfjmrvz{L>PQPSUTYYX[XRKM^wMH_;[3k}mv{ұoxumnoje]G3Jh}ŻQ[bkmonrooqusstqsqc9  AjHeP]$ +0\=  #.452*IgVRIIGEGGB82*9CFHHGGGGEEFDDECDEP>AA@=>>@=;;9;A=@CDACAA@@=??>K8`j:;::875531/ſxlU#k¿{_dffilquyy|~kdpx|^0GIR^iqx|~~n2/6q|=9>Rcoqvxz~|}}WIlŮP3467689-<35679:<+ $_?7875/dnvnqlqmmpvoopqsjortlinspknpqdjosgonxkfmsqhlosgijphkrtm_gklafmr\dhi`eimc\ggjed]PIDABA?DGFHHGHJJIFDEOY[^adbac_Y[a[_ai_\\e`_`h_^_da]_bc]_da[^ac^[`f]\Zc`[]`^Z_`\V[_aU[Y\S`Q@/*'('&)-2<[baba`^[ZZ[ZZ_gQUXYXUNJV^a_^VUWWXY\cpkkt~|x}|{z}}dATcpzpT^Q4-+#.QqQXAQum}|pjmkfdb`[IeOHwZ;&"4TojB2@??@ABBCFGKLORTX]afjnsvz{1$S[VXY\\^__c`\VM\xgYbɪyVKpL9iw|jiy[mb}oovvsqk`GIxǺOZbjmnnpoqruvtsrpog5 fnN +,;6   *024&O`CPGHFFJMHB:+ #=DGIHGHGGGDDEDDDEMM:?@@?>?>=;99;?=?CECBCA@@=<=;ZBrq@<<798653/.ymT"k¿{cefgjnswz|~~nemw~\+;=J`fmu~}~~{ywtnd\WG1',̍{zƾ?A@KR^bda\[XQUKZ_M7.7bñL5535686*934479::+#bC89753drsionsqlltpknrqhmoqgfmsoghnmahpqahltfdmqo`jirYhioaerojafkj`fknchcf`fff_ahdeaUKECB@?@CEHIGEGGFFFDIV]]`f`\af^XY\[V[]`ZZ_e[[^dYX_a[W^aa\_^aX`_c^[bcZ^^bb^^`]Z_]\\`\]X_W][fXTC1,*&%'+2E`^``_^]ZXXYY[\ggKRVYXYWRKR\ba^\ZYXYY^fuwimzq̶|wzzz|U;c`z~suA0 }oZUytrwzyyphecb\YD`7S|]d[:5\qzxrvD'Rqog^gFLORGIGFB?85>>AAABBEHJJMQUW\aeintwy|%*T{rigefhijigb[\vѸNM~@Edpwdgzxup|y|}}|zwo^cwqqjhjiimv}´U]cllpoprprussrrpok2 + + C0  +' +!)(Rg^ZUTXYZUOF@.$#4?EGFGGGGGHDCEDCDDPJ>???>>=;<:8:;>>@CCCDD@>@?===GXBEF?98786630..˵znSg¿|egjjlqvy{~fkt{~Q'Dfl^__ac]^^\ZPMMKJHICIB?<8(C*bLVCD<...10+($,0/)'3^zwx}óH1126798,8466799:,%dB:9974lspeolpomnupimqnflkrehlnnhiklbllnelnrikmnngnjq`mjlgnojjgjilfkhken_fbheecfib`TKE@BB>@CEGGGEFGGFBEKY^Y\\e]Z\c]ZYZ[X][`]]^bYaccb`_a\^abbad]a`f_a`e_[[gb`baa]\^a\\_cW[[aZW[aWURK3+(&'+3Ud^a`^[YXWWWXYWRHOVXWYYWSOQX_bba]YZZ\`gzoinxsvprrlw}LCZ`|:x0qc_[dwiksryvrjea^RM\07Xr}}D4!:^p{?Btqe]~hIMLKJHHDA=37FtòǠ}@./+**)(''))'+-7>?=>@@A@ADFIKNPTX]bfjpuxz~"3QmuwjnrxwqqsqjbqĶxG9^3c}x{kdlouhntgx|qr{~wfDLWWZ[\[[]lâzvqX\bilooqrqstrssonlb+ + + "ZVSamq   +"(#Ygb`WSTUPJH?>, (' A@DFGGFFGFGCDBCCB@RB???>=?=;;:8:<@>@CBBCB@?A@>?<>=<:;:87796643/.˶xmX c{\gmostw{lnprrvwxzzyywph[gPKzXXWQLHGDB>>:85541/0+,43*HDbeOO=/.+-,($$##'$ %:?8.%"FF4256576-423479;<* b@9:;91fpqlsjppoosrkpllkohoiqljopojmksmnmqmkjpmgkpnbhhpgimtjejnjbfhlehin_adjccfcd\PHE@?B@AACFHEFFFGFC@GW\_\^]_\__``a_ZZ^_Z]^dY`bkb]cb_[\ha`cee\`ab]]_cXX`h\\bb`W\]_W\]_UYY`[XX_YTTZM938UǷb2/+()*)('))))+.9??@A@@CDGJPRRZ^bfkpsx{c.5797CFPfonrssqlqܚs8q|zz}cTW`|ww|{xuvxyzz{qJANSUWX[]_jǞzwS[cjlooprqtsrtqoom_& + dzrsU +   66]{d\IIJFA???=A',-+QDFGGFGGGGEEECCDDCV>@@A>>>=>;989=C?BDBBCA??@?>><<:;;9:;:8655421-yo[%^~alrtw{~~zyxvwwwtutsmd^_N2~78.'"'+),*+,**./0144AW@"G,g`X[7/,'&$"!!2$(RC4557755.4323688:,b@;=;90ginpulmpupipsrjkkrmmfpg_lsnginrfinoifiljYlliUjim`clsgfinhbcfjeeei[aciecd`WMEA?@@AABCDHGDDEFFEEK[i^__b^^`eb]`f`VX^_X\^b[\_g]W`b^Z]c`W_ab\[^cY[\cY\bdV]c`\X[]aZ]Z^\\X_[[V[VYVUYYA4,*3]^^^^\ZZVMMQVWVVICRWWUUVX\\YZ^a_ZZYZ`hr}jkostuurwpiltovxxc=]bltl_fju~~PH3 xoofaXc}uB9)>hlnkemj^\Fa;..68*%M\fP-:iu&Pol[l{NQKHLLKFC@71:hDzM-,*()*)'&'''(*/:>>?><>A?ACEILNQQZ_afjosv{N(:?#"-4;=;=:=C:@?CUap|깇:u]FYWZ]^X^`^YL_;@A?>===><;::>?;@BBBBDA?@@@=>=;<:9:9:977422.-zp\"\{iu|whzwuttpommnkfaYUK'=@40/59<=@CDHFJJLGB@UD%ÿEJK<35+'&# %'&N÷F0346686,62135989+e@QqL94k`morgjorlimpncgerjpdjcgnpkcjln`olnkhgilfojk`qinekjkjljjecfbhgjdecgdhgcbVIA?=@???@DFGFFDBBC@BNV]aR[`dUY\d_Z]^[XY^]W^]^W_^f\^a^\]^bc\c__]]^b`d`cZe`^^hb[\a]Z`c`Y]]]U_Y`SXX[RRWYTE9/B^\\][YWXVSOKOSUUPKJPWWUWXbb___a^Z[[]dixrijorssrxgVffcsptrq{O>agprom^^Tghq{`1 /{ZIV^[nvE:DQ_fkigjd\UK\2-,1Gcq= *Eh\zT#Fq\1dpa_v^Ebz_JEGFA;51Iqʷt=.+*(*)''&'&%(+0;==>=<>@?@CFIILRUZ]afkosw|C! .@>=(!!%15BIG@42C?>@==<><;:9=???==>==:8:9:967430--{n]%Tvzo6>|vrnlmjjkijie`VUJ#fwUWYURPMKKJGGFGE??<532D. DB@90-(#!&&FøE1455677.6233788:2 $hA^T71ihpnnlnlnhmonnmnhopthhjnpklkohnmwikjmiempocllofjgmhilmicdfj_cho[`fhfec]RC?>==>?A?@CEGEEEC@AGPX\[aS_``Tbb^_`cZ[\^]^^c^^^e`bacb\_a`^_baZ`ba[_ab[]^c]Z]e]XZ`ZV_c]R[\[QXV_WYW[WYY[YUP?P][\\[YXVRNRPJIOTTSNBPWWWYabaaa``_^^bglkilprtsrp__5\npmTr===<<>?A@ADFHLPTZ]agmquy})!&38QusI4<>AC^ñ_D}n~myWBEFCDB?A=80&3=DAJMQXbr|nT^fionnqnpssqtqpom^  + + %7   *320fmbcaYYXXTURJA hmFJJJEFGHHFEEDCACItCA?>?>=<=;:;8<=?BBCCDCBA?>=<=><::9999986331.+žƺ{lZ&P\uoQNMH;6/+)#!" 6zsojklhiihhhea[UI@UBAC?:10.0/+(%%%##!!$3/!B6=:0,&$!%&BMNS`mw="HĽH1345476/4124689:1%hDaJ83losjlmpkjismfmpmbjmofckqmclpo]glnfjfnhbhnj[nlkeeekdfhke[gfc]ceo^b`edb^PC<=>=>??CCAABEDCCADHTYbd\`^e``[j^[`b^Y[\]W[^aV\_eZX`e[Yac_Y\a`Y_]_X^``Y`a`\]^b]\]_[Zd`[X\ZYXaU]Y^XWY_XVY^YT[]\\\XVWUPHMPQFFNSVRHHOWYY_adedababbfiqkjlpqrttĚii^wuuwo8Zdksuwyy{wutfgmytnX owou}tDKKIIO\hkh`ZB^?,*DqG0YV+"=e{m,Zqe^gwFWvr>?73:eɶģH/+**(%&&&$%&&&+8><==>==?A?BCCHLOTY\ahmqux}|+5Dc޼d9>@IicyFZqT@vx{cumkxzoj`]UO:/575;;:>I\sŕtkQ\gkonnrpprrssqromX %<!/8]/ +   "-/0moa`^YZ]WPQOJ>rlCJKMHGGGFEEEDCB@@KP@B>><<==<<9;9=>AEBCBCB@A??>>==:;:988777533/.*̺{n_$ML/!5ysnkklsghfgefb\UN&4)$'&# %&$'&)*,--006C=$ }D3?;2-&$! '@~@"FԻĹF2344276/31457679-&d@cP:3ihpgllojkjlhdonhdjnnefmlgelmk]mol`iamdfgllaohmgdbjgiegddi_abfcgehaa_XMA<>?><>>@BCA@@BB@AERWS\bbV\`aOY`c[[`^XY\\[T[``W`__[]\e_]``\]^aaacZa\b^][f]^^`^]^b]Y]ccY[[\W\_]QY[]UTV[WTVZXX\[[[ZXVUSPPJDOOIFMSUVMBKZY]^effdbccehku{hjmppqttY6an]twx|U9`fuuuurx{~yps||}l"2{y{u:DEGIJB\md]UIZ0,,;r[4*$KjD3npc\izmM_L?<33Frǝs9.)))(&&&%%&%%'.9>=<<=<@>?>;=>>=<:;:?>===><::99876220.+ȹ{ob%L˶O"0zxippemirl_ZVLBPC::;=>=@CBDEDA@?;90-('# %$3Hc!#Hґ&"$&(%Yĸ@3345367.11147898. $hA`U85leommkmmpihjounikpkncnlgejnjihnjmimhlhpkhljnhligagelfdeki[aeg^bfhb_VE=?A@<<>>?AB@?>?>?@FJ^^X[a^U`^_U_a_Z^_^\ac]^Y``_\e\[^d^^_a`\^ca^bebX_abZ^_dY]_b\V\_\WZ`_X[YZQ[[]UYZZSVUUTUW[YZ\Z[YXWVWPGOPJGGOLJMRVSL=MV\]bdfcbccfhk|qghnppqtuM.6pllmq}DDJHhpxv`aev{~~}|}}bQsL;;BPYRQca\KNQ.**.=VrqlF6D,Yo1Mwl_dQPe9>804Qɰc4,*+)*(''(''&%(19<=<;;;<=>>?@CGKPTY^chmrtzH#15NҮ/HϠ;@CRgqKTeӯhad2dzv{{||XY1;HQSXXWXctbUahqqpoqoossrssrpoT +  T@70(! #!   &&>@>;=<==<;:9;=BCCBCCCA?@>=>>=;=<9:989:632.-,|pc%HˮO 0}b]VPSjaSMHCBBBB@?<<7882+)(+22&"wŸ~B4<930*'# %&^JV;Hѡi|H-÷>4345347.500689:7. "d@h\;7jblpqjkpsiglppjilpbmbsgbhklcjlmejgldgilgcmjjaigeYdcjfbdhaXabf_ab`\SB<<>?==>>>?>?>>===?FVZ^a`^_\]e^a`ha_Zc`\_f]V[\`Y]_dUZ_cWYca[V^b^[``^Y\\[U```Y`]`[[[_`[]Z][][YV]Z[Z\VTU[RUX[YXY\[[[WUTWVSMIIQMDHNPKIRUTMFP\_aafcccefjnlilsqrrs|}~>D`jkigjm}t6M&")2K^ioe`\lsw~~}}A~{xwuylXRSagikd_[DZ?+(%%! #?ajwK9^ue/hsgZcrRSwoB=729TŵY6100...++,,*)(+4<<>?==<===>@@CGKOSZ^bhmrw{.%5:Xؖ/Uҏ1@C[}ɢ{Y^n8s|oktsrpu~~xqE2<88>ET]_fw{`TcimnnoqpptstrqrqmQ  + +   + + &.)! :uj^NJJMKKMLFE: %aLLJIGFFFEDCCBABABA@>>@?>=<=<::;;=?BDECBACA@@@??>;:;:9:9699633/.,pd$C̰xzyomiU !!0|c\SM,?@9/+*(&'#$$####"! %19%saA 2;:41,%! $;05hIIѡ2$Lø@234<=:5.324;C9890$fANdE>8jflnngjmoegjkpjfim^neibejglcolianhk`fgfabmgf`pgf]j`ihdddc`cada`^WKB<<===??>>@>=<=<=>>EVd\X]c_Z^``U]`a\YYd^Z\aVT]Z\V]\aU^]`Z^e^[_a`^`b]]^_[]]e^]^aY[^aZW``^WY`[VX\\VY[^QRV]RTVYUSZ][ZXVTUVRNNIDHMRKKPPILPUWPKX`bacbdcehkpiilppprqwqXe|tttprv|`7L!):Wempmdz}~~}s*'{yxrjfhl{|sokgjic]UBS1)%"%:`ik"Lg?:wod]qtP_HB<54;^¤{H;;99:9867765434:>@@A@>?B@?AAACEHNUZ\bilqw}()59Tݜ?lt8>C`yžeQ[sj@v{{fz~yropmnmogjbV9;BDCGGR\_buoxVN_jmompqrrttutsromK  + +     *2, :uja[\YY[^VTPI:,_KJHJHGGGEDCDEBCB@A@?=?@>=><;<<< 2=961*" #(? HҾ HB&»?127FE:63645CK;891#hD\nG<8mhnkmjnnmelkkoljimgvkfglmfnmmeigvgkdmdffmmbgdqcfdobgfgcbedeab``VI=;=><;>??>=>?=;:;>A>T^gVT]_[W^\[RaZ^X]^b_b_^X]`Y^^cZ`]e\]^c`[^b_[_ebY\^^Y]acYZ\`XY[bZS]`\XW_ZSYZZS[[\PUWYUVWWXW\\ZXXVUWVQEEJLFCPQMHMOKIQXXQPZ`abcccgikww{{~qprrr|ultkywwvwww}E@C $9Manc>}tqiee]pyvkhlj`\JBH.'$!#*.[t|-[zk_YLJluAD:58Ka¸i>><;;;==<=>><>>?@@@@BBACECBDCCDDELUY\chlsx{!+5<;688<$fˬȈ9 *<969C965857730/+ !Dҙ!Y׸`RC239GF84/646DG>8=3#gAguG<5m~jqhkntnjgnmjknldliqe`fmibkmlajkqacan_djkf^jdoYd_j^cgh]^eae^]_XE;8:<::<>>=<<<<=;=@AMS`^`S\[]]\_Z`]f^a[e``_d\[\b`X\_cX\_dZ]]b\[_a\X^d\T]_]X^__X[Y_Z]]_Y[]^[\]ZWY]XXW]XYPZVWWYVUWY]ZYXUTUWVTOIHLNJJMONJMPJLUXYRT]_bbcehhkokxsrsr}efpewwxwyxy~}8O+%/G]rLh~{usqrlbeipponf_`BM>,&:a|d)luh[SnOOwvtcA@86=aª\A?<<==>???@A@A?@AAA?@CCDDDEFEEFDEHNW]cimsza*17EeGי?>AJZȲJO|^paqxoAJRY[]^\\_bypNUdknmnpoqtwtvtsrpjF    $# $$%L|hb`[WY[WQMKA9 5ySMLJKKHHGDCDDBBDCAB@>@?>>?====<::<==<<:9:87767531-+ʻrb)8˳^,>41?>6( ,wxjfkchhd\UM =e[NFJD<<=;96350.,'!" %,4$cͻЯuv7'@97L[;?;5/1A'!-  "(29% IԒ_ȾC526MF85.25:JK?:<4 dBgR=7jplcjkthggnnhlmman`maedjhdkgj_mdk_h]i`fiif^k^l\g_jbfde_egab^XPB:78<:8<<;:<;<>@DLY\eZY[e^[`abZ`bcV]adYW]_WZ\_\Y]\`T\^bX]^aX\`aa]_a^Xa]ZX`^\[_Z]]_[[\_][Z[ZSV]\TWXYUTSYRUWYRTWY\VVTSSTUTTSTNEHPOKJNMINUOOT\]TX_bceehkqqsk|qswurromtotxxywx{j2[ !*@N(ywtkkeahpyua_Z>J0'#"Ng}BDxqeYSQSR`eLTtYC>73CqʾxH@>=???>?A?@BCCAABCA>@DEFEDFIGHGFIGJQ[cjpv}K ! )66Dܭf/`qK;@@QR4Rzguzu;NUZ\]^]]`bdzvISfjopprptuvtvsssriD + +  ) "' ,3-IxaWTPNPRLBBD;0#aU2$ BMMLIJJGHFEFEDBCEDBB??>?@==>>=<;:;;?DDCEDCCB@@?>>><;:99:9686564/,)ȹrc*1ʵ`'8Q<&?\F17)(sƚ|`TN #93.$+#"$ """"!!"""""%+4@#d5&<84GE# %=*  2YFҶɾȾD739JE64-159JH<793cAdS89gjlelhogjmkllnjljvgofnjiilpekipclisdghkiefgn`fbkdcdja_fhd^][L>7589979??<<<<<;;=?BSS^\cXTVc]X_]^Z^__Z]_`Z[]\X^_\^]_X`\b_^Xb]\Yd`[_`^\]`d[[]`YU[_V]][UX^][WYYWQU[YTTUWTTTXRSRYRVWZYUVTTSSRPIOTRMHJQRIFJLHQSORZ`\Z_badegnwziov{~wtvtvwywxwwy{O4J(pvzsrrstqmgaaPKM-'() ,Wo7]zl`XVa[VUKKTrCA<74N²ǻf4>DEGIIJJJKIIJJJKVagpw:3PM=.&58@nƌ2/<@??TQƽ/Jgqzqh]Z}l@QW\]^_^]aaŬOUdhorqsqstvttsusok? + 74KP  ..(RzWMKGLMNLCBF>,&~|O<300,)"JLJMKKJHGFEFEDCCDABA@A@??=?==::9:<=@DFEDCCBAA@?>@>;;;:988685552.,&ùqc+2ͱ}va" #=e`8.'u}\SU!/C:3*.,-..22589;::>:9<>@F(cǯ5 %@;2KN!"$:' !-e|IC\@ď@/jötA139FD450467EH:76-eCM{a:7keljrdklnkhjonhjmrgfeokehlj`hfk_gfo`aekg^ceh[e^ja_ai`\ea]YSI=7589867:>?<<=<=;99CP`XeZc[\X`]Y_Z]]_\^__^]]_]]^b^S^Z_VZ]a]\[bZX^cYU__ZV\]ZW[^]WWX[W^\ZX`^Z[\ZXVVYWWZ[TXUUTWVYVVUYX[XTSQSTTRP<@PRVRNNPQGCLOLOURV]a_bbccehm{vgrziwyz|zyxwxxxy~=D:5xAGHayzxuokhea_@aL+(5HA7-!3Wnb3svi]Y[ylfeXRNQO`{jEA958^æK +<@ACCBBCCEDEDDEDDEA8=HJKJKKLLJLKLMNR\eou&?==>=<;:99UGӧ*J_9D.18JE661446FJ996.hDRt^;9lekio_kqoebimlgjlmgebmfbiigckbgZkbj]_gfb`edhZi[f_`af`^`[YOC;6578777;<>@?<;:964J`Y\^gY\Zb[Z^\aZ]_]U^a_[[_`[Z[`[T\XZT[[`\[[aZZ^bXX^[ZY^\ZZ^]\[]Y\]`YYZa\XZ^XTW[XUXYZTTVWOTVXQSXYXXVSQQSUSQOF:AJRUSOIMRFEUQJUVUY`abdfefjopiwq{}l|ozzzxyzo8R'-bR``Tq8,6Hdu~|ytspkgdeaX?`7(*KJD4+!!@^rQYth]SWsr|ysdYRPLNLB?:6@qĚy2/CBCEFDEEEHGFFGGFEEA6@ILKLMMPOQPLOPPVeovG;-jˁ/6;BVxӫFBAIj]yPLk}ɸk?x|}{mw}SET[\_``^]a\ƤqyKXdjnqopnrttsttrpli: + 6[dH%baE +   !04'!\zid`[Z[\YVUOI)!XGKJJJHGGEEEEDCBCCA@@?>?>=;=<;:::=??BDEEEECB@?@@A=;7542..-0/1110,#¹tf*1̬b# 2SH:%-rtknfc^__`bcgeeaXT&2\SHB@786310.++)''$ "+6'[̴џ؝; %C<5KNi>\o<) "0N( D֊.@nϔú=129FC74,727EI6960iCc_78ngmgk_oliciklkijjkjkhibgnggln]i_segcjlcbdi_fehXa`faab_\UK?965775469=>>?><9659JX`P]^fQYYcVV[Z^[]]\S``]Z_]aY[^_XZ_X]Ya[]Z`]a^c]^Za^X^]^[Z]_ZY\]YZZ\TX]]WTY\SQWXUQYPVXXUURVVYTVVVXURQPRTUTMDMKF@LTUSMHMOILTSSXVP]abegfglsh~wwlxomnx}[9M#o KTy^D- &=RgyzyqomigfcaNCZ)(,E6#'184K|ȹg!2CFFGFFGHIIGHHIIHGIB7ANNPSQRSRTTURQU`nya`HS -29CYs}Ħ8C@L{OKKVYF|y~qr}vvHGV\\_`^^]`]ƤrMYdhnpoqnrstuutrond6  "NisC + + +$77"c{fb`^_\[XWUOD'^{@KKKKHGEFFFECDCBCAAA@?>?>=><=;;;@A@CDDDDC@@@>@>>9641-/.//.,+*--, ɿȾuf,'̰d "  Dq=18;D<-&7a' 4/)#"  "!%&&()(++28@'Z̵̝ќÿ: "A<8NI9z=%%  %W+*KC* @ي%3~1Ƹ!=227@<56-607?B8873"fA^vV97mmoflgslehnmjklleijmedcll[emlZegk[cckb\cdbZdeeU`\g\Z^ZRF=9888666799;;<;8631:N\W]Ud\aPd^^U_]\[__[]\e]\]g_^\``[Z`[VZ]_Y[ZaUZ[`ZXZ^ZX_]\[[\YVZ\ZYYUXV^[[X\YYUXXVUUXPYX^STTVRUUWSTZQNOPPQQRNCDILHDMUVQNIKPLMTUO^ZX_bdfgjov}ikhlmgeqtq~tzuv~r|H(-9w/3JUM6Cc|qAE262J`orqmgdc^EWN(%3M-$.9DL<5Wl~^M~{rmbVQOLTbdpPID@<79\ãS!,AFJJJJJIJLJKKKKJKHA2DQRSTTWWWXYVVVXgyHWIuhO'05>=><:=;:869>==@CDDDD@>?>><8532100../-,*'(,+!ɿvd,'b!#'(SI ",>-$$=Ľd+2LE?:6766789<<=AD(V;ˬ̳¿= #B<1NS[;*=1(  Aդ"/-7%8M[efe[;\5%!7L'*D: B_s}Lg|zyyzsqeXONJRLFB>:5CḻıwA(,HMMNNMIJMNONMNOMKH@5PUVWW\[\\[[ZYZ_u0͚I[ .8;CvʾkNA?CXP廨rPS_=`~Wtyôh]gz=OZ`\]_a__[VtosJ\dlnorqrqrsrtrrppd* + 0+Sj:R>'#   +,&"lwhea\YKDJUXND%!omFLJIGFEGHDBCBA>?A@?>=;6413688406::;;?CDCDA@A>=822210-,)**+-,('()ɽsj1&}e!,=O/#18! "#:¹j1?h]UPJHLHGHIIEEHGEDA@@AAI%WŬÿ; %=<8LQMr>I.DaS& "?éx~"A539IG<61445CL?::8!jBHTG=7ljjhqkhhigfimnmkimdjemedkljelgh^balba`ffZccdYb^jba\YK><747843479;;;741028CR\d]OZ]aV]]`QT\a[[]b_X_]^^`Y`^_^_]a`[Y^`YVY`Y\Zc[[[`\U[_ZP\\XSXX[RWX\TYZZVVXZSUVYSRVYVSVTVORUUTUQXWWTONOOOPQPNLORFKPRHFLRSMKORNU[V[\Y]dfhkqjklmoooqqponj}juty}sh' Mo:V5kR1"89Kɾf7."/MQOPQPOPPQPQQQQQMG:?T\\]^_```a`___m)8F'9P6*8@M|jXA*1@@>F^fºfOM_~7j{ZqCPY_^]_`^_]_mN^flopploqrstsssqma)  l` Dr7 +   ()'%mvhe`[YPNQXTMF seDHCBA>=87=>BCC96??>;9425753063.29<98;@CBAAA@=62220.*((&)+*++*(&$˽ƾºsg4$zf 5F0#1*!>ͱm3@h_YRLJLIIGGCD>@DGEA@BBBE"S~?$<<;LO#?63%&!-{B !FԿº%I31=KF:8304:JL=9;9oD?@>=6hzegkqgdhmicinnfhkm]feqcZjlf`effZ_bi]]aibX`adRicg^]UG::855851259:;851/02WƾȾO62,2NPQVSTUSTSSTSUTPNF4@\a`acdeeeeeddhx{C{R/-8EbЙA;>?Gri~ƴ}o,`wvvgc~a@Q[\^_a_][ZcfO_einnomoqqqsttqol[%   tB H2   '1.'nvhda]XYZXXTM?$[@AACDC@9,36:=B7/9>:4-0336430/2-.47775:AA?@?>73340+)'(*++./-*('$!ɿsj6%rj"211-%)) =œѸκn6?h`YVNLJJJHUMV`hHADB@ACCI$Qȴuo¿A$9<4QLcZ&<)7s&&pZ3Xؚ-#"*÷#B328GD;81.27GF?;=;"qE;A;<7kefmpc_fneafikalkg\jdka[fhf^f_gYd_e_a_e`ac\c_la^[RB9:754760/1798850038DR[[`_WU__[[[eWWZaXV[`ZTZ^aV]^ZT[__ZX[_ZV[YXVZZ_W]Z\T]_[X]\ZY\\YXX\WWY[U[Z]WWY\STUYUWVWSRUVZOUXTNTUUPQTVMKLMOOPRMOQIJLNLEIOHCNUTMNUUU^WV`^ahlr|vpnmllmorrrtu}xvrrxuu=!=LXe!jxgI0$ .I^}J=1%!&DB%&?4"BarKn|{z{|{zw}}|}zyxncNElŰŢlA;50)5UWVY[YWWWWYYY[UURK7>^geghghghiihina$p܏ 0>Htĺ~5<@RW^8]emyyzzu}ywdau]VFT[]a`a^][Vovs`Q_ekpmnmqqrqstspro_   Y2      )1*-jofaa___\XVQKA (J07BGIGDE>5.04:9341.(%/554.+)*1112/3505>@@A>91242/($'-//-142-))& ľſsj4$j¿h!('$ 9ᧄʪ̼q8&K02^_$)0;Pӹ́3=AWGþUG|ux}ss|xvuom|sNIU]^`_`_]ZQvtYR_flrpoprsqqstrpnl^ 1  !  +%,#/uleccc`_^WVME<->-38;KT-$7)%J2.z:fE¸!H027IH:84315DH=9;9!d?oF<8okkeghledgieagkoecflbdcid_dgkX^`d\aagW[bf`Lh`]XM;556457630.-2452018?PY\WWZ\YUWWZY`Z`Vd]^W_^\[a`Z[]ZZZZaXY_bYWXZYSWZ[U][\U\X[VXYVWYZUVW\XWUXUWTY[XUWWWQTVXUTSTTVXUUSVTTSTRRRTSMJKMOOQRRVTJLOQLMNLJJOLMTVWQSYVZ]V\giqumofmloquunuvwwwy{Z 0pw/4aV~~|{{uttjg[K8*1<# .?JG@FC@(;Wh^X}{yz{{zhcnid[lzxupeY\Ȱ~PA>874/$'`haadcfca`_aa`__\UI;Zklnlopqpooonp~1"ơj:%*-3?Vt{r5?I\A½Gb~~{mmq}CKX^`b``_\[OzxXT\dmqooppsqqrrrqlj[ + + +  (" +0}lddee\YTPKF@7'oN"-$->1615=>=911(++)3<=7.'*45-.6+%&/65400..19?A@923331/30,/00/5642/*&Ǿïtg6i¾d ";ֽv?7hdZ[ROOMNcOCAAAABEF(JsnK &?>3IT^ʹz>8*(K/Fm8!cկƻ$E239KF767406EH<::7!bAnD>7ooj_ijjdd^fe^hgjddfk]beh\]b]jW`]b^aahXebabPcZVG736633662/-,,.02259@Qh\XX\_ZYZXU\``XZ\_Y[Wa[X]_]W[][TW[^UZ^aUUX[URYYWTZZWN[S[XZXS[\\Z^_^VXXZXVWZUSVXUTRVSRUUSNRXXPPVWOUTSLQSWOIILNNOQRMFRPOIPQKFMNFKOMOXZYVX^Y\]_glrwxjkusrqr~wkvxwxwy{B!.(2Tv~~~{zxtqokljiaQ1G6%#H+7;:.#>]p~Ks}{y{{{wpjgjrgbxwskaUgȻqJD?<:83/!Nncekmiededced_[T?Cduqqtsspsutrru'0CUu1#-8@QנÝ[9?LmN{Krº>l{hoy²}CP\__ab`_^[HST^fmqnpqrrrpqqromnX + ,M!(05J     +!7xjYOGCGGCGG@>8%wF6.,+:+9:345ADFDCA5()++4:>80,0951/5/%(/663..0006BA>73442.196,/1/165531+%ȿ~ug9býf#$9˵wB9haZ\TONNNR\;̩ʴE;?L|bm^;Vü?pwuq@Q\]^bca^[WG~}KV`glnoooqqsppprpniU + + 1!   + +!HI#"3=B}kLKRPTSWUTMD9aNJ<4(/(>66@CIGHFFC>3&*19:<80/8:79842((/340,153/7A@9441.0-396--.-231/-+(%ÿǾ¼{qe<Ze'&8uE7bc^[TONPOd~@GDAA@AEJ&HưJ $?:7K\!"/X >/)H66!#\֗06?N̽F538GH444436CG;:;8b:83+**(0MhimqmhbYOAu|suvyyyyzxuw|\LN/"3:EVg{o~=>ERTUKWyvtlplBx}UgldAR\^_```[XQGMW^glmpoqqpqpqrspniM +  *wkcSD:7L  + '|rphNJo\TX]`^\[XQF6"}WQ7/)."A9EJIJJHGGFB=0.9@8;843:778757/,257425443:@@831,-.-5:4,+))--*('%##~nb<Sҽf+#!$$'+6̲ûzE5da_]UPOOJah=FCBCA@AE(GʷÿM "=;8IW)FU: ++H6]ع[+ǻ"@228FJ662.36GI9:;8dBgS=8raiikbb`id`fff^caf]ddf]YfgaRb`eW`afW^\b^XTE;411631/461.*)*,/38=P^[^YbXX[\\[\\[[a[ZX`Y[Z^\ZY^ZVZ\^RXXYXZ[`[YY_YVYXZTVZ_V[YXW[T\[_WXYYYWTVVXWUYTXVYVWVZSWVVNQUWTNPTUNQTPPSSRPMHGGILQTSPCHOPKGMMHKRLJSNKU[NW^]W[_aeimsuskxs{~opw{yz|I!'0|=MT^Z~}ywsqokjgb]:B:'C˜" '][ ?]wxIz~xyy{zthhpfmy|{xtl`PlʼpMKHHGCB?<97,"!!7MdreZD]~qstwvvy{ywx8'e^.F($08H|˷eJ2>?F\J߬VXZtN9~~lo^dv~pnWHW^_`_^_\YROMW^fjmonppppqprsqplL + +  #G  + +)qbATlea___^^\WRH075''ExAIIEA>ADFFC?>3:B<863354.02770027<>97725?@?820+,2..4/)+'&*++*(&!"Ͼƻž}oe>Sm.0$ "#"#"##&)%#%'+--/ķĻ}H1ab__VPPQJTTAFCCA??@C(DͿsM $:=:J\jξQB++Q5 )4CR3]پ~&@¼%A236CJ772.35BL9:9: b?PhY=6mdifgde^ffefeedg]gfieaZcjd^Yg^a^k`_Xg\YVPG5/.0230/231.,*+,/48@NV`_dT]\`YXZ]^VV[^W[Z\RUZ[WUTYYYXW[SYUYX^X_\^[YXZZX\ZZX[Z\UWX_RVXaWUX^XRUWUSTTUQSUXUSTVRQTRNNPUPOQRVLPQOPSRRNJGIIINSUTTNHDLOEHLLDGMIKOOMTQR[a]Z\cgkox|hyxafhptqyux34V't~q#-Jt~}wponlhe^P1S-(nY3yNT.*Ke~gH}xwwyxmahfft~}zuqh[O|Ȼ`TSONOLFFB?<5)&FVa_Npnlilqqvzwvqsl#rdz-!+4:;p׹O=?GgJON[@6et{o~sq|l~~bhLKZ^aba_`]\MMqoMX_gkmmmoqsrrpqtqlfH + +    + + +JaWqj=V|ifc_][_^^UQI+ S|FJD>>=<=CFCDC8:@<784212-28:72326<>9654>B@>742,-22.,+**((**+++'# |pf>Oûj55,'%"&0'%'$*?PT1%-,5Ź̿û|M0ce_\UQNNM_BGDC@<>AD$?ąM $:<;CY0JT'=!' ,R5)vZ۟oIt)<125;>775/26?F<:;7#fMyL>9ngjbeghbdfhdcefdZdhj^`agg]\adZ`dfW\\g[TL@60-021..032.+*+.269BT`Z_[cU^WaSVWXYQZTZW\RYW^\][`W]\^\X_]_Y[_h\^\a[X[[YSW[ZSY\YPXT[STPXVSTXSPTVQOXQWSRQWVTPRPSOSSSTTORTPUOQRRUVRRSKGHILPTTVUTPFEKRJEKNCJNILNMOYWW`_^]djnqkfkgbbdm}vrzmuuup{g!R+?o;~}ri\7"+AalcmmjhfaB4F%*/;P4UlQ[{xxz||ucgfiu||zupfXZȰ{QJRY\YYQOLGB>4%5^Sl|PNNUUW\cnwwtmI+=Qm>".6;?QbozI:=Hp[һxcX6Neluxwmjmnmidr|kuBNY^ab`^`][HXsLW^hlklmppttrrsrpliC  + &! . %''%  +  -+.fti:\lfc_\]]][VQG) XxGE?@=;95:ECED<=?<677521389845536@?9539@BB<851)+241*)**))'%&&&%$ Ʋ~}{{y}{pkDIºm42.-?mg*)(An*.3Ģɻû|O/ae_]YOMMM]ƽ~7EC@?>@@C">O $9=9A^$("(G7.B:(&F2MYRx,w]ך_Ҳ*=50056563.247689:7`LyD=8u~hj[b`d]bbde[eeb\`gfU]bd_Ya`bX_`dV^ZVUK;3/-/10.-0340/--0259HUZ_`d[aXcW`Za]\\Z[XZYbX[Za[[Q]UV[]ZR][\WY[bR[_\RUXYWSWYXQZXXUXPWTVUVUZTTUTYTTUZOQTWOTW[QPQVPRSSMNNSSLQONMOSRHRUIEGILPTUPPUUTKAIKG>LNINMGQTPU[QYaebgimsigijjijmmhrkpwquzU&Px}~ze^9$$RqmiedZ894$|qZ +aZ @[uzA~}zzyyzxnhhlq{}xrlaSjɶıiC-+0=K[^aXTNJB-! ;ZujJOOSXWWYWV_n|j1F- -.;B]{747?BA@;74/(,11-'())('&))'($"! ʽŰ}|}zwz~~}|rfCHƺr12/-Ul&*.xWCJgP*1ĽļQ.`da_XQMNMO]BBAAACBF#8ȹQ !;>:D^')$!""<]T-*M6 utYغ*;61254575134575774iApE=8selYo]eag`^fdjdb_d`f_gd`^ci[^`c^`Z[VSF7.+,021.,/344521356:J]`\^adX_]cY\``[ZY]ZTYW]SWY`X[TYVWZ[YW[ZVP[\[Tc`\V\]ZWWYWX[`VTY\TWX]VRX[VPTWXPUWSMNPRNSVWNOQOJSPSNPORPLOQMKOPQNUQEDGJNRTULGLRSQLFGNGDKMMMRQSYRZYR^ccgkmsfjikkijklnpmoijx}ukwi7YN'&RrD'h|}z]qNeqjidbV2@+!5FQԴ@(MdlK~yzzyyukiigqzupi_Vx¿ȲL"%2Hgfb]WC % .ixn}ne^NLNSZ]P?+yZ4$4:Hsڷ<:?@VNsCzukkxroES\_cdd_ZYU?iťRY_hjkmopruuqrqppli= + + K_ + " AC;:atib``^b_^]VPE"dpD?B@A;:99;?B@<:<:-'&'.388846328:><69?DBA@:53-,.01,&(++('',//,(#  ÿɿɾ¿|y|{{}~~|zpfCDƸp70-,W\A#)4\#.&:[$,ϻŽR,ce_][RNNPXt?AAA@@@C'9̥T =>mS<;q]idrabdja^ghfYa`dX_\c\Z`hdQ[_bS\XVO@0/)+,.10--0435346436JZ_cZ]`iU[\dZYXYPWZ[WRWX[UXW\X\XZW][Y\]\Z[Z_YX\a[YW`ZSVXVRSWXTRUZNSVXOQYXRJTWUORRNLQPQNSRROOQOOUSRPQPQQOQNQQRMPRXMFEEHPSSSSLCGNPQLCGMKGKPKNSQVUV^ZY`einoxyfjjhihijknqoqpqvseq|{pt$It@ =|~{/un\lnkgc]D4J%Jl + P!3UpWe{yxyyyrmnjgv~}xupfYYǞy3BUX^skE[vA +4ryfcV\T3 .8jn)78O˗pk6>@^8`Gx¾crcEX^^_ab_\[R=uŸq~NX`kllmnonrprtssqmh>  + 9T(# &    +;]2*'\ohc`_`a^_]WND  sg>A@D?<>>=9;AA<<<;4(!"$$+1367448:?=8?BDBA=643.05/-(&)+,++++.0.($  ~||}|~{}}zypgF>ȼs8/1*F`ae,+6bJWa(;wp',ͽǿW(_f_^[QMLNKYpjL?DCB@@AA?(9ͦ¿S"=>8Q](&" = -(G8 Rܰ2:V /=400358635324569:7l?lS=:rZafjVaa^[]ef`SbcaWi[cY\`b`R_``VVSJ;1,,+,-0/+,/24554231=N__^g_[`eU`bbZ][ZV^aYY\_YXY^VXX\WWYZ[X\`ZRW[[UVZ\UVWWQRXVVSTXYUVVXPXTUSUUUORXVRSURPTURTTZRPQTQOQSQMPSPPPPNLMNONRSQEDEELPRSNLPPCFPTRJAHTHFMNMXURYVW_[^hlmqjfiljjjjjkoppsttwwuwsqwu[!$f2M[D-lV )1YnokgcY3=@"q|6LL A[vvD|zywyzuphjgez|zvsncTfĶd%8; ?te#%qcXtCwqg~}ă\Q%:WhG)49Ri0Y]8=@b:c¿TR~\p|~uZIV\_ba`]\YR;yxzRXahkijnnlqqtvstqld7 +    +  +Ge4&#lsgd`^___`[UK@"g=@@>769/3+"%&!'27743689B<=CCAD?966427:2*&&(')++,,),0+#  Ľɽ~{y|z}~~}|yoeB<ƾv;23,Jsqr*,3&J)(|{X+`da\ZSOMJKJ`i8;?BAA@AAA(2ɸS #?>:MY'%" !<@XP- (==HIOݓ"W_-;31113673-20156885i?mP?8sO^bmWfN>>DQac]i^c\oXe`ie^\_h_ZTQF8.*()+..0,).230232/1FqˏH>[Ŀ9U}O|tofLMU\_bb`^ZXO;zxPVeiilnmpqtqsuqrnle3 + )&.C +  "" +Wo`=nogfe`]^_^YSK@$~TAB=BD>:<8?DA9214/.(&,+,36931598835753:<8.((+..*)+-*(+.% ¿~~{{{|z~}~~mdF:ǿt:.2+N~++*7=DG>H))//r] %ahb\WTPMMHCGhU~AD@@??C>+2ѸZ +=;8E[*&"  M}t]aHYk{/Szh~arwe~EOWZ]`a^\ZWI=qvRUdfjkjnorrpsrpspm_- + + s"A(  +#" Fykc8lpjhd_]^]`YUM>&C>BCBFC:;?B?:1)-365+).46610168756;A@=A?8037757=>:0**-362*),)(+.& ˰~}yy||{~~}qfE :ǵt:+3)?RWn,*3ʜ),/,yĽ^#&]e`]YTPKIOˡ0?BA?>BC*/ʻrzW ;<8CY(&# ;! )';:zV۰)&51>:1126764(33467667 jHY|^=;r{G.Kd_i106665365;\`aSZ_b]TZ\ZRK=/,'%()*,-)''),-***/7JW^`^`behbcae_``d]ZZ`[[T_\UW[`UWW[YYT\VVXa]Q[\VNUWXSZVVUQUXNOWQRQTUUPSPUSVQRPTRSUTUPOQRPORVOLPSNNNQPMMOMLKOMKLMLFOMNLFACJOQSSSROSJEHKB;GNPRSUOKRVRX\W[^agjmqwveiiihjiiilmpqststutuvvz|e%,#jtRL,N~U"5H|YbyuqonkidX78;tÇn& >[xyG|zyy{|xqqqgkw}yvql`Uhɹ^$,NJ:K:St54itd|nQjl\U31r}tTAH*"576=οŎ2ABBA@@:0*'(/;<83/.-/05898417;963773.0455;?><5,+,4890*+)(**( Ǯ|xx{|~|}~|pbG!6±s?+4,Os/,*@kn#R++/*uſc#$\f_]ZTNLMQ8AAA>?BH-)ǩ{] "A=;I\#&$!AR_-(>lE,4Xcf8/26433434Y_aZab`^[^TPD7/*'%&*+,*(&&(++)((+;VXX^ea_`eaZafcZ]`aVWZ]WTTYVQUVWTVR\WWUVOUTZWPZXSQVW\VYRXTVVWSWXRSVWSUUVTUWYRRTUQPSURNMPQNMOOIMQPIMOPIMNNJLLQMLKMLLNLOJCBEINRTTSRLLOHBHLIFHORSTSTNYXVZYU`cgjnrjghijjiijknppqtutvuuvvwy~H.2^%jtta>*3owwrnlke`Q3@-^_C-(JfgV|yzy{{vhbceq}{yuqi[JtP<*tz8?%CR AorcqX4_P,1WbX*446:GƬX4kms|{siceajggep|pwq`iq5:1I__WVZ[W=NvmT]ehmmmqoprprqqrnj^&   + + + $- :Z1  0 @NhrlV=wqhebaa`__[SI6 9<<=BB<=BB=52-*'-7?@=<8658:9764579:867350,/3:>?>=>7.,-3885**)'(&& ſæ~}xtz{z}~~}~|pbJ$4ɾq?)7-Crpk-).m},+-(sf!#\f`^ZTMJKMj|@EBA>?AF/(éZ A=9E`$'$%8@31*+ )=A2:Lɺ¾1@90343432,31456984nDiF>8lB.,;[d='02233321LYcVMVZ^[YOE2,)%$'**)*%#%'((&(*/AVfZZYdaZ_d`[_``WZW^QXZZVVUVVSXSWVUS[TYTUS_VXZ\]TTW[UYW^X]WXXRTWTRTTUQTSSNNTTLORTMNQVNLOOMKNOPKPOOKOONJONNKOPQKKOOLNPNND@AEKOQRSRRMGJMJDGQL?IPRSVYQQZTTYV\dhjnueiijjjjjilooprtsssuvuuvx~}/$&F % 3v~}wvwusqnkkb]F,=:zS|>1RnSo~zxzyz{rcr}q^etxtmdXYǚ½vB? uE0+!,S"KpneiKk{nVD)>01Z7-;8/9830@J@/:;C`>ɸJELiNL~~ȿwb322!AcCGZWR7UzhU]diklmqnqtqsrrrml^"  + + \T-vZ9 +  ) +"o{[44xohgdb`__]YTI4 <4>A?BA>?<6-+,+,4<>=:><;;:885458:9:864667214<@@?<61/.06974.+*)&$% Ŀ~{{}}{yx|{|~~~|odL"1ǼsC+9+FqkW/,0fZ-X2$-,+vſl'"Ze`^ZTOMMP~DEBBBABE2%_<<;B_);/8! (!(<@"Oܢ8*)*ļ298-264453+20256998iGfH@9q=++-A^C%,3-+-/1.BRZS@DQWTQB1*$$%%(*)&&%%%&'&&*2I_]_U_`aa``\bdd]^\aZaWgW[YZWTUXXSVZZVWW[UTX[QSWYVRVWXVVW^UWXWUPTTRQRSRTVSTQRPROUPRNSUSQQQOOORQOPSNNNQNLNOMLNMLKNQMMJLNKK@?BHPPRSSSPNIGINMFGLJCLSUWXXOU[V[_\ehjnz~iklkjjjkklopqstsuwwuvwxy~f,T}I~G*0HkcyzusqokhcZ845jΓ$H8 >]vwI}yxyzz{xmedo{{{wpj`ObbC@$/ry`9,1&aaO. *WlolwK>ExoQ;WI2P6289=8311015885.,+)$$# +ǿ˾}}wrqz|vxz{}}~|rbL&1ȹoD(:*')&***.MY`L43',,,l¼vn% Ye_]VPOPOG@BFDBB@BE6#ǔqx~~b @@;BaV³,7N_K*!'9@@EGݧ]kd(3273254652)/1225995aJtD@>o}? &''R[(+2,'(*/.9DOJ=/)%!%(('&$$#&$$%'*4O_jZY[ja]afbY`gj[\bcX`\fRXX\ROSVTMSTVPUSZRSWZQQVXXTXVUTWUYTXTTTSTQPOSPTOVSWQWQPQYSOPSROQSQMMPPJKMNLLNMILLLKMKKIKMOJNKLLKJ?AEIORRRSROJOIGHMNGGMFFRUUY[ZOY]Z^bfhlqkevwhiiijkmqpstutsvusvvxzP**P-&EG [~^wtU3!Yxsqnmie`O/A*$mq+y8*"Hc|cT{yyzz{wv|nhj~}yuof\Po͸ȵGD5 cQJ+1N.&*JkwH 4^nm|}A-8CkaU8$M" /8?,48<9:;;=I|x]¾Al}}~R,(Z\+B:SZWL4fWP]cjmmmmmpqpqrpomiZ  OSzV.B + +  * +!cr' 5CFxohkgdda^[WOH6 MEAAGA;6;>;5019>>>8.49::876775788898<@@<96449>>=810--37871--,&#$# ɾ|qefjt{vxz|~~~seQ%-ǾrG&:$&$$"#)%!W9*-*gPXRSPTOJKkn' Ue_[TOMMJJ8DGEB?@BD6$a 9<oB#$"4X,#//'# %/7KD=944:4)&# "'))&$##"$#$%,:FY`hXW]g\\bf_XbdhY^abZaV_OTYYPNXUUMYWXVWU[JZWVT]YUW[\UTW\SWUYTTTWSQUXSOVSWORRTNPNRMOPOLNRSPPPONMQOKKNMMMMKLMQMLLSNMKONNOOF@CHKOQQRRQNDBJKIIMMDJLJLSUY\^WK`[Ubeknv`}|hiikjkposutsuuttuvx{1'HA-o~kEhrsqnlicbE,E +ґ5-/RhRr~zyz{yzsgeihv|xtneVWƽ}dphmxZX5"%9S<%50DÏD+65;889QSstF}pkxo}~jND55///6>=>;5/,.4896.-,+'$%# ̲cWWYaq{wz{|~~~teN$*ƺuG)?CJPW`pZRv9&,-)eP]Z\W^cdbus)Rg][UNNLLO@CFCA>?AC4#Ȣ_ 9;;G_"'!3.6& (9MBsP* IڽΙĿ35=4554666&22455797#lGoH?8q?  !:;..'%.DF<4/-*(&" $'&&$" !! $(/CXW__g^^[fafda`ciae`c^_^cY`Y]]XU[\SVYZTQUWVVQ[UQT\RRXYQQTWXMUVXSUUTQOSSRPURTMRQQLSQUNSSOPTRONSQMLNRNLNOKKMLJLMNIJLMKILNIKMLE@DHMNORRRPLLGDJIDHPQKJMKNVZ\_^SS[YafjnvMjhhijlmpssrsuuutvwy|r$'.?FeD{}h[phewproklfc[7/?9x{Ϗ;ZnxD~{zz{ytkffijrzzwpiaVaʿŦ^3H 3?,!JhqhVN,<_IE]e+4583\Кl*7667Z>ĿYA}hjsrx³ng?DD8DT^VZXVG:rukuMV`dhkkknpsnqrrrqnmQ   + + +   $#  + +0~qiMW|qnkkfeb][VQG-/mwW8)$##$atHKC;:DBBBCBABA@?<5)/366368/034954<858951.-25;;9640-./.03*),+('%! ϯmYZZT[r{z|{}ueP&+˼xJ#EeH?10--+e¦s-Tfa\UONMKLEDEDBAAAF5#^¿^ >==B`", ":GZI+!&1MwiSG1J޻`S58>3456554)30156799'nEmI>:nD% @#(-&$:EA8/*'&##&&#"!!!""#(4HRb\e[_ah[Zcha[ada]^bg[]`bTYY_WRU[VRQRTMRUXRRUWMSWWRQUUOQTVXTXWWSXVTRUSQSQVPQS\STSUQQOQOOQTNKNOLJKMNJLMMKLKKJOOJGKLMJHMLLNPL@CFHKMOPQROFGKGFKKFKPMGLMLSZ[^^\UT_egkn{qGmkihijlnqtsrsrrtuxxz~a'(yNJ"b}~8@.1HBuvsrnnmhaO,=0"}.@#&FaudT~}zzz{{tckh`mzzytnf]RqJ25 + + + 22)Pjof|\H!9zjH&25CyyFT^djnkknoqnqsqrromK +   + + + +  "" +"gsox]BCX|rnljfec`YTQF&5[B3+(&&vuCHC9568ADDECBA@?<2,/267111-26871465696351,03331220/..+*)%'-,)&# ƿ­wYV[_Z_ux{|{|vgR%+ʿtI#GkS;50"=KMC)-+cſv0Qgb]VQMMKK:FCCBAA@D8"νc ?=;Ab/SJFDAQU@) (7L*8_}#Jݟ(#U59:1124663+5235679: hFn|F?=rO3!'%#.( 1@DE4'$#" "$$$!  "&-8W\S[ShWY_f\[ae]Zbca\^[g]]^bP[W]UTUUSRUTTPUT\RXUUQZYUWXXRQWXQVX[UUVWQOQTRLQRSLNPRMRQQKOOQOOQPLMMJKLMLKJNLLKLMLLOMIIQOKLKNKJNQGADJJMNNNORQJCGMGEKLGIOKCIMKY]]_`\P]gijrYM†jfihhijmortpqssstvwwzA;Jt0.p|hmD+asrqrmlf`F-E"'ҫc,2Wjz|Qo}{zz{{|ynnphozywskcWV{Ȼs78( + ' 2Xlnt_S= K= 438mC85X<8EoH9Obiovz}~uhl|~ekd/0^``bA<\WP5GFU]fhjjlnnoosspsqooF  +    + ##]\HC[yqojfcea_YSMD#,<4/$xk8HE::8//8?BCA??>=5-/.31++-48994.3668:8672.../1/00/-..,+*(*-+)&  ̿ĿeUX]bcixz{~}~vgW%*ȼtJ @_K /SJowJa;,.(^xowy4Lid]XQMMOPvFCFCBAA?B:#Øc <<:nW?+*+ +@CIG("""#!!$"!!&,>ScdX[[e_^ac\dcdcdaabcd]hbb]a^d^YW^XUWWXTTTZQXTWRTRXTSSWUOOTTOUXWSRSSOOQQPORQRMQNPLQOQNQPQPRONNPQLMQPKLLOLLKMKKLNJIJKHJLKJHILND@DHLNNPNIPQQKFIJHEJKIMRIHTRS]a`_`^cijluP;rcihhhhimqrqprttuvuwy|u&2'2Ymd0Mx}zcRsvurokjdY80A :4aտ=bwpO||{z{{zz|yytxzxwqhaTdɴêa$/ "NJ*=^nXb\Y3DR#49=><;93,.4872/+08:71,29;::87862-,00/---,++.110/-)'$  +ɽý\UX\`cjwz{}~vgW')ʿtsJ#>Y.&(%iy:+.*YYKLNKJKs}:Ij`]XNLLLNDzO@DBBA@@B;"ʭj½i ;>:C^*n+8:E:+ '4WymQD2Eݿӽ45<5244566)43446777cDTmI=:l\G8# %)$  $?FHTB'!""#""  "&.?]cY^bcX]^a]_ac]]dc^Zbef\ccaWZ]^WTW[UQRVMLTOTHVVTNTSSKPRRPNNQUSYXTSWRSOPRPQSSOQRROQOSPQPTONORMLMNKJLLKHJLMKKMKIJKLKLKLJJKKMJKMKACFJLLMMNBFJQPMFAJJIMNCKNLQWTZ_`acdeijnyY$egejhhjlppprrsttvtxz}dC RAzl_|S51=quoxssnmlhbQ+71,O%EavhZ|{{{{{wqsy}}{zxtng_SmʵG)B 73!]ȯ`=$GgjWF^esUR'9Z(5:PuB3~p0PZq¿vJ~ll}RC>$.39OXRK4OOW_fjmkmnoppttrqponD 9uejpu$  ; DbXO0bzqmjgfec`ZRK@ 'cE>>>611,4;?===:;:415:=:8436994/,2;>==<;=;3--0210.-+)*//..1,)'"  +¿ȼƼh[XVXZ`r|~uh['#̿sK">$Rt0 +*"`-,0-RQcipTh;Lgc]VNLNLPvECDCCAA@A<!h =>8DgU$?T^C*"'9X(>Nk#Cޮfzb4193475554'4136889: kAeN?=qWK?. +'$ :HHOR9" !##!!$)1A[f`U]d_L]\bZ`cd^\`c]^b_c^dbaY]]^VXVXVSSUSSWSTQ[UTRYTTRVTQSSURWYYSSUZSQPQOKNQPJQPOMOORMMPTKMMOMKLMKMMJJIKLMKMLJJNKLLNKKLNKIJJKKGCGFHKMOONKFALPPKFJNIFNJIMPPVXQ[aabdejmqs]'[nkjggjnqspqsuutuvwz~J-'t"-vnDjvI9myvsspmke]A*<#2Sl}`s~|zzz{ys]]birzwuqleZZʶu7IO + %P2(,mվ"+OriTPUUWPLQ`tm[H*57FO;8B=3\PþhQxlzĸE@X[C:L`\XSF3WrJV^fkmjmoprpstrrppn? -nc[UF9*#   ;! Bg564jwojhfffb^ZSJ? *]HGA=642247?@A?==825:<<:8799652-,6CBBAABA;33310///0-(.2-+),+((#" ̿ɖja\WZ_nsvi[%sL#ARsw{*+\M-.-Q×>Ihd]SLLLJJNWMS@CDFDBBB@B; }ajk <>9?c0\)80<#&"%3W ,7E6+9^54>3565656$5236778;k@bHA>kYNC7! + !# +2FEJRP0""#"! $&+3EY_c\X`][O`Wg^cc`^_ba_ab\dfk\][eY^V_UXWXXTTXWRUVYRRUXRPRTQOSVTMQTSNRTUNPOPMLONPNRLNNPPPJOLOLQPOONLLJPNIKMMJLLNIHJLLKLMJIKMIIJHHIGDEFFJMNPPMJDBHOOMHFLHGJHKTRR[TQ]`affilsbdS"{{nmqrsssuvsstux{}10/^2KhYehgvtspllgcU7.<B8 ?[t~L|}yzzzvpnyui_hstpkaRb¹d-Z6 + + +V%,1> 4s2bvhOPVZeXXWTWQZY>1345PQ3MH7;94;gDVY}dprzödqA/MQ3-K]YRA4`~LW`hkmklopporrsrpnm: + !)#,  +  ( MhDIQZ:dunieffda]XPJ? 1TIHGD@:247:?><<84.588997125512520;>ADBCA?611311.,,//*+-,**.+'$!! ̘jeWX\ktxwi]'"˽sM!@g~}r-(@hXeCM5,.,Jâ~ADib]UNLJKLIB<>;GDDCBAB@A;!x˹o B<=Cc&:!8565+%%3\GޝZAPſ5/94345554&3127689?#bChB>>pXNF9+  +  + *FFGNVG*!!"#"#',5LWd_[_chZ^]f\dbf]Zbj`\_a`[dglQ\_fT\Z^SS[VUPRRQPUTVMRUVPOSSPSSSTSQSRSWTSNWONMOPMQRPMOQRMMLRLNPPLMMLIJKLMKKMJIMNMHIJLJHILIKJJHJKKJIDBCFGKNNORLJNIBGNLKEGLIGKKSWSUYQX`dffjnycdeN&j{uquvtttvy~h5#\xiuSkkvtrommgbS,65~e5 Hdzoa{{zy{yteg[fw}vurnh_VuĿH/{# + + + 9P1%Dkee\O=jocWWUYcYVZZZ\^V:#)% 133:>>8Jk6:88FuǒM¿HiWks{óz7)RCDbL6UVQA>fmLWaijklmnooqsrrtqlh6  + "TP + +  * [sf:krmlghfe`[UOJ< 7SJJJHF=46530577/(/:837:5/14/05876@<:CCB=91,2441/+.67/*+./00+'&! ƾʾŕiXX]aqx}vg](ǻîwM =Xvq}2*,HPrgoD'-+L¨xr~F@fb]UNLJKNRkpwz}AABCAA@@= sʼ½p==:AeNN,=KD=,!&3]&*D޴M76.63333454%402679:?"dDwGA:nYKC90   "AGEJQPB(!!"#&+!q;p<=:?`$(8'*!'1cQ!>ȿ56>3343454"(703789& !$*;YTZV_[]d`_Zf^e]abe^dcdbei_aim`aai[a]iZVZ_XVUWVNQUUPRQRNSSUPOQRONUPQNOQQPQROLMLONLOLMLPNNMQMNKMNNMOKLJPNLLLMIINOIHJKIIJLHHIJJJJJFJIFABDFHLMOPMKFCHLC@CLNLJLKESVSZYSY[XehjmtjfhffjbH.6e|xx~9<|dLww}ytsslmldY7-9;G 3oJ9ZoR~{yzyzymjrmkp{wslgbZ]iVtTX#V+.D$q<+OnrOQacngPO\]`[XM'/Q)$,i;&549=BZ7;6567QKKGB:7;C=3011126:62696,+.059878;>;655663212.&'-/7:82.0465/'$&# ¿Žvigs||zwm`+zÿyO>7du1(8bY]hzL"+*E}I>gb]XQMJJMfHACBB@?@@iˎrfq9=:Aa+.3)%--!'1gx}[6|vdVI71d4/?4444545")7336798:!aR}D@=lvJK>3-' 1GHIINPO7#"'.[AžaHomq{xslstqrckû|T=NWRFQRZWUL/@sn~hMTegjikmnnnprrsqppa) + + + ON&[ + +  % +qtA8|vnljhda^ZTPH7 EPHHA<65:@=62013303/28:4*)+277657:<>989;:6001.&'/06:6103664-'&#! þǿƷ´zy|xk^-!vȶ{TB_6Aq+,IM%+(BƿwlK=ca\XPLKKMh:BfSßb+9?JJ-!)2hvnn ;#??rt4+>2479744!&:545768;!gH\vF@;s}@GC2+'  +JJIHJLQF3'*07BUeWV^eZTcafQ]_dWc_a`_a_\Z\baY_]_Te`\YXTVSYSPQPTRQPWRQQVUSSTRPMQTKPRTPRSTOONQNLMOMLMNMJMNOJLMOMNLNLKJKIKJKJHJJIIJIJJKIHGJJGHKIIIKH@?CDHKLMOPLDDHEFFLFAIPPGHOQSTR[\_`beginwffhhfefileje^L=ETwȔV-/&2>K^h&r{n<'xѢz!6-SpPx~|yyyzxssqhepxxtmic^Z»{Kt`kT&E)El\8Tƾ;[knYhjfj\HRWZXbX<Bv0?> 47NeUa+97747Ak@¼Sb~¸wsH84CA+":ZWTN,@xiMYefjjkkmnnrqqrqqra'   + + =A: + +    +#ybUr}A',GvS()*+CM=da[VNLLMIXU14:EH61 "75;D?88<#kBY{RA>o59J7,%  + $FLPOKLPNK6-/28G[X[a_Zag_eY_Y`^g]`ed`[^cacd`bX^_eZ[\[WUTZTRUVUPSQTMNQVONSRPNPROFPQQMRORKNMNMMMMNOPLPKPLNLQKIMRLLMMKIJMLIIKIGHIHGHHIGGIIDEHHEFGH>>ACEHKMKNNLJABKJDGKEBNOOJNTORVU[`acehlrmdgeeefffeUcwswrl\K>DZϻ8-$;|S_ztqqnjgd\6&A44 5y+7ZqX}zrwvyxe]^ehtwvpkf`Y]ÿjLƬDMUͷy^!G^jcQtsjeSHTX\VXT0ij)$(37CN27748ExǎMLq~~thv÷r?.LP?aY5VWSF(G}z`PZegjjmkmmmrsqqoroc% +  :Y' +  +  "! Bb[=Cyplihea^ZWUND1 \yA9548<9?C>75745;5)+19854,,2.,*15:DE>;9841/.-,/112.-,*(,11..-)! ɾǿzl`1 sȼñyV<]wD&1|B**->Q8db\VNJLMPtķk>FDCB??@@a˝īv9=8@g/HE!6--+(!'4m#+8"WT8+>34J?878 l>fRB;m9.H=.%  +  CSTUSPOPQI60/5;IVee\`haXb^g[_ag\\df^[agd_addW_c_RX^^URTVPKSSTOQRSMRRSNPQQOPQRONRQSNSPSNPMNMOPKMNOHPMOLMLPJJNOJIKLIIJLIGHJIGJJGFGGKGHJJGHGHIKGF>?@CDGJLLNLIIJEFJIFJLHIJPOOURQWTV_bdfhmueffgfeeeecVoprssutmYKIMիx&N]% +O{ev~K\xurqnjeaS)/5j,FnIbunf}vkjrywlaa`buwrnid^XlQ[RlD`ҽ:,Ofi^TfohdTPZ[\\[P+&#AH'D#-6<:SҐp37678Ooi˿M~}gtĵo[PC*6J7`W8UWWA*Q}[O[fhkkmmoqqqsppoqj]!  + + #>BCA=6::<@?62481060/,*(*259DGD?<:51/-.23233/-,(&).,-/*&! ɿȼȿŹ~ykb2 o°z\!7B:DxYSC&6Q$@N()+;P5cd]UNJLLN`S=GDBA?@@@ `x9>:Ac0ºy;27,(!&7l"3)->+¿<)<35>B=55&&33@@CEJMSMMH@GIFFJHFLNIKQRQPWVQWU[`dhjpwdheedddfeaIe1auprrrtvwz`9[3#Ya$h{D/;Zglyvtrqmjd\G&8&֭ss+Ok~Uw~{sjkkrrebdeiyvqohaZW|tKq>,tWw[[ 8RiiTUmqnfNR\YWZVE$7qdU?,,7@RSWi*83>WS|ǽpKhz~t}òpBK-#-6?UTN:*RwwXR[eglknorqppsqopqjW  + + Fq|U&   +  %+ + 1|-?IVvmjifd_Z[WVOB, + ce2879EECEC@>;@?ACA:2331-+.-)*,1313BGEFD?60,,273233/-+'%*-,/0)%! +ÿŽyjd3mży^:ZV+T|?&=|b-Be&)+;¹|R4jk\TMJKMP[]>EGDB??@>[}7=;@`RQU.2  +$(:o "+2=;3⾙[V:'?44=F=65"$53;I:58:"fGnL@;n9%AF9-"   0Z\XXWVUSSTE4/16?>?BDGMbZNKGBGHCELFEMKJRTSOPZRWZX_finr{{dhfcdefffbOkW0=qtruuw}D>)Q'I;eL4UlrO~yqgcvy{ti^`cpvqleaVbĿcK&BnVXu#EXikRPW^dZGSZXXbY=!8r@/5BpML?J5/CaBļYQ]z^GW[TQLRVVTM61XwVT\dhmlmmqqqsstqpnoY  + + OuI +  + /- + 4@=[JTvkigda]\ZVVNB( fc97>;DFDED@;?AABB?7+,144672-+,0.,0>EAD@:21.-4;62331*'+-+)-0)%$  þǿxkd3fyW%77YS(+/q4e}>'*,7Y1hg[TNJKNO_r>EGDCA@?<X~8<;?dU 461I^L/")9qf!3Ҿÿ=+B23:B;59#"54:F:78:!fAhR@8k:!4D;1$    +)W\[XXVUUPVQD3.15=MdbVccbY\_d[]`f\\ce`Z\cfT^b`V[Z^SWVTNNTSQHRRQKOOPNQMRKRQKNNPQOMPLQPQNNMRMLKMLJNMPIKKOJJMNJKLLHIKLIIJKHHGIGHIIIHGHIFHGHHIJIE>>;S8=8@c?5:x'3CI8,$'7tGmu8l 5˜3Bþ@(>0379977' 437:<979#jAo}F?8l|8'8;5) +  "N^][YYVUTWTOE1/26=W_Pba^U^`dZ`acU[bc^X\af[`_[Y^VZUZXROSTRSPUQQNQPPNSNUMWTNPTRMORRORPSMMMPLJLLLILKKJLKLIJKKIMMKHMLKIKKJJIIGHJJHHIJIIHJIHIIHJD>@=@CEJRiKNHDGHBEHGIMRUYYTOXWU`dflpvcefffcbccdemkmk_B@{tvz~|_$]JCL&NF&f@G_wzUvytpnlifeG#8*⻀>mm&Lct{I{|{xtqxtiptxyyrolhc[Y~{AmyjL^isTCYQdH*LckaOsoijMMg^XTYP%"_7"n(56;.P|FGEB<<@BCB?>7+(+159::9521-+/31-15532116874/.+'%&,1-(&#"""%! ÿƿļƿâ|ib6[ϵ}_1@NFG)9R52R,+-3¹zZ.cg[TOKKMMX]8EDEA@>=<A:>9Ae%3!'#&0y'kx0~ 68LIYA%=23;AA:;( 126>>99;#jClHC8i|= #*56/  +   +Ha\\[XWVUSSQP<./38?Q\d_^Yd]_]d`b]fe`^`]\dab\\]`RWU[TPPUUORUSPPPRPQNQLPPTQNOQNLOPNLONOJLLLKKMMLKLKLKNLLKLJJKQLHLOKIJMLIIKKFGHFFFKJFHIIGGHGFHB<>=BCFL\iMJGCGHEJMFMTVZ[YRW^_cfhmnxafgeddbcc_\}iofduXl{u{~M2kS5xgZ7.*Aswromkge`2&<%ad^2Vl}iL}xqvwywm`gjdbuqolg`V^ÿdAdX~+'7tȹv'2SfhXSbkbVPT]a[ZWJ b6/:gh-9:9Vm758RomȼyH{y~xuqlvzivĵBKWYZ]\YYVRF+8f¦lJT^diinnnportsononlL + ' +   ?"!# + )Nryom=]tliida_\XSOKA! qG2?=2:DD><:>AABCA>8,)*48:;:9863-+.46/27754303896.+''&%(.,)$"!$%' ˿ǣ{la:Xξc"1Ne2OQ'.hOtm.,-1_-ed\VOKJLLaHEFDA@>>;?ʴ\ ;;:>].fdga$4#>3597&75:A@9:;$hDjI@:j8""$*31$   =_\[]YZYSTTTSP<--35>_i]\ahW[_h\Zce][^a^Y]`_Y\\\SWX]RQSWSMSRPOQLPNRNPKRQSOQTQOMQPPNQMPNRNLMNNNLNMNNLLIJKMJJIKHHILHGJJIHIJJHHHHGHIJHJIIHIIIIIA>>>@DFJnzvHIGCGIFPIKTVZ[ZXUZ`dginr{qddedcdddcWe¹y|}?2,YYPBSvhsywsonkhaR*1?V&7= ?`wXc|vi^qxwn[def^Zqnkg^Wm½RUkjXb^Ok Aĸ|=VgjPenmi^S\`XPVVB)15:ASczO-5;]SĻmR}mwdzvde?OWWX]ZXWWQB*?o¹JU`ejimnopoqrrqppnmL + +  -I   ? +D~I3ZAbwlljec^ZXUQMF + + $>AA528A9-.;AA?CCB>9268:9::98751,+/650588767457670)''('(*-+%#"%'# ¿ǿǼƿƻǽ{kb6S` -<7AмqЉ#7>9;eK44LeL,$&3;>:C37!wCe="?33=F<75'!74869%gDiI>=i:#"$#&-( +  0YZX]ZYXVTSSSQJ6,047E]Z\dbT[`bZaeaZ]_b]X][^U^\]U[VXQXVVTTUPQPQMSPTNQRTQPPSPOQOOPQOOIOONIKMLIKLMMJNKLKKILHKKLJJHJHJLKIJJJJKKIIJJIJKKGIIJIIHJ<<>>@BFQ{JMG@IDGLLPZ\[[\XV_cgjpvfcdddccddcQ|th_`qux|j* ^K'0"e~~}zwvsolibZD&>4~.G  &Jfz~Eyyuloklqghejp\colgb[R{ÿ|:ldp Qɼ}V"HZfiHYhjf]MZXQNUS5IdP#47>jz&9 8=hAú`W{rp|IJw{u=PR0!#$1RVQ=,Cr~LV_ejilllnprqqqqpngF + `L@?H( +   ;  +Gq156j|nlhb`][YWRJB '=8;6.08404?A=>A@?<79::;9983.-/.*)38415755678:8871)&&(''',/*#$('  ľÿÿ¼»{mh;P¹h#-?hz\ (*()*,+*-,2h,_aZTMJJLNQLAFDA@>=9Dɬ#6=:7h5&"2-! '%%. 6[D=!A25=J;76%83;I?76;&dB_nI?;h<"$!"%(! + + "L]QZZWWXWTUUTOF4/.37G]ca_Zc`b`ef`_fa^^`aZ]]bX[Z^UVT]SSOUUORRQMPSUOPRTOMNOMOOQMNPNNKOMKJLMJILNKIPQKLMLIMKMKLLLIKLNKJIKKHIIJIHIIHJIIGHIIHGGC:;<=>DH\`wlIMBDLGLQJWZ[]]]\`eimrwaeeccdeeeaHscQTVX\ekmqtwz|W2Nkh +6s|zwvrmjgdW2&:-U''41UnmL~zsohrrn]`T_vqnlje`Y`ýh@jE:6::;<:50(##"%&+34112411689:::741,('%&(.1/'&)'" ɿǿɿ|oe<O̿i$,QZ$*' *8-/|h*_bXRNJIKNQ;EFCB@>>:F˲͟n~#5<7=c 3/136&&#&4 1>$C34=E956'639J>77:&cD`VA:h< !!!"#"  @]OS\[WWUOMVUSPE2-146MbW]]d[]^d_[_d[Y]`aUX]^SYXZNUTTPQQTQLPRSOOPQMRQROOPNNQPONQRLMQULLLOJJMPOKLOMILMLILLMJLKLIJJKIHGHHIJHIKJIIIKHHGJGIIIGA;:27DC?=904734897525:;;97-(#! #*3413464479:9876942*$##&+00+'('! ÿĿǽʾɿȿƿпþĽ|mf>Lż{}m$+4!!2kZ..al#1w²j'_fYSOKKMPZË=DDDCA?=>"F$8>8=bG9/&'$'31Zaep(1迲lcz>!@459?<74'638@<687%hFjP@;d;"! !!  +  6]RO\\XVULBT[WSP>,,04;MU\X`Y^_a^]]`Z[\_aVZ\\U\UYRXUQQUTSRPSRQRROQOVQOPTPMOSPMPSRMMPSHKKOJJMMIJMNLJMMLLMKMIKMKJKJKJJIJJJKIJLJHKLLIIJJFGJJF@;:46JJF=2-7:26:65406989:2#!  %/54034534979740.1,-*$$'''-2.((' +  ƿ˿ǿÿƹ˩{nfBMg%)Cpa'GN´h"0xm%YbZSNJIKNKl7DDCCA>=?"Eɷ(3>:;f'V 3'&"%3Wj[22%/5Dq="A44:BC66';56==88;%gIeIA<c<#! !  + + ,WWHV[ZWTL==\ZWQJ8+,/4;ntlhda`_ZXUPF5 + K,'*>GD6*+57/:=71..50,/4,$%'/44.////-1//.+&&'&)+&$&'&),)(+$ + ʪzof@Fk&'NxR*83XKC@'FL6_&*rǷsu$#^dZUPKKKNVǿȌ=HECA>>=B"?̩aj}¿(3=;;`&t56%%!&1 +cY',縙m(zƾA!B46:D@65&938FB97;'dDRgLD>l< $"!  + #PdMOW[XWO?8X[ZTNF3,./6?XU]^i][_cZX^`]XY\ZTZZYTWWWQSTVMOPSPOQQRLQMOLOPQPQRQPSRPQPPOMLOLLLNKLLOMKKNMLKJKIKKLJMMLJKKLIJLJIJIIKIKILGJIKKJHH@:9;>>=$:Ϋєԑ(799@#C35@Fu_kKLGRQNTTV^`cgjnt{xhfcddcfdbV&rnKTV^ei>t}vz{~q-=F]b  "c[r|@Hvqljb]C$.05l~uj\ 'Li~wArysi[nxumuxwwwrolid][~ÿzCktc{muz[Hv ++p*6Pcm_CT[aS6Ca^WJVM#!OE$=Zv0;7:t?oP@.2EwğDýFkmtmkyĮ~|r@R[6%(&4STOB&:`yYeHVZdhmjkjklonnnmlib/ + + IeLKD+  + # +"s{o^>tqlhda_\XXTND1 + Uz==<761,*/1-04/*233/.,*("$,,.18BE<5662-*''&)*+.)*0*$"#(*$"&'# + ĿȿÿĿľοŽǾ¿ŽqgE@ͽf(&Hũʤ2$%#ˮE%,jýt+$Zj]YSJKKNNzCDCB@>?B'9ýϨƈˍ+4;78g*[YLC#0%&%%2UD5\**=>aLC%E48zHQKJTmY{} 3lD<ATky[HPUUB &IRRR[C&b"$585Gk'4r0,37MyY½IxwîuglaCQXOJNUYSRO>&Ag}h|^LX^egllmljmomnnnrm^) + + :RH2( + + +  +#}r_L>vqkgdc`\WWTOI0 jtEC7530(189233.+1/++13/+%#"))#$2=HI=8775/+,,-4224-+/*"!#)-)$()% + Ŀȿ|oeB;Ͻi.!4-\Y5:1Csrs6!*'bt)"Xi]UOIJLNRWBFDCA?>?$8ȼͯȦ̯¾*3?;9h%2+))$%/ %(+JջB$B26:G@98' 639FE;99'dLoMEBf9#%%#$'% #Qc^OJOTUJ=2E_[XTOH<,-.27GW^\bXV\aXWZ[YQ[\]QYWTOTVURPQOPPQQMORNNLPNPORQOOOOQQOSNRPPNMMPMMMOMMKMMKLOQKLLOILNLILMNLIHJKJLLKJKLJILKJIIIJIC988:=?CZ~Tw{LSROSSXUZbdgknubccabd`adeij]10knorqsvz}wF& +8e Lts|wqQrnhe_S*%7+}~;ش5 Hd|NY|rbfkyyoW_]O@Wpjhd^YlĿOOofF9^U (` #H[lrPJROR6#/KSRTU:aa(+58A/M86u+59R]swPrɾqUBQP6*73K\RP9,Bj]OT^egkklllmnponnkjZ" + + Fj! + + + # +hxF0!!H{smgdb][XXUPE. lkFE53/5946520,-/.+&&,32,).-./)&*6;KH?86345257154361+.*$$#&+((+)" + ̾ɽľĽƾȾǼ}rkG@÷n4&M}f_4),)`™}w,Uf\SPKKLLR?@(4+7=;@KKG6WTK2,FnPOV]eiklllknnpnprnk^! +  OS6'!   H|smgda[ZZVUOB.+sf=$hhGH=65@>21//*%)0+'#$(.2-*/144,*/87AE>7426=:7965623.*0.'(+))+,+'  +þ¾ſþþƾļɻŽǿƿƽư~rhH>=,.ͽyvyuT.2:9s//$'$%2-ųhEbH @46;D@79+629B>8:<(gIjRE?m6#%(&'/5.$ +  /]c\VNNQUK8'L_\ZXSLB0,,.3:W\WZ_][]^ZVZ^]WVY\TSVZRQSURPPRRNOQOLNRQNPORONLNMOQOMMQNMLRLLNOOMLNQLMMOKMMOLONRMJMQJMLNLKIMLKMNLKLNMKLLLHLH=99:<=@F{z,FZVRMW\]_ahjlrytdecdbccdeQbyfj[,8{~svyz|d5+ "Qq% 8jt}zwsfdljeb\9!1#+9> <]t`>|siepxqe_]QEkwoje_YZd@wkbMEB} (̾Ka17RbwdIRdZ7%%VVTUTN%.P40%?kC565Zr1he.6;i<ºVX~Ucpx}ȵgCJS63BJIBTRK-2JqNPX^bljkjkloponopoiY + + ,Wm~b  +! +Ivqlhfd^]ZUTME%1`F/'&#"j^JJH=@FA1))*$").+,)"$+51-*-55.,386;A=532:@>89834431,-,&)11//,+( +ý¿Ŀʻϲ~oeD>úm2 @[0 KRUQW+**'Z´{z1TiZSPMKMPO@EECCA?@=+2Ǿȗ¿-299:`TV(L*305*$&1.O.(@[LD43;E@76*869HC;;<%dLpIEBi;%$%&'*65*  + + (Pg_XPEIMME/8d]\YUPJ@0(*.2=OWZ`[\\\WUZ[YUWWWRTSWNRRQOPROPLJKLJKLMMOLPORRONQPNNORNOPRNNNOLMMMMKOPPNPMLLLNOMMMOMMMONNNOOLNNMNOOONONNMNE:56:<=ASh|YXPQ\^_chjmtgfcbeddcdfMemokj?!|tvxz}Q9   WpazmOVknjfbS+%A@Y3{ !Jj{NS~xpdmorjfhffMNlnjc\XgÿOOmjcJPr jIQn9@VfrfYcd?#">MQRTE!#EVJ!635@\˷i,$)14GxϖE@Tz}nemozɴ~9KUR*32OTOG,0MwLPY_cmkjjklprpppppfU + F}cBJE  +   $$  +9MJ=$P}qjjhhha\WUOB%BmV=1&%"#|bJJ@?GED@50/)%*,*/,##-76/))10,17988;7220:@?;5764531,)'$)00.,))& +¿ɿſǿýŽƽ°qjJ;кj1CqJ@LtE4]uQ++*Vs0Pm[VQMMMNT:DDCCD?>:*.ɹ”¾21::=]Te>2UhR1#%3/'gLNE367>;67);69====?*dIjREDeG*&$$%'086)! +  8ea\[RIIOM:*W_]]XSPJ<,)+/3@W[ZWYXXZXYXVVZWURVSWVXOQQSRMLJGIFBBDEKLJLKONMMPQMNNPNOPPMMNPNMPMMORRQLRNOOPNPMPNPQPONKPPMNOOMLMQOPPMMMNP?8557;#\IF=CDCB?<=;1)%'+//*)5982(&'(.75:::82/.,05750332521-%%&+0-*()*% + ƾþſƾ¿žshJ;мl2!>=,,Ǽ¿12=:>d'Tg2?#-/("'%&/+)][YĿLA349?:65);38?@=;=.eL_vOFDjNF%%&$%(151) +  -_d^]\QKLJF2Fd_^YVRQI:)+-/6HTZ]^WW\[XTVZ\TTXXTVVXQSRVRJJKIJD@A=>EGHJIJHKLONNPRSOSSSQSRPPSROPRRQTQRPQSRNPQRNPQROOOPNMOONMOOPONOOMNNM<7757;SP8,(-7TRN>*;U~IRZ^ejihlooqrqoqonlR + U6%  + +   +;[]C![}ohdbb^]XVPG= +TD@=IHDA;?B>/.,(7;:739;95+$$*/59:;<9361/-.0220113/0-)+,.0/./,*$ ÿȾɿrgO 4οn59&.Q[UCD2@A J=,)+U|z6Ih[ROIJKKeǰBEFCA@>==-)¹Š¿23>:>h&'% " 3)%&#,  *`}Z=J<56:A=65'<36DA:<<-iI\sMFBhR_&'%$%&*.0.$ +  (Oec`[WNGIG;1``_\WVQLD2*+.07GV[]WWZ\VQVXWSRWXTTSWQSRVMGGIHKF?>;AGcRgkZ\\_cejmsjfdbadddddffgihG_~kmrvxy{|S+  e^ EtI[w[B4Toif_S(&/D}& 'Ul{~MY|nenoswm]ajgnxplgb^ZlĿPR:9Q`y +%Vȴ.7S_mx}{f`f]G/),<%GaCk285222=2Lv*6?cAļqU~XCOH5`aZXUSR9):\ERZ_dhgijkmppmppomjQ + SR39Hhg}?    +Db-]~nifb_^\XSPH< 0NABEGFD?8>A>02;2<<<65::86-&&1048:<>:6663/24130.,110,)*-1431/*&" ƾʿʾ¾ǽ½ýsiO!5ɾp67/(&!$8i9E|:*,,Ix:IsbQRLKKMKbueZRCGECA@>?>1)̾P|¿51=>>e.,'! 2,(%$0  )ĿP>35;A<56)=38DD:=<,kKgTFCg@h.&#%%%&',/) + + ?fb`_\WNKH?3Je_]ZUTPKC2*-.08N^\Y[Y[YUYXVUYXXVXVYXXUSICDHHLJDCC@ADGHJLKIHIIKLKLNNNNOONRPPORORRSRPQPQTQMRRPPSPPNQOQPQPOQPPNPOPPNOPPC635789<=AIYRnj|Z__bgkmr`cdccbdfda_pieaPA)uqsvxy|~z9& sl a^SXycqz|{vpmie`J!*4 6est;m{oljjuvknoaQZkmkf`\[v½|?ig^Z^ *DWbnu|vfnlaYMKB@=,Nsa7633442M¿n.6DuȵA\a_̽gjxPFRM+ARNTTRK7+9_ITZ`fggilhmpqpooonlM #tmSG;     +AF1b~rkgd`^]XUSKA 9K?EGGFD=6AA=3042IqQRMJIIFHFHCDFHFCBBA?<1&Ƿ¾6 2<9:h?653-,<5-/+!#3 *ĝ}¿PC24:A>67+=37AE;9<,gJdVEC_2p2(&$&$$&'*+$  4hc^^][XOFA39g_^\VUPNH>0+,-0 + 9~ADHIFEB55?:81*.08>;6873993-01127<86)<26>>:8;/fNjSECa*hD)*&&$$%&()% + /^cZ][^[WNE3-Rba_ZXTRLE7**,-3@QZ[WTYZXUWWZSWYXTVWVRJ?;<@EKKLJIIJHFHGGIIIKKLKKKJIIKJIHIJLMMNNPRRPPSRQRRSPRQQQQRTPPQQNPOOPPQRQPRQPK51346:;98453212111-(+08UxgǺO}yjzdn{{ʶIJIGE>1:>862+04:?;4783785./1128;9;69854996.+,*)+..,'%())($!#$" ȾľÿþǾƿŽýſ¼ÿ¾¿uiT"3ʴo9?2)*(((),=qYEDg?+**DƿmDCviNPLKJJIHHIGGECECA@A>@6&¢R¿8 1=950.('$ #1  '3&$êȑQRE25;A?94(:58=A=:=-bIf}KHFc(Ue+,*)&$%$')&! + %Lk^Z__]\WM:-5cac^\YUPJC2)),-2AT\YZ\XXWWUYVZXVUXYVTG;879@IJLJIF@:;?BDEDDFHIJJKKJIJIIIKJJKIIKLNMLOQRRQRRRTQRRSRSPQSRPNORNRQQPQPPOC20155:<88;58EJY~xbhilskdbccbcaa^J~qc\PMQT`tsuvzz{}}?' + )hu _wXS722[|xtrmfc]D"'79ƚZ/ 4dr|s5g~zxvwxthdaccmppkid\Xx¾{;djq #4"&:T]hlgz`GTQPMUN)-aQif5-7431001011/018`[ºuE6('(((**:Q<@"TL)*,>FAsqLPLJJJIHHHDDCBCBBA>?@6&˸gΜ; .<840+'# %1fVǽ8ÿSF139C@86-869AA99:*dPq}RIEc(By/)(&''# '(%# 9faZ^`_^^WG."Ufd_^[VRNIB3(),.4DYZ]ZTWXXSUY\YUV[YVQA7479>ILNNH3&&6EFGFEDCBCEFHHIGIKJKKKKKIIKKKJIKMONPQPQSQRTTRSRRQRRPPRPSRPNRRPP80/1579<97><9;FI_vkegjnwgcadcccaa]@p\QWX`ijkoz~twyzz||t+!  8X/ )vlwyox{xtplgf^5 //2SGӰrBjw_:|zwwxyxs[b_`ospnhfaY]c6vF9GDeJ /gL3$ CV_mg\vTQWTLOUD$#gCw375542//././14>j=ƽg>o~{syv|vưsi]kEQYC<828OSNF'2LnpMV[ciefgjmoqpppsqog=  + +  +6YY1 $rvoida^\XVSNI1 + bXKIKF=1,045873767>:6::42692,41/146:46<@C=6.0/*()..,-'#)*)#!!$$# +¾½ʾǼ̾Ŀǿvh[%3rB@5&$%''*,De?L&*'=ſFBIQKIIGIHHHDEDCDBA@>=>8"lɐ: +0@73-+'# $/n, T,)d¿R>249D@99-947BB;9:.jStSGDb--v2Enw|{{k1!&%" + 3Xe][__^]]]78iba][XVPLH@0**,-3HY[VRWWUSWYYVVW\WRJ<4158:EQMKE'!&?OLKJKJIF@@9>HJIJKJIIJJJJHEGHKJIJIHKLLMNNONPPQQRPRRRRRRROPQQRRM3,-025::979;=8;BIgdehiox{ieacbbaca`Odegghhijinxxqvyz||{d# + Fo Auo|rTY{yvqmif`O&%6 K25ix  !Sl}~LN~yrefgooc]aehpnjgc^UeþPHvyq+ &ɾ`*)KZci[PwrRQWRKPT<!7C%> 445420/--//.14EwƵ@ļR_tlzxrstpmdrgl}®|YDQRA:JHHGIHIKIIJKKLKMKJLNONOQQQQPQSTRRTI++//26985785;=6;AKkmbdfip|rheccdcb`abbefffhghhjnttsvxwz|}I$ .^ua \mrv;GQjyxvqlhe[G"(;o}Z6 3eqt5g~xtlpqmbd^agqqnifa\Vw½y4\{ ((CZ1R\giVGMVS_ORUSOQN1dY"$83340./,---./5PϓPüS{{ʿag~PFTG4CB8HTSM8'7QujNV^bjiiihhnnponpmkc5 +   +  + +  ?d^T,/xtnhea^ZWWSOC+ + +{eCCGF=4/,--4;:22249548311.0564/-/188567:;:::75/++,-,)&%(("  ! + ¿ƽʹǿĿ¿ý¿½ĺ¾²yk]$1zŹvD;3)('(((*C]%**6K;knKNIJGIJHB?PFEECCCA>?>8#vʠ{j¿? ,962,*'"%;lt*"UA248D@56+:36=@99:/nKbUGEa,_W$! "9fd^[^_```XEMfc_]ZYVSPJG;+)+,.8N[ZXRUWZVWVZVUWN:50-18;>ITNKI>/*,:FKLMOMMMJEDIMMLIE>9CGIJ@3GIEAFTwkqbefjsjfbbccpgbgdeddfffhijlnqrsvwxy}y.* RV-h &uu;;JwloyvrplgcZ8!/1oN;˯~ Fku]5.08BGMLLLLKJLMLLD:42/>HKHAEFD>/ADEJJIJKKKJJJJKIHJKJKMJIJJKJKGFDA>6.143267789<62870-,+(" !!  !  +!Ŀǿɾ¿Ǽſżǿƾ»Ŀ¾üvm[*0upC.1&((((()2WI&+(2ɽ~{O7lvRRMLIHF^CBDDDB@???9wDzʾ3 ):63.+&$""5ot:\G038;;76/946>?:::2lHjWJFb4 =Uk.%#  $#Iea\Z[_`__[G)`gea]^ZYVSONF5)+*-4KWUTWVVW\SXY[U>5200/16:?GSOPNLJKH?<:8>ACEIJKIIJE62>E719GJKHG@=7>CDHIJIJJLKLJIIHIIJJJKJKKLLJJHGGFD=.-,0333:95;=:AKfbdfgmyudeb`addbmi]pylzrjimpqtvxyy{J# 'kgs aok}rWkxxytqjhbZD!%6"zΌ  /ao}p7^|wrkkprc]d`fojkkfa\Yv¾|5X! ;j|`'IZciaFA64?>?CIURTA$m552/.--,+-../3Gw@¹lk~c}is}Ǵuw9RS9EQE>AEJIIIJJKLLKHHHIIJKJJKKLJIJGGJIF1'+).216797;:DOyzwtqjhaX5 -2) :k @isa2zzskiizwm\ba_snkie`[`½a-hű "rOF\@1NYelXE<((*,-+4JRQ8211.-.--,.,-/4MЧHɾTn}_~l~}yűftgAQN-Da^\SRNC)/@\UOY_`iijgflnnnonnml['  + + + +  + + + +    &88(=rqomheb\ZUTQD + u[CE@12>@:;53102.,-,/677888214.(#(4EC>;>79CC@;3394+(%(*%$$%'%"!"! '¿ƿǾÿ¿ƿþ¼ƾĽĽ»¿»Žxl`'(mŴtL-5()''%&(-BH[SaO()(/}ƿS6lSPLKJJN_x;EDCCBA@?>; lŮxȢիq2+653/-)(h_d"$5hM\=656<=750@37<<2nNjYJM[: a{q99*+#);bida^][^a_]A3iiea^]ZVRPOMJB2**-5JYXYTVYZUYWR@4+-012/29>AJPNQRRQNLMPLMNMKJJJHC82;GHGDB90,7HNJHKIC:07DKHGDFGIIJIHIKKKIJIJKJJIJHHJGE=$*-,/122878@=BM\fadglrhbaa^`nqrebmgake^krtzpttvxzyn"") Hmq. ;h;4Z{ltyvrmig^O+",$OoN  UmxKF}wqekqwvtsolkpoifb^Wf˾ſK5yȮ^ #RYĸI=S^hjUG9)03+.&-IQN1#70/,,--.,--/07V׀aƻCe||ri|[EUU1%-.@TQM?#0DcRQX^`jiihflmmoonmljW#  + + 2:/76" + + + +  + 7XA!Dvrmlic]ZXUUPB  %vH=B<24>?:@>91044*-,-0687795123+%):HD=:@88>@=8401-)'(.0+)(*+(%" 'ÿƼƼ¾ƽ¾ļ¸»yjb+)hĺqK,6)*'&&)*)/Pc%)(,|ƾS1`yRNKJILM` !`y EU'2(!'(?\jdde^Z[^_bW/Xkhc`^[YWSRPKH>1+,2CWXXWZXUXZTI7.(*/0/01:=@EQRQSRSRRPQQQQOPOLLMKIGDFHKOMKD:.2BJFDEDC/'-AIFCCA=>DGIHHHHIHGJIHGGGFGIGE@$$2.-1403739=538DA66/<37D@9::2hNjVJI"b@ YP)4/'/ 9aoiedc]_^\a];Amgeb`]ZYVSPMLH@1*/AGHGA/*,--242565;@EO[x`cegkvldcc`_egkedalvx_mw~prtvxxuq;8 + %`SN #xRHe{zyzxuqigbZ6 &*ff5R  9br~~Z5tzvlhqtuwvvttsonie]\]Ϳc3k 4zB *KZ_efJD>502105GVUD"#}naI#.1/.+*,++-/.05KxFǾrP}w~omptzws}ǸiHLVM4#*,@ONG3!2IlNTTaejkicajnoonmnpgR  + + + '?! + + +    )C\9Ptqokcb^YVWTO:  + 5776,(0541=@><20--((+/313565221(#,;FE;7=7443.,-/+(-36640032+))+$ +1Ŀ¾¿Ǽ½ƽȻǾƾ¹ú½~xk`++\ŴsQ,8)'((&&(/kY)+IW(*()rþ[,jJKKGHJJa;CEBDDA@AB>fȷŵʶ¼¾E )=731-*&\weG,E'8/$uaA437AA672837AE;:92eQ[pSKN"^F T(J110+$-$+brnhefc]][\_Q3hjhd`^\YVSROMLH=/.7GUXYXVXYYM8/)&(,2569:ADIMQRRTUSSRRSSRRRQQRQQRQPOMNMKJGD?6//8DKH>0-6EJIDFFEC/$/?HGFA8.15;<:4/.7BGC8"%*-./133679?DITmf`bejphddb^_ipmcgt}tmny^pqrsvxxuj&, + 4zP ;iY@96N}srtmjf_Q)#)!xy YU  DgvwCC~xthhqwxvwvstrolgb]XaȸF1m Ke3R]dkeD@9,,-*/3KVT=1AVwF21.-**++-./.4=_Ǽ8Ƽb]lvǴBOU?=<>7ANMF0%5PqKUW`fijgc`ilqonnmmfU  + + + + 98+     1\nY7Wtnnlhd^XTVUM: + >~E73/'/688<>:::51/*+05642265121&$,=GD=67731010121.266651//,((()&  /ſ½ļǾºö~xk^,*\ǬtS*<''''%&'+FWq%((,k½Y*fRNJIIJJZ@CFCCDA@>B?e˼˪{¾E );743.+'1'.KShtiD348?>551736@C<;<7eSLNMLM![P +0D +D610.+#',!*AEHMPRPSVTVTRTTSRQMKOQSSRQQQRQNLJHD?;688.',;KLJIA% %9A92.+*2BB=&%++.0135379CFDCDA@?B@`˹ȭmK (<742,+'/<*"2J ^Buf?246<=961935>B<;:5eRKLLLL'[X t mC000/,  7%.6Ojtohhhc_[Y]D>@HJMMQRSUTQRPRSSTRI6:PRROLPQQRNHF@CEC@<=@>:/%,)+222748<;AEHTc^befjvxbba`bmpkdagxxd_waaYtnorssutvB. +  Vpf "nWWt|nbnjgbY7 '2'*q ,Vq~T-q|wkhjwwvuttvsqmhc_[^Ĥf%`-%"$F! +wjUXnk$FYbkjPC809616.5LQJ/; #3/0,+,)+,./7EjxaʿQ|yxqnyı~|iHRVJ@>)0HQID)(8Tw}GP\ahiji__ilnnnmpmiJ  + + Emz3 + + +   ."^tpnje`YSRQQJ6 + El0../+)+/6:>42029<;435155363051)')AJE?6389745526799:9761-+(+*('$ M¾¿ɾƼ~~~zm`0+[ɸyP*F)&&$##&)MoXIHB')(&dĽ}^ 'aRMKJIJM^ADDDDC@??BB!YȶǚmصJ *:632.+(Ft 5H!Mnu3Pdqh;458AE;50946?F=;;5gQJJJLS)Xc JT>2121-&:-"'G_sqkfhhd_\YM6bniedb_]YVRQOOLJC87SYVY\[WYU<651033?>=>>GJKOSUTUNAAHHKNUTO:4DNOI9DINOPNO54AF@?GGE=964:?CIJA3(4DKKKJE,#39989/29><7' (+.0/488;yqlfmxwxwuuvtpkf_\YeȵI3|hoqrr{vw %f#+MZdosLC7*11.0->PSD$-iT02sI)6.-+**),,01:5/0:==80+,22160+4.'&,GIGD?9:99;<=9;<<999661+'*-+)% ]ÿʿżľ¾úĶ~zoa/+YǺyQ*E)&%#%$'-mƏ-)(&b¼_"&d[NLJJJHDRGF=BDDCBA@>=BX˳ʺ¾I ,;652.*)BrU:2F"\MN{?p¾k?168BB772226AI<:<6aPHJJLO$\k  D:5522/+%8@)*Kdtqmiihf`_Y7Hojifca_ZXTRPNKJIF>JYY[[[WPQB<;:689?A>>>EIKNSWVTO91?GB:CD??;=99;@FMMJ;01:EKLG< '7961 )>>=;0*+.//597=   Fewj*U|xoqsuwvwwuttomie_\UsƳ}->īm&L[c#1_  Mu_3P_eqmGC@1(%''(BPNB!*I-3-+**+++,17EpB·jAun{ʼ{OHXQ&H[RRTQK<$,=[vMQ[`gjki]YimmolmmjeC  ?'C( + +    ?OB&&jusoje`TRSROE0 XW&+($$%*(-6<><848>=91'(/1130,2/&%*CFDDC@@@???=;<<<98763-(*/,+(! j¿ſÿƿſľ˾Ŀƿ»{oa2%[ɶwU)L'&$%$#')Yq]QQ[%)'"Z_$%b^MMKJKJJxZ@CCA@@?>?>WµšőO +):754.)'% 2E$T?Ykj G237?>87/014?D;;>5[OHJLLM+^l +n+#6G;9541/.,#'/& )Hmurnjjhcc]?8onlhfc_[YWSQNKJIIDGVZZXXVH>;<><77:>B?=>DJLMRVVURI6;8?>:=IO?4;MDB@DD78?7/6>;AAEEFGGHJKHD:04?EFG* 5@@<7+9?=:5& ((.0056;?@IFMVdX]acgrba```bf_[`ZZth|y|mnppqrtwF( + a<  !n~ilnV`rmgbW8)7F`  'Peu}T.p}xvuuxxvtvvurokgd_XT˿¾k5WɀO~{UmkS5   I>3OkjOMn>]EBA@@@??> TO +'9852-))GZce! 1B%'}Ox0kk H158==750213;A;9:2\RHKKLL'Yb 4=%!"JE=965321.++MuurkkigfcT6Yqnjgd`][ZUTPNJKJIHR[YUY\N?69>>:8:=BB=ADHLNQSWWUTNOPJHIL@9DC69;6>LP@9=RPKC17IFPWknV^_bivvbaab_hy{tbc]nrsh]iphpnppqrrqn2# h8 4Qg|u{bJlie^M(!+.1r +&Kg  2Ylzr79{spuvtuttuvurqic`\WeɷQGwžn'$!#TXr yâocw"BYajmaEA622,.29JOK5 PO!"81-+**+-,28Jv؋NTWsowzswusmi}qk~ȴ{hzHQVRKPGHUUPG1$2CciLS\aeihgVZjmnnnmmmb=  + + + SQC + + + + +    1rrT-/mwsoid^XTSSOA, + ^Q8>=8785+,978;5,4;=80,0*'-5/-11--.86540230-031./-,,))'((&'&''% ÿ¾¿Ǿ½ɿþ¼{pf8%Wɵƶ|V!A('&&%%&+LT()%%Pža%YbFJIIIMMc>DCC@@A>>A!NͶʰQ +);743-),MvRY "/C&%|elfm C348AA862735993cXKLKKO']a + +\%9MC>;953420/) )X{xpjjiifbDErpmhfc_\\XUQRNKIHIKMPIJHFA76;@@;:;?B?ACGNPQSYZWXUXWWWWWTQRVK@?==HK87@CHKN94GM:5AIE?49:=GOPPOPONLMKKLKGCA;559867640+78&#&+31668@7BEHT_`[\_clh`ab_`gglmevgdlnwpmppsrqrb! TqQ+ N}TB49Bj~vmdlkf^A!#, %Bkb07//8516449;;82/43,+52+(,.42>841/43-'+0/-,)''''(*.)#"#!! ¿ÿ½¾ƿ¿ŽŽļ~~|pe8%R˺~T"<)'&'&&')FsO&)**%%Lžd& XaFKJJILPj>CCC@??>?> HʷŴ½S '9842,)&$$"0Ez(${gc9fp E448AB75.445=B<<<5cWILLLO![d(6by.QKD@@?;77300-' " 4f{skjkjhgW89:;?:6;@A<98FDJU`l\[[]dnaa``__^\Zgfqd`^}c^{qqrlnprrquL(  F~U %n~gMh|UbpjhcY1#.Ot8 +Nj{yP(l{uhcfuuxvtsrpmkea^YXòj1Qɛ+1H\wrlh@J  +Mvib: 3U]gonDDHHXVKMPOONB)-v:V + + + + +    + 6sC#9qwtlf_[UTTSOB+$ jM;(++,.0/33013-0<:72,+.22,/,$%/267?80-063.+*//01.((+*)-1,$!! "" +¿ʽľ~~}pg;$Sɴ}Z:+'&&'&&(N1(%%Hýf(T~\JLJKLNPn;BBC@A@>?B"IʯµW $<972-,(?TS_$ /Ay)lUeeqD457@B840435AE=;;7^TGJLLQ#\` ,26CH:PHBABA<896420.'" HxyqklkhgfBWupkedc`]ZWTQPKGFGFD@:757<;48?@=979@DBACINQSV\\Z[[\\\[YXYZXVXYXWXUPMLJ568759?=8;GEB:159?=<9& )-46<>:B@ABBMVYXX[_gu^b`^ahnp]`drfd]kdZ^Zpllmnpqrqq2. !;ni 8mKAYyxdjphe`Q&#&;g~ 9^rs>2|~zsjiouuvvtrroljd_\Sdȼ½NHnčo^>)'9,91fx% X}M7#"i$ >X^hpnB?5.@:6>DKTN8"6}r{&$013/+,,,-09DkǾ?¹qWwwjʾiexYBSQ"PgP6B8FE#)8PpUKR]dghifRYlpnoonmkb-  + + +  VTM + + + + +   +  0dM"BB@BDJNRUZ^\[\]_^]\[\\[YXZYYWXVUUC8=D:::4:47A@9;88<=@MURRRPPQQQPONPLIGFFFIKGFDCA@?9$+488A;>BABCHRWYY[^gwncb`^`kvvo^ZkoidddWawjloqqrri 5 + + !mwJ Vq7K\L\{{ytsngb^D !' B)a: Ifwc,L|xphaowvuwtsrojid]XSnŴ}/QƷegyK!L  bí|$HY`jwlE<0*2+0/4JWP5/w,52/,+)+.3=TҦFýecy[{ȼvOHTT>"857=9H> *9RpSMW]dhjiePUjomnmmlic*  !?C8 + + +  + +  3VL$>rxqfZUVVVTSM@""gF0+&%"h;2)*+*(-0,%%%  + (ſžȿƼƿ}pj<&S¿^!@.&%#$$&)N7&%#B¼h,Rv^MJIIJKM`:CB@A??>??%Fϸ˞a &9;62-)$%+0Bx*rgLl`t +D/49CE85-533>I<9;9XQHIJKM)Y_#6957:M{THB@@?AA?=<:53/.+*%$XvlfeigbAivo\M]a`]ZVSSPMJHGHHD>868:94;??:8;AFF@@CJRRUY]\Z\]]^^^^]^\\ZZZYYWVVSF6=GB?I@<8675168:7;:/(*+)&(+)$"#  + 'jƿǻ¾ÿ~qg>'QÿaC1%&%%#&*T{io7%%";uj.RbLKJJJHLI?A&DͱǙ¾b %:962-))5[[Y-Aq- Iay G35:BE65/124=F<799\RGJLMM(Xc 7B88:JULB>?@@CB@?<:8630/,(!)oti_bghMKwobAJW^^ZWTRQNKHEFHEB>;:<=46?@?:D79;:9794725;;AJGJNOOOQQRPLE5/5;>INSSHHGCA?8'17:5=@BJABMOVXY[ana`__^`glcf^|~kgoe]]uijmoqqrp5. !]}k 5nr~jv|xsmie^M(!))[15r; 8]pq76zyqhgnttsttsrpnic\ZYcʮVLz˜`5)!Mk,78  ?kH:&8b_ 7TZaiiPA3)034:1>KSB-tz.&9/,,-,.5>ZaoŽ?_xuxykqDzu@RWN55'#TQOI2$.?ZvLNW]dgildPXhklllllj[#  + ?(Qÿf":.%%&%$&'B^JGV;$&"6wj/Nx^JLJHIIGGCFDC?@>>?>(;;ɚ] (>:73.*+I#->p.".4naĿt H257<=8425455;<>D::DDDIRRRQKB$(0DXWOLHEDB=0$4:==:??BE?HSWY[]eutaaa_^cpzcb[i|lpsf}rilopppof#G + + 4`nY N{~`Ix}vqmhb\B"!)!'?9q Hfu}c)K}xnagqvustsspqlha[VUnijC^Žw^ql|mq] ?­8 AW\gk`L@-)*(,31DOP<,.@U!)4.,*+,-7KvI»{HcmrxsdpxwŰ|Xy?RTCA^V7;TLH,&3A\zKMV\chiicQUjkknpmkhX!  + + +  + +  +  &,0OwwqmhYWWUURH:  2j5;=7<967::0'+)%*2+'%+//+0782.,(*+3;54364322326970(''&%&&&'&%# +)þ¿»Ŀƽſƿ½ƾƿƿżƽƾ}phC(L~j%6.&$$$%&)95/e4%%%7m0Lm_JLJHHIHwKADDB?@@?@>+9̻ʼnc^%;841/*(:_N9+;i0Ul^.FkYz A246;@<645369?>897XSIKIJN%Y`#;C9JQJD@C@?@CCBCA?=8842/,)$Vue`bZ?]uoaA>AGRYWVRQMKGEEC?=<4/36/;AC@CIJIFBEJKPUY]b_\^`_^^_^^^`_\XUSQTRSURPPWYRPOAE>8>>78>5;EDE=3;@97HPRMHG5!+P[TMKHGEB<%"3>:>?@ABFHHTWZ^_hymc``^`c`a]bx|syj[]_hyhjnooonV$ + R_a  ng7\vupzxupjiaY4"&x )VlyuP&b{yreaertttqpomhe`ZVY¹lDqư,+ulv  ^sZh'IX^gk^F>/0<<;9<3455310-**('%%&'(&$# &¿Ŀ¿¾ÿýɿſȿºsjF%I¾~f&81&&$#%&(&Em5+'#"1m/HoaMLJIHIGJFRDDBCB@@????*<̴`#<951,)&$& #)?k0\Y{ +A545>D=51136;;9510.(#!&y}uh`]HG|tnI=9;@LUVURNLHEEE@<91-+427?ACEHJJHEFKINU[^bb_^`a`^^_`__`]\ZYXYWXXSQOPSTSYUTRJKNB5;75JTROJJLF9?I?.*-,()E[TPMIGHFB5-;=?AADFFNORX[\bmhc_\\^jkgd_gcYsrhhmfYtoilnpqmj<- + %h$i.]oa~Sj}{xtpke_Q%&+4W6Q{ 5_rn53tzvwytlqtsqoonmfc`ZWfë^N{ɥaqtuxK īh[je.Q[cnkWD<44>D@7>LNG4""Q2/-+**.7N}ԟJļgcyuyaʾ|YGTZQ%*.4HNKF&(4CcIMZ^dghhbOXmknonnkhS  + "[zya    :f@QwurokcYWWUOH4 + + + EwB6429<@BBB?69;4,02.'&(*-664321//-9;8646:=;6531//.-+*)&%'*)&#  "&¾¿ÿ¸~tfE'M~h):6%&%#%&*F]AV;($$/{qr2EnfIKHGDGFFDECCDBBA?>>>@)4ΰʿh 8:620,(Ul)*>h1T}NF|qkYx >227@G;7/345;B?884ZSIJIKP'Wg";RLEBAAB@AAACCCBB@>@;942/+&!!1x}te^Q:pvpY;788>LVVSPKHGED?9962)-43=AEFGKLJHEGHKV\aefc_``_^`a`^^____^__]]\XVVLORSSQTSSUTWNE@:DMNPRTVVTKMA$':RSQKLJHGE@0,8??@@ANJLRUY\^dp{c`^]]ennbabiaaaahy|fiklopolf': + + /vRJJayvhxxwrkkc[C +#)a$53,6 Dcuz`$E}vqkhvtstsrqqmkfb\XVsº>^ˆ'Dnm(  XżJ,UVZgsuSE<0)*-/)f~[ɿ^v|wVǺTHSRASHJMWOIC$)3Ff~LQZ_fgigaNYllnnmlkdO  + $3     +,5@1Uyupnh_VVWVPE5 GfLA>:?CDDBA?96><79:84.)&&+-.0.--/-9:75553676631/--.,(&&(*+(%"! +!(ÿþľĿƾĻtiG%H¾~k)8;'%%%&&()CgznK+%$$/s5C}~IKHFFGGHDDBDDBCBA@???*5˻}i9864.)';64.*@j4gXx +E527EL971855?H>895ZTJJILL(Vl +0THCDEAAA@A@CBCBB?@@=;852.,& 1z}rdS>U~sfB9:78@QVROMIGDCA9762-'308@CDGKMMKFFGKV\afjjdcbaaaabba`aa`_``^__]\]ZVUTTOQONRSVVRKLIHHJRUWX[WUM"&*049CDHJGGGC:*7=>6,/A1(60,*-.;PapUy{`{@a˵JNWT%BSVUYPI9$)5MhxFQZ_eghi_KYlmmlmmneO + + + + (     IL?1Yzwtrh^WWVTPE/ ]mHEEBBBB?:>@=7<>>=:9760)'((''#%(**97465545555541+*--(#"#$&&'&$ + ",¿ƿǾ¿tgH%H¾j(7=%&&%$%(1z};%# ,r9Cw|CJGGGFGFFDCEEDB@@???@./đj!;:74.*$$#"#*Ag5ks!!%Vy +C516EH870436?J=896VVIJJKJ)We + 6GCADCA>??@@CCDDC@B@>><84/.*# 4x~s\F<~vqS;:869GNKLKIEBBA;653/(/13>CEHJKLKGEHRX\bfjnhbaccc``ccc__``aa`a__^^^][\ZXUSSPNPSPONNNMKRWYYXUTQ+!(,-*'$$/AGIHF?)4;?>AEFIRSUY]aj~e____]gog\bbbcyg^wlachmkklpoon< + \tn '~{ydHHs|wsole^N&$'(f| + 5^m|p10sytghrnssssrrpjeb^YWd`GlƩX"/Or8T,d~i ;=   *k^_mvEB@44285;EOMA6-@8&#-7.+,,5BmFdb}~wZs|VosdowDVsDQSW."$"3POI0%,:LmoDOY_eghgdI]klllkllcI + + KQ'    #LQ@)&czyrpja[XUPMB- ccAFECBBA:0=@>64;<=:98:70-)),-)(-0.779;:646533275-**)($!$(%! $8¿ľþʿɽ¾»ƻtjK&Il*8<&'%$$$(5E;G#$%*||{s7ClfKKIGGHHCBNEACDCAA@=?B/.Äl 8784.*&F~+(Bi5^8bY+ Wx@347GK<70/44@K?898XWGIJJJ*QhICBAD@@AA@ABCEDDDAA@>?=:73/-*$ 2z}nS2`{taC;885@JAEJFCAAA=4452,*/0SokFPY`fkjgdKZlmnmmkkbI  + <<  +   &cUA)&i}uphb[YWPLB. jZ:BC>;;:806A@:345;;:99720/+'*1.153<:;65/220-,*)))'#  "$! +'>þý¾þľǾȽ~rjP&Dm,4:&(&$#"&?k9T$&%'wx:=giMKHHHGGT^:BCBB??=><:622-)# 7w{l6FumM<8768FA9?BA@@A=63541+/.7AEFIJLOMHIW]^ciouvmgffeeffedcddecbbba````^__^^^^_^][XWXUTRNMPW[[YZXUN/;Q:"%-100+"0>-#*;CC0)@CEBCMNNPUZ_fssda___`jmd_bk\\a`a[]loiiikmolkS! + + Q9aAdl=2-?EhxupjfbV3#.0K+ )QguxJ!Y{uof`qrssqqplif`^\U_}¾y9hnj%Jbw>0Kcvq}p   9UY_t[C8.0B>:-??>=53:>><97548<:63///-(+.366>;8CGFD@9422/'(+)))'#!"" !! &8¾þ¼½Ŀƽʼvu¾¸}thM&Cp,07'&%#""#*pZ2U/#%$&s~v=f:-p0O~B355;B<40242;C>:98VUFHKJH(Sg FAAAA@@@A@?@BBBBBCBAA@@@><9850+)%$>{{tR2wzrcB;986?C817=@A@>93463/+/3BBEIILOONNW]_dkqx{tjeffeggfgfffdccecdc`a_]a`^_`^`a_^_]\\YWUUSUY[[\ZVTI(D1%(.542,(#1EF@* -`ABZRSQJF#&0DXxzaNRY_digfbL]joonomib?  + + +   +)[[))m}{wric]XURLA' vR???:=BE@98:;?=><935662.+*/10*-264A96=CA>@=620-&&(&(*($#"###"  %1ĿƿſÿĿƽƿ¹ʽf^^^^\_ZZiożû~~ngN'D~¿m/)3$#$$"##%!###%%&&#!srx?9dkPKJGFGHGTABBAA@>>>>/(ǣЙ˧½n5992-+&#%:a7s @X2L~A:47>C@665738DC97:PSHIJJJ.Sl 'OB@@>=AA@ABBABECCCDC@@@A?;;:94,+*&J{tdDT~ukI?;969D9+,2:?A@<51334,,,:ADGJLNOQSW]aels{~|leeeghhhfgfhghhgdccab_^a```_____^_]]]]]]][[[[\[ZVUN&% &.37640+&,@GD/,?9FFFDGACLPUZ_iga``]\\]^^\[Zk{c``aceggimmmkb3 , + @m>G|vUvxvsnjaW=(($d+$H}\c Fbr|~\#B}uk_hmsuqmnnmkie`[ZWrĿZfȳgv]lyM 0! +18<@B: %KX\bkfIBG6/))0BYu2'/,..5GzOxc\W?8K{Xt|w.A~~HK9B_?A[VRMIF$)2@Wz[LT]ciihgaU]kppnmkh^@  + + +   + +    + 6aB"+i~yuqkc]WTRJ?$ uSEDDAAA?=99979;<884312/)%(/02.-355@526;96341,..%#&'*+*'#!$&%#" ",»ýƿĿĿΤtWWZYWXYY\\\QC8ZyüshR#Azÿl3(5$#$#"##$%&&$$$&$##lyA8bpPKKIHIIC=79BDDBA><=@>0&ƹ}˭l6883,)$ $9`;h+t`&J¿~8948BMB731;3>+)+/7===6/021--+5ACFKMPRTUW]dhnu~qgghghiighgfffefgdb``````a``a__b_a_`_^_a`]^^][[TTYX)#(/36740,*$$%##0=BDE_XDAADFKOSW\j~ca``__]^^__}vo`abccdgkklmmm_"+ + +nt*gsIm~}rywtpmg`T.!'){E$!'Qgv|J!Xyokcbqtrqoopmkgc^YZZÿ{Ouƣ\^R_fil|(5  ,3,$#+92 3QW]dh_F@A-,$'._Ծ:oxXv¿e--FJK)/F*6IMQMIB$)6E[|TJT]dhmkfbOZnmmmlli\:  + + Yilp[ +     +/HG /lyupjbXWWSL?# #lF>>>=?>:74351/..0-&%*00,+,044/+144:5459:;84-(+0*&*-./-'$""$#"  !&¿¾¾°TVZWXXXVXVWQGCLY_ce_`adgl{{ú~ukV'@rp5(:&$#"$""$$$%%$$$%$jB5fPHIHIIIB?ENECA>??==>>2 Ÿo5:94/)$""6c@.7^S;G¿8525BV>63-84><>>@@CB@?@@C?ACBBA@ABC@=9:98430*# 'PbYRkveH?;76;?1'')-6<>70.032**->CEJKPQQUZ^elpv{njjigigggfedecdeb`d``aa``^_a`]baa_^`a^``]UX\Z[IAB@?) ;TX`gl[D@>00!%'9JLF:(ArYb61-19Oӣ?ǿuZwkfzmJ>=Pe_\aacee]^\S>VA-),$%8MMJ7$+7H_PIS\cgiigcQ]mmlllkf\0  + X^[9; +    'TD-r}zvpha[WVQL?  + )z?5//56531/-/0./,*$ #*-/0/3540+/85;355646852,*,,,2554/(&$"" !  #¿¿ÿIJeSTXXWXWVUTRKACQ\]a_a`_acb_bb`a`aikwȸwkZ*?qÿq5&8%%###""$$$$$###"# f|}B6kOIHGHHJ[GBABAA=>?=0(q78641*&#!3_D,>L*oqV"KĿ}7444?O664,64>N>8;9MUIHHIJ,Ot +G;<;<>=?@B@?@AFZlW=BAABBBA@?=<;62/,)#)O^VXtXA<968B:(((*0<>;3.022-)*9BEIKNOQTZ`glr{}zytrrnjghihfeeb`aaaabcab`^`abbbc`abb`^_`G/3>CG>2' $NW402674860+"!>KKJINqnHGILOQUZbxk`___]\[]^_yrXvfaaacdcghjlmmjd66 Cz\tk@mokg\?$!!-PE E\nyy^%?}vpfagrqsqopmkhe`\ZZrWe̼oR`>@?2'{¿t=<742*'$-[E I~6535?P:74-54?Q?9;;KRIKHHJ+On =<:<>=>@?ACCAEeU?AAAAACB>=1 (IW\dlmRC8*,(%'/DNKA=S}nZ#:006M\mǿMkyt~~c}`r~sQeyydKDADD?=>882ESWYZXSTQOIC/&-;KhMHOZbgjgf_T[illjkjfZ)  + + eJQ1 + + +     ;u~wphb\WTQK< + +  )`).-(.2000.,,0035662/1/*+.+/44/(.2:=832/12341,++*+1773-,++-.-'"  Ŀÿ¾ȿϭ{`WXVXWWUSTTNFBHSYX][[\\\]\]\]][^^]^^\]^\]_a]__]]`b`clpxvjZ(;gs9#0&#$#"#"#"#$$$#%%#a~G3`rSKJHHKK_|9BABA@?>??4#z¿u7<851*'$ *\E'i(A;435@I988*35?PB:<:MYGKIGF*Hr ,?;>;<@BBBB@BhI=BACCDB??;<;863/,("%EONS^tjF<879C:)('(0;970+/22-(+SOKMT~RIQRSW\jyb`_]\^\[_^\_^XPdcbcdbbeilnvrkQ +  &urzKWyzwrplh]L!!'",ŝ 3Yiu}xe1%kytk_dnrrrpmlkhe`\YWdgT|êxrh~`v  5?<959:- /PU^fjbODC>?>@@=INH?:%(\1-:14EjIԐtwJ_st{|wz{ej~ifb^][\[UQ>JTX\[XTSSPKE,(/=>@>@6"xǼo7>740)&"+WP1y@ +@803=C8:>-64:EA;98IXFIHGF*No 5;<;=>?@@CAd~G@@EEDC?>==<88630-*&DHHJHnbD<:9>B+'%(,6974--112*(4CHKPSTY_dgpz~wqhgc`bccbabaabbcbba`_[YU< -G]]N1+.436Bej;'!9POMNOiROQQX^ol`]ZZ[]][]\``_^^`a_bcabfiklpog7" + =idyyurolfX=$#*sO5.[`  ?]kyx]"9~wrggdgpppnllifb]YXSqÿX`14(%1hw   46;BBC>*9WZ`hk^IEKHGFJHJLMH?7*Dj1'#"341;[ױ=XfyUwnovz~}|opvxylkfghde`[U>LWUXXVURSOKB()0ASnzCGO\_cghf^R[imlkmifX$  + &W44 +  +  Dywph`]VUQK8 N\08632368;63AC;483,,--*+.-,-,*.3/)%" + ļ¾ÿĿþƿ¿ĽſþŽſżkUWWWXVVVTSOIAAISW[]Z\][][[\[]Z[[Z[\ZYYYZ[Z\\]Z\^[Y\\[\ZZ\[\^__^]a`][\cekt|vl^)5ft>#8($"""##!!!"#!"#""So{I-^sVKIHHIE^>CA@A?@?>>9vǻſ¿s6;74/)& !,ZU .AĿ +A424>E:67-55@Ccu<@BCCC?=??<<<:54/-)*HGFA=d]A<9:/*%'--011334202/+2./109AFD?631--00,+.-*++-02.($  +¾¾¾ʼtYYXZXVWUVWTLA?EOVZ\\]Y]]]][\\[^\[YZ\[[[[YYXYZY[Z[\[Z[YZ[ZZZZZ[\ZY[ZZZY]YX\]^ZZ_`flpmu{wm^+4ao> 6)%#""#"!!"##"""#"LL)YmUIHGGHJay;CBBA?==>>9qŴĻ¿z79650)% *WW ($>Ŀ ?547CO;64'64A@Bee>AECCB?@BC>;<9920,& (EDE:5WMD=DWa_^a\/*+0546Xkih<(#"CVRPOQVjuRW[e}}a]\\[Y[ZZ\_^__abab``bdefillkkR"  '|g2;QSkyvspmh`D!)&}.- 4^hu}zk/)fztj`dmqpponkied_ZWXgÿlO}ư^%G*C^(  &*(KVZ`hmRCEGHHJLKKNOAD$"GGN`|9 41=\tSԹKmhom9C[hquyưDT\][YUSSPLI:$+5B[s~|BFMV^ddgd]R[hlkjihbP  + *]`J + + +     Lyxoh_ZWRMF6  [_B8/7=ABCA<;<92+%!&.10-04000.//.4149;=BDD6-./1/00++/0,+,.01.(" ¾Ƴ}^XYXXWTVUSTOE?==>@<mĵǹy8:971+'!+T^ >pg=ÿ?:57BQ=76'63:KD997HSGHIHH.Is0@=>>BF]Y;DCBB??AA?A@;;64/-&-BEM/6VNAFLSY\`abhu}|tk_cb`_`aU8=[```_`bcB()5>:4\njmZ,'"!7SYVUVV_vqW_ks`][ZZYZZ[]]]aaa`a`__`bfhjjjhi= ! >tuWSn|xvsqmj`6$&!2hxX-sQ  Banx~}vY$=z~yrhcklqppmnlheb[YXUq¿ZaħmVs " 2SV[bjkNBDFGGJKKMNH@E&55J{Ulͪtmowh=@WjxvuhsĭwGW]YWYUTPPLH6#,6EZuuy@INWaffje]PYgkjiiibP  + .): + +   +  +!Vx}umf]YTSNE5 ][J@02?DCB@=;<9/)%%,00'(11*&+/.,1.6?B>:=@4(/57310,'&++*/120+'  +¿¿ý¾¿Ŀ¿Ŀľ¾ƿƻbQUXXVUTTTQMI==IRUVY[ZZ[\]Z[XZZXXYZXYZYY[Z[[Z\\Y[[Z[YZZZ[[[Y[ZWXXXYYYYYYZYYXXWWXVXZZZXYYXWXWWXYXXZ\dbfkpsryx}~}}~|}}yw}z|~wla-4[sB1'#!!""#"$##$###$#HQ")\gYNJHFHGI}M@BCA@>=?@;jʴļu9;;61*'",S` .&;¾B746>L;45)74;IA588KXGGHGI,Fo 2C@ADYQ@DA@>A@@@>?=<953/+$,@FE-6SKA?JB+)&(,511/*-3872.;BGNV^a^W\x}z~zpgaca_]]^b_aabd`T6+1IMP_hihnF/'"#5RXYXY\g~v_p~h^][ZY[YZ]]]_a`aba^_`bcfgijigc)$  b~aBɼ{smlgtxPpivd˾lHW[ZZWTSQNKF0#,7E_zr?KQXaghoh\JWfjjihibJ + + *\[G     +Wy{tmh]ZUSND1 nYKE;6>CCB@;::60(! $*-.%#(0-)-.+(**:CC>744/-1342.-/*%#%),..*'$  +¾¿½¿ɿſlRYXUTUTTTSQI=8CQTWXXYZYZ[ZZZYYYYZYXYWZZYXZZY[[Z[YZ[ZYZ[XXYYZXYYXZXY[YYXXZYYXXYWWXWYZZWXXWVVXYYZZYYYWZZXXWYXZ`deltx~}}~z{|}|~zk`,1Xr@0'$#"#""""""##""%#DP"%ZhYNLIHHGVQ?DB@>=??@9dDzźv6<<71,&" 'Pe 8/9þ9645;<744*759;8698LWGGGFJ,Kn :B@UMBABAABA@>>>=:761.*!!,?F<%7XG@JO.*'()04/1*)059508AEIR\`^SWw|yz|smjecdba```aa^`O-.I_`ccfefWB3(#!8X[[ZZ`q~pza]\ZXX[Z[^]]]]^^_^^``adggikklZ$ )yjI+-i{wtqon`<&%dZ+ɨ2Yiv|~|l/&e{ujflnpppnkkjeb^]VVflNyķ[M2t7  4@D?><3 "DSW\hi\FDFGGGFGKLOEE<09KܩAǽ|]nzZ{vrxg̼_FWYYYWSRQNID($-8Ib|oDNW]dhjmi[PUfkigghcE  :LF?# + + +      \z|ulgaZWSPD- xUFA???CCC@;:;72)##(+/,#"%,010,()+*:=DA;52.,/1/.--.,)(('()*($#  ½ÿĿ¿ĿǿſǼûμ˽v]TUXWUTSSRPLD<==>9cŴöv4;:41+%!$Nf ,#8ÿ780567784*92358878!K[GGEFG,Gp*ET}D?AAB@BAAA>?>9832.(!-@K9%>VDDV=*(((,5.-,'+27724?FIQYbcae|}~vjdgegcdec^WA-$%4gd`_adca]OPFEGY_]\_aexxr`\[XWYYZY\\\\[]^^__`aaeffgjif@ +  <|u_xwusrojY.##Ke D_myvV>xyskagmonnmkkgaa^YVUp¿Z[ímdb  &771-'%# *LSX^hjUECIGJIJHILK=I*5Cf߆JĺpClwon˷ULXZYWWTRPMED)%-9MbiFOY^cikmjVMWhljgfgcD + + FfW7( + +    !Y{~|vnic\VUNC+ +  sL??EB=ACC?64974.)).01.'#%,11,'%(.*8?>?<641./-++*,,+)*,*)**+'"! + ýĽ¿ÿľžǫ_ZUTVTUTSRSLC<>?;cȷŻ~89852+&! )Pg 5¾ +9:1276576)72434758 JZGGHGH,BpCqB@BA@@AAA>>?;:761-*0@N7'BPFTP0+)()34*,)(.4842=EJRZ_War{re[Yadfec]SI7-Qfhieed\]VOMS`ccabbcfpri^YWVVXYYY[\[^^^^^__abbdfghiib, +  YRcvoa|vuqqngQ+#! %Rdp{sDO{vtqompponnkhfb_ZUU\ÿQkälej  0Sb\i\ruqw}~vɵNTZZZYVTRPKEA%%/=NfdHQ[_elmlfXR\iiiiiiaB  + + 8HEO*  + +  %4ART@  #^~|tojd^XVNE+ #l=7BHF@BDCC=87121-,1123.)%)--+(&),*99:=<:6344-(()()++-.-**,+(# + ¾¿¿¿^WUVTUTSTRQMF=:BKQVXXXWWYYYVXWWYWXWVWWVVWWVWWWWXXXUXVWWXYYYXWWWXXYWWXXVWXWUVUWWXXWUUVWWXWXWWXYXWVVVTUWXXXXXYXWWWVXYWXWVTTTUVWTUVWVVVXUXXZ_`achqlps~uuwz~}ynf33LtH.)$&$%&&%%###$$$$"8wV&#Yp^LIIHIHPL?EA?>=>>?<^ɼǿx38852,& 'Kj 4L85ÿ 782344472&60234697"IYEGHHH,Cq Wj@AB??A?==>@=<;82..&0BI-*JOP[2 +  "B]}|dQ%a~{soj^ZXUND* + + +yJB?GGB=@AC@>=3//11/04763/00/--)+)-B7479853550,0.,-//1...,,,+($ + Ŀÿ½ǿuVRXSUTUTSSNI>:?KRVWWYXVXXYYZYXYWVWWVVVWVVXXXVVWWXXXWXWWVXWXWWWVXVUVXVXWWWVUVVTVWUVUWUUWXVWVWXWWXWWVVVUXXXWYYZZWWVVWXUXWWTUVVVVTUUUUVWWUUUWXXVWWX[[^aagkm~}}znb20N¿sJ/8?>>?=Wȭ«z3;:41,&"(Ho 3-v73 573343343(9125779:HXCEHGF,Jr2ta@A?@?>=??@@=<:63/,'5IB'+TSXI/+)),50*.'&-489;DKO\fjtznbTIMKVfnrkjmlnkheVFHOSYZbecdgkn_lVUTUXWWVXYXY\]_^_^]^_`adehhfeE 8rpzxvrpnmZ@%$#   C`ny~~zY 6uxutsqpppnmlhgd_[VVYqX`Ŷn-;XX{dtvwusgziƯ|HVXXXXVPMLHA6 (1>Rl{TMS^bgjkngZU^lfgiigd8  6N]h + +   2\`' )a}{rlic[YTMB* + + ,rLF@EFD>:?A?>?92--/0377767664..,*'0F?>9:9889:5110/2574/0110./,' ¾ÿĿ¿ÿþÿÿÿ¾ſġxSNWXVUWSSRPOB7;DLSVWVTWXWVVXXVWX[VVWWUWVWUWXWVVVXWVWWWVXWWWUWXVVWYWVYXUTVVSTUVVUTVSUUUTTVWWVWXWUUUVVUUVVVVVXXXWXWWWWVXUWVWTVVVVVWVVVUUWVWWWYVWUWVWWWXZZZ[V~}yof3/L®xO=~SR]??A>=>;8710+&7M?">g\W8/+))/6+-+%*08<;BLR_hrxoaTNHEGKT^afnpsrh]QKIMWbdefhf_bTQSUVWWWWVYY[][]]]a_``bedggf`2 + + [}|{xtomlfS>!   #Tgv}sC T}vussqqqnlkjheb\WTS\ÿEpǭnO,9r '8:=??4  (LUX^fjYDCDEEFHHJKNBPE -/"!&N|ۧIY2McpbaefROQziqzÿ}qhofiwdëlEXXYVUVQPOII3'2AVnMLT]chklleYO]kgbikh`:  + + .-+1 + + +  0w^M* )d~pig`YXUNC% + *lPHACFED<::???;3/345787666773/-+'$0MIFA@BAA?>8545557642310/0/+' ½Ͳ`TWVWWVUTRQLC:5@OTUWWWWVWWVWWWVUVVXVVUUUWVUVVTWVVVWWVWVUUVVWUUWXWWWWWWXXTTTWWTUUVVTVSVUTUVTTTUTVUUVUTUVUVVUVWVUWVUVWUVUVUVVXWVWVUUWVYWUWXWWVVXWWVWYXWVWYXXXZ|qd6/Iͽ{NK:MGqeiuUGxP#".zwZ*Ti[NLHGIKJ=<>?>< Qµy1987/)'#)It ":ÿ 782243657'91244688"EWEHGEE-Ar#>zxI;?@=>?;;72-,!"#@R22ig`B4-)(+4/*.('/6<=BMWclw|si`VQLIECDFL[gluqje`Y]^]beif[K48@JOSSXXX[ZZ[Z\]^^abbdeifeV  $|~|{uspng^N4A   4^n}~h/(g{wsrttqppmjigd`ZUTXi¾mAwƧndY .80(#)3*2RTZ`ijVCBCFEFGIKLLB`B$2>8()6gۅPOHA\]67:=<955887888667750))(%5MLIHEEFC?=::9876464221/.00,$ ¿ÿÿƿȿơgTRTVTUTRSQNI=7=HRUVWWWXWWWXVXXWVVVXWTVUVTUUTVTSUUVTTVWWWUVVVUUVWWVVUWWVWWUUUVWTUVUUTVVTSQVUUVTTTUUVUTSTTSTSTSUTSUUTVUTTTUUWVWUUWVTSUUWWUWUUVVVWVWXWXWVVWXUTVS~}pa8.TwPBPY^cauO?FC%!+Y(Mc\NKHHJLM=BB??===>>=Rø²z 3:950+'$ 'Hx :Ŀ581255563';035666:#@ZDGEFC-Ev!Izv~G>??@A@?>>?B?>;81-) $MP2]pkP40,*+02)-)&,18>ALZgozyqh`YSMLIEFGDKQ\fikigeegjhT+!)9NRKSXY]]]\]_abbeidcD 8~{zvrmkeZL+!$j/'  BgvT!:zwtrrrqppnkihfc^ZTSZqY]ȿ6 *82/3:823788998677762..+(;KMKKGEFB?9998774243122111/*#  +¿½¿¾·xUSUUVUSTSQQK?9>>>??Oij¯{"5:951+'# 'Gy :4;2145563&6/368788#@[CDEFE+As!Su~w@?C@A@@>bt];2.+*-3.*,'',4=@KZgt|xqjbZTOMLKKJOPONPNUiqrpSA1(U^0:GUY^aacddeff]6& W~|xtpmhbYI%"-XU=:5/+! #Rhwt?!O~xtrrrrpomkkigb\XVQ]¿PpǶ  -:AB5)  #BXW\dlbJCDDEDGIGJLIDg"+iZwȾzjd\ZXP~|q|i{s||{knʶIQZ\YWURSOKFD! )5G\vIOY\afjjg^WV]ijmied_*  + + 1F3  + +  +,! 8vypkfa]\TM;  +  TcLIGC>BDD7(+4772059:;888776764344,=PLKHGFDC?:<;97974543543430*$ »{YUUUTTTRRRQOD75@MSWXXUXXWVVUWVWWUVVXWUWVVWWUUVVVUTSUTUUUUUVVVVTTTTUVUTUUUTVUSTTTTSSTTRSTSSVVUUTTTTSSSTSTUTSVSSUTUTSRRSSUUTTUUUTRTSRTSTUSVURTTUTTVTTUTVVUUTTTTSRRM~|pe;-gǹzP+/$$$$##""#!"##$!!!)z|y`(Gb\MJGHHJM}>BABA>?@??@JŽ{9:861+&"(B{ 9¿182103342&90336776#>WFFEDD+Aq#d|uzn@B?>@A@=>A?@@>9630)$DI<6fnR91.+,23*-)#*1;?K[hyztmg_YXUQPTTSSP58P\fI pS!8]s`MB6 -;EW`fiffV!d# %w}zwrnkf]VA "=097>VOH'< /[kve,)_zwtrppqpomkihfc[UTRfhO}˯u 5%2/358=) *LVX\gm^EAFEFDFGGKMCN`8ܴOǽ~Odqsz~u}~tkwDzCRTVTTVTOMHD<)5G]wvFOW]cfgid\STU_bhigeX*  + ;GD + +  %$"  3_{wme^XPLD;-   OaCEB?.kyT,-&$"!#!!"! " !"!"!%v_-Hc\JKIGGIGGCEBACAA@@??>@ Eƹ{ 58973-(#(Fր 6293323342%61335677$@WEFFEB+@v2{vvz|d??<=?@??=9662-%)@N86bpH61+,17/+)$'/7>G\l}{wlea]YYYW[[R0,:EE*,nC?\L50%=Yck{29re^I*f. 9c6Mn{~}xvsnje\X694 !-8@P?;AfoxzT!9rxussqononljgfd^XUUZvKVͤM ? 8;94//(0TWY^gl[GCHGFFGGHLM>\R#NޞMƼydogu{wxqwuleqnkf|bhƱBQG:?16YNJHE8#+9J_yCNX_dghjc[RRTdvkhifU%  + + + + + + C_, + +  +-55 " +:KTFJA;>9==990 Yc?D?=@BBA?9347899898854721675436;/>LKIJGEC@>>===;98887677531.,'¾üþ¼ſ¿ľľžsWTVUSSSSRSOK?67DPUUVWWUVVUUWVVWVUUTUUVWTUSUUSTTTTWTTWUVUTTTUUUUUUUUUWVUTUUUSVUUUTSTUVUTRRSTTRSTSSSUTTRQTTRRQRSSSSTSQSVSSTRSSSUSRSUUTTUSTRSSRSSSSSTSRSSRPPRPQPPOPOOQS{nk<0twS)-'##"#""!!! ""!&qb/Ee]NKHGGHGFCDA@BAA@?=>?BAȿ¿}19863,' (CԂ +5.92433344"62334677$@[FFDDC+@umusx{}D??>??>==?@@?<:930,#)GP/EqkD70*-41*+'&+3:E]p|xqmgea^ab`O*+>:;eZ[ejy7I&.C(,>E ciX7*2'&%&#!S}wdJ;A:KaisvtpmhaZU+!']<(3A'&/" !Rkvn=N|wsrrponnllkeda[VSU]y8a) "T%.69<4! =VYY^efQBBBDDDGGIKH@l?'^ւYo{o{~}zuk\lzsrëi9>+1I44QJIE3#-:Kb{}FNY_ccjl^VNKNZllhhdT"  + + 8e= + +  + @ap`aRQIKHG>7. eeHHFEFDBB@?:::88986995573365874?F2BLMJIIDCA@?===:9888877651/**)($ !$ÿý¾¿¿ÿ¼}TRVUSSTRSRPMC:8BLRXXXWXYVVWVVWWWWVVUUVVVVTUQSTSUTTTUUTTURUTSTUTTTVVUTTUUTUUUUUUSTRTSSVUUTTUUTSSUVTSTTSSPQRROPRRSRRRSSSSRRSSRRRSSSSSTTTRSRSRRSSRRSRSTTSSSTSPQOQPOOQPNRRz~}pk@0q¹yV)-%#$$""!!! ! ! !o}|d/CdbOIHHFEECCDB@AB@@=;=??$?Ÿ¿y 19752.' (@҅ +5/:2344357 93223587!@[FECDC-?q,}{wxz}}U?>?>>?==>A@?=<;621,"!/NMDkzeE4/-04,)+&(.6D]p{vqnkihid;%83' C^^#hJlNJucn~~On}|`cU%+-*;=0.*''#$x__e|[C<532Gfke`[O# 2Q6GPNAD8  /\iwc*/b{vrrrpmnnlkjgc`ZURXjW0gȴ #633/-*01#EXXZ`faMBCDDEDGHIJIDr31y[nʯmrzſzuy}b^̿`)%1OSY2>IGB,$.>=:98996620./0.1436679:898787ÿƿͻeVTVUSQQRQPNH;7>IRUUXXXYWWVWWUUSUUVVVUUUUTVUUTTUSUUTUSVQTVRSSRRSSTRSSSUTSUUTRTUUTTTSTSTUTTUUVVUUTRTSSSRSRRSQSPPSSSRPPRRSTRQSRPPPSSRTURRSRRQRTSSSRRSRSTSSSSQPPSPOPPPQPOQMxyqi>,i¯{S'.##$""# ! ! ! n|}b0CcfMJHGEDDBCA??=><:86641"9­|48763.' '?ω /+;243534441124796#B]DEDDB*=rOyuyyyyJ>>?>>><=<=?@@=;9530) 3XaXy|e?50-40)+'%+3?Ob|xvrommR$4-EQMJc+/g1[qa`mFJ`$JU=lbG"/+1o{pWK=2*&#$#!!!<;::::9778766687==;:;97578=>=;AB@>;:872.) =qsQz~\:4025)+*%*08EHa|vusk70.G9'M[a]Mw;Op(> 22a~UmV8*3(@y=8QhotrmUE=/(*+UyDMIOerZ@ggb\b3"$'!07H=QK: "Ohrz{k5$L~wtsrqqollljgea[VSP\t*DoŤ7 ,'-&$'.71 5TXY_cfXIACFEDFFIKJBZmJA}<~now|i`kvmpwN{|ɶ@CJ+###%";=!,;QjkBOX\fc`\SF8459G`iieJ + + + +  XwO  + +   + !bwke_^YTNNOJXeR3("#|ZDIHGFECBB@A?<<<;:867668;>ABDKLPFA?CF=??;=>==::<@51-*!E}s[}\=2050',&%,2<=,T}zxsQ-!4>:;T7T_w!)Krr=WeR%)-*Nn''1P1FEFXfw`++-wzjNFB?Whd^_]%#)AQ>-&0! 3\o}Z$0^|wusqqsqnkjhcb]XTMSg¿Y'Vw )/99<::;) =XXZ_chVEEDGDGHEGKKEma%X܋LjC:KxumgSCDIax]_jwd~{Ų+:O+5.)&$@= .?SkdDOZZddZZN?,&"#8]hghG + +  + + -09 +     #R{rnha]VMNMC!''($"#wzYMLFHGCCCA<==><>?@?EEHJMORRPSUYYYZ[VXYXZ\Y]]X[^_`[_[\\^_````_^_`^^_^^[[ZXS¾Ŀȳ[QUVTRSRSQONB94=JSTVWWVTVVWWWWVVWXWUUTWWVVUUVUUUVVUUUUVWUTUTTSVVSSTUTTTTTSRSUUUTTTSTSTUTTRRSSSSRUVTRSQQRRRRQQSQQQRPQSQSSTSRRPQQSTRSRRQRTSQQSSRRQQPPRROQQPQQRQPRQOMMNPPPPNOOOKj}slG-oſ~W#(1(&#!###"! !! \e6,2#'Z_.!'#4į)7;974,'#'<̐ +*':1234447 50244675#6WCDDDA.:u /iuoqwy{ymFCB@====?>======>;37:,*)$).7F2&6dyi. 'Uir[)e=gaG-+.k[$(?h:jS-$%WZ#(RTM3#Ceu|I ;lzvusru{xlggd`\XSPWw¿92bʶp )(()-35" %FVY[afgPAEHGEGGFIIGBxL!#!-kk^uxbsyoACBkuTUzxrw{}}~z^pzî&CRIH=4388?;%/?TmYCPX[de[^H-'*)%-XghhC  + + (f[C + +    + $Ru~{tvr`VNJHE0"#(ctcRUYVVLLLQVOLMO\RXXZ_]afgcilheeecceba`_``]`a^^\[\ZYXUUVWYXX[Z[\]][XXXWXUQR¿ļþ¾ÿž¾bPORTTQQPPOLF;3:FQVVVVWXXUUWVUTUVTTTUTSVVWVUUUUSTUWWVTTUWXVTTUTVVTSTTUUSUUUTTSTTRTTRSTRRTUQQRQQSTSSUTSRPQRRQQQPPQPPONPQPPQRQQQPOOSRQRRPORUTRQSRQSRPPPPQPPOQQQPPNONMMLNOPMMOPNH9U~~slH+rz]#$1)'$#*-*(&" ! !! Ri5'/!#<5% ")#7{%3:973.("'<˒ -'<124546:$10135565$8YCECCB/8r ?sosvxwJCB?>>@?=@>><<=?>=>@CB=830-( QkQ[?:>4'+%&*3GY9(+Cij'0$(;IR'"7swNj[:'5'/zV0/F[;pJ(&.fA#(T}oe^_g5'6X MLQR>0%!Qjyi3#O~yvrqqy}~mgfc_YUPP[ÿq'?jʬI &&,'$" -LWY\_edNCDDEDFEGJJBM3!" "!7|Tyɰ~tlk_HKDikZnszxnllmqqyzz`s8,Vhij> + + + + $  + +    +Ihr{tgUJD@=<6*!$'$#*8UWSKKXXTRTVX_\XXY_Y[^_dcdeghedfdcdcccba`__^_`_`^[ZWURONNOOQONQRSTXVUUTSOQPMM½½¿ĴhWSTRSTRPQONI>46ENQUWVUVVVVVWWVTUVVTSTUSSVWUTTUUUTSSTUTSSUVWTSTTTUUSSSUUTSTVUTTTTTPPSSTSSRSSQRSSUTTSRSSQQRRTRRSSRPOPPPPOOQPPQQPQONOSQPRQQQQTRPPRRRRROQRPOOPPQQQONMNMMLKLMNONNKB1H~slJ-p}`*$0*($?cH-20*$ !!Rm5)4%,E/&"!$)#3ǽ+3::72/("(7ʔ -$;2356434!1.045656%:\CCDCA-9r "Xywvu|NCCC==@?@?@@?=>>?==@BDC@;42/-&%XeY_@BA++*().;ZqVC2-:\d+%%53DZZJLQV)2JYcV).6'0f}zlcVD]D-+:c,%+u|{wqke_cc%!*?Bve)_DHIGG7!1Zm{tW*0^|zvqpoz~{ohfc^XUQRgĵP*Nsǡ- '<:<<@=( 4QX[]aldGCDDCDFEILK@[EBŹzhnejvwþl|w")t˿dJT"#G9"0BEC@#&1?YtTGPW`d_[V4%4>61\lkh>       #2DNVZI14:<>CBEDF?<:5:DHUX_gfeiffdbabbc```a`baabcdb`bdedcddcdccaba^a`^`_\\[XVRPMKKKKJKKLMMMMOPMNNMJIJJIÿÿĿŽþȿ¿ʦzWTSSSRRTRQPLA96;KUUWXVWUTTWWVVUWUWVVTTUVTUUUTRTTTTURTTUSSSRSVRTURTUTSRRTTSTSTSSTUTTRQRTSSSSSSRTSRTTTUSSQRQRSRRRSRQSRQQQOOQSSSRQQRQOOQOPPQPOQRRPPQRPORPQROPPPOOOONLLMKMKMLMNOMG;'D~tkJ-sy],$0''.g}c=C:2)!  " Tk4 )9))]r0("#%*%((/::75-)#*6ǔ +*"<1455527%2-024556%3\DDEDC/;p 2fzvwU?@AA>;?>>=>@>>?>==>>ABD@=750-*%(bcU~bGK8(-(&+5C\zzbRD82IkM=BADEBMS8'a`G/.()*+8I_nwwkWMYY$&<}|zvtojd`kS #.X0$S4S-%=@;^lw~}kD ?n{xwttu|yjfd`\WTOTt~45]xK .51//1/" >SY\`ai]BBDDEEEGJJJ@ly$R?¹tiazuzu~\'R}ϹULVN4@7:CEDCA#(3C\vMGPT]\__S5!-.Cionh>  +  $-:GNSOTUQQY\ZWZ^\]`a``^[ceibffffggefcbbacdbbc`_`_`^^^]^_^^`a_`_ba]^^___^\\YXTJNKLMNOMMPPNHJIJKLMLHJJJKKKP¼»ýľĿĽÿº¿ÿ¾žþŷ^QQUTTQRQPONG919EMQVUYYWWVVUVWVTWVVVTTSUVUUUTTUTTTUVWTTSTTVUTUTSRTSUTUSSTTSSTUTTSSSTTRQSSTSSSSTRRRSTTUUTSSSSQNPQSSRRSSRRQQQQTSRQNQPQQPPORPPOPSRRRQRPOPPQQQPOONNNNMLLLLLNMMNONMC2!BulM,sʽ}`.%0*&3HUeyV-(# !!Lm6 .;)%bi/&! !&&*#,*-8994.*"!+7Ĝ +!<1345323%1/034676&9\DGFFE/8q  %M|w}]?DAA@>>=>?>=??=>==A?@BBCC?<620,)$ 0j\[}eLI,/1+-2:?@Ji}scZH@?Zw7./'HYW'!1ka UzqeX 585-)-2$FWW\aebTBCEEFEEHGJGDzb+_Vʿlgiu}t|{MEdxPPRVTPOSKGEA@")6D`{ùJIOR]]aaSE+'>]pnli; + +  + 1<MSSSVUUUVUUUVWXWVVTSTTUVUUTTTTTVUTTTVUSURSUVTSTTSTSVVSURRSRSSQSSSQRRRQRRSRPQRRQQRQSUTRSUUSTSPQPPRSRRSPPSUSRQQSRQQOPPRRQPPQQOOOOMOPRRPOOLMOQOOMPONMLJKNMNNMLLNL?*CukM-rƽ|`/$/*(,01Fm(*% "#!Jq5 /;-" ,m`9*(" #$$"!#&',!&//7673-(" '4¡ +'¼"<1112136#1/356555#0ZDEDDB/9r    -\{qA@BA??>=>=?@>?><=<=??AD@BB?=9640,'"!9p[XiZ<6;4027?>36QamyukXIBGc{)@LEP7:3R^1$RhQ)$*'7`[QID=====@BA@?B?==852.("!#!Ci^XuZ;A=446?HA@JPSYcp{}q`RHMdt ,2VHdccXkgdq>&J;a`K*+%BK*H?CMUPOF<5&#);}e|ih~nxwsohc_m^!+McSUT][VJ;+(9Egs}~rG=m{xtt}re_ZVSOTw{|DB_y!4`"%(0*(?26 +  %,.(*2, 5TX\`bf_KDEDDDEEGIJBW.;O̡i~}{c`n|kt|mgruz{_qlkyhcb]\UWYTWK6PPQOJLKLICC7##"&,Ec~JKPY[`^Y`TGBB>,"#&/9FNVY`a`aaabc``bcddbcdddeedceddbdc`cdb`cbccb``^_^][YXVVTSRTSPRMNNNNQPPPPONKMNLOKNLMNNOOLSVYZWXYXWXYXYZ[XYYZYZYXZZ[YXUTU»¿¾ÿþÿſ`VQSTSSSRRQKF:58GMRUTSUUVVWTTUUTUUTRSSSTSTTSTUTTUTUTRQPRQRSSRRSSTTSTTSUVSPRQSTSUTSSSRRQRRSRPPRRQPQRRRRRSRPPPPQOPOPQTRPQRPSQRSQRSRRQQQSPPROPROPQQQRPPOONOOOPONNOMNMKMLLKJLLMLLLJB2!;~umQ'h~a0-'%(0159Cnw,!! !Gl61>3)##,E;982+'((%&&'++,&'018984/(#%1 . =2532441"/..03368(,ZADEEC/8t   4SyL@B@?@?BB>?>>B@?A?>?;9>?@?=AB??:623*!"( !Fh[]vR@PG=XN:g_nMagDRm`j[2mY=!,')N89jYD;6318DY9"'Qx`zCYrfruqkd]dwB$.`81G4`w|)@9 &-$#  (6<>=;1 =TX[^bfZBCHECEEGGIJ@f!E޻CJzt|~xjfqf^_{kfx{]dzrq`tx$(`pjhfdbfbagM>UE::7;;HEBC7))))-7YKNRV[_`_^WJCFThmlmia:%" $(6JSUUVRORXZW[X]d_bbcacdfddeedddfeecbdcb_\^\c^^\\]^^^^\ZZZWZYWWUVTRTSUVVWVUVTSUXYWUXVXVSSSTPONSTQUVWYY[XYZTXYVTUTTTUUTUUWVXWUXTTVVSSUĽ½”kVTTTTTSQPNRM>34BNSSUWUUUVVWTUVTTTVVRQUTUTSTTTTSPPRQRSSRRQQQSRRSQQRSTTSSTUTSSRUTRUSTRQSRSQQSSRRRQSQPRRRPPRRPRQQQQQRSTROPPQQQPORPPQPOPRSSQPROOPRQPOPRPPRONPQPQPPMOOOOLMLKLKLLMMLI=*8wnY(hɽa1,(%$'/3<8SvJ  >r95A7+'':^iP;0)**+)*)),+/+#30<;641*$&3)½"A2323431"31134369'0^EDFED07t  )A]W>C@>>>=@???@?A?@B@=<=<@@A?>@A@A<742/("%$JhW]wHP[PHJPPNNMKLLPW_gjrytbUNVhG+6( ?2S2ij)Ts_qI?iY+&*$1T+2BUaqj^B!%K6%*u}nsAopbosmg_\iw'&6e'EoZ2+)2@IKOLS:7^gt}v`(0^xurv{kc^XUPRjZJrT+]!)$!% .2+,/43"HXWZ^dhX>CHDDCEGFIHCz$VN̼pOSc}peYCA@Rfeufjnoli¾rj)N|ZJXD71187HFDD6./1138Fw{EOUW\`a`_^RGH[jlonkaE:2**,.,+***+3-"³0-7;750)$%0 ( !D2422544%40345566(6\DBCED/5s    6Pjb@CB=>=>>@?>A??@>=CA>::>AAB@?ACCA>:5331)#+Mk\^yQ\a[WVRPQQONPNNSV^fjot{yeRHO`r69>:411'#!A@_srZy4Y_P*+'=93* *RWVY`gi_?ABCCFFGGHEJh1ljf|{XϷipmx~iC>Agja|~pyl)Eg|HQSOKI1!3xkU,eŹ}b3-,$$#" $"!!!! !! 0r<7E;1,*5<0-C:.-,02/-/.08/ Ʋ4'6976/)$&0+¾C2320245'5/126577)-[DCEDG23t  %)BMGFBA?>=>?@A@@@@>?DBCB>=<>A@A@@@BBBD;8631''""1Fu[c}N]olb]QIPOORSRVSRU\dkorv}weRHSkR38:99CABDEGHIHAXK5}lYgeSiV`putYqʸ}vj_K>8kli}wwgm8,Ps|xxvrqnloASTTTTQKIIEFE;=BCE>315B´{PUY^bbccbbVJH`ikmliY9CLQWVYYSRPRQNPPSUXY\^]\Z\]]__^_db`a`__]__^^`_`a_^[]Z[[ZWXXXTQTUVTXTVZXY[YVXZZZZY]`a`aaaa_d`ac``bacccc`_^^b_]]]]]]\\Z[[XWZX[YYYX\WXPSPQPPLMLKJNOKLKIGEDBCDCA@A>>@><¾ÿaORQQRSQNOPOJ>34CLQTSSSTTSRUTSUSRRTUSRRRQRSSSVTTTRSSQQRSPRSRRRSTTSRQQSRRSRTQQPQRRRRQQPQQSSRQQOPRSSRRSRRQPROPRRSRQPRRRPQORORPPQQPQQPOQQRRQOOQPPPOMNONNNOPOOOONPOMNPONNKKLLMLMLMKJD8*-xjX*T~|b3+(%%%" "! !0q=5E?95.1504C?61/110/13371xĶ4)6653/*$#+)½">2321136'3.111476),[DEEDE27w  #"!5R%,E@@><=?@>=@>>>@ABA??<=?>??>?>@BC@<97420! !#!;F{~\ezAAnspf[TPPPRUTTXZ[\_`cjntx{yfTCQj84998;<>AAAFHMPRXWJG:Mfb=?cV.&*&.V1*./-/9M[[5F7%&p}meuxosrkd[O3.#"2`(%((&&(ASCB.#)E1Uaoz|g.5_xvqoox|vifb_YSNTi7RxMc$UcOR` +  +:=@@=:+ =TVZ]glbH?HCEDCFGKNE[2">~`edZcVVQM\l@öyi{vo¿fhtb;0=Y|q]]^[YVVW\L>[XURPMJJHIJNOUXZUQ=7772tʱ}d\Yb_efefj]OHaeknoiYFMSXZYYYUVTSPTXZ___^_^^_a_]]]]]\\Z[ZZY[ZZXXZ\Z]XZZXXVSRTVVWXXVWY[]_^ab`aa__``a`abafc`aab`ac```_bcd``_^^`^\\ZZZXZXWXZTURTTQTPQTQ<:=9139KYb\PLGFHGFEFEB@:+$:=<>>;>?<<½þȿſü½ľ̞mLRRRQSTRQNLI@32=IRTTUUTTUVURTUSTTSRQRRSSRRPPQRRRRSQRSRQPSSQQRQRRSSTSRRUTTRTUQRSTSQTSSSRSRQQQQPQQRRSRNPQQQPQOQPSRQSQRROPQPQPRPPPOPQQPPQPOPPOOOOOPNOPPPOMNNNPONOOMMOPMKLLLMMLKNMLHA5%.viT*F]ƾ|b5++%#"!  ! "!+w<;LE@FE9516?@:5,'%$$(134-!xĿ7+7784/)$#-#U6)½" ?2542218)2/134765((WDECCB07s  '(GD <@>>:9741+ !!' $@Dac@*Or{umcXPPQQQUXY]_\\^aeiqux}zp^NVoV58:8:;U$)-+'&+XnVUG%.7<^grwypJ!Fr~wurqptz{nfda]VQNRyk aLLx@obQL7 +%#2;69:21#  EVX[_nwc?==<;¾½ÿ¾½¯}WPQQRSRRSPMLF627ELTVWUUVTUUTTTUTTUVTRTSRPQTTQRQQPQOPRRPQRQRQOPPORSQQRRQQRRRPRURPQTSPQPQSRQRSSSRQPSQPRROPPOOPRSQQRQPSQRRPQPOOQQOPOOPPOOOPNQQOPQPNOOPNOOMNNOQPONOPNLMNMLKLLLLLKKMKE>/$'tlW.Ko}`9*,%"""  !")uA :NG?@85%!0441u9 -96960(##,!iy<*½!B3323124'4.114337))[AEDBB08o" :T+$>=>=<>AA>>>>BBA???>=?>>?>@@@BCCB?<:9632! $%,FI]eI'Fj|xqjaWOLQQSY\]_`a_[_clpuwz~raTXjw89::99<>>>@CFGHIILNQSUYY[[]UD -0+0MMOGA92215.5F&'QzV_[ffaqsnh^P9$dN*469876DT`bI%9( $Odpx|we6#S{|wtsrppprlgfc_YSNI[N9##Gs#5afM#S+# ,3+ 'MSX]cs}_:BACCDEEHTcWp-`:ŸȴymYeKļV#CU_kq}<64EYdaW<5=97544/6:AJVWVTVUUSTTUVWXVUSK6002.(.5IzsghfffghhgaORgihlkfTINUWZUTTQW\aabefba_`^[[[YZXVWVWPRVXWVRUWYXYZ\^\^]``b_abccbbdeccccccddbbcdddabdbb```a`a^^]][[ZXVUTSTQPPNPPNLKNNJKMD*! :ZW) 3SjmcNDEECDDCA7 4>=;<>;¿¿¾¿ÿľ¿ÿŽgTTSQQQPSRPOI=12@KRTTUUUTTVXUUTTUSRSTSUTQSTUVTQQRSQQQQPQQQRQQQPQQOQRQPQQPPRRRSQSRRQRRRQRRRSQQTRQSSTRQQRRPPPQRQQQRQQPRQOOQPOPPQPPPONMOPPONNMOOPPPOOPONNMLKNNONNMNNMNLLLLKKLMLLLLLHD9-"&ul_0Yû¾{d<)+'#!!! " &{r? ?PF<25JE6;::3(!/8882r9,87862*$ #)U~Nts*(ý @3244334'6./1003:((ZBCCCD23n$*KU.>>>==>A?@??@BB?>>=?A>=@@@A?AFEB?<;9863+% #:GG}ciU7^v}wrh`XRNRTWZ^_`bbaa`gkotw{}~p]MShS2;<99;>@ABBDFHHIKMPQSWYYZYZS/$-(#$$%),5ALPKA=L:#'oo_jcmlrgaebkokeXK,KqUB2+*4?A;52,&>3^ivf,5]zvtsronnkigea]XPEFk}1 P$>=ĿĿ¾¾¾ĿĿ¾ĴkQTQPQQQOOPMI>52:JQRTVUUWWVTUVUVTUUQSRSSUTRSSSSSSPRRRRSRPRQPQQOQRRRRRQQQRQPRQQOQPPQOPPRRRPPQQPPQPPQRRQQQPQNPQSQRRPRRPNORPOPOPPOQNNPOOONMNNOOOOOPOPQPOONNNLMLPPONNOMLLJLLKJKLLMMKLIC5'"~uk]2Yƻ~g;)+&#" !!   &ysA#2-(.1;40'$1=??>6 n;+7:751+$!#*0[)½"=0002332$7//23468+$YDCAAC04r  7\>????>@A???@?>=????BDBA@<;;8542&  "" GBQalZ*! 'Cl}yrmd]SLLORW[]_eeeaa^fjotv{tfRL_yv:DJFBABDCEEFFGHGLNMNPUWZ[XYXJ &-%#"#$#""!%'%-7:'&7zrcXZbeaclh`]R?3NW[M5*',3/'.7B[hw`?mxttromnkigec_[UL=1a`!C`"%V)/ILXT34cC%! 5YUXZij:AFDDEHNWe`2""~~vk^0Qº~g;(*%$"!   !suC"BYPD;4-%&+$#"#,;CFA=6h>+:;740+$$)Tv-("A11/1012'20/13566)&]DDCB@05q +Oa(@>@>=>>A@@>AA?>><>?>@>>>=?BDCCA@=<8653, $'!)H=Kelg(0Vpyzrjd\SNMNQVZ]`bhhc^_fgotxyz{|vhXQ_uN5PV]SSKXLMRQJLLKLOOPTVZ]YZXYB)(#!!## !!#! "!!"'K~qh_fTOIULRN7#:NX\P?2,7(#T`kvy|l?E||vtsqollkigda_ZQH1,tL%,>By"&*MJC2DlB'+(LUX_qt@ABBEHKTe~O!ij+),3- %>EM_koww}mS7"?ķ}o@.69?@BAA@A@A?=@>=>@@>><>>ABBBBB?<98540')!)"1J?Xfhs4# <`vzwmha[WMJNMSZ_`deed`belpnompw~zj[Wfrn2;GIWSTN`\X]c]W\VR[VSVVYYYZXW3 *'"!""!  " #)kwuid`TQTM- /CLMK*L]oz}tb%1|xtsonllkiifc`[WN@#,Yv9(#$$$%.F{"% !'&,.1046@@K=644(%! & $1MZdi8AAACGP[yB'-05@C3.550,--1624;@KVY_baFPeQE<<9SvǫzjPX_dimoonnmlgecegdacddedegfcdeedcddecbba^a`a`]][\ZZZ[ZYXVY\[ZXZ[ZY\[XZ[\]\ZZ[a`_^^^Y[[D47622UujghjkiikjeQUijhkh`ONV^agafhfe__\XVWVUWX[__`cdccaacdcde__`^^\Z^\\\\[[ZVXXYWWVXXRVTSRPQSSOPPPORSPQTPMOUPQQQQNOPQONKJNNMKLJEJKIHJLLKKIII>% $Nyc* @awxg1 + 0¿Ƽ¿ļÿ¿ÿ¼mVTRRTRORPNPI@34>HOQSTVTSSTSTUSRQPQRSURSSSQQSTSRTVTSRRSRTTSQQTTSRRRPPQRQRPPQPQRPQPQQQRTRQRPPPQPPPOPPRQONOOONNNNOPNPPONNOOMNPOOOMNNNNNPQNLNNMOONMMNNONONMNMMMNMMNNNLMNMMMMLKKMLJLLKH@2&xl_2R¾|iA(,&%"#"   ntG$F`[RHFJKB523/#%)1:EEA?<6b@,8986/)#"$'*Uľ%C3211107'7/.24557*#VDCBBB15t"%'Jk"9A<=?@AA@@=>>A??@?A==>=??@@@@AA>=:8453. + * +>QA\jo|?'%Gj~spkbXTNMOPWYZ]`dfecdcdfgkorvyvsc[fp7189?EJOMOVTWihRk\Yjnai_d]^WVP$#+%""#!  !" !"2}|wurf\SD" 0EXjuxmMA_}wqpomjjjgghb^YSI97Tuh844321036ARUD711/2A-#%3A.+-3567979@BCDFHFGKRN8358Ofo;@CEEEWtu-"'-378=BC??EIIIOVW[\]behnqqqlgigaY\[_ef{ttnleaeijhgfhhjhihhhihfgeeccecedbcdeeebcdcadc_a`_^`__a^^[]^\]`]ZY\\\]\_a`^`a`a`]\\]\\\\^[\Z\G8855[uhhklljijjgXYglkjhbTS[`afabd\\YWTRUVXX[]``]\\XZZYX[]\XZXXXXWWUUVUZWVSWUVWWUUYVYVURRSQPRSONONOQRPNRTOMQQRPSTNORRNNNJKONHKIEJMLJKMLJJLKFC) +  >l: 0Zs|wB$  +»ĽþĿ¿qVSUPQRSRPPMKC707FLPSSTTTTRRUSSTRSSTSQSRQQQRPQQQRQQRRQPPRRRSQQRRQQQSQRQRPPQQPPQROOPPQRQQSPPNOQRQPPPPPOQOOOOPPNMMNONNONONNNOMNNNMONNNNNOPOMKLMLMONLNNLNONOOONNMNNNNNNKMMMLLLMJKMLJKJF<0%!~~}vi[2M}mB)*$$$#" !! jsF"E]XOHFGKPRG@?625=?BDB@>6[½¾>):996.)$"&(-a|);2310002'2./24557,"ZDCB@C31q$0iR'F:=>?@?>?=>===<=??>=@AA@=<865451. $#) O\=fnlJ/ 5Sm{wlf`YTONOQRVZ`bgkkdccchjnmmnvxthe^p~Z!286;  %:H^xq[>@\Xbjlnlhhhddd_[XOD0!%#"!!'DUxT?CDCB@??=?>:;????EMILN\MBAGFDHHGGKJKPPPRSTVPNNPMEBB?:O{lIECEHGMxw@#$*8?AHPQTT]ebchjgllorqpqsuwyuvsqmkighd^b_Z^cfhieffefggggdeegeeebeccefdcda``_[_dc`bdb_bab`bcba^\``\Z]^Z_`__a```a`aa^a`^\^^_^^]^^^_a_`_VA=>3\silmmljijlgWXhmljhcSNY`Z[XSVTVQQQSSUYYVX[UUY[VUYZTXZZVVVTVWXXWWVVWWZVTUVVVUUWTUTSPQSQPRTQNQSSQRRPOTROOTRKPQLPSOLLMJILMIJJIJJMLJKLJJKIEE0 +-^_$ &Hdz`. + +¿ľ¾ǿ]RSSQOQPQPOOG:13AIPRRSSUTUUTSTRQQSTTSSSSRRRQPPQQPQRPOPQRRQPPNOQRPOQRRQQQPQQPPQQQRQQPOOOPPPQPMNPQQRQPOPQPNNPPOOPOMOQNMNONNNPNLMNMMLLMNONMOPOLMNNMOOMMMNNNNMMLNONMOONMNNMLLMMLMLMNMKHC8){}}xj^5O~jB&)%%$$# 96# !brH'A\WMKGEGJOK??98=A@ABA><7[ıxþC*98:72*$$("/V }¿*;22210/3*//034377,#YCDCA?22r "!'Ch.0A@?A@>?>==<=?@@BA@=<<<===?@BB@=:9:6330# % %( *e]Clnm[3  ;[}}xqkd`YSNKNRVV\_fkkkhfeffghhhlrxvrjbgv)#467===>>AACDDDGMPS[acppknlVQ7HqCBADDCH:*682--2Eaaeiinmnopppqopooppnlmnppnmmmmmjjihijhgghefdeeccdgdghfgfgeeccdc``cdccba^]_bccbccbab`a`bcbaaabecc^a^_aaaabaa^_ccbeefcb`aab`^`bcfebdaUKE7iqknmmlhghieV\jmlggcTOV[VTTVUVYWUWX[WXYWXZYVY[WWXYZXWXYVVWXVX[VVZWUVZWTSVYVVWUVVTUURQRRPTUPNPRNOSPNQQOQSSLQTNPSPMLKIKJKKKILJIJKLKKJJKKHD5 + +  #P}> + 8\s|r4 +¾¿þĹľſûþ¼Ľ½^STQQPPONNOMJ?53=GNQRSRRSQRSUSTURPRSSRTURRSTTRPQRSPQQRQQQPOQPOPRQONOOPOPPOOOOPQQOOPOQONOOPOPQQPPPOQQPQPOOPOPQPOOONOPRQPQQPPNNONNNMNMMMNOQONONNNOMLMQPONMNLMNNKMMNMNOMMOMKKLMKJKKMLJG>3&u~~~xia4K}iD$)%$$$!=|%$_qE'A_[UPLIBFRVSVR[JC@@@??>7\Ή&*#9tDW¾B)9;<90)$"#%#%vaYsv]rB w*B1244312(1//34576,$[EDCB?42s "! "%*=<#9B@A>>>B>>=<<>AEDA=<<;<=>>?ABCA=<;6250-3-& 5oQBjuld( *Df}ypiea[SMNNORV\dhkkkheecgggimlnuvvlihvK%5657:;=>>@BCDEGILLNMQX_ajhZJ&'?$&(+'"! !+Mh{xusqolcXJ,&  $6;*!#$%&%&()'(+<@:4366H[XH:58;;?DHILPQTXZ_cgikf`\ZZYWXXWVVVUVUVWWXWXWWUUWVWZX[YYYZXYYVWXWVSUQPTTVTPRPMOORRW\imrtkaFBD?><;Egifjkjllmmmmmkklmlklkmlkjkjihggeefehfdhefdeeeccdeggggdeecabbacbbcgeeefcbaaaddacedddccceddbabbdcc^^`cdcbbddfeefgfgffdccedbaadbeedbbb^_T9jjimomljihjgW]hmmhgaWXZ^ZYZZXYZZY\X[ZYZYXYYXXYYYWXZXUXXTSXZVUYUTWVXYXWVTWXUVXVWWRRTPNRRNQRQNNQPQQRQPORRPQROQQOMNOKKJJKKJIHIIKJKNNKLJHJKD7" #8hd* +*RlurM% ¼¼¾¿¿¾sYMQQPPQQOOMIA847ELPSRQRRQSRQSTQSSRSRRRRSSRRSSRQQQTSPRRRQPQPRQPQRQPOOPOPQPQNNOQOMOPPOOPONNOQOOOQRQQOQOOQQPPQPQPOPQPOQQQQOQROOPNNONLNONOOMKPONNMNLNNLLNNMLMNLLNONMLNLKMMMMNLKLLLJILLHE<+swmd4HjG!%*%&$(EsAQxcK:(Fh^UTQPj^@B??@>>8Z҂! .,[ÿG*99981+##%"bu*!t¾->3325441*41135677-"WBCDB?3+t   !!!!!#"##"#%%((("",??@@?>A?>??>>ABB>===<<==>@ABA@?<;85532* ' 0 OsHDqxqi, 0Nh}{vqkgbZPLMLRW[_cghjolgfehjjjkklptyulhyv*,-/347;;=>?BDEHIKKMPSQVXWUE,g/'&%*.'" !  +Cc|ukrsmklloh`I'"%(3:)" ""$)07:51/../.--.2213145666:MPDEGLNSWWZ\bfgknrqstrqlhdaa_^_^]\\\`]\\\[[YYWWUY[[][\^]]^^[Z^\^aa_]bcigijojlkkkilottywwupkU;=>:2/8J``^deffgfgfggegfccbffhhhgffffhgfdecdgebe`^c__^edehjhghhfgffeghgdcdddeddeddefedffeeedcddcdbbceedb`aggfffgfhhfdgihggfcacdcdddbdcccbc`_[NrijknmmjhhkgS`gnojh^X[[]Z\\[[\ZY[[X[\SV\ZXYYVVX[WWYWXZWVTWZXVVUUTUXYVWVUVUUWUTVWTTTQQQTSRPQQNNPOOQSOMQOOPPMOPMKLMJKLJLLLKJHLKIKPMJLJIJKF@( "'S= #Edv|c, ǾľYNROPPONOOOMD7.4?IOOPSQRPRQQQQRTRRQRRSRSQSQQQRQPRQPRQPRTQPOOPRQQQQQQPOONLOPPOQPPONPOPOQPPNNOQPPOONOPOOPOOPOMOOOPPPQONOPPPPOOOPPNOONLNONNOMMOOONNPMLNNNNKLNNNMMMMMMKLMLLLLLLLJLMLJIJJA8(m|}vmc=F¾~lE"$+%&%1dT <_og]ZWZXVPQKGCI?65'@a_VTSPαk>B??=>=7M҄DQQC-UþE(88762,&!#$!?c{a$x³¾->5535333,9./145890[DLQD@3.u !!   !""###$#$$$%%'))))*'%&4??@@?=@??ABA@>???;<>A@BAAA?>:855530 (&  "bbDIu{pr5  8XuqmgbZPMPQRUZ`cimqsqpjfijiijjnrw~zxzy>"$+-159;>@BEHJMOSSSUQO7(:QD43-1578$#  $%)-,0=>@A?@AEFHKNRUUUVXY[`dfikorsuyvxxurojhedc`abcaaba__a`aa``]^^c_bddeehgimronssvpvvxsuppnmljkkklmnommloplbT2274(&2ERV\\`aaadcabcbbbaccdddcedefccggfdgeffffgijfefeheehhigkgbccgfhfgdghhffjhefefecffeeggffffefedefefd`ageeffffgigeghfigcc`bfdca``a_\\`^[Z^_zlllmkkjihkg\djllji`Z\][\\ZX[]Z\\YX[\WWZZYYZZWUYXXWWXXYXUUWYWVWWQTVWWWUVUUVVTUTUWVQORPQSQMLPMMPMNOQOOOOMMOOLLOLKLNMKKLIJLIGJJIJMNKKLKIGHG-  + !"BrI "9]qwv: þýþþľľ¼ŬbPNONLPPPPNLH;226'@]]YVTP|ȿl;D@>>=;6L҆3xOĿJ&78872,'$ ""! _{R`-%r¾+?4433432)9,.3557:/!ZKq~H@22u #" !#""#"!#$$%'&(&''&())+,-/1.++/=@>?>?E><<>@@A@A??><;<>@>?ABCB@>956661.7 +   +/jWEHyv}B  8b~zvqlhd]WOOOSY[]ahouvsrnlljimljhmx}}wsri& $(.37>@CGKPQQUWP+"'015367CW3+(&#$" !#(,1336798737794>@GOYYQFECBCA@@??>60)#,7>:8IяcѬ]P¿P'69972.''%!!!"-pJ&oÿ+E2322133(9/145578.!ZLlzN?2,r!#"!"#$$%$$$%&''*))*++,,,,/02454425@@D>>=<=>?>?==>>><=?=<>@@B@>956673/&  +% +  AlUEJzrN" $?[v{tpmjd^XSSRQVZ^bimuyssmklmlkjjkpv|{smksv<* !"(,.6>FJLNPC'!'-/01436::=G?;75229526>ABCEFGHHHKKNOUUV][\[VQQNKKLJGHHGEDC?;7/,*&%;EIOQUWXYXVUUUTTRSUVXZ\\[^^^acaeeddbchlnpsuwuw{yyxywvtqpppqprrpqqsututsstsrtsqqqnonnonmlklmnlmonllllkommlmkllkkihggeghhgdcT=9>705:NW\`cgggdedffghiggghffcdedbed`bd^abagfdgfegikikihhjkkiigggfgihihlkkljihhhhjjjjkhfijjijgdhjjigkhdede``_]\ZUPQTRNNMKMHJHKJNUZ[YWYVXZXWiu|omknnlikllhZenmmjha\]`^]\\][]]ZY[]YW[\WZ[XWYZXYXWUVWVWWWWVUWUTVWTVYWUSSUVURRUUUTSQQRRSRRQLMPOLQPJLQPMOPPRSONNOMMKLJHJIJIKKIJKGGKKGKKJGHG9  + #Yp1 'Onuyg/ýĽuZSQOMMONMPOMD803>KNQQQRQRQTQQRUTSQQTSRQOPONPQQPQOOQPOPPQOOSPQQOONOOQOMOQRQOONMLOOOOPNMNNNNPPPNNQPOOONLNOOONLOOOOQNLLLLNOMNOPNMMONNONLMOOLLOONNMNNLONMMPOOPOPNMNLMNNMMNNKKKIJMMLLJLLKH>4&Zzn`BDmJ#"-**1++68:0.,51% 5[pd_Z[\\ZZ\b[_\XP=(;^^WTSTsŖrYw9D@@@><8Iӑ)3s?PþO'5<;:762/)''&!! $1aOm붫,D4121324);/024577*ZCdzN<2*t!$$$$%&'&&''()))+,-,-////03579;;;<;?JD>?AB==?<<=>@@>>=??><=>>?@ACB@;986652.   [sRCRu~\) &C_yxvpnlib\UTUTXZ\_flrrtqqomnnmmopswvriT76.!"#&'(*+*-/6>?IJD579;<==?@CECCEEHIILQ[PJILPORTTTUWXYY\Z^\_`[^^ZYXWWWWUTTTRPPQQPNMNQMGJPTTXZ[\]__]\\[]]\]^^`abbdcfgfggkkkkjmoqrsvwvuwwvwxxvuututttstsqqpompolnmmnonnnmmnoonooonoonooooonmnmooknmmnmkiihiijikkifccdS?;769BPZ__bbddcfefffddeec`bcccdefiihhfhhiiijjljjiiifhgihjjijihjiiikjkjmnljjjjiijlhhiikihlohfb`^cdf_`YVRNJJG>@@@><=>A@AACCBEEHHLU^^[ZY\\]]blyzpoomokjjkmh^emmmjg_[Z^`[Z[]]]]YVV[\XWZYZ[YYXYXYXXWVWXZWWVUUXUSUUWWXUSSUWVUSRTVSQQQPOQQOPQMLOMKOOKLRONNPLPPQOMONMIKKGHIIHIJGHJJJJLKJFHHEG?& +  Am{. ++_sutt>üľľļĿ¾eQRPONMNNMMMD;..7DLPRRSQSQPQSQRQSRSPQPPQRQPNOPQQOONNQPQPOPPNPPPONPPONOOLOPOONNONNMMLMMMNQQMNOOOOOPOOOOOMNOPONNPOONPPOPNMMOMMOQNMNNMNLNMNNMLLOPNOOMONPLKMNNONNOMNOLMMNNLKKIKLJKLKKKKJIF;. R~}}}}~~zn`>CnL$!-+.TibnezhiisO$ :nG)7\^ZTRQqϸ}9A??>=<8Fӓ`Z$$OÿP*8<<=>:.'$*.-''#!iz}/=3123435+7./02466-XCm{F=4-r"'%&(''(')**,,,-/0111233467;;=@CCCCENXD?AA=<><=@@?=>>>???;;>=>CCDA@<9997420)  (miKEVvg2,E`x}ywtqnjd]WWSRVX[_cimsysqrrsrtrqnsv{rM.  "$''(*,-01238:;<@FIJKPOQHFGHFHIKLNQQRPRVXUVXVTTRUX[^]\]^`a`baabaadba][[[\]]^`^^^]^]]\]\^^[ZWWYZZ_bcceeeb_deffgffhjlopooqqqstsrrsutstwwwvxxuututvsqrppnmmhknnmnonoomoqnoppoommmnonmmnnoonnnnonpmnmpnklmmmmnkljjkkjjiinjifhTB768Mfefihedddceefcdfggghljijijlkkighhiiikjhjiihgfiljjijljjhjkikjlijkllnkkefhgbb_\^ced][]TMJHD?AA>:7985789668789:=@A>@AA?BEHFJX\\\]]^^_`cmy{nlopnkjklkh_gmnkjf]\[^^]\\]_^\ZYVW\ZWY[[YZ\XWXXVWXUWZ[VTVUVVTTUTXYUSVTTVSTURRTRPOPONOQQOPOMNMNOMLNPROLOMMMOPLLMKGJLIHJIHHKJHIKJJKLIEFHDDB- "1YC 5`wuvxN%¿aOPPNNMMOMLH=305AKNPOPQRRRRNOQQRSSRPQQPQQRPQPPRQOOOPQRQRQPPPOORPPONPPNNNMLKLOMLNONMMKLNPPPPOQPOONOOOQPOONONNMMOONNMOOOONONOMMNNMMMMNNMMLMNNNNMNPONLMLNNNNNNMMLLNONLKLNNKKKKMMKLLLLLJG?5*S~|}}~~}znb=>mM%"+).Owstep^YVP<#!7~oH)9\_ZVTUrӻʃ8A?====9 Aљ3H"#,IV)8;=B=1'$%.A8%2&$$m쐘0B3334446.5-/02457-XDenC>1(v&*)'*+)+,,,-/02345667789:<>=<>@?>==?@BDB@;99:85321     +={_NJXwu9"!/Nf~{wutqmha\WUTU[Z]`gmrvxxvxwyvvw{y1#&')+.13689;=@BEGIJLQTXXXWXWRRSRTUTUYYX^\[XZYXXVVXWZ]^_b_abddeeebbbccbbb__^`bbdbcddcbbbeeaee_ccdcegjilopppnoqpqstuwttuuxwuutvvsssrsqrqqpqqnlnopoqqrsrqoppppqqqpqponpppoommnpomlnopomnmnonmjlmnnllmmoqnnooqronlkkjjkljigjhjilbZFAGTfhloqomklmmkkkmmlmmllikljjijljijjkkjjkihhkjllmlillkjjhfhgigbeghgaa]YZSOMPGJOONKJMJCCB><><89865556335434567891%M}}}}znfC:{oM& *+,+0-+$#&# $#!5uorzrF'7aaZUSRnɣɈ9C@>?=<: ?җn|\TdHT(8:?M4.'%);B42&% h띫/G3322346-6+.02466,SB^[?=-,q(,,+,..-./013466689:<;<>?ACCFGKMORUVUfhBACC@?=>??===>==?@>><:<>AD@?;99996520-    UqWMGUzA &#"1Nh{{yvsqmje_ZZXYZ[\_eovy{{zutr|z*.02469<:9;FIGDEHDAA@=<<;898656543344444778:<>=?@@ADDCFHLW``^aaceacfp|zppqqolkjlmielmlkkfX[`[Z^]\Z]ZZZ[[ZYYZZYY\XYWXVYXTVVXWXWUSVWQQWTSWWTSTRSVVQOTSQSSMMQPLPRJKNNNOMLMLLMMONNNOOLQNMHJMJHJJGGIHGFIIIHIIGHFEDEDB6' +  "Cmrxyvſżü¿¿qFLLKKLLKD801=<9 ;ё$vkĿV)8=AN2-)+6IC2 .!$"d뤪1F0334336-7/022786,VJ}p?>0*u../..000013456788;<=@@BCDGHIKLNRWZ[[^_aTACCB@@=>>??>@@?======;>ACAA=:98:76311&  "clWNH]{L(-- !!'7Qj}|xvtqlid_XZZ^_bdmqyzofXpÀ468:;=?BDGIJLNPPSTWVZ[Z[]\YZXYZ[]]]_^_`_^\^^__]_aacadghijijkjjkgggeacikloosutvvwwwzz|~~|zyxvvxvvtsussropoponolkmmlnollmlknpooqpqonprpoonnpppnprsrpqsrpqoppopnoqnopnnmmnmnnnllnnonnnnnnlljjkhikkjklkjkkkkkigggedjhkjjkihigifebcgjilkjjijjjilkikkmlmnnnmnmkiffeededdgeccca^^Z[]VVQMNMOLJH@;96421& .16:<;96=HHFDEGBAC@A@<;=:7786443354455668<>>=@AAA@CDDGLXd`_bacebefu|woponnmmmmke`mnjjjdZ[_\[^[\[\[^ZYYYXZ[YWZZWYXWUYYWVVVYYYUSTVUSSSVTUWTRRSUUQNRSPQQOPPMMPQOLJNPMMNMKKMMPPOKMMLJMKIJJGHJKGGIHGEFIGGHHDFGDCCDB8) + !:`zut¾OHLKKJIF?4.6BIPQQQPPQTQRRQQTTTQQQPPNOPQRQPNMPPONNLNNOONMOQONMNOOPQNONOOPOONNOONNMMOOMMOPNNNONMMMMMNMOOLMMMMMNNOOMLOOLNNLNNNONMMMNMNMNNMKLMMONNMMNNLLNMNLKMMLLONKLLKKKJHIKJKLMLLKJF@2(L~}~~~|qe?7mıpO',.-=vacnebV##!#tYRTf{pM,/]k\VTLeMnYzhAB@>=><:!8̾͡IJZ*;?FN3..6@LD9&#'$d뜡3E1255424,:20257892VGuq?<1't//211222347899:<>??BDEFGKLLOQRTUZ[]_adieQFDCBA????@@?>?<=<=@CCB>?:8964320+   8xpUOI\|T58,""$'(*9Rfy~|zxusqnicedegkkpo^ZV}îO9<>>?DGGIKOOOQSUXXYZ\]][^``[\^\^`abcb^_aceefhhhkpmopsstuusssomsomlf_glttyxv}zxxwwxyyxyxyyzwvurssrsronqooqmqpooppppooonqonpponoppqppnlnnpoppppqqqqqqpqpqpponnnmmmnommmkkloqmlmmmkikjklimkjkifhhilmmlmljjjjjgfgfghgkminjgillhdddbdgheijlnnlljjjjklkikjhffgfiedcbb^ab^^]\\YWWRPTQSUQNLHHIJKGC<98610-$/247;:96>GDFEDCCCB@A@>>><9887344464355469;>=?>?A@@CDDEK]e`accefdfgt}wlonmljjlljfcllkjjb[Z\^][[\XU[]ZYXZY[[ZYXZWXVVVXXXVUUYYUVUSTVRORWRRWTSTTTTSQQQOOOQQNLOMMOMIMNMLMKJKMMPPNKKLNKKJJJJHHGHHEEEEDEGIHHIGGHFECFE:*# + %Tsw¾¿¿zBJIHHG@306@JMPTRPQRRRSRRTRPTSRQOQQQOPQQPOPPOPNNONNNNOPONOPOONOMMMPNPNNNNMNONNONNMMNMMONONNNOOONNNNNNPQNMLMMMMNNPMLNOMNOOMNNMMMMLNLKMNNLMMMMNNMLNOMMLMLMKLNMLMLMMLJKJIHIJKIJKMKJHF=1"E||~~}~}}pdE6\|£sN),/-)S`M9KRB>:$&" &`UV`vpN..^g[URPUbUZ~tCLEB@=>?<9!1ÿ\*==@P027A?JE8)!e유.D3344412*>11247982QExr89/*r2254356679::=>?@BDEDIJKLOQSVWYZ[]_`ddgig^MDB@@>>>>@???@?>=?><===BBA@?=;9763240* " + DdSOKBT]F<# !!!!" !$$%'+,+,/6C[r~|{zxwuqoqqrurbWl7?ADCEJOOQRSWVTXZZ]^]_ceegelhhkkqrqnptttuuuvvvxzy|zxwvvyyxxuuutspnidbcclpsvwxvsutsvxyxwvvwwvvtqrqrrrroqqqrmolmlmpponoqopoopopoooqqqroonmnpqqpnnppmknnqpqpoppoommnonmlllklmlhgiijjijkjlkllljihkkjkjjikkjljjmllmmmllqoiggillkeb[_bbdeeheedeeec_b_^__[YUTQOZdifdeigcdfa\\[YVVXQPSTTUPNLGFHJJFC<:8610.%!0358:<:6=EBCCBAAA@BABBB?><:665333654555689;<><=>=>AAADH_hebdfegefjs~rmqnmmlllnkggmmkggb[XZ^`\^\XVZ[ZYVYZ[WX[XXXXSWXVUVVTTXXSTURTUSSUUTTUTRSTRQTROQONNQQLKNMMMIJMNMNLLLLNONKKLJILJHILHIIHGIJGGFFGEFHHGIIFEFDCFF=,$  !Ag¿ýa9DAA=6/1;GKNQQQPOPQSRQQRSRQSQOQQROQPRQQPQPPOOPQROONQPOPPNNMLLPONMOMNNMMKLNNOOMMMMMLMNLLMNNNNNONOMLMLPPMNLMNPNMOONONNNNNMNNMMMKMNMLMNOOLMLLMMNNLMNNONLLMLLLMMMKJKKJJIIJKJJIJIIJGA8-@~~}}|~~qfG/OoźtO(/93*&&#%*!(,+)3.#}[S\vppO.+Z]WTQOI@\l{tHKFC@@?=;5$/ƺa )9;@M;@9/+HB3~)#b1C0234303+81001478/KA{l>=/#s335678:9:<<=?@BDGIIHMNMPTWYZZ^]_bbcfhghgdZFCA@?>>@???>>>==><:;<=?@ABA=;;663443/$ (" +@@MNJ:OjyeP8  !!"""#$$%&&'))*+,./0/..0/;Qg{~~~}vZ:EHKLPQ[\\^ceedfhkklmopqsuuwwuwuvxxwvwwvuvuuutuwxwwxuvxxwwvttutpqokkfginsvtutsrtssuxywvuuvtssqqsrqqqqnpooonononopprooqooppplmlmmlknnlmnmmlnnonopqrrnooqpnmonmmkllmljmmlkkllkhlihiiikkkhhhiiklmmklolmlmnnmllnjlljhgkidcjkkifaVBJZaab`\ZWSPOLFA>;>CFHBFFGCCYinfikighib[``]XWXVUSSSSQOLIFGIJHD>;850/.$#14899<:6=CBA@ABBBADABCC>>>956644335554568:<==>==???ABEL\ceedfegegmt}qnpppommlmkehkmlghbXZZZ_^^XZ[YXYYUXXXTWXUWXURUXVTTTTUWVQRRRSSSSTTTRPRQQPOMOQPPONNMQMLLNNKIMMMONILNKNOMJJJIIJJHHKJJIKIHHIHFDHEDEHIGFDBDDCDEA3& "4½O'234/,5BFKKNOMMPOPRQPQQQSPPRSQRRQNQPQQQPPOOONOPPONMONOPQNMMNMNOMMLMOLMKKMNNNNNNMMMLMNLMMOOMNNNMONNNNONOOMOOONNOONOOONNMMPOLLLLNNNMMLMNNMMNOMLNLLNMNNLLLKLMMMLKKJJIJIIIJIIIIIGE=2';~~~~}}~tiE1VpýsN&9haTH@@[dL]a]_ia){TXtc\pO,+X^VSONMXhYTXSYRIC?@?<:6#0e#4;=IGD1((A<(|+ c4D0235436-522/1566-OOi@:3(s66899<==>?@BBEGILNNPSUTWX\^]_`bbeeffghjjieWFCBA@><<>>>>?@@><;<<<@A@?@?=;87664622 + &.)#!"# .69CNAZlLpY{[U.!!"!$$%&'&')*,,-.//0241433334631/27A[n{QSZ^`bgimpswwyvttwxywvxwxxxvvwvuttutuussqttsrssqsrrtsruvvvvvvvwxwuronmnpuy|yurpoopqqtuuuutttrqsqtqppponnnlllllnnmkkklmnjmnmmklmooomoppqqqropoooonkjkimlnllnmlkllollkjkjhjjijjiljhkkkkkllllmlmmnlklmkllliiihegc_bca_cehhikgccaZVX`ihfbOGJIIF>933335;BFFEFFCFSkijjkjhfeca__^YWYXQSUSQPKHFHIJIF@;9610.#%14698:929@@?>@?=?@A>ABA<>>967644345654447:>?==???BABDGL]ddeedfgggov}smpppnnmlkiejillijb[]\W\^\XYYXVXYVVXYWWXWVUUXTUXVSSRVUVTRPRSRQPQRTQPQOOQRQOPRNNOOLKMKLOMKKNLLNNJKNMMNNLKIIJHJLIHLIGJIECFGGDFIGEHIFEEAACBBBA6%  $ľ¾¼Ĺ/$$#$"",37:?CGKMNLLONOOOPPPQPQPPPQOPQQPQQONOPPPPNNNNNNOMPNNNNNLNMMKLNLMMMMLNNONNMLNNNMOLLLMNLLLLMMNNOMNNLMOMLKMMMLMNNNMMNMMLMLKLLMMLKMMMMLMMMLNMMNLMMLLLLJMNMMLJIIKKIHHIHHGGHD9-!9~~}}}~qhE0SgúrO&<7 .¾f4<;@E4,%$$"|*#\𖇗0H2/23013*71./1469-LPfW@80+q<;:=<>@ABCEHJKNNQSUXZZ[[[^`baabbeihghillkkfPADB??=<>??@@>==<<;==?BA@@?<=:7895552) 86/**,$=.2>?%cz3FOA<[P((((*+,-.1225698:=>?@A?AFEHGGHJKNOOPHS\euyucfhhlooqrstttuutuuuvwtsrsrqqttqrqpqqqssqpssrstsqrrrststuwvwwxvuxvssppqrnmquzxtsrrrqqrqpsqpnmmlonmmmnnkmonklknlopnrnopoqrqqpprrtspnnnonoqljjmllnmmkjnklnnjlljkjklkjklmlkmnlklnjlmljikjjiljkjjljkjhgigfhjhgedefggehihikmmlomlmlqqonrllifPFIIHE?:52104:BDDFFFDCJfmiklhehfa_a`YXZXSWXVROKHGHJJIF?<:600."%135889727<=====<=>??CC@>?>:87532335644646:====>>>@?AEGLdgdceffffhov}pmpoononmmlfikkjig`Z\\Y\\ZZZZZYVWWYWXXUWWVTTXRRWUSSSSRUUSOQSRPQQQQQQOOPSTPMOPLMONLIKKLLLLILLLKNNKKLMJKMIHIGFHLJIHHFGGFDCFHECFGEFGDBCABCCB@@8$   ½¾¼½þêI! !!"#'+.37>BHIKLKLNOOOPNNOPOPQQOPOMOPPQQPOOPONNONNNNOQONNLNMOPOONMNLMNNMNONNLLNONNMNNMLKLNLJLMLKLLNMMLLLMLMNMLNNMNMLLLKKKKKMKJLLLNNMLMMMKKLKKLLNNMIKNMKKJIJJIHHHHGHHE@6)6}{{~~qhG(0=vN"0OPJ12/.1.G4%&)&!sonJjMY~sT-)X_XROLMri{swc`UID??<><6 )ƽj 5<::;7.&!x,%]/C4222213*70//2454+NC?><90&u<<<>@BCEGILMOPQSUWYZ\]]^`aacbefegkkkkknnlli`K@A?>=>>>@@@=<=<>AC@?A>;:964333355346569;:====??=BFGPfidbefghfimw}rnlopmnonnmehkmijk`Z[Y\\VX[TUWYTUWZVVWTRVVSRSRRWURTTSSTTTPPRRONROMQRNMNQQOMPOLOPOOKJMNKKLJKKLJLNJIJJIJLIGHIHFIKIFGHFDDCBEGDADEECEFBBCDCBBCA:& '! ¿;"&+,/89<:96 (̾n!6:<8653+'#! u.$]Ľ6F3442114*70014576-MA<<;82)t<>@AEEFHLNOQTVWZ[\^^_`abccfghjjjklklmlmooool]CA=?>>??A?@><><>>>>@A@CA@A=;;;988352. )>DBCA=?A=;8864223444445579;:>==>??@AAEQehdbdfggfhmw}~snnqpooonmlikknkjj`ZWV[YSV[VTVWTWYYXVVVSRUUSSTTUUVSRTVRNRPOPOLMRNMQQPPNONLNPMOONOOIHNNKLMJJKKKLKKJIIKKIIIIJJFGIFBEGFCDECFEDCCFEBCGDADEA@B@<;- $* ü@ !"%()-3<@GIILNKKKKLMNOONMNOONOONOONMNPPPMMMNNNNMNMMMLLOMMMNONNNLMNMNMLLNONMOMMNLKJJMOMKKKLKKKKMLJKLNMMNLLKLMLNNMLLLNMKLNNONLMMLLMLLLKKJIIIHGHHGHKIGD@7,2~~||~}sdK 5ǸwQ" %1-,>vjhahpm@$" lqd`dkysW0$VZTQMKIHGHIHEFHGEAA><;7(¿p4:<84,))'#t/$bý9L3453125-74454456/NB<<:70%s<;??@>>>>=;=?>=@@ACD@@?<;::876541'$),6DIQTP8@>=<8763224345555689;<==<<=??@AESfgdedeggghrx|~omoroooomlnjklljji\VYYYYXTWVURUWVUVYVSUUQSTSQRTSTURQSTPMQQNMNMNNOPOPPPNOLIMMKMMPNLHGILLKKKKIGJJHIJGHKLHIJJIIGFFFFFFGEDFGFCBCCFECBDD?BCA@BA>=6 #%  ½û¿¿ÿD  !"$%')-5=?CHLNPPNNMMMONLLMLMMOMMMPPNNMMMNMLNONMLLMMMLLMLMLKKNMKLNMLNPOONMNKKMLMMLLLKLLLKKKKKLMNNOMKKLKKLLMONJLNMMMLMONMPMKLKLMMJHIIHHGHGHIGHHD=3&+}~}|{reK /yxR$ #/,,5ZFSIJUQ3# gz}rW1'X_SRNLLIGGIFCFGFFCA>=;6%w5;;91*&" v6&aľ<N4213135.567643350NC;;;82*s=<>?>>?>=?==??>@@CAC@BA=<:;;97443349@AFIPZ^_SWZZ^SQ[Xabbeinfjhihiikjlkjpptustussusqrqqqqpqpooooonoqpoommmmnnoopqqqppprrooqqpqrststsssrrsrssqqrrrrrrqqrrttuuvsuvssssprrqqrrrstvxtvtsqpqoqropmlhcfgkmmlmrpoonpqqlppnonlkjmlmkmljkjihnlhjkmlkmlopnoqqqqqqoonoqqoomjjhfhfecgghglmpprrutuvutuvyuxyyvvwxsxwxuwxuruwtuvwsrsurswuqtusrustrqrvtrrqqrsssqoqqmoqrkprnknqog[QIHIIF?<74346;@DFGGGFJZjiedfffccccZ[VQOMLGGIMLLF><8410,!-7687;:623246776568;:?>><;:7663344345547779;<<<;:<>?ACGUhhcfeeiihkuz}onpqnmnonmljllmnkk]XZYVXXTUUURUXUSUWTQTUPQSTSQSSQRSTQRRONOPLKNNIMQLLMNNNMKLNMKLOLJIIFIMIFIKHGIIJHIGGILKHGJEHIHEGHDEGEADGDAABBDFEB@A@AACA>>=;7! + #% ½ýE ! #%)+/6;>EHKLNNMMMKKJKMMMNNNNNKLKLNLNNMLNNLMOMMLLMLLKJKLMMLLLNNNMKMMLOLMMLLLLMMMLLJKKJJLLNKJJKLLLKLLMLKLMMLKKKJLNKKLJJKJHFHIHIIIHIHFDB:-! (~~~}}zqfL-o¾xS# "1.**.&&&)'%"!# ^v\0*T[RQMLKJHGGECGIJHDA?=:7)}y*7;;5.)&# $v5"];O4432136-978:76662KC<;<;6+qEORW]\^dfcdegghhkilmnnpoprtvwsuvy{wxy{{|||~lE;>???>===>@>>?A@?AA@CBAA?=::;864444:ELPRSVWWZ[]^^abclhdcdffpyejfjliinlknnnpqrrsrqsssssqqqpqrrrqpqppopprppqppqqrrqrsturrsssutrstustutuuuuussrssrsussssstrrqttpqsssqrqqopnpqpqqrmnonqpnqqqqqpnhhjgijlmoopoqpnmnomqmmlknmlkkljlmoooonnprpnpoonqpprqoqponqrqonprstprsrstqutvttuzwxuyyxvyvyywuwyxtvwzvuwyvwwwvxvxusxvwywvutuuwvwuutruwsrsusrsqprsqqsroqqrnqsshqqomopolkaTKHJHFD@;52237=BGGJHGETglddfe`ceb\]WRMLIFEJLJJE>=9400- .6558::60102352344699<<=;:8665233334433777:;::<;:=>=?CFTjfbedfhihksy|tqsqnmppnllklmnlki\YZXUVUTUUTSTVTTTXVVTUTSSUTPQSOOQRMMPNKKLKJLMLNNMMMNNMOLIMLKJKGGHHGILJIIIHGHIKHIIGFIKFEJGFGFCFHEEFCABEDBACCCCDB??A??A?:;<96% + #$ ľĶL !!"'*04>EDDJIIJJKMMLMMONJLMMNNMLLMMLLKLKNMMNNMLLLLKLMMLMOOLMMNLMMLLKLLLMNMMMKLLLLLKNLLKKLKKILMLMMMLKJKKIIKJIKKIHIIIFGGGGIIHGHFB7* (}|}}{pcI,lyY$"2.*)&"!$'%! "$"^t`3&V`SSRNLJGGHFEIIHFC?><:8&+}}E<::5/*'$!$.t8"\?J7554355/?9:995975&RE?>>B@:x~~zk]acellmqrtvwvxxy}z|}||}y_@>>>@?>=??A?@AAAA?@ADBDA>=<;;:655358DPZYZ\[[^`abcedglheedfegbntvkhiimnorrrrrttuuturrsuuvttutstttsqrpooopoqpnppoqrtqppqrsqppqrrrrqrsnrrqsqqporsqpqrqqpqqqppqqprponooqpoqrprrtsrrstppqpqrrssrnlnpopoqpppqqoprrqqrtvtwtroptsprooppqsstsprsqpqqqqqsrqtsvutusvyxwuwyxzzyyz{|yyzywxx~yzxyxxuwvzzzvyyvuvwzywxzvyuxzyvxxvw{zxuvwuuwzvuvvpsuvtrsusrturssrrrsssrrqsptnsopptnnsmiiXGHIHHGD=61025:AFJJKIGLhgdfgbdbbd_XPLLFFFJJJJFA?9212,18558;:5/100141244389:;<;:8674331124333369:::;<;;<:==CGWjdbdgiihhiuz|vprpnoonllkjmmmmkf[[YXWWTWYURTRPSVPRVVRUTRQTRPOQPOPPOOMMLKMNMJMNMKMMKJKMNLJKKMLKIGFFIJJKJHHGGGHJGHJEGKJGEHGEEFFGGFGEDECDDC?CEA@CA=>A?=?A<<=;9' + $'$ þļŸR !#%',/6@EFLPMLOOOMNMKLJKLKKIJLKKJKLMOMMMMOOMKLNMMMNNOOMKMNLLLKLLMOMLLMLKLNMKLIKLJKLJKLKKLLKJLKJJJKKIKJGIHFGFGHGGGGHFFD>3&  %~~z}}}}~qbN-cz[& 0+)($!!"&%# #"Uv`3"U^Ta\XSKEGHIGHHECB>;;99/)y}k?:=:1*'$" !(6$t3!&/+*7dF/T?26:7:?@6J=@BGJIPQRpfh]]hpqz}~}~|}}||}|uU???@@??=>??@??@>@BA?AB@>?>>:7666366=MZ]]___`cdddfdeefgjiggijmonkojkjkjllloqmmprrpooqpnnppqqqoooopnnnmmmoppnpoopoooqpppooopqpqqqppqqpsqqsstsqpoqpqsusprrqqppoqqqroononprqopnnonkpmonllmnppnprroqtuusqolmknnlljmlnnryusqrrqqtpqsqttwuunxxvvxzwuxzyx{}|xz{xy{{zxy{yy{{yxvzyyz|xyxzxxwzzzwzz|{{zzvvyyuy{zxxyytxzwsvxwt{zvvuutvvxvvsutyuwvtsvvursturuursuspqrrnrrrmnrpkmqmkkj[MFHJJHD@41013:EOKHIIFJYgdfeebcf^VNJIFEGHJJJGC?8211,19998:84132012124224889:977764331124444467899<<:;9;<=@HWjcaegkjjhkszzsornmonmnnljlllklfVYXXYWRVWQRUSQSRPPTTRRSSQOPOOPRSPNOPJKKKLLJGLMJLPNIILMMMKJKKKHHHECGIGJIHFHHFFIGGGFJIEGEDFHEEGEDEECCBBCBCBABAAB@?>>A><>==<=9) + &*'  V!%))*16?HIOPQRMMKKKKJHIKJKKLLLKMNMLKLNLLLLMMLLMNMJMMMNLJLJMOMMMNMKLMLJKJKLKKKKLMLNNLKKKLKKLLKKJIHGGGGFGGFGGIGFD9-   #~z{}|||reK1`ÿxZ% 0+)&%"!#$$$ !%$!Rwb5$V_YYY\WRJKKIFGFEDB?;:897'.{sJ8?D;-)&+(!+149<|@4Redpw{zwrȾpnteacjnstutzpssrsvyv{~~~}|}|}{{{|{{z{nK?>?@??>=>???>>@@@@>AA@?@??<7678754:PT]`cd`cdcdddcdbdfghghhiijmkkjkhjkkmnnnmpqqqpqrrrqqpqrrqpooooonopqknpqppnnrqpnmnppnopqqoopqrpsqpssssrtrssprqrturosstrqsstrrtrsrrrsttrqrqprpttstpqrqsrtrttrtvutonlllmrrrqppnmnnxz{xtxywwy{xwy||z|}}y|~}z|{ywy||z}}{y{{yyz|{zz{x{{zz|yz||{|zzyz{{ywyzwyz{yxxzuwxyrzz{yx{zxy{xwxyxyyxyvutwwuwxvqvwwsuvtrvxsqstrpsusstsootrnoqtqqrrnnomomnldPIIJIHG?31104:DHGGJJEAR\cffbbd_TLJJIHHJJJIEA=8200,2989;;:4011/11.02013457876554431113455356769898898:;<>DZmbbgejjjims{yolplmonnnmkjljkjkeSVVWXXUTSQQTTSQPRRQSROORRLPQMKNOLLPPIHJLJJJKLKJLMMIILJKKIIKKHFEHFEHIIKIHHHHGCGIGEFIGEFFBDFDDECCDEDC@CCABC@@AC@>?<<@>==<9;=3%   "{{|~~}~raJ2\w[' -**'$$ "#$$#"$#!Lyb: $[aUNKOMOMKIGEBCEAA?<=:753>wa:OEJOB22=XYC@ACJW]cmrt}|}}}}}~~~~|z~{{~|{|{{|{{xvtfG>?>>>>>??>>?>AABABB@@??=<;:9898969NYaehiffeddfeegfgiklkkmklmllklllmjjnonnnonpqqrqqssqsspnpqpqrsqqrrrprsrtussuussttstuuuttusuttttvvrtutsuvutvwtssuuuvvursuuussttutvtswvutwuvvwxzxzywxuutvsstuvvwvprvwuvz|xyyyx{~{y}}{|}}z{~{v{~}y{}|z|}~z{zxvz|}z{{~{{{{{|||}{{{~zx|~|x{|zy{{wyzzvxzxy|{yzxzx|y{xyxzzzyy{xyxzyxv|wvxvurvxruxvqtwvstuttvvrrsussqqttssqqqqqpputsorqlmqpmlomcUHHIIHD921134;AFHIJJHFEaegdba^QKJKHGHJJHIDA=:753- 3879;;:4-///00.//01345645764232133335534588889:9:;:9:?C\ldcfehijinu|xnlolklnnmmnjjlllheSUWWTVVQQSQPRSNRSQORQNNQPILNLMNOLMMOLIILLJJKKJKLKJHFIHJKKLKJIGFGIHGHIEFHEFFIFFHECDHGEGEFEFEDCDCCDDB@CCABA?AAA=<>?????=;;<<;5 +  '*$ &$¿ú¾e !"$&),3=EDILKKNKLKKMMKLLLLLKJKJIJJLLJJJJKIIKLMMLKKLMMLLKMKHJLMKKLKLNMMNLJJKLLLJJJIIGGHFGFFFGFGEF>1      "~}~~}~~qbI!6Zɿ{\($.,-($# "%&$$$$$" LwbC++5^bXOLIKKJGEB<=<9??A9:>?>??=99;;;;ACbiegifhihhlt{xpopmlnonlonkkllkkcSUWVTTSOPSRPRRRSSRQQQNMOMJLLNMMNMNNMLIILKJKKJHIJKIIGIIKIJMKHHGEDGFDFHFFFEDDGHGFDDAEGEDFFCBEBADDBBCAAAAAB@AB???>=>?=<=;9:<:95& + )1& $=?51¼q"$'*/4:DIOLMMMONMLMKJIIJKKKJLJIJJJJJKLKLLILMMLLJKKLLJKJJLLKLMMLLMLMLKJKLKHIFEEGHFFGFFE@9+       ~|}}~}~~r`G'@Nɼ~|M#'+,5..)'$#$'((&%''&" #?{{ufQLS^iaXJIBCCC:9<:GIP[_b`fiq~~~~}|}|~~~~~}}{{||~~}||{{{{{{zyxyxyxywvxq[B@>A??>@>>A@AAB>???@BA>ANZddbRGCJTcknqnlmmllkijmlnpqqsqrrrrruqqrqrrutstuuuuvwwuvwvuxxvvwvwvwvuwwuwwxxyxuxyxwxxwwvxyvxyxzxy{y|zxxy{zzxxxwwyz{xyyx{zyxxxtuxxvuyytrxzwv|{xtxz~zy|~z~|z{}}z}}~{}~|z|}}z}||z{}}||}~~~zz{{{z|~{}||y||w|}z{{|{{}|zz||{|{{{{}}{{z{{|y{{{z{z{yzy|vz{zvzz{xxzzxyxyxyz{y{xxyyxx{zyyyvwxxuuwwttxytuxurvwustupputsqvvortupopspqrsrpooqponropnnicPMIJF?7322215:=DGIMLFCQif\ZOMKKIJHJKKHF@97445/$5<:;=>=5001/-.../.-.1332332341133123112438989;:8:;:;;>Caiffkijkkimv|vkpnmnmlnoonklkmijaOTURTSQQQSTQQRSTPRUPNMNMKLKLNLKKMOMIJIFIJGIKJGGJKIHHIKKHHJHGFFEDEFFEFHFCCFEFFGEBEBFIGDEEDBBCBEDBBBACA?CA>?@>>>==<=<;::98;<86)  #.% #HVO{"&'(-1:AIMOMJJLKJJKJIIKJJKIIKKLLLLLILKJJKLLJLLKJKKLLLKKMKMLJMKJIJJHGIFFEDFGHHGC<2#      }|}~~~~t\F2M[xzwiD@S\Y;'+)**&%'&(&-6AGCO_ixvpntxtsw{qqopkkijdhimstswz~~}~~~~}~~}~z|~}{||z|{~~|}~}~|~|{y{{wxwwyywxwvvvwoR=@B???A?=?@C@@??>@ABDQgvrtqgebYcmrwxvonnmllkmkllnomnqrsutsstssuttuyzxyz{z{zx|~{{z|}|{|||~}||}|}}~~{}}{|{|{{}{|{|{yzz{{{}|{yxzxxxzuxxzwvw{xw{|zxx~{z}}~z~zz~~}|~}z|~}|||}|~|{}}|~{~~{}~~~}~~~{y~{|}~}}~z|~{z~}{z}~{|||z{~z|{{{|}|}}||||||}|}|~~{{|{y||wyz}z{z{xzyzvzyyz}y{xzyy{{xy|{y{{vvy{xvx{ywxzusuxuuvwuvvwsuwutvuttssuuvttsttssstqpmrsspstqmpronpqikoniihXLFFE>62133458;AHJKLKGG_e]QOJIHKIJKKJE@:6223,$6:99<=<4030120/.//./1112330242221221032348899889<:9;;>Gemigjiimjjnw|tgpmmmmlnoonmllljkaTSTSSPPSNNQPMOPPOPQNMKNMLNMONLLLKMMKHJGHIJHGHHHJJGDGIIIIIIHHHFFEEFFDDFDBCFEEDECADDGFFEBCDC@ACBBBA>?AA@??==?@>=>=9;=989968;66* +   +# &Ri¹ #'-16;CHMOJMKHIKIJKJJILKLIKJKMIKKKKJKKJIJKKKKIIKKKKJKJKJIHFGHFFGFGGFED:-         {}{~~||~}~p]E!Onvúmmns}jae^]`aeimouy}}}{z}}}}|yzzy}}~}~~~~}~~~~~~}||{{{xy{z|ywyzywvxxxywnL@>=?>=>>?@@???DEEO_ivvwsomopqoru{}||xtsqsrqurtuuvvwyz|}~|yyyz{{{|}||~~~~}|~{{{{|}zyy{xxzxwwxyyuuwxvuxwuz|yx{{|z|~|}{}|||}{}}}|}~~~~~~}}}{}}||}{{}~|}||~~{|~|y~~}{z~|}~{}}}{{{{}}~~z}}||{||~~}}~|||}}|}}|}~{y|~}}|}|||}{{{~~zz|zvy{{zz{z}{{|}yxy|y{{|y{xyxxzzwy|zwz{wvz{wuwyxwxzstwwvvtwwwsvuutvuusuvttuvsrttrtutqssqkprrpnrqjmpnnppmmnnjklfZLGFF>644445459@DHMPPJG_\SNJACJIJLKKFA:5232(&6878;<;524//1/.///,.0/10111122221323343378779888988:9@@??@AA?@?<<@<<<:8:=:8:889:980 + +  + &   *\% $(-/8?DILOLONKIJJJJLKJKJIKKIJJIIJKKKLLJKMIHHGIIHIHFFEGGGGIHEC@6'       s}}}}}{{}|~~r_@+z|w}{wxz{z{yvzzz{{{|}zzz{z}||~}}|~|}||||~~}~~~~}~~~~~}~||zzz|}|z{||}~}{{zeD>>>===?AA?BCBHXhuuvwusqotywwvxx{}||z{z{z|~~~}|}~~~}}~}}}}~}~|}~~~}{y{{yxxwzzvvz|wz||}z|}|z|}}|y~~~~}|}~}}~}}}{}|}~}z}}{|~{|~}z}}||}}|}zz|~}}|~}|}~|||}~~}{~~}~~~~}{{{}~|{|~z}}{{}|{}~z||}~||}}|||}zz~}{||zz{{~{{{~{z~zwx{xy{zwzyxxwxzyy{{{z{yyzywxvxyxwxxyvuvwqvwwsvxtquurstuutwxootvsuutqttsoqruqnrrnnpopmnoolkmkhih]MGJF?966544559=CNQPTJCLRNLFDJJLOOMG?96230'&69989=<54510/./0..,-/011100/132113223434646787888889;?Jiiefb`hijkpxkjnjkoolknllklljjl_PRTQSSROPRNNOOLPOKLNMIJKMLLNLKJLIIKKGDFGFGIHGGHFGGFHFFHIGDBCEDCEGDDEDDDEBDFC@CE@?@CCDDEA@AA?AA?>>?@=><>A>;;>:;<;9:;:9;:;::982! + + + +  $ + 0û) %*04=FHFHKIIJIIJJIJHIIIHIHJKJLLKJKIJIHJHHGEFFDEFEDE@;2*         n~~~}{{||}~qaB'~}wsmqtxyzyx{xz|{|{zz{{{{}~}|~||||~|{|||{|}{|}}}|~~}~~}}|~|}}}z|||{zyyxb>A>?>>@A@CN\gmpqrnllmmsuzyvssvxzzzzzy{zzz{|{||z{zz||{y{|zzyu{{{y{|z|}~~|{yx{y|{x{||z}}~{|~}}~||~~|}}}~~|~{}~~|~}{~{|~~||{~~|~{|}}|y|}|z{}|~~~|y|}|{~}|}~~}{}}{~}}~||~|}||{y~~z{~~|~}z}~{{{{y|yz}}}{~}}~~z{{~~z|~z|~~z{|{}|xwx|z{{}yyz{zwvyz|zxxywyzzx{{zy|}zwxzyvxzvxxywuwyusuyrvvvwwtutvvvxutwyvurrruuuuttsrssqosuooprpnmpomnomlmmhjlh`WLHGA:75433258;@KJPOJNUPKHJKKLQULG?96331&'6879:::355131.0/....0///10.-/10321122435658768:8789:;?Dkjfc``ijjkqykmnjmqrpmollkmklkm\TSTOQRPMOPLLMLKNNKNKMKKJLLJJMLHKJIHHGDFGFFHIECHHGGFEFFDGGGCACB?BEBBEFCBA>?A@>@BB@@ADCBDB>?@=?A?>ABB@CJUgu{zxvtvsponovuwtommpuxvtttqssuurrstrtutqtvwuwuxsrtzzvw{ywz|}z}}}{~|~}~}||~{~~~~}~{}{~}~~~~~y~~~{|~}{~{{z}|}~|z~}~}~~~|}}|z|~|z|||~|~|y~{z{}y{~{~~|}}|{~}|{}|~}~}~~~~}}~}z{~}}~}||~}{z~~|||{z{}}yy}~{|~}zz{zz}}x{{}}~|{zyz{zwy||xyzzu{|yw{|xzyyyyxzvuyywxvzxwwyyvsxvwtvxwtwutvyyutvxstvtpsutrqrsprroprsopnsqpnppolpmlmqlhjkieXNIGA:765455548<HghfbZ`kjijpy~lmnmottrppllkjjllm]PPRPOPPNNNOOMNONLMOKJLLILLIHLKEGJIEEGEGFEFGGEDEGFFBCGECCCC@?@A?@CBCECCBBA??@BAAB@=@B??>A@>@=<=>=:=>=<>=;:;;;==98:857776797665*    - !$*1:?CGLIKIIGGJKIHJJIIIHHHFDCFB>71'         c~~||}~~}r^A$h}mOOSYaeiqvuz{|}}~~}}}}~}}{}|{|~}|}~}~~~}|~|~~~}~|zyz}|||~~~}||zv_IFLSckuzxwwvtssrokkhmklmb_Xamononpomkppnlprptuvsuxxvvt{ust|yvx|yxzz{y{||{~}~|~}}}~}~~~}}|~|z}~z{~~{}~~z||}|}}~}}|}z~}~{{~|}{|}|{}}z}~|}~{y|~yy}||~}{}|{z{}~y{~~}~|||z{}}{{y{~||~}z}}y~}{yz|z}~|z||zy}|z}}|{|||}|}}|~}zx{|}{~}|z|xy{~{{z{yw|}wz{{xzyzw{|ywz{yyxy{zwxxzxvwyuxzxvx{wtvwwruwttxxtsvwtuvwnrvsqqrtprrrqqqpqoorqnrrrminolonmjmnfehlgb^OIGC:46643339:68:4.21$)88699;;56896222000--//010///0.02023202324564366676789=IjhdcFUjjiipz}qmikjprpopmllmnligVQPPQPNNONMLNJLNKILMIHIJHJKGGJHDEHJGGHGFEFGDDDBCEDA?BFECBABAAABA>?CCA?@?@AA==>=>?>;@A>>9;::99:;;:867655796677357/   &&ľĿ; "(-4;DDEIHGHHGEFFFFDBA>94+!        ^}~}||~}}}s^E!e}{WCDFBFUdntuy{~}~||}|}|}|}}{|}{}}}||}~~}~~}|zwyxy|}~}}|{{{}|zz{xwvvwnkejkoqtqsppnoqoomjllmnnpoonqorpoorqpntspqssruwxuxyvsxvwruw}yyzzyz{yzz|{}~~{}~}~~}~}|~~~}~{}~}}~}~|{~|~}~~{{}{|~||}{z~||}}}~|~|~~}}~}{z{}{{}}~}|~~|||}~~~~|}~|{}}|{|y{~~{}|z~~zx~~{zz}}}}||}}|{{{~~~|}}{~}~~|zy}{{}}yxyywx{{vx{}yz|}{{{{||xwy{xx|{xwxww{}xwyzwuuzxxxwvxywsuuvruvuuuuutsttwvvuvssurprqroqtpmopnnrqnoorkimoioonkkkfghkhigfWNG?7677358:;889ATTLJLKKLKMMKF>94/10$);9699::59<;764211.-,---./000/002121003334565556887799?A>AAA@?ABA@A@=??@>????@?=?><;?=9::::;;;;==;:;:879866654663564691!    ".0(>&/5:>CDCA@A@;50' +           V~{{}||}}~t]C!`~~~ztRACHA=?Xmrty{}||~~~~}~~~}~~~||~}}~~~}~}}~{|}|xvptswsvwywwtwutwyxutuwwxwurtsvvusttutsruuqqopmpppkooqmonponnrpmmqplqrssutuvyvuv{wuu{ywy|xwxzzxxy{z|||z}~~|}~}~~~|~{{}~}{|}}{||~z~~{}}~}}yy|}z||}|}}{{{}~~~|}}}|~}|||{{}{w{~{}~||}|z~{x{~|{}|y{~zyz||}~~{z}~|}||||{~~|~|z~~~}|~||}~{zz~{zz}|z{~{{{~{{y|{}}|yzy|{zy{z{x|{{xz{|xxzzvuyyvxzwvwzwxz{yzxyvvtzzzvvwxwututvtuuuvstwuoquvqqtuprtrpottmprplmppoopnnmrhompommmkjlihgfhkhhf^LFA864259<==<;>JRMKLLKLLMNKF?83.0/$+98889985<=;8632110/-,,..,.///11331002122234643665469:?Qjjhfbcghkjqz}nmlmprqrqqpnkklligVPMNKMOKKKLJJJJIKJHHKLFEIFEFDBEEDAEGECDECDDAA@DDBBBBDB?AA@??@;=A@ABABA@@>?AB?==>;==:=?==@;:;;::89::8:<9::75:9878966655796577440#    #278&;"*/3;94+  + +            R~}}~}|}~}}~t`F#a~zw}\9BKM=8Nepty|zx|}}|}{~|}{~||~}~}{|~~zzzxywwwwxyz{{|yxzz{wyyvyxvxwxuuvwxxuvtwvxywvy{uxw{wy{~{{y||{{|z|}zy|}{ywvyuwwyvuvvsqqsqpoqnmopmlmnnmommooqsnoppprrpqtvrtuwruyzuuvzwvyyvwwzzyxx{{{{{z||{{~}z}}{}}{}|z||{{~}z~}}{~||}~|~}||{|~~{~|z{}z|}|zz~z{|y}|z{|{yz{}}zy}|xy}}}}}|{{{||}zz||{|}~|{z|}{z}~|||}{{}|{}~{|||{|{|~|{}~||}}zzz|{yz}|}{{{}|~}|z|~{{{|y{{zxzz{yz}{vyzzyxyzwuwxxxyxzvyxzzzyzwwxysxzyuuwytuvtquuttuvppvvpqstqsrtsrttpppqooptqmoqpmoqqlmnqlmomijmifikedhjhhhi]RIC94047:=AC@ALRLLLJJKKMOKE@;3/1/!,89998:86=?>;9753322/-./..0.-031110//11122336446655576=Oljfedejhjnt|}ppkmmpqpnonnmllkkeVOLLJKOIGJLHGIGFJJHIJIGDFEEDEEECECDFEBACB@A@?@DA@AA@AA@AA?>=>><>???>@A?>==?@?<;=;:<;<:<>=:<:79:9::86698898467876863452153255030%   (! !.5A0E +         L~~|}|||tbC#`}~~}}}{zwttyfH@ETA:L`mvxyzzzzwwxwwxyxxxwtwvursttsspopppopooonmklkkmjghkjkfjolmlmnlmnpmoqtuuuuuwwxvux{|z|~~z|}{{}{yz}{y}|zy{{xvwwxuwwwttuusssqoppnonomnkomooqmlnponoqpknoqnqssssptrvwxwvvzxyxwvyz{yyyxz}|{z|z{}z}}|{}|}}{z|~|}|{|~}|~{~z}|}~{{|~{{|~{yy~z{|}wz|~}yz|xy{{|||{y~~}~|zz{y{~~~||}|{z}~}}{~}{{~}}|z}}yy|~|zz|{{}~{{}}{zz|{z~}|~}|z|~}~}{|}}{{|{z||~z{|}zz}~xz}}wyy|x{zzx{zzx{{yy{xyzywwxxvxxxwx|vvxzyxszwvwurtvxqtvxrwurrusrrstrtutttqrsurrusooqrlmonlmrniopklpokkkljmmlhimiegigghkljeiibUI@932469?FBBQSLMKKMMMMMKE=8301- /89:87975>CA<;:84221..0/.0//.01//001111323332254134446>RmhfeggkjknszxonlnnnnnkkjjkkkjmdSNLLHJLIIJKIGGHKHIIFEGIFEHFBEEB@BBBBB@@=???@??@@BAA?AAA?>=>;;<<=><=>@@@>;=>?>=::<;:==9;=<9;99:98888969996686564465345423233222/' %('+*'&-1/.ĿS  + + +        H~~}|}{|taG$^~|yyxvvvuurhMCF>CPfnpsuwsqttpnpponmlkjhikhiddhiheacfhhggecfhegehhddeijgghhhgikhgjkmmopsstuvuvuxwu{{{z||}y{{}||~yy}~{{|zz|zyy{wywzvuvusssusqonpononlmmokknolmlnnnnonmmmpqtsrrwsxvzwwv{wxx{xuw|{vxyxw{{xz{}}z{~~|~~|}}~||}}{~~{}}|{~|{~~y|~}z~}w|}{{|~x|y~|{y~|yz~|}{}y}|~~}||{}}|~~zz~{~}zx{~{{}~|z|}{z{||y|~{y{~||~{z||yz{|}|x{|{{~}~~~{}}|}~~}|||{~}{~||}}yyz{yy|~{{||{|}|x|{{w{y|{}zyy|z{z|yz||y{{wtvyxtvxwxy{wuvyyxu{wwvvswswvvvvwxtuwxssrrrtwrqutnosxlppqnlopmmqnmornknqnklnmnlkptljmjjjfcdjiejkkfhib\XLA82159=BADQPKLKKKLMMJJE;83./, .89978;87@DC>==9655201./01-/10///.01//0203322453133358=Ujhigfijikns{ummllllmmmmkjiikjjdSOKJKIIJJFGKHGIHFHHFFEFDDGFBABABAABA>AA>=@AA@@??AA>>?A@?=;;;;;=?><===?><;=<:<<9;<:9:<<;:889;:;98848967885475255443354441230032-(  /.&'),2.+21/X + + + +       E~~~}}}{z~}tcH'd~}|zyzwwz|{z}{~~{yxxvwsnopmjlhdYKJGOU]a``c``e`a]bd`Y^cbbad`bcdbcddb^]dfceffeffhcggjfdggge`ffbbfihdejjjkoopqurtsvswxvwxyz}z{{}}}~{}||~{~~~z{{{vyzyuvyytsutqqrqqpqonopmmjlmokomnmolmmpnnoonppqsprtvptvxquuxstwxwvtwwtvxyy{wwz}}|{|{{{~}|~~}{|~{z||{{|{zz}uw|}zy||zz{{{y{y}}{z}|}y~}}{~}|y{z}|}|~||~~{y{}zx{~z{~|z{{zv{}{zzzyy{zzzz{z{{~|yz}}||{z{{zy}|{{~yx{~|}}|y}{{|~|z|z{}}{|||z{z{yzy{y||}}}||}~|z{|y{z{xz{|xxz}vyyzwxyywyzwtwxxuvwxzzywwvwxyvxxyuuuvtuxwrvyupstrpsssqstprrrprrslrnprnnqpnopqnqomklpohnopilljgejhgifcdggcgkjdegd``^O@9248<=>DPNLKKIKMKLJJC872./+,88779=:9DECDB?;986333///.--///0.-../00103324533223578>=>>AA@>>>=?@=<>@B?>;;;;;<<;=;<<<=::;;9:;;<:8:8:<97997899766464454333453465//321222120132+( + + + + *56(#'-,1½c  +  + + + +        G}}}||}}}~wcJ-c~}{|||vxyuspmolkihkjijigglodeejhaced_a^Z]^`^a\`][XZTRSXVYY[]]Z\_\^[c`^`cb`bbb`cecacbcb`cgecffffghidfhicehifecffhfhjfgklhhlpnqqtrssvvuvwxwy||x{|~|~{y|~}z{|{zz{y{wzzxuwxwuuusswsqrponmnnlmkmmnmsnknqnlnojloonpqpqoruuqtuxqwtxtwwvwwtvwxxxx{}xy}~z{~zy{~zz||zz{}{yy~{{{{|{z|v{}{{|}~|}~z|}}y||}z|~|yz|}z{||xzxzx|x|zz|~zzz|zy{~~yz|{{{{|{||||~{{||zxzxy{{{|}{{y|{zz|{x{~{zyyx|}||||{|z~|{{~}}|~|~{{}|||}{zz{z|}~||||zz|zy{|y{{{txzywxxzvzxzxxwxy|xwyywxwuvy}xvvxwuuxrvuusstwutuvqtttqpstsrutstrrrorttorqqmosoknqnloqkkpnilnnemnoijkmlfihhiedjbfgkiiffdegd_^RA:558;?BQMJJMKLMKLHI>430/1+169877:7:GGEFF><>:8673000/..-.0--/0//00122013113322455>_kkhkikkjimw|vlmlkklkkkijkjkljg_QIHJKHGIJIHGFGGGHGFEGCCDCCDB?>@A?BCA=;<=>>A@?;=>=>?>=;??<9998;;:;=;;;:;;9:<:9:<987977866787688756554552242362033/.01/.02101100/.# + + + + $.8/!#*f + + + +      @}}}~}}}~ydO9V{~|}yy|zxwxsoqsrppqqnsvqvrqniefcfeab^\^aab]``c][^_\^]__^^]^Z\]\\[WZVZ_YXS\[ZY^Y[Z_`[[ZYW[[YUVXZVVX^[WZ\]\\^a^\`a_]`b`adb`[`a`\_bb`beeceffdcggccejhbbbdbacedcfihdikmlqssqsssqsvvvxyxxy|z~{|{~w|~{z|{z{{|y|y{wwwzxvtusrvxqppqpmmnmjllnlllnikmnlpmolopkoppororrttvstsuuvuxstuxssvxvuw{zvy{|yy|~xy{{y|{}yzy}||y}|}{y{|{zy~{yz||{||{y|||y|||x|zyy{{}x{{{x|xzx|z{zxz}}{xz||zz~zxz|~zy{|yy{}ywz{yzy|wyz}xz{|yyy{zyy|zyy|{zy}z{{{z~||{}z|}~{z}~{z|}yz}yzz}zy{~x{z}|zyzxz{}yyy{y{{{y{zyy{yyzzxzzxswzwuvxwwwwtsxzvwvwvuuxswttsvswwuqutvsrqoquurrssqqqrkpstnnpplnpllnojmmmklmkknlmkllljjimmgdhkgdehddikehic_dfa``fWE7467dojjiikmjjov}vnmllkkkkkjjkjjjih_QKHGIHIFIJFEFGGEFEBDE?>@BBB@>;>??BA??<?>;>><=<:;;=;8:;:8:9;=99:9889879:97865587895446535773453367235213212210/21..//-/.+-/-% + + + + +  %-2!!r + +       =}}|}||~vfM7]}||xvswurpmnmkofhbbj`adbcc]]a[Y^]Y\Z[\X]Y\SW[XRSSSNQWTRSXWUPQUTSUWVWY[XZY_]\\]\[\\]YZZ][YZ]\YY[^WYZZ\XY\YZ\^\Z]ZYWYYWUUWWSUXXXXZ[\Z\\\[[^\]^`^__b^^_a`^^bbb`bdbcggdefjeccgda`abbbdcdcfghfgllnnqrrqttrqvvuxyzx{~x~}}{}}|z{{zz||xuzy{uuvxussurqstnopoononljllmmojlkpmmmqmnmnokmqpnppqqttvrrrtstpvtwuwstwuuuyxwxzz{zz|}z|{{|}{{{|z|~z{||yyz|zzy~|yxzzz|||{{zzz}||{|z{y{z{z{z|y|xxz|xy{zxx|yvwzzxy|zwz}{xvvzuwzyxxy{yzyzx|y{z{zyy|zyz{yzz}yyy~z{z{yz|}z{z|}~|z}~{{{~zz{}{|zz{{z|y|yz|}yz{}z{|}zxzzzxx}wxz{xyzxxyzzuwzyvvxywwwuuuxxywvuvtvwyuttuqwvspsttqqppqusnpqrqoprrrrspoonormmommmomlnojjklikmlgjljgjlhbgicbegdcgifggd`ceda_a\ZH<547@NMKMMMMNMLID8.-/22+14777895;FMNHFFEA>>95662010-,..,,-----.100221332123456@dmijgijiiknxtolkiklmlmllkiijhi`QKGEFFFDGGCDEFFDAAAADBA?@AB?====?@=>>9:=;9<;98<<;989:;:979988769:88:9855689:96685367864545635442443336435333120./.-./.---..-+,--.9J$ + + +  + #-)%'&)&$ w  + + + + + + + + + + + + +     4}}}}||}||}weK=Sp{zw|xywsnhhfdaeba[]\TWVUPRT_bb[OFFGCD@E?@A<;@BA9>::>HRNOSTNNTTSPKTUTTTUUUWTTSVUSQUSRRUVOQSVRPSTSTVTVVYZ]Y[]^]^^\\Y[[ZYZZZXXZ\ZYXWZWXYYXYWYWY_]ZYZYXUVUTTWWWRUXVVYYWZ[_]\[^\\aaa]``b\^^a]^`c``ac`bdghddeicbbda\\cd`bcccagfdghkilnqmqqrqptvuuwyxuy|{v}|}w{}~zxzzyz|{xx{yzxzwwvtstsrrtrsqoomnmkkmjknpjlloiklmjjklljjnnnooqptrsrsqqrurvvutvswxtvvywwyzxyz{zyz|uxz|x{{{z{{|xyyzyx{|z{x|{yyx{}{w{zzyy{}zz|{wyx|xwz{yzxzvwzzwywzxxwwvyzyx{{yy|{xyyxyx{yvxxyxx|{yy~yyyzyyz}ywzywyz|yy{|uxzywyz{xy|y}{|~|{|x|z~|{||~~ywz}yy{{wxy}xwy{yxz|wvwywww{wyyyxyyzyz{{wxxwwvwyxwutvuvwvsstvstvwutrtnturprqopqppttpppoqsqnpqqnmpolkpqjlpkikokkkmghjkiikmfjlhjjifddedededdehggdccaccc`^\]XL=45>OKKPNLMMMJKF9/./12)36556895;HNNDFIGAA?<8884321//.--,,/.--/10121/011122256Cgmijhihhhkov}tplkklmmlkjklkkjig^NHGFEEDDDDCEDDBB>?AA@BA?@B@>=;;===;==;:;<9:;;9:;<978:::88777789898888965885586566355543353324444411105A21211.10,,./--.--,,,.,+*+/\yy3 + + + +  $ #+.-+-)'"  + + + + + + + + + +    + + + + 1~}~}}}|}{{}ygNAB:6=>7<:597729DGA9CCA@DFE?FPXXOA==>>A???>>?>AA@=;:;BKLMMNPNNQSRPPQSRSTQSTUVTTTURSTUSRRVUQRTTPRVWRTVVUTWWYZ[[]ZZ[ZZXYXXVZZZZ[ZXY\WXXTXXYVWXWW\^Z[]]\XYZUTUXWWV[ZVUYZWY]`Y\_`]^aba\_^^]^\][_a`^_`__`bccadcdabaa`b^bca_cbabcc`ghheklmkppppprstwyxxwyxyx|zy}|z{{{{}}zxyzwyy|tuwvssttqrstokmnkkmlljkkkglnmimlkhkmjkllnmopoprtrtrtqrsvqtvxurtwuquwwvuvvwyxxwyyztzz{{|{z{|y{{}wy{{y{{|zzw|zwy|yvyxyyz{|xx{{wyy|wx{{y{xxwyyyxywyyzxxw{{wx{yyzzwvxywvy{wswzxwx{zxyzxxvyyzw{zxxxvzzyx{{zx{yww{{|{{z||}yz|}yyz~yyz|zx{}yvx|wwzzxxx{xx{{yyxyxxx{wxvwyzyyyyxy{zyzzwvuvuvxvvtsutuuutvtxuurvwvrututssqoqsporsqmpqmoqpmmqpllnnkjmoklnlllnjkjnlijkliiklkhilhfefcdde`bcccgigbdd`_`b^]]][ZSA7@>;87621/.-.---/../00/1221/11222356Cjkijgihhjjnv|rpmklkkmjjjiklklhh]MDEECCBCCABE@@A@>>?@@???@@?=;;:;=<=>:;::<;89:99:<758;99::778767656964754575695554434441342132201010/02[T831/.11-+.0++,+*)+)+-+**.\}N + + + + '26.%'(%!  + + + + + + +  + 2}~|}}|||~{fLA=,+24175799969@@?;??@=??@?>>?BGQVTI>;=><>>===><>>@>;9;;CJNMONOQOORRPOSUQRTUQSVWVQSUVRRUUORSSRPPQRRTSSQTUUSTYWXX[ZYYY\YYYZWVU\\[YZXW[ZYWWVXVUXWWVX]\X\[ZWVXYUUWWTUUWTSUWYXZ]ZVYYY[^^_`^^]]\_]]Zb`]_`___ae`ace`bbda_bc_`__`_]`abbdgefhljmkqpoqsrrtwwuxz{yzz{xz|z{|yxz{zxzxvuxwwststqprrnoromlmkhklikkmkkjqlkjnjjkmjijmmkkppmnqrnrrtqrrqottvsrpstrstvvstwxzxyy{x{x{y{}|zy|~yz{|twyyxxx|zxxzwwyywxzz{{{yzyxxyxxw{xzxyy{xvwyuvxzwxzytvxxwwwwwwxxvwwvvxxyvuuxxxwyywwxwwuyyxwwy|xvw}{wz|ywy{uvu|zzz{xy{{xyz|xy{}xux{ywy{www{xwu{xzyzz|xzzzuvyzvwwwtvxwwxyxuwyywwxvutvwywuuussttsstvsuwwstusorrurrromprompqlmoomnqpmnpommlklmmollkmnklmogjmjfhlifhkgeghfeefacdeaddccfgebccaca`__]]_\XUE@@;7662100-,----..0.-.0022121212224Dkkjjhijgihnvpolklljkjljhjkjjhj\LACCBACBBCBCA@?@@><@B>>===<<899:;==;7:::;:98:8:9:85786787678644556843553565686521123430340/320-..1//34[p`>/1--/-++-+)+*++,**)++*)Wwe + + &2:4( !"¼Ù) + + + + + + +   + /~|~}|{|ygNB<.)01154848889@?=@<=>@@AJQSPF=;;;=<:;:=<=>@><:9:>DONLKMNOPOQQQQRQRQRRSRUVUUSUUTSTRQOPTRRSTRQTWRSTVUSTXZYXXZXY][ZYXXYWXYZYYWWWVYXVWVSUSUXVSUWYWV[ZXVVTVUVVTSUVTUWWWYYZ[XX[ZY\]]^__]^`\^\]]_\\_`^^_ab^`cd`abc`^aa__`_`]^`^_begbeikijlommpsqqsuwqwyxxyw|x{z}|{{z{{zzzyyvxxyvwuwutrsssptsmlnpjillhjlnhklohglnjijkhijkjijlmlnormrrsqssrqtsutvqststsvwvsxxxx{z{wxx{xz}{wwz}yyxzxyx|z|z|yyyxwyzwvzzwz}zwyzzxwxysxvxvyxxvutusvxxvxywsvvwwwxwxxwuuwuvwwxyxyuuwywxwyvvwwvwxxuww{wwvywvwzxxyztvu{yyxzxzzzx{yyx{{{{{yz{{xwvzwxxyvxxyxy{zwxxwuwwxuwwuuwxxyxwxwxwwyxwvvutxyurttrrswqqswrqvvststpqsssqrqooqpnonnqnmooonmnlmnkgilnlmmkilliikmcfjighjjeehegffeieddebdddbcfecfgcaac^]\`Z\^[YZYSRNKKLJJKKJFA92110/& 36545774>LHJOLGGHCA@@>:57532/------../--//./0//0103216FmkiigijiijqymnlkjlllkjkkkjkjhiXLDCBBCD@BEB@A?=?@>=>?>;;<=<<;;;=<<<;98:;<77679958637645544555433676632211465343/0111340231/011/0/1./14]ktkE.+,,,+,-,*+,+)*+*)*--*Noz. '1991#-  + + + + + + + +   (~}|}}||}}vgPA><;::;>DHLKLMONOPNQQRRSQRSTRSTTTRRUVVVUVSPRTSRSTTQPRSQOTWSQQTUUXVXWW[[WUWWWUWVUYWTUWXXVUVVTTRWUWUXVVYZZYXWWSUUUSRTUTSVYXUYWWXZZZWW][Z\^^]\\Z[Y[\]X^__^^]_^`aac`aa^^_a^]]`^][]]^\aba`fgfdhjlknopmntuurxxxwxwzz}y{|}yxxzyyy{yuwyzvuvwssstpqqspkkmnkijgdhjihlkjhjkljkjjhjkjhlkkllnnqosnoqtrpptpruvqqttsssturwvuxxwyvyyxy{{zxyy{zzxyzzwzzzxwwxvtvxxvvyxvxxutwwywuvxvzwut|xywwuttwusuywttyvuvwxsvxuruustvwtvvwutuxuuuwtwwwstvvvvwvruvvuxwzyxvxwxvyyzzyxzxw|yvy{yxx{xwyzuuvyuvvytvvywxxzvxuvwywuuwuvwwvxywqvzyuuxvtuvsruwsrsurtqursqtttttsvqsrrqssqmqqljmommopkkmolmkmkmkigkjkmlkjjjjjihkgkifjiikhfcehfcdja`da_cea_acbadd```_]]Y^\]\\\\\VRMIKJIIJKKGB6/,..,% 26535543?NIKMJIHGCDB@@<78741.-,,,-,+----/..////0013147EmlkjghhkiioyknkijlnmllkijikjigXLEC@ABA>@C?==>==>>=<=?=;<=;:;:9::99886677567666376346433433353424224320024322/01///0//1//////0-.-/,--2YgnsxP,+*,,***(*.+*+//17@HM[ht|D$$*5:<:/#2   + + + + + + + + + &~}}|~~~}zhOA>2)00343455458>:;;<=BGOTQI?9988999799::>>;78;?@>><;<=====:;=;9:<:67879988786577557743476235311211220423211322101011-01.--/./.-.-..././-.---1Vfkkz[8)-+,+,,03499>EKSU]]cgms}^-#!! %')./*$+89;<82$6 + + + + + + + + + +  #}~}~}}|||jPB>4(..343536426;9<9;?@@=>@FMQND=8877998789::<=999:>DGGKLNLNOPNQPOQOQTQNSSRRTUSSQRMQRVTQPQQPSQSPOOQQQRVTSQRSQTVXVXY[XUVXVUVXVQTXWTRSSSQUWSOUTSRSSRSVXUVYVSSUWTTPTSRORUTVWVTVXWYZZ\\[]ZZZ\]^SZ\\Z[]^\Y[[\Z[^a^a`_]^]]ZYZ\]Z[__Z\ba_`cfddeiifgkmnousnrttswtttxyxvzyzutywvvvvwvxvvsussrrpqponmmollmknjhiifhhjehgifgigehhigegiigjkljpnporlpproqqtsopvtprssquvvtvxwtwwywwxytwvww|{xuxxusxvwwvsuuyvtvwvuvxxvtxxtvvvstvwrtvvttttrrsurssvqrrtuwtttvttutsprusruvtqrvtruwttuwrtuustvwustsqtwvtuuwtvtutvuwuwtvvxwwwyuvvxwwwxuxxwswwvrtuutuvwqswyttvvutttsvwuvvvutrtuusrtwutrtttqtppqqmqrpmqoqorqpnqqqppoonnnnnlnooknnlikmmhjljggiggijhghlghggcceebcfgfdfecbeccbcba^__abccabfd_acd^\\^Y^_[X[[WVTQMIHJJJIHEC>3-++-,#!33354422>IJJIHJHFCEDB>>=9432/,,,,-..--.-,./.-.-.//-137Jmhhihggiikr{}nokklmkkljjkjmkgieXJBA@?<>@=<=;::;:;:;::;:9:8:7576658864544642343345331231/0013300122-%&,0//..0/-//.-**./0/-+-++---*+-)-3P__F\{m?+.0-/147:=@EJPSVZ_behlruq8'(,---/01.('=>;;;8-@ + +  + + + + + + +  $~|~||~~|kPC=1',,343547438<<=;>AC@>CJPTMB:776679878;::<;89;;?DIJHMNMLNMPMRQNPTTPQRSTRRVWQSSUQRXXQRSTRPSSRQRRSPOQSQQVSSSTVUSWYYVUUUVVUTTSUUUSTTURTVVTSTSQRUSSVVVUYZYTSUVSTRVTUSTTRUWUTWXYWXZ[YXZYYZZ[[TZZYXZZZYV\X\^^^]]a^^]]Z\WY[_\Z\[\[]]\\_ddbcdfifhjkmmoporqsswstvzxux~zyvxyvvxvuxywuttvrqrrppqrnmmljikhhfgghbgfhbfffghhhhiigffhhikkillpmppqlpqrmpqsonrtrnqsoouuuuwwvvwwxyywxxzxxy}yvt{xuuyvuwxtuwxssvwvtuwvutwvtvvvtuuuuvrtuussssprstrtqrqpsvqqstqqsrporspqrtqorusqrurqsupsstptuutuqstwvttvuvvvtvwwuwwwsvvzwvwxtvvwvwwwuwwvstuvrsttsusuruuvtwutvvrsvwutvttsvrruwrqsusspsrtqrrqppotqqqqoqrspqppnqqnkmnlkmnijmlkjklihkjgijjhhjilkhgiiigifedebcffdfifddedbcb_aaa^_`_]bab_ba]baa_\Y]\^][ZXWZ\VOKIHHJKIHD@;4..//,!#22131331CJFHKHFGE?BBC=<<:344/,,,++----,,--.//.-/..-/24Prjhiighhgksy|lomklmmkijkllnlhhdVG>?@?<=><=<8;;99<99:78:;;8874563067434324223331132312110//1000//0-(',/.0/./0-.-&,---,,,++,,)***+1DPS;5QxN0//01478>@CHLPTY]``cglpvQ1,/0000222,!$@C>;<:3L + + + + + + + + + + + + + + +  }{}}}|~|iOA>2',-332556327=<;8>??>@IPPJ:75658977888:9<;88;??B>=<:5420.,+,-...--.-+,,--.0//..11Tmljighijeirx{nmiklmomjjjkkmkhheTE??==><;<>97;977:85865788545323+!(3313122232121222/01//././,--.00/*&+//,,/.*+*"(+-,*)***()(),,,7AB54;It`6-12358;?CFHMSVY[\abginr}`3,01000143.##5B@<9:7Q + + + + + + + + + + + + + + ~~}{}}}}~}kSA?4),/211336307><;8<<>BJQJC85555775458999;;99<=AFJHHLKKKLMONOOONOTRRNPPPOPPPNQTPPSTUPMQPRPRVTSTTRONRSPORRQRTVUXXXTVWVWUSVUOPTRMRSRQPRSQPSPPOPSPPPRSRUUOQUURQQUQQRTRQRSUSTTUTUTYVWXXXVXXXWXWZVVYWWWY\\Y[[_]\_a[Y]][[[[ZYXXXYY[^Y[^_^_ba`cdhigimmkoqpkqrsqttuqxvwtvvwurtusuvtsprqqrqonorpnoomkhjlgfiibaegcdffdfffcdffbceefdfihgklllnnnlonomsrporprnsqoqtrrsvtsvvvuvuvruxyvtuyvvtsrtssrvusrwssuvssswvssvtqutvqsurprssporqmpqqnnooopoqrqmopsopqtqnqroppsppqtomqsqqpsprrtooqrpqtvpppusqrtqsrupusuruvtrtswtusuwwrwvxstvtrrttpttsptuupstusssspsssrsssrqssvssstrqqurortmoqrnlmqmmlnlnopoqqnmnmmnllokhjlkgijlijmlgijiehihdefggfjfdgifeffbadfcdedcccdfbccc_cbdZ]_][`bb]__]^``^\^\W\[XUUVVTUXSMIFEGHJKJE@;1---.*%0/0/...0BFGFGFFDEBC?AA?::7310.+,-++-.,--,+,+,-./..//05Vmmihffhihkr{zpmijlmnljlmlkkjhfbPA?=;<=:9:;88:886754576565433210'&//012012//0/020./231110/-.//3565- (//,,.,*)) #+,()***)()+./17=B?:;@Inn9,2447:>ADEKMRUZ\_^acflpwo:./11101241():<89;:T + + + + + + + + + + + + + + + + + + +  y~}}||}~|lRA?6)*.212325118>;<<==CLQJ?2/13456645788;:989::851//++,,++,+,-,,,-,,-.---.-07Wnkghgfhjikt{plnlllmmmlklllllgiaLA>;99;8888777776665556443311/1/#!*-11100/..001-./21/23112367<>>@>9(&+,-,+,(& "**+./.0257;=>ABGGEAAJOnz@155799=ABGKMQWY\[^acfhnw}yI//12133674/#'58:;;V + + + + + + + + +  + + + + + + t~~~}oUE>8*)-10333622699:;>BMNG91./1456654789:;979;80./.,'%/--..//0=EHEEFCABBB@?><<:521.*+,+*+++,+,--++,.,+,++-0:]okigfheiknv}lgmlkllkllkkkmmmigcNB<;;987996745644554424313321020#'-21.-.//00322232257669;<=?AEDCB=-!)++,+&% + !*,0348:?ACFGHHGIKJDKWSgK/47769=@CGILOSWX\_`behlrzY11322258884(!3558b + + + + + + + + + + + + + + + + p~}}|||~~kTDA;)).2/334510299>BINQB3.--.134456559;876870-..,&",-,,-/-.BEFGEBACC>?B?<:<8331-**+**++*++,,,++,,,,-+--.2bmiiifhijhmv}njnkjlkjkjjijlllihcM@:;:86567764653255332332221111/#&+..--.00244778568769;>=??CEDDDE?1#'**((& + + )0369=?BEGIKLKJLMNOSX\gT647779;ACIIKNRTY[_]`cflqxg;3234489:<91!&+2h + + + + + + + + + + + + + + + + + + + + + + n~}}}{{zkTDA8((.314455015;>ENTN=/,,-.-0113577899887=@CDDGGEFJKJINMMLQNJKPOQNSPNNPPQRQRUSSSUSSSTOPRURQQQSNRPSQPQPPNPPQQSQRSTXUNPRQOOPOOMPONLNPPOOROLMRPNMPNNPSRPPRPNOPTNNMMMMNPNMPPTNQOOPQQRQSUUUTWTRRVUUUVVUVUVUVXXVUXXSUUXUTTSRVUUXWSSSUYXZY\^^\`ceedejjjkjmlooqorrupttwtrqutortsrusrpooojlnojjjlgfihfddcba`a`__abcdba\c`bcbab`ccabggeghigihjjlkkijlmkkppllopopstrttqstxvtuvtuuutssvsstsporvqqqsmorqnmpqomqqoqsqpqsqnnonmnnjllmkkkljkjkjijlfgimljllihjlijjmkiklgilmjmonllmmjnknkmnnmrmnmonpnqoqqpnnpsooprnopropqsopqsnqsqlponioopnpopoppqpoopqpnnqpmnppmppmoppmmmpllmolmomjollklklkljmmmijmihkjkgfnfbgiedfhdggededcegeedcdedcdeeabccbdd``ab`_ad][_ba^^`Y[`_XY\[W[[XXW][ZYZY[Z[ZYXWUQSSURUVQLHC>@CEGGEB=60-,,)$#,,+*,,+.GDADD@BBB>>?=::9531/,*)*+,+++*,,+)+,+*--,,../7alggjggiijnu{plmlmnklkiiiknmifkaJ;8:997677654633342443110/...10-"$/..--.1223766588779:<=?@CDDCEGED9&('(&  + *1469=@DGKKMNNNPRTSV^b_yc<497685& +o + + + + + + + + + + + + + + + + + + a~~|}|{|~}mWE@8''-003465015=DNSJ4*')*-/.1/034797788<=AACEGHFEHKKJLNMLNSOLORPPQRQOPRQOOQSSOSSTPQSROQRRNLQQPJPOOMOPNNPPPPPPOQTVVQOUUPPPMLMNPNLLOQPMNONJMOLLKMMLNPOLNPMKLNLMOMMLMLKMOOLOMNNPPSQSSUUUVUUPSTVTTWXVUVXVTUWWTUWVTUVURSUVSTRQURUUUUXXZU[^a]]`cdcdkjhkkljmnonqrtopqwqnqtpnqrpnsromnnlijkmjijjhggffdc`a``_``a`bcf`_`cbaa`]]_aa_addddgfeghiikihhkklkmmnmornoqqppsrosuusrvusstsoqrspqssoosuooopmqrpopnnqqonqtspqsrnmnpllmlgjkjgkligiihhhgifggjjihiijillkikjiijinkkjlkjlnmkkmhklmlmmokkknlmmmknmllqqpmpnpopnporqrpppqornnnrpmnomppromoqmoqompqnlmnmkloononlnmnlnlnmnlkknlkknhgiihkkjfkjjhimhfhikgehghghfedfeedeedbdgedecaabc`bdd_`ca_bca__`_^]b]Z^`][\]Z[\][Y[[YXWVWVX[[WXZ[WZZVUVUQPRSQSTOIIC>7(½# + + +  + +  + + + + + + + + +  `~~}}}}pYE@7%'-/-1332//6EOPG/$$&()+--00035665689>BEBEHHHFGJJJKMMJMPRPNOPOOQQPKRQQNRSRQQRQRRSQOQUTQOOSPPOTPMOPPOPQQPPOONPSTSPPRPNOOMKNNMJKMNMJLLMJLLLJLKKJKOLLKMNMLMNMOMLMMOLNNQQOQPONRRRQRSTSRSSSQSSURTVVSSSTSSTVUTUUTSUUQRTUTRTUWSSRRTTWWWRXZ\[[]``aafffjghhmlmlqrroqqupnrqoqroppspomolkkmjkkljijifdddd_`ca^^_`^`ab\^_a]`__]\^__`bcabefehkiijkegingjmpllmoolnqqoprqprsrprssqrsqqrrsrsqqprrsqsoonrpoppnmnqonqrqoprqllnnkkjkhjjihlhfghefhjfgfhehhjgfijgikjfhikhgjnhhikihkliijlhkllkklmkklmkkkllpmnotonnpnoqrlopqooopnnoskmnollnljmnnlmnqllonmnmlmmqnklmmonllmilmnkklnkjimjiikdhikjljhhkiigiggifhjiddgjeefgbcdecdec`cec`a`_b`aadbc`abaaa_`_^_`a_^\`\]^^Z[\\Z[[YXZYWVVXVVZZUVXXWYVTUVTQPRRSUPMHFB;6ADCDDA=3(&(*)$!**)**,*0ABC?AB@<>@>::;84130.-,*+,-32**,++++++,+),-+,0>bnjiffhjjmovponklnnmljjiikmlil^D9665@eS354322.021110..000.,--+("+.-../0132344568;;:@?@<2# +  + +  + + + + + + + + + + + + + + + + +  `~}|}|~pXE@9#'-0/2120+-8RP=*#!#%&')*,/02465467:=@ACADDEGGHIJIJKLMNNOOQROMOROONUPPQUSRRSTQSTURQQSSQRQQOOQROKNOLNNLLNPNMMQRQPQQNLNPMLLMNLKMNKIJMLKLOJKKJKJKMMLLNNMLLMMMLLKLMMKLNOONPPNJPQMQSTQOORQROROTRTTSRSTRPTTSUVUUSSUSQVVURPSPRRQOPRQUVUTWY^[\]`^cbdfhhegimkmmpppnrqsrqpnprpknqromnlkkjlhijkgghigfc`a\a_][^^_]_``]`]`Ya^_^^`]^aabbddbdghfgfgafeidhklkllmkmknqnlnrrrrpstrsssqptsppstnnprorqrllmrmmoomjmpnnopqopnpmlklkigijkhhjkheefcdfhdefgceehecfffhigchghhghihhhijijjhkkhhljjjkjjjmkjklkkllhlmolmnnklnjhllmlmolkmnpjjlnjkljinlllmnmjjlnlmllljlmkkkklkjklgjlkijhiilgkjjjjhihjikhgjhfiifdfhedfg`bfhccegbdddcddaabbbc`_aa_`bd``ab__ba^``]\]`][[a\Z^^ZXZ\WWZVVXVTWVWVWWWVXVVXVSTVSPPQMORRLJHC>63>ACEB=;1'$'))%")'(('**0>BD>@B@==><=;<:6230.,+,,+(.=>,***+,**,+,,,+,/=cnkkgbgjkhpxpomllllmljiiilnmicXD7456;vd5330///.-//.-,--.,,,,+()./0-/111234557:9;;>?AADDDGIKKIIA%! + + )157?@@??=5$% + +  + + + + + + + + + + + + + + + + + + + + + + + + W~~}{{|~~nWEA;$&-1/2121*,4F6'"!#$%%')-04444458:=ACACCFEEHIJJKLLHMPPMNMQQNPSROOPPMMPTPPTTRQUSSPPPOMPPOOMMMMMKNONNNOPMOOQQSRRSSQLLMOLLLMKJLMMIJJJJKKJHIHIHHKJJKJKKJLLLJNJIJMLJJJNNNLPPPNPOOPSROOQRQTRSRRQRRTSUWSRSTRTUURRRRQRSQOOOTPQQRPPPRTSRVVY]]Z[_acaaegdcikjjnnnllmrnoqpmlopnjmommnlljkhkhhhhddegdebaaab]]]^\^^b`]]^Z\[^[][\\[[^_]_ba`dddeecebgfheihmkmlkkponpponssqpprrqrttpostpqttmpqpnponlpoqlonkmpnonoonoqnimoliiihgiiicdhhfecdcdddbececdeedddfhjgefjfgghgghhghjigijjfdhlffikfhijhkkliijkhikljkjkjjkikmkmknmllomnklkllkijjnljijjjkigkkkjkkilljjjilkkjkhkjkjjfjmmffiigijhehhjffhiegieeffgfefagdfddcdcfbccea`bb_`c^\`a]\`a^^^_]\^\\__[]]_]]Y\[Z\\XXXYWXWWWWUUVTUWXSTVWTSUTSSTPOOLJORPNMFA<2.:>A@?;6/&$$'+)"('((('(1>BE@A@@@><==;99840.-**)))$#5:+)++++,+*,,++,/AjlijfchijhpyqnolklmljjjkklnoldUB7467;_ys8,0..--,--,,+,.-*,,,+')02///002436789:::=>@ABCEGIKKKLMD* +  '25:AEHLORTUUUSSUVTW\]Z[xa>989=?BFHNMQSVY]abaeehkpuX=>>@>>@;0. + + +  + + + + + + + + + + + + + + + + + +  S~}~}}~|||}~pZD@9&'-101020)),0&"!!"#&((,0424457;<@DEBCFGFFGHIHJLLGKONKLMNNNQQPQPNNORRRNOSRQRURQRSRPRQQOPONMMMNOOONMOQOMMPQQQQRROJKKMJKIIGIMLIIJJGHJIIGNFGGIJGJKKJKKLKLKNKJKLMLLKNLLKMPNNPNNPPOOPQQNPPPQRNPPQSQSQPPPQSQPORRPOQONMNPRNOMPOLOQPNPTWVZZYY]aa^`bcbcfhiimkkknopmponmlklnmonmooklklhihhfedfeddd_^_`_\\\\[]_]YZYXWYY\[[XZWY[]^_``^aecedecdfiegflgjkmkjkonlnpnoppnoqqoprrqpqrrqsqqpqonnqlknrnnlnkikmlljlmlnnkhjihffhghjghdedecebdffdccdabddaabeadhg`bficeeeedeedfihfihhdbejeghhfiighjgkhiiihkikjjhjkkjjknklkniimnjlmmihkjghhjjigjjjhggjjkkijklkijkjhikjjilghiidhjjccghfiggfhijhgfgfefghgddfecdfcccd^aab_`ca^^aa__a_\]`][_^]^]^`^]Z]]\[a[]\\VW\\XXWXVXXWTUWUTTURPRTRUWWTTTTTQPPRNJKPNNQMEA<3/9=?><84,($"%,*"'&'''&(3?=>A>:=>:8::86430/.,+(()+&.8,**,,,++++++,3DijkifdefgjowpjpmlmnmjkiijlnlidR@77:=BM_x~A-../.,,+**,,+*)*,**$ )/10/0023448779:;==>AABCDGKLLLLJF. +  %06;?EIKPRSTTTSSTVWWV^aVoh?<::;ADILNPRUWV\aacdhimns|gA8=?>@@A@@<5 + + +  + + + + + + + + + + + + + + + + + + + + O||~||{|}|o[EA:&&-/./140*'&$"$&()+/026889==@B@ACECCFGHGHLILIMONKOMNNORPPRSNOSUQOPUVQQRSPQRSRPSRPOQNNNNNNMMNLKLNLLKPQOPTQPOLKKLJKHIIKMKJIJHGIJJJJMHHGIIHJIJKKKLKLNLKKKLLKLLJIKKMMMMNMORONOOOOONNPPQQOOSSQQPPPPQSPOSTPQQUMNNNOPLOMJLLOQPOQTWUXYYY^_^_bbbcgihhjmgjmnnnmqnmnnkjlnommnmikhiffggccegcbb`^]^\]ZZ[[Z\[[[]XVY\YYZ]ZZZ[[]]`a__dedddc_dfgcfgjfcjlilmmmkmnoromnqspnqrpppqonqsnoqplmmpkjlolmlnjkkmmplkjlmknljgghihghhceeecccbabcb___b``bb^`ad`ceebccdbfedddegehgeehfddfceghfhhjhghjghiigehiehjkfhjigghkikhjhgmmhjjhfhiggffijgfikigikjgjkhghjkhgiiggifhgiehhhghigdhfghicfggfghfcdebaeiebcfbbadbbad^aab_aaa`a__^]``^[]__]]^][]]\YZ_\Z[_[[Z[WVZ[VVWWVWWWVTUVVSTTRQTTTTTVUQSSNLMOMJJLJLNKD@>869;=>=94+'&$"))!%&&%&&%1=<<><9<;89:8665311--,)(*,'$2;,))+,--,,-028LllkkhgcfhipxoksnnomljkjjklmllfQ@:>BCCEEFJKKKLLLJ5 +  +  378>FIMNQRSUVSTUXXX[]]ViwL<<9>BGILORSSTZ]bdeiijmorzzI>?@A@@ABA: + + + + + + + + + + + + + + + + + + + + + + + + + + + M}|}}}z{|~|qVDA;%&-/.001.)&$ #%)+,./47:;;>?B@A?DDADGFHGJOKLJOMLMSOMNNPOOQRNOQSNORPRPQQQMQROOQQPKMQNNIRPMLNOLLLMKLKPONOSQOMNKIJJKIJIJJHIGFFHJJIIIIGGGGHIIGFGHIJJKJIJKIIILLIHJJJJKNMLLQPLMOQNNPOMOOQPPQSQOPOPNOQNOOPOOOPULLNNONLNLMLMOONPSSTUZX[Z`_^_baaeggchijdklmllknjkljjjjkkjklkghfgeedebeeebb`^\]^[ZZZXYWZXZZ\WUY[VWZ\WWYYZYX\_[]abacb`_fdecffhhfhignlkjmklqrnlnrpnmpqnppollopnppnnononmkmlnklmnlknpkjkplijkjefghdddd`ca`_a``^_]^]]]`^`aa_`abababcbbbdfcadceffhebefb_cgbcdhghggeefgdfhhedggfhhhehgfgighikghhihjijgegjeffhdghddhiffiifcgfefhhffghihhighfffhgfjigeeiffgeaeedcddcbbbadfgdcddbe^accabaa_ba_]`cb\]][\^^YZ\_\\]\Y[Z[W[\ZZZZ[[YYXWWVUVUVWUSTXURUURRTTOPRRPSSQOOPLMKMLIHHIKIGB@<89:;==<60)%%# #!!$$$$$#$0<>>=<;;9998657531/*++))(*(("0;.)+++-./135:Qgkjkgfffgip{qjpnlmmlkljiklllldQ@<=@DGJKNZnQ,+,*,*)+)***)(('%! + '/21012355589;;>>@AABEEGGJKNONMNH7 +  279>EJKMPSTUUUSTXWWU\^WbW9=>?CFHNQRRVZ]_bdfgikmnry]>?B??@B@= + + + + + + + + + + + + + + + + + + + + + + + I~~}||}zy|}|pWF@=%$,..211.*'" !%))+.347=?>>AB@CBFCCEGFFHJLJLKLKLNQOOOMOQSRPPRPPNSWQQSSQQQSRQORSOMORPONRNMMMMLMMMLNMMMMNOOMMMJIHHHGIFGGGGEEFHHHHGEGEFHIHIIFEGIHKJKIIHIHFILKGJLJHILMMLMNMLNPQLONNMNNPNORPMMNNPPNNNNOPMMNOPKMOMNMLNLJJLLMMNPORSWUYW\[[\_^acccbggihjikiljkimlijkjijkljjghgfddbcchedbb]\]]\YYZYXXYYWYYYVVWXUWYYVWXWVXZ]\[^`abc`^bfdbaedfhggfgiighkikoolklpqppoppqpplnponrmmnonnnliklnijkmijlnghkmihijhggedccbaab`_``_a^`\\\_\\]_]]^`_aa`_`cb_`bc`addbccdaadebcdfacbecfeecdecdhffdceefhgffhgfijhhkidfikhijiedgiffehehigehighhfdghgfhfefhhfgigefgcddeccggccdgddfbcedcdebcdc`cfdcbc_abfb_a`^bbbaba^[^a^\\]Z\^^[\]]\\ZZZZYZYZWXZYXZ[VXYVSTWVRTUSOTVSPSRPQRRNOQORUQOOMLMRLLKIGHIJEC@><::;;<==6-&"!"  $##"##&/;>=:8;76676567221.*)*((')((#3;2,,,,-0126=UgkiiY]degipzrinnmnnljlkhikljjeR<99=@CEFIORlY))*)'())*((&&'&% + + #0223434567:<===?@ABCFGFGJMPPOOMI>% + 059>DILORSUXXWSTVXVW[`bbwZA??BDGKNPRTW[\_aadfgklnrw}i@@CA@A@D +  + + + + + + + + + + + + + + + + + + + + + + + + F~{~|}}||{y{}~q\F@='%,.-001.+&!!#$&)-136;@?=>?@?ABC@CDDFGGGHKKIKMOOMNPPMQRTSPOQPOPTVPPTTQQSUPOQTROOQOLMNOJIJJLHLJMKMJLJLMOOOMKIHHGFEGDEFHHHIIHGHGFDFGEGGGGGEFIHHIFHGHGFGFIIGFHKIGJIJIIKJLKONLKMMNNMNNNOONNNLMMMNONNOOONNMLLMNJGKKMJIKIJKKMNNSTTRVUWXY\\]`b`adffiiihjlmjihmkihkihikjffggabcdbcccaa`_\]\ZXWZXVUXWWWXWUTUUVVXWWUVVUTXZYZ\]\\``]_ac_```_eghdgfiikjkklomlmmmpqplpqonnmnnlnpikmnkmlghljjkkkkhjklgjjhhjhhgigdbcb_`c`]^`_\][\ZZX]Z\]\W^]__^^]^```^``__baa`bbc``baccabdecccecdfecbdgccfeceedbeefceegceghcjffhhgggeefhffffgedeefdddceffddeabddcbcecdbdaccfccfebcbcdbbadc`bebacc_^cd`abb^a_b_^^a^ab__`^^_^]^_[[[]ZY[]Z[\[WWYXXZ[XTWZWXWZWXXTRTSSQTSVNQRQRRQPOONOOMNQOOMNKHMPIIHGGIJGDC=;::;::<=:4+" $$"#"#$/9=<9786755564311.-,)('('''($!1=5-.--.0149ThjfdPTcffgpz}piqomllkjkkjjjkkibP;66:>?BEHJLRc_+'))((()&&''%%% + + + +  -14555557:;;=>@@CDDDFFGHLNQQRSQRH$ + +6;>DILPRTWUXVTVWYXY]ddZpgG?BDDILPSVVX[\_`begihlosvtG@CACCN + + + + + + + + + + + + + + + + + + + + + + + + + + =|~}|}}{y{}}}rZEA<'#-/.002/+'!!#$&*+05=<<;>@DCBCDEEHIHGIJKGMNQOLOPNJRRPMMPQONPRPNOQROQPOMORQNMPOMKOLKIMLJKLLJKLNJLNPOOPOOLJIKHFGHEDFHFGEEFDFFGDEFDDEFFFFGGGHIHHGFFFHIHFEHIHJKNJHGJJIKLMMLMMKMOMMNNOMLMMLNLLNNNMMKMNMLNKLMKGNMJJJKJKLLKPQSTQRUWZY[[\]_^_adecfghfijiihiigihihfgiheeed`ccdbdba`ba^]b]VW[YUSZXTUXXQSSTRUUWTUSTUSUWUVY\[\]_^_``a_bbbaeefgihgjljhjlkilmlkoqpkopolnmmlnpqlnnnkmkkkmjkkmkjhihggkiiklheefdbababab]]^]\[[YWX\V\ZZZ[[_[^^^\^]`^]_b_]^`]aa_^___^^ab^baba`cc_abb`acc`acdeedcbededfddfgehgiddhieegecdfcabccaabbccccbdefcbccecccfcdcdaaadbbdcdaab]acbabc``bbbab`^abbadb``c\^^]\_`a^^`]Z_a]Z]]ZYZ]YYZZUYZYXWXVYY[YZYXX[VXV[TRUULSSSQUQONNQOPNMKOOMJLNLMOMKJLKHGGFFGGEFD?<;9887:;82+! #"###!"0>9;:87773234321/,,+&'&&%%'&%"1E=,---/15;UljeZNUdgffqzzpknllmmlkjjkmllkh`I8568<>@CEEGLPd{i-)*&%')'%&'$## + + + + +  -3476557:::=?@@BCDDEFFIKLORUVUSPK, + + +7;@DINQRUTUVTXWXXYZ[ae^krM@CBFJNQVVV[Y\`bdghiknosw}ZFBEFV + + + + + +  + + + + + + +   + + + + + + + + + ;~}{{{z{{y|yq\HB<'$-.,012/+'"!%$'),39><<<>@?@BCBCEFEEGHGGJKLIKLKKMONLIQPNMPNPPQPOPPQQQOTPPQQSPMOQOLLOJLJQNKJLMKNLKIKMNNMNNKJJIGGEEEDEDDCCDDCDGEDDEFDDGFFGFHGHIJIHHHFEGIHGHHHHJJIHGIKHKKKKKKJKJKKKJLMMJLLKJMKKKKMMJIKMLLKJIJIJLKHKKIIJLMMPPPPSRTUXWZY[\]]^abb_fddfiigfhefgiggghhgihfddccb`aa`^_a^Z[]VSVXTRQTSRRSTOSQSPSTTPSSSSUUVVY[Z[^_]_``]__``abdaeggffiifijjhiklijnmonoomlonmnpmnnollllghjlhhjieffhgeegghihegfbbcb`aa`][^_]Y[\WUXZVYXXWX[[XX[[[[[]ZZ^_][]\Y]][[__^]^`b^`]^__a``cacaaa``bacgdaaccaccc^aed`eec`becbacc`bbba``bddabeacbcbdddcbada_acbabb]]`b^^aa__``]_`bcba`a_`bb`^^`^__a^^``[[]^[[]^[]^ZY[]YYZZZYX[YYVXVXWXYYVVXXWZZUTYZVUVVQQSQMPPPPQMMLKNNNKKKMKJJKLLMMLKLHGFGFEDCADCA?<976789:60+# ! "$#$%$$$4;8::77863101/.--,,+'(''%##%#!!-ED.,./049Vlji]2Ogifgpyxollklkklkjikllkih]F61347:>@ACDGKP]se1%&%&'$#%$$#" +  + + + +  ,3766679::;>A@@CEFEEFHIJLOSUUSSPI)  + *7=@EJLQTTVXXZXWW[Z[\^f_fuOGBEFJNSUVXZ\^adfiiklmqtwdEHF^ + + + + +  + + + + + + +  + + + + + + + + + + + + + + + + + :}~~|{|z|zz|{q_KC>($+.-1142,'#!%#%+07;::;>>=9320110.-.,*()'(&" %%" &!,MK,.101:Zkjib5Bfgffnzxljklmlkjjkjjklkig\H301236:=?@ADGJN\us<$&$%$#"##! + + -46668::;=:<@@@@BCCEGGFGJNIJLNLMOQQLNOOLQPNLKRPMNTOONPPQNMMPLJONMNMKMLMMGHIJHKJLIKLOPOMNMKIIGEFEFCCEFFECCCACGEACABCEDDDEFDEFEEEEEEFCEDDCEFEDEIFGGHGHFLJGKKKIKJKHJJJJLNLKKLMKKJJIIIJHIIHJHIIHHJIKIHGIHIJLNLMMPNRUWUUVZZY\\]\``^^a_dbbadacdeecefdcdacaa_^\`_]]][[ZZXXTUXRPSQPSSRPOQSLOPPPQOOOQRQPQRTTUVUZ^_\Z]^Z^`b_abgebdgecfifeikhgjkjglmmkkkkjkkjkljijjikkkfhglhfghffdgdcegdcbda]`_][]ZZY[XWUVVSRTTTTSTSSVWVVXVWWXVVXXWYZZXZZ]Y[]\Y\[]\][\ZZ]^[[\_\]\][_____^``b]`aa_aab`acc``ab``c`[^^\\]_\Y^_]]___^`_\_____`a^_`aaf^_``]_``^^`_[[^]Z]_^[[^]]]^\\Z\__\^]^[^]\ZZ[]XY]YWXYXXXZWVYXUVUSSTTVVVVTUWUUUUVUUTUVVRQSQOMNMNOMKJKLLLKHFJJIHHGGGHJHGHGC@DEDB@@ADEMT>66767742,*0GGIMKJIGDDINQTLFUWUVONKHD6//1..---+''(&'# "$" ',$&IG-10/>>BDFKOXol<&$%##!""    (4677899;<>AABDCEGFGIJJLNRSVVWXVP6!  +&6<@DJOSTXXYZXXXY\][[`eaWs`KHIJMQSW[]^_acdgjllnqrux{Pm + + + + +  + + + + + +   + + + + + + + + + + + + + + + + + + + + 1{}|}|{z{|{s`KC=(%,,.203..*$ &.47:<;9;<==>BCABDFDBDEFEHIKGILKJLPMMMOMOOQPQNPRPNRUPOSRPQQPQSPOSONQOMKJNKHKIIHKKIHJJLJKNMLNLHFEGDDACCFDEEACEGDDCDCBBFDDDDEDEFGD@EGDEEEECDDEEEGIFEGIGFEHHFIKJGHGHFHHHGGKJIIKJHHIHGHJHGGIIIGGIHGIIIHGGGFGHKKNNNNNQTTTTXYXZ\[\]`]^_b`bbibgbecbcbccdcb`b`a\]]][[[][YX[VUSTSQPQMNRQMNPPOLNPPPPNMOPPOPSRQSVVUXZ\WY\\Z[^`\`acbabc`beeedghhijiilljmljiikkiginhjllhhjicddjgeefcfefbdddbebba``^]__YXZYWVVVSRRRPRRSRSRTTTTUUTUURTWUTVVVTXZZX[[XW\Z\\_ZZ]\[[[\[^^a]]_b]]b`\]`^\^_^]\__]^a_\^_^^_a_Z[YZ[Y\[\^]]^^]\_]\^_]^_`^^]][\^bX[\\[^^][\^\XZ\\]^[[\Z[\^][\[Y[\\X[]][ZZ\XYZ[UXZXTVWXWTWVWVUUVRQVWTWWUTUUTSUURQSUPTURNNQPLKMMNLKIHJJKLJHHHGHGFDGIHHIIKOKAAAA>=@@=@VnB555432/,+1?GKLNOKMJGJPRUUNQ_\ZUSPNJB300,---+)(&'%&$!  $+-&%DE0/2<_ohdcaddefjq}tkhkkjjlkhkkjklkih[D3.../258::DJLRVXXYYYYYY\[[\`ab[mkNHLLMPUX[]`aacglnmopqqsv}o + +  + + + + + + +      + + + + + + + + + + + + + + + + + &~~z~{zz{{{{{t`K@<'$,-.200/0*%#.368:999<;:;>A@?DECABEEEFHIHGNMJKMOLMOPMPPTONNTTORTTPPRQPPRPMOQPMJMOMLLKKHGIGHGKJIFIFIMMLKJMLIEDGEDCCBBCEDAEEECDDFEBCEDCCDCCDDGBAEFCEFEBBDEDFEFGCEEEDEHEDGIGHIHCGFIJLHJHJJHIHHIIFFHIGFHHHIIGHFFGHFEGFFGIIJKMONNOSTSTWVWVWWX\^\Z\^_^adf`baf`_`b`^^`^\]]\Z\Z[YYYZYXYXTSPPQPOPNPPONOPMMNONNNOMNOQMLPRPORTSRUWXWWXYU[Z]Zbaa_cba_dccfhigilifiligkljijkigggigigeceedaeeffeedcfdccfdbbeb_``]Z[_\YY[YUTSRMMNNOQPOQQQRQTSTUUTRSUVVVXWVWYYWY\WWY[WX\\YX\[XY[\Z[^\Y\]]YZ^\Z[\ZY\[]]]]]]\]_]\[\^^]\Z\Z[\ZV[ZZX[\YYYZXZ\[VY]][[[XYYY]Z\[Y[][[^]XZY[XX\\XX[YWX[ZZ\YWYZ[[][[]][\[ZXYXXX\ZVUV[UVUYUSTTROTURTTPORQRRSSQPQUSSSQOMMLJILNNKJIIGKLKGFHFDDGDCEFEDEGLgrM>=????>=HXq?43231-++0=@INMPMLNKGLQUXSMU]ZYWTQNL=20,,-+)('$%$#""',)'&AK238angddddegggr}shhnnflllgjjjjjjii_A2-,+-/3579<>?BDEGLP\m`F$"""!   +)*) + + %49<;::;=?@AABEFFHIKKLORTRWWYYWWRB" + 5=@AFMQVXYXXXYVX\^\]\ae\tyQHKMORUYY^abeehlmopqpqtx{{ +   + + + +  + + + + + + + + + + + + + + *~||||{yz|z~||wcLA<*!*+,1000.*$%0458;977:<9:=?@>@CDBADDCDFIGIKOHKLMLLNRPNPPOLLNNMNQPNMQMONOOMJPPMKKNIKHKLLHIKFHHHJIHIGJKLMMLLKEEDDBCCAABCCBBCCAADECBAA@BBBBAAECB@ACCBDCBADDDEEEDEFDEEEFEFEJHFGHHDEGHIIJIGHHIJGGGGGFFGGFGEFGGEDFGGGDEGEEGHHGIKKKMOONORTTVWWXUZZZ[]\]]aa_\a`a_[_]_]^^`^`]ZY\ZZYZXWWWVUSVSQRROKLPNLLMMLMMMKLNNLNMNLLOPPQSSTUXWWXWWZZ`[[]ba__a``bf_adhfdghdefhhgjihjjjieggjihfccfedbeedehdcbc___b`aaa`]\][XY[WVWWUSTQONPMNNNONPQQPQRRSTTRRRUSSUVTTUTSUWXUWVVRUWWUUWWTY[YYZZWWYZ[Z][Y[]Y[[][^^^]]]ZY]][Z]^]YYY[XY\WRYZWWZ\VXXZY\ZXXZ[YYYWVZ^ZZZZWYZZUZXXUZYYWX[ZWXZYWXXYZWUUWY`^a^_a_`a`ZVVXWUXWTRUWUPQTSSTSTQQRTQOORPORSPQSSPRSSOOPMIKMKJLLJHIHEIJHDCDDCBDCBADCCBFQ{xM>=?@><@JYn>111/-)(/;>@PSPONPPLKMQTWTPW\[YVTSPJ5/+-+)(&'%&%$"#)*+)#GR39aiggedegffiq|nbhmnjlmkjiikjkije[@1++++-01479;<>@CDDDGQbcI($ + #-./-  + + '6:<:;;>?>@ABEFFJJLMLNQSSTRVZYWUQC!  4>AEJNSWXXYYXX[\]]]]^bheo[HMNNRW[]acfegjkkmnnprvwz +   + + +  +  +   + + + + + + + + + + + + + + + + + + + &~~~{||~zyy{{|}{udKA:*!**-1//.,)&%26448:77:;<9<>@>@BAACDDBADFEFIJKIJMLLLMLLLQMLOOOPOQROMNTNNMONONRMLNNPKLJNIHLMLIHHHFIGIIIJKHJKMIBDCCACCB@@CA@BD@>@CAAAB??@BBAABDBB@CDEDDBBCECCCECBCDCCCCEDCDDEEFFFEFFDDGGHFGJHDDGEEFFDECDFDCGFEEDEEDCFCBDFDEGJKIIKLNLMQQQTVVUWZWX[\[[]`]]\`^_\^\[]^][]^^\YYYWXYWUUXWSPQPNMOQKFHJLKKKJKNLKLMMLJLNJKMOPOQRRPUVVVVUTXYZWXZ][\\^\_aa_cdfedgfcfghhiifhjifehgfffhcacfccbeaabd``_b^b^``a^^__\Z[\ZVUUURRRRNNQPKKKMMKKKKNLKNPQQNOPPQSSRRRRSTTUUTUUTRSSVVVUUVYUWYYWVXXXZY\ZZZYSWYYW[\ZXY[YW[ZZYYXWVWWZXVYWVYZXYWWUWWVWYVWXZXWWVTTUWTVXYUXXWUYXVVXXXXXYYYVWVVTVVVSTONU\]c``a`bbdZRPUUUVVSTVUVRPTTSQQRPMOSQNOSPOPQMNPPLPQOIJKLGKMIGGKIIIHGHGFDECDGGCDB?@AA@DTu?7:;<;BN`tf;00/+&'*39=BQRPPPPQLIMQUWQQ[ZWWVTSPC1+,*('&&%&$#!"&+-*!$F\Pciggdefghhiq}}rhhligkliikihhjhhfX?1-,+,,.02668:;>@@@??BI]aL(  '.1/--$ + +  !2:;;:=>=>ABDEFIJLMNNOPQQQTWXWVVPD& + + 3>@DEPSUWZZYY[ZZY\^__fjei]GKOQSV[^_cefhhkklnopqptw|% +    + + + +   + +  + + + + + + + + + + + + + + + + $|||zyz||{{zydL@>,$)),./0-,)'!'057456669<99:=>=ABDABCC@ABCAADB?@AACCDCDCDDDEGGFDEFEFGFGGGIJHFHHFFFFDEEEFFEGFEDDEDDEGCBEGEFGHIIJLKOMPPOQTTROXXUVY[YY[^YZ[^[[Z[]]\[VY^[YWWVWSWWVRQSSRQOOMNPQLJKKIILKJLMJJKLKJHMLGJJJKJONNOQPSUSUVWWVVYW[[^[`\``aadcbcffbcefcehfdfggddffddefbbbc`abc`aaa`b^_bd`_`a]]]_\WW[XUVVSPPOMNNNLJKJJIJJJKLLMMOONNONNOQOQSRNRUUSRRSUTSVTTUUSVVUTTVVTUWXUXYXVWXWWXWWXZ[[ZZZYZ]XXYXWW[WVXVSUVVTVVURRRTSSUUQQTVUWTTVWTVUVWVVVVVWXTVVVTVWUVWVTVUUQTUUUVKFR^^acababdggXTRTWURTVRRUSNOQPMMNNLMOPPNOOOOOONMMLOMKJKHIKLJKJFIKKIJJLIKLJFGKLKJF>=@A=@Mqp=7::>BKWeuc60.*&$$+4;AEPONOOPOKJNRWVPR[ZXYUTPN=,)('&%%&%"! %,.-'?fhjhgdedgghhr}~rifkjijkjikjihkjheZ@1.-,,-.01455:<<;<:;;=BIYbN,"! "'**-//,)# + +  289:;=>?BBCCFILLLMOOOQPRSTUWWURPJ* + .BCDHOSVXYZ[\[\Z\_aabfklikLNQQRW[]`begfhjlnnnnpsvw{ \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/cudaSift/inputData/righ.pgm b/third-party-programs/Velocity-Bench/cudaSift/inputData/righ.pgm new file mode 100644 index 000000000..1b51fc73b --- /dev/null +++ b/third-party-programs/Velocity-Bench/cudaSift/inputData/righ.pgm @@ -0,0 +1,1278 @@ +P5 +1280 960 +255 +Pemj\`]QJGF<76565324@WxdabSEGVaZ99?IOC;9:=BJSUTB5244;EU[ULJRNH7/8BC=8:5554G\ipx|seYK_xc@VG;88::CQhy[GffYchbUKM^rokeekofdlwvM;OTdtteZ]agkh^pWFDCDGEKb_ROLIKIERdehkjkmlfaX_jqi`Ol}hsuxjehjj_-:brvx{~`6FECCA@AIXTMMkwl9IE=82/?qbRKJC<;?89;;AUK73441}W0$!   0uub! ?a}mR7!(JmRilj`e^QLHN=986553/4?X}decRDHUfT87?KPB:8:=DKQWW@5445=HW\VPV`]V5/6>?;798479H]jtz}tfXKdx_HRD;99;FEDCBBEGRYatg>KD>821St{k[jPS]RF9==541153.xl1  "Lnyn; + + + + ;a}nT22Y}Uhoj^b^SOHN>67543005A[hgeSCGR\Q26>ORB=<:=ELOVR@5457?HU\UQ[eea=17@@;:68678J]js}~teZTdy\GOD;99;>EUkwMIgd[dkgZLFfopmlnppf^kxuK;S\f}psf^bflnffoVABADGFLfaQMKKMJDWgdfghhong`[l{\np^q|nVz~}oty}{ognpq[.?itz}[?EDDCBH\gpzyxuf>IE?82.Qkr}Lp~jw~a_BXI44134.*fzx}}~o@ + + + + + Du}iP + + + + + +  4]x{gP.AnWhnhae[WRQP<56764223C_~imdP?CR[O78?NNA<<:=BKSWU@5458?JV]XQU]_Y;39AA=;:6879Nais}teXRfzXHSC;===?EQmxNNgb[flgZNLhsomhhnmd_pyxL=W\pur`[clotfgkRFDCEFFQg]QNMKLIGXa`acbflmfaZu}aym`nusPntztuwsmydIGR[]eh{xpnqpsZ0=mu}R@GDCDG\|tl]Suc BOF?;32WFeuJxrolv_hVpS22342*)]qqqpruxzsA + + + + + !NVg{{yunb@ + + + + + + + + 6`{xcD)3_¿Ykphac_WSNL>79972325>^}gofOAESaQ7:ATPB<;5577=JW\UNRUNE53;A?;<85558Sals|{reWSe}WIVC>DFB@FSlxPNhc]fmfULMgqngfjmjdaqxuF:W\rvsc\eknofehQGDFFIFRl^PONLKHJ]b_aaaeikfa^vw]zyj_lsmMl_VT\gikvaz~~bMKOPMNRaxroqouX-Gqx}Q@GEEFGmw^HMhy\FOGD:20<.K\STQdb\cfZmZ93122-*P^`^`ehjnty}}oQ$ + +    ?SWf[LH% + + + + + + + + 7[vuX7)Mm_krk`a_VSQH=8::73323AckndODHT_N99BPL=:<>AFNTXP=5457?IWZRPYZRH52T_prse^hmppagjTIKIHHCRp]PMNOLHH_e_bccgkjd`^w~xoawmamviUjUPNHT`jrƑLpploqqqnrx~{`NJLHFLP]zrosooS0Ipx~QAFGEFJgl]TRcn~["GLIN;20...,4.9CTYmb^d^63221)%GPQNQWZ]bintvxw{}yjY8  + +       $3 + + + 2[{xaJ(6c|`mrjab]VQQI>8;863314CelnbNBGW[K79ANL@;;;>FNTUP?4448AJX[STcloT33?D=:8::55>Pemu}yp`UNg~KNQIKXaJCJYrrKTndaineYNLjtmgfnpldcuzn=AU\{}qvfakopnaikUNVNKJGTm\QOPOLHI[efhkimojb`Zjtrni^}hcmy_N^\[UOW^fl͎:g]OJLLWiptxz~~x|ktdPJGFGKP_{spspoR1JpzMEHFFGIRR\uy{q~T&DJOW:3-***)*+-/67>EKOB21211'!:CFGHKNRZ`florsuxzyxqnfYF.        + + 4brZ2 0U}ÿ`lrl`^[RNMG<9;754304@fyjocKBGX`M69BPLC::=>EOUXO=5357?LW]VWixvT84=D?=;:879qu`="Kn_rsha]WPNKE<;;555427Hjygm`HAI^eE5;CPLA9:;?HQVYN<5258@MY\TXemjN44;79:=Tcltywj^UUk}HSQIOdgPCKXxrBZkbblmcWMRipkgdotnedtz~n7CX^~qqhkqtspcieUTYRKLIZnYPNOONJGbfcjilopjb]\elpoeYc_nwYWddkpwi`_rˁ9fWLHIIXdgrlqoi_[VODckljdkw{zp]^fLMKIDGO_wtprqtM0Ss|HGFDF]x~mYGSdH(KLQOH;1,**()+,0114221233443%#9BCCCBDGKR[agjjhjjgaZU<  +    %+.01125565234435676420/+))((%$! +   .Z~hJ&?daqpc__XRNJD9:;785027MnvcoaIEPbeF4Wglorng_TXlHTPFThfKDM`xfB^k\bhlcTMRotohdnpjcbt|e7IYasvucdjnsn`oeQT[QJLE_kWPNNNMLMdedfhfhljb\WYeslUXebn{XZdq~qc]tyAjTKHHLV_etu`icTGLLB:>M_ZQX`fjnSVnRDABGFHN_vijyusqpqF1Vx~CFEEGbtlKP`rA,PQTV`VE0++)+--111352233:<92%#>FAA@CFGKPU[`ceeda]N4 + +   #*3:<<>>?<;;::978664556645323232/01322302100/.)%! + +  576;CP^]QT]^WC64BGLZnd?::AWgnpsoe`QYp|HUQHTfaIENc|d@ai^\dfaULUpqnhirri_fw}b3HYaxxs_bipsnbodPX]PJKKchVOONPPNPdfikhknrkb\Z_nuhSVgdm{O^kxznb[t¾o@gSJFEKW]bqr]j`RPpaA69I_YRP_`hsGI_KACKSNIPazƯžwvstqnF5Zz~|AFEDENRSiy|o_?2NSX\acWG7+++,/242231449@?90#'DQPIFEJJKPTX]abcb\N0  + +   */39<>ABB??@<:;9:77875443443202233111011223000234/*(&" + 3]~~dC#.[~foodbbWNJIF<;987855;PuqfiXGCQ`^=5HU]\TYccYF45BHN]rd<9;@[hqv|{j^S[owHTNKXjdCDJa~e=ch\[dfaSNWprlghork`fv~a5IXcmwn]bhqsmdpeO[`MILLeeXQOLPRMSegjmlmqrja^[eoqfV_~d^qPamrwurmbWzʿkBeSJFFJX[^rn^i^QaZ@44FbXV^eXirGL\FBH[bUNQa|ȨɿyvvsssmA4a}zDGFCCFNxziYMO97PTYacgj`J71-,/1232247>===;:999:9744543220.22231110.02210013431//*)'''&'''%$" %V{iK)$Ox¿iqubccXNLKC:;<97645;SuniiXFDSfa:4WOJZkfEELb}f:cjX`dc]QJUtrmfimpjbiu}a4KXdpvm_emprmamdNWZGJMMhgXQONQRKPfdfggjoqfb\\gqufUbdbr~Mdnosrpl_T}̿cE`SIGGJTV_qiXc\KOfUB0/H_XXnjZlvKRdOHShgXQO]xȤw5ICLaqxuttqrk?5ax~tCGFEEF[~g[ZK\s~56ORZdegttaQ@002434448@PSJB90"+=EHKNLNPU[b[duzaUC+" ! +  (269><;;:;;88766534330121200100010030///13312330..0/.--///0,))'! LyrS0"N}hsrfacVOLKA99;:87459UymkjXEGVjb@8=KXI=9:.7EHP_r_<:8F^kpx}xj]N^qn?TMHYngFFMc|e>fk\bid\RMYtpibfpniciv~[4IXbpvl\kvuslfr`RWWILKKhfUPPOOMHSgdccdhlnf`]\fx}nJ_dkwJchpxusk_T}ɼ[FcSIFFHRXbri\c[PXrX@.)La[WifZmwK]iTPcid]WTaˢحkr,*.26=HUcq}{utsqoh=;gx}p@GFFDFYhPXSg|48MMXehforwhYQEBC=:79DW]WLC80 #9BFFHJKNSXYOlzZbXAAEL<-'"" %+259<NSG<:;>EJUVWI:459@LV\YXetnM608CJPbtW@<:Eakrz~xh[P_ohCTLIVj^FEPf_@ji`foj`SLYrpigjuqi`kvY1LXfvykao|yujdq^QYVLKJKieTQQONKERdcceffjkea^]eyxeVg`l|Ofmuq^UȺYMdUJEHJRYatf\cZPVqW;),O`YUmoctzHkqUYiedc\Wc͚٭|@p5311653.2698<=FEECDII[s{wo+8IHXkfbgjotsiaa^SNLS`eaZNB9.  :AGJIIKONL??fqNMMMJRd`NLJDA>;89;7/('$$**.25668::::997876788765522432110210-1011/21/..0-./000-/2212133332576454355321/,'!7h|b>%=zmtpab_VPLM>89:78524>[kkfSEHWi\=7;MTE><=?EMTYWH:468?FR][YesmP:17EJQbmV?;@C@1&$(,3;<=6?Qw°vrtsopg68.":DILMLLMLH97XPJHDGIGIHHKLOQ\ZZ`[TSC4'%)(*-.,.02321100/0244774552211220/1--./101/0/00/0././.00./0/31210334565345676656640-+$.[|d>5wmtndcbTMKI?:<:68636@_kidTDHWeU;6=PYF><:?GNRWXI;648@GR\XXcmlY>18FJQcqQ<;HIINH0&%'.@O\\GBJjstutqqe6>kw|d@HDCBCCJhvnLEEEY&?PFb\]aca_YQbvwdjrofa_XL<5. %;EQ^]WUSPI86HBFFDEEHJHKJJMJLLNSY^e\2)/10-//-.,..,,+*+*,/-.34334310/13211..00000/..0/00/00../0.01.0/1014224554466678878888741-(%"&Pv|dS}munddcVNKIA:;<89735Bbjg`QEIYiZ:6820-$ !@kuuovndfcWPMG?;?:97845AdmjaOFJ^m_;9=NQF=;<@CDFD=2';k{qulcg_TOLI;;<:88746IinrdNBL]mN69BRSD;<>AGPW]^L8658@KU\TJHHE;21U]tovfgv{uodhjXSa[KJHSk^PPONLKK[lqukmi^\W\jtk[P{|msv\w{N{xo_dͳ?aeSMTSOU\dx}WceVNfnF4&)T_`rz}~yIQLHEFHQ]rĆƿϕ~t*XG_fe[A*&()5I[^]TIB]Å[lpnlbRsf[t~s}oorqoo]1Hpw~LDFDCAA@@AACB@BGfl"GMJvxLDFNIESk\^ZSLNP43,,?LZ@2!#8DGHGGJJtZIGGGE(,NNG90/-+,+)*)'$ -7.)&''&),+*+,.-....1201.-/.-00//.01/.000/.01323232456887769ABFHJLG=2(Enw{~}~~qvl_d]VQJH=<<966546Hh}qtdPDLalN:9DQOB::;AHQW]^K877;@MZ^SMMMF<2089KckmoslcWNgTKQKOeqT>HUs{QOgcalph\PJbmnhdnrogbq|E:VVjymtccpsrqcfjWS`VMMIUhZOQMNNJL]ktsrri]]Z_k{qaQ~|owwUvw|Tx}l`bϮ;gfUOTQPW[c{zXieVKjyE1#%MTdw~~ONLLGGKQWnŋ̌}u/LOgou]=*&'(6K_jeYK@^|ZnspoeXuZEkyz{nouqprY/Inw}OFGDBAA@AB@BCAABob"HNDxwSNJMJHKf[TURNISO62(-BQ_C0#9DFFFEJM{KHIIIE$&HGA2.-.+))+)&'%$$ !27" #%'(*,-,++*-.-../01/-//-../0/.00.,023322133466779989;=>>?DFKNOMM>1#+bnpstustww}{|~rul\\\URMF=<=:88636Jm}lufOCN`nQ4:BRPA;9PhmgoqibVUkCLPHUjmL?IYu|HPn`cntjZQQhrqmlrwmeds{|:BSUgyspZ`twrm`hiPJOMLTRYk\SPOOOJH^fqggld[[ZcozoXPzkwsZxyuX{m\hʠ:jeURQNOU\hwZncRKdmF0!,K]oKkFJKIGGKTiӾNjyum=IOeriM9'&%'3R{}aK<^~`vofQuUDqhbonec`fpw}su|~}oosrrkP2Snw|CCDBACBAA?>@BDKnX(HJG|ea[YUME>PaYMPLLOOQ]N51'0@QT>#&>GGGGGKPFKIHG?(C?801>jqh^T3)&&# +   +  !"%'%&(**),-,-.//..-0100.//111200344545878:9;;;=?BDGNSX]WQ@0 ./.**)&  "%',/18DIPV]gnw{qxiX\URPMH<ABCPO)ILLrcc^TMHE@BDGMQV]]]WH7$   + + + + +  &.8=AJJ_uvg]`\UMMH==?<9876:Qu~rq[JENdpK7:ERNC9:<@HSW_W<659?FPZ`bivwfG33BILYg_E;5?Udow~naTWmEQOJZnlQDL]yxFUlbdpqdWMMjurrusslegr|m5D\^{rn]fyoZlgSU]UQUM]gZROPPMHJ`frllgZW[Y^cigVTzzmV{yoY{~eWeſ?laQQSQRU[gq[q]RNcS<.#5T^|gOiKIHGKORZjԺxx_RCWq{gK9(%$&2ERRSOB?ajY{vnZM{MDs`[jjookjvwpjtDt~sirsfnw||FFEA@BB??@AAA@Q}J+IMMcSYWOEBB:JIDDTHGPSXdJ23! 4BOoG:,CHFGGGJO?KHIG< )=>94?V'(&%  72 + + +  #"!"%%%&)++-./.-,/01232355467557;:;;:=>@BGKPU[_`^YN:)   + + Jxwzxe^i^UQME<==99667YwvhO9)&$'6I[\UO>-EGFFGFHTy@HGHF=)><85CyW()'&  G+ + + +   !#$%')+))(*-/121344455679999;<<>@BEHLOY^`a`]M;) + + + + [mZZska`nsy¾{zifj^UPPF<<<99955:g[mxlWFOHv_Xis}~}|tlѾblAAJ\F2NMZIEOVQH@BMbGAKJCHNSWbC3/7EHLTKJ<1GFEFFGIQqZCIIJF:(<<:5DP%(($  +<   +  ! ""$%%()+,-0112465458:=<<=<=?AEIKPX[``a`YOA1%  + + + ^oWSY{}A")6AJS`oxxvjgf[XPMF>=;98634:S}rpl\EENZUB7;HVNFMY]MLSX^N9769>HS\_[ksoe@/5CGN\t[>94?\ms||k_Q\o|FPMGLc_FFO`~o@aiahrlcVMUjnohekpkagwg7LYatupMjwrk[mdS]_KJPN]bXPOMNLFLcfhmmlokOVWUXWXWW\yp}fUyy}agwjRpŷ:p`GIIHLW^hkSn[OVf=,-ZbgVXdNHJRX[_iհ׵MbLurwkK3'$&*5ZeJ@>hYlwoRBIF~]Tfpz}}}qgѸ^i=_[{rqvqsrsx{t?AAA?@@@BCD7OKX=?ITTJEBR]DAGABSRRWaB3.7BHClALK8:MGFIHGHLOxnGIIGHC1.@<;6HK&)'$ "6JD>8- + +  + +  "%$&)*+*,./1304689:<=??@BCHKPUY^`__^XNDB1   + + + JeRQeJ% )4AYbv|zjgk`YOMG><;:8856>[~qlhXECQdaC5;HTNHP]bOLPW^Q>75:@JR\_^mri]@27BIM\r^B96A[ls{}k^M\qw@QLDGRICEN`|pDbidjqmdUMTmonecmpi`jva1K^gotmRg}vqicpaMYZOMQJ^eYSONOKDQadiokihbUWWSVSQTVWtm}e\zz]jvzfMqdzʇ>ucMEDHOU_klXmVJPo\;, 5VfR_dHIOXY[_lҮدJr|HYSWTD2(&')8]vjVKA@jYoumNFKG[[hr|}~}peжcl}:][|nicOHOTix~{~n>DC@?<>@A@>?@CJO=9TJ^w>?GNMCB>PZD?>AIQNKM]<2+;DGJTrZLMJ3ESJGEGFGJRv{NIIIGGC1,>>>9LK%(&%   + +cnhvpQ +  + !#"#&()))+,.012558:;>?ADGKOSX[[ZYZYWJ:#   + + + + + 0{zgeml92AWn|yxfei_SPNG?=<;9877>YohcVADQff=7=HWKIS^_LOSZ\T;668?IT_`blkebF38BHP`qb?:9D^ns{~wj]R^rnASJA?==)7UkSb\FIQWY[^eӯةCvs@QRNLB3%#%)6LRSOLCBk]tvjRIDEVZht}~{{he}Ƶ_hwAʽS]zhc\PJPVix|xw}}ww{{y}e=DBA@AA@@?AABMdp6;PNeu>@CD@978EVDBCBEHIGIW<3(=DIU`e^VQH1#LNHGEGHHHO\WNIGIIID1-=3   +  #W}~8'7Wlzxbge]VRPG?=>;;868@XmhaRCCSbX;8DACFGJGHDJW=3'!=ELhqspaPG0*BIHGFGGHILLfgELHJLIHC,-<=@A\<''%$   (ufr\    !#(),../.02568:=?BEGJICCCHIC    + + + 2b0+Md}{{udhe^TQNC=?@?>979?`kmbQDESaX?8YoIfOMRhpZSZѥՠ7f?rmgiG-%$%*9SjwiT?Dp^triJQ:L|S[gegjnqfiѶ̱UvdJɴȳBa{|XWWPNd\foyѲiYLFEKX_b]HGJNNUxYhx|vuuqjgt|~zwwsr|(DTLnn9@D@6/027<;AGIGHHFKS70#'A=Nq')(%$    6|u[   + +  !#&*0759B?EFG>-  + + + + 2Xnw>(Llz~~xvyy{{y}~ztbgg]WRN?=CDKPH:8CfstgRAG[l_87?PQJLYh\NPW_`R6679@IT^[\l{zT42<=ETi`Fjecglj]QK]moifhopg]iwM6VZjetihwvoe`kXJOMKNMOfaURPPNIBVisnikfZYWUUTSVUhNh|Gpy~aU{˰^VqTIEGKXX_q~]biUIOxZ7$AYu=bNMOgpUSbΥԝx5fDb[_`?,%%&,:HQZ\VEBrau{qgIR6VyTbfYZZ_c`jκήVz^Iƫ̳L_uNVXOOc]enwѫcWJEFLV[^ZJHGKKQvZc~wv}~}utwz{vx#CRFph=@AC:.128@>=?@=CEDMP6/ '>DFLPRZRLE*0cZIFGFGHJcqvumPHHHHHB' 0:9>@EVmotr5)-*%#    ;^  +  +  + !## + +  +   + +;Nsf{vkK-Vxyvxy}}{|z{}maif[WRKA@EKVUJ>9CguvmUEL_sa25>OSKM\h\NQX^aS:88:AJT]ZWgusS42;FHO[_N=99KemsyzncWJb}\AVH?=<>AHSi]EifacjdYPI_rpfcgnpf_kvN3VZtkvfgxuobbjUENNLNMPf`VTRQQJDVjtoloe[[WUVTTTPd~t}Ph{Pqz}^S}ʭ^SrTHDFKSWat~aalRPqZ6$@Yv;bRKPefNQkѤ֘x3cF`icR<-%#&.=NSSZT??x\ksrmhIV2XyLjjWRSY^`jϻͣU]MçʰHewLWXMNZYckxϦcUIEFLUY]VHFFJKQ{Obqonlnmipu||~v HTOwa??CFA864FGHJMMKJF)DgLGHGGEGMhoqwkSIGFIKE$ 49:<>ETchgZ73..+'  +   ;F   +    +  + + + + + + + + + "24I;3"Gn}~zwy{|||~{}}reke]XQKA@DNZYJ?;EjpuoZDMbs^95@ORIN_o`JRW_bT;36;@JV\YYaf[K84AGRl]Kgf`fjcZRLdvofbhopealxI1   + + + + + + + + + !Ek|zwz|{}|||{mdie_ZQOBDIOYXJ=;LmtxoYJMb_57BMPJQbrZLT[cdN;76:BKS[Z[gfXL63>ILVplK;8;NgrqzxvdXTgRGV@<>=>@GSmXLogbgoi\RMerne_gnpecmw}E:V\liyfaflmobcgRQ[YQPNUe^TQQQRLDWgmxtlijcW[WWVUVUSrys|Qo{Fqsw_Z}ͽTYkVIFGLTXbw{ZgjSOfX3 !DXw4aSNK^]TUf͚Ŀѓ~|/WFmwR=,$$$,FgqymPADZhllkc@e~2ZxRniTTSU_br˰їSH_˻˦GjrOUSLKOUdhtΜ[TJHHJQTXTGFFHLV{TmjTIGFNRBRk]HHHblpvaq|l$ORLsaPPTVVPMJVaLGGFELMDPK1-+@FGHIJIKIA&$6AGGEDGGGJ[glj^NHFGFGE 38:;=ES`dhUA:971*  +  +,] +   +$ =OP1&&#!!!'% +   +  #Nwywxzzzz{|}negf`YSN<@JRYWH=:JrvufNALZjT78@QPIN^jVNUYcbH:67;AHT^ZbvyiU42=HLWqhJ;5;PgqxwfXRiOFPD=;<>AIWkUKlgajtlZPKcupiikope`lxF7Y[ojud]afloeglQVZYLSHSe_UQOOPJF[fnxypmmb[ZWZZZYTJrut|Rq{Itpv_[ƸM_kUHGFJVXcw|ZhfRK_uM4 %F[|6[SP[rfVTkϴ˔ÿ΍}z.TEdjXN;)$!#+FZfn]MBFZglkkdCbz0_vSpeUTSV`ao˲ϑQJa˸ȣAonL\XOLNUciu͔TPFGIMSTZUGDGKNP|VriPCJNLG6LdYJHJ\mn~Vlu|u{zh"MPMuc\Y]_ea]brgYVROMNJFQK3,->DGIJJKIH@'-CEDDFIFFGZnmoTLGFHHK? 28:;;CM\bcL=:973/   +  !s\   + + + ",.( /1*),+,,1?6 + !! 1l{}|uwx{|z{|~~lbie`ZTK?CIP^WG;;LtomaMCJ[dM68ARQJN\aOMT\c`H767;@HT\]fy}pT04?IM[mlK=6;Rmrz}r_SOiFIRA>==>AHUmRNifeotjWOIgtrnnqsnc`mvA:X[}}mrb]aeiqbhgTU^WKPLWj]SQQPQKG\fnxwolmaYYYZ[[XVP~nsyMquQwus_TG`jQHJINXZ^z\heRK]vN6!I[w{7XLP[nlNQo˨ʗ͈~{(MJbor]?)""&,BNRZSMBJ]jkkkd?dv5dtSogWSRW_`q̪ЎXJlĘϟEqlPZXPMNVckz̏RQIHHJTW^TGDFJPY~~Wv{fWQ]\J@3L_QIHMZflEZYSSXgn}d)SPAKHPPTZbcaehea_\XUOIHVH3+,@GFHIKKLJ?%+DHDEFGFIQptqkZOIHEGI> 9;;<=AN^b`H997942$ + +  +  Ft( + +  + +  !13-''*+,,*,5( + + !!"   """&)(&'')'))))/3201110*'&#"!'X|x}|yz|~}|}~~xogkg^YQIAFNVc[F9:Pw{jj\ICI\`G69DUPHJ^bSMT[d_H657:AKV_[`uzkF31?HKYllA:8=OkwxscTQkIKOD>=>?DJXnPOhehrugXOMdswsttsnc]nwxC?X[{rs`\aejobhjRX_PBOJSi^UPRQQLH\fpzhhg^U[[]\\YWNu}nwzMq{Kv|y_UķHdjXPPLQXY[|SgaQNflH6!N\ps0QJKZlgMWiɮƖɇ*JPif@%!!$.GS[_XLALZjkkldAlk3arSmcWTTW^cxƤ͈\BrϗHqdR[WRNS[enɄSRJGHLSV^UFCEKHTOq~tfOWkXH;-J[QLNP[`fx=QSJHJbnsa)RNB80+,-*)2>HRY^__ZVQKL]D2+1AFJKIKKLHA,>GDCCDGM]notpgTHHHIE; 2879>AVhheH?87750#   %     04/#%+,//)'%$! !!#&+,)" $%&&)))()*(**)))*+,,/1/11320348@A=@A@A?=;96420/+&#!#Lrm|xzy{~}y|}zynkke]ZRJADMZd]F;;Q{zpk[KDJdrL97ETRHPfiVPUZcZB578=EMW^Y^u}dG43@ILZunA;:>Xmu|rbUWmFNPA>?>?CJXpMSjghoodWNLiruusvvn_]ozt4=VWv~stbbbfln`egQW]RHLHVk^VPRRRML]hszcfe^X\Z^]]\XP~~~pwrNvzMtzsZVɺDhpkon]_a\]x}Zh_RKHEB4%O_q8WKJTkbL\oɞƕƍ~v1IH_qt[@&##$1I_xtYNDI[jkknc>nd7fl^naUTWZac|ǩ΁\Au͎GoaR]TOR^`fpȃ[RJGHNST[RECFLMZPyythRVgXG;.P]SOVT]`_u>PQGIN]hlX-TOC:4...+*+*,/6;@COMQQQ_8-&2?DFKKHJLJ?.CGCEGEFK_gswuoRGGIJH? +39:;@Odgii]G87653" + + !-1& +  +   -80#(-393+'%7L>641-.'%$$$$%')))*,.,(')+,-+-/1232143114423346699;89==BEGIJJKMLKIGC?>=;9864/*%!J~pj|zz{|}}~|vkeke`YQLCGP[g[C=:S~~skZLELbkH99DUPIRhjVJSZd\C676:CMX]Yasr_I53BJM\xp@99?Xnw|qaQRnHNLA??=>AJWs|LWifioldWKJkrrppstoa^nzu3EY]voneefkolaidRS`SKNIWi[SRQRSJJ_iws`eg^XZ[_\][VH{|kupLwxKuszvztlWXIiv|j\`v{|xXi_OGL`G3 )Nfc@SKJ`vbTZnˣēÊyr4EQdigT7'#!$/JryUKCO_hjmn_?sX8ji[m_WTU[bn~ŷ̀aA~ʈBqaS`TLXebhowXQHGGOTX\QGGHLLYN{}q_OXkXF9,J[STZZcZ]o*     .50%*7IH;.(&"CWIJEFCB>663164.,*('&)-*((),.23439:8776444566659::=>==?ABEKROSSSSSRNLGGGEDCBAA>92-)$ J{sx{{xz|~}~~hgkf_\SJDGS^fW>9;Y~woYI?IXW@7:ESOFQ`_PLSZa[B86898?Tow|qaSWpELLC>>@ABIVvzIWjdglkeWHLptoggnro_]o{l6BU]vtr_fkptp_haQVZPHMK[dZSRSTSJK\it}obfe^X[]^Z[ZUU}{juoNuzzGunpnnnhX[ȱDZ@n{n\cw}|uUkaRPqxI3 ,Mfb:xeIFIfiUPWiɘÑŏxm6JVhhcN7&""%2JpsVJBR|bhlml`DrU:ln]m`VVVZbk}Ź~aAKp]T^SQaj_irvYRHFIMUV]QIKMML^OpaRZkVF7.P_OT\ZaZeeEOZ][fj_bP/4AHMag>97?Xlu}m`QVq@QMA=>>BCGVwzHVfgjnohVJOoribcorl_^p|j2EV]vso^invwm]kcPY^MHKH]dZSRRSSHF^gjrhceg^[]]\[[]XUylwnNtxwMsikjjjeXZš:u|}l\a{xWl_PMhbG/,Mf`/e\DKTac[KUkǴӾּvb@EMOLMKMMNQRQRRRQRQPNKE?9.' V|{z~}}~~yfY]^_YQIEKUdobA8>^ziVG<76413:ISLCSccQNV[^P>777>FOZ\]gookI13BJPbwaB<7?[mv~j_OTt|AQLA?>=ACIZw|CalfkopgSKKksnijpskadrl2ET^ptnclsywj^pbQPQHHNJ^g[SSSTSHH]dhsidgg^\\Y[ZZ[XW~uzlTrzoOo_fnkgaU`¤7tvupqusi_h~tTnXMG^iK+-NhY@;7888543! !#  (  +   + + "13  &5mO*&"5MIIIFFIGE@&_ZFM^\MD5&"!"! "%"!! " !%'(*(&+.314;<<@AFIMLORSTSUWXXWUQJC4*!"e}z{|}~u^WWZ]XRIFLYhm_B7?axeQ@734435;JTLLWgjLMW[bS?978>FQ\]\jsm^?/4AGR`k^<:7?\lv~zj_LSryFPLA?>>?CK\xxF^keilkeUKQovokmttiY`ri4FT`oslcmwvrh^m_MCFHKSL]cYTTRSPFK]elwndef^ZZYYYYZUY{|iQuwlPndz{rh^S\¼;vtphlnldYgnUqVOLmkF./TmNBr<[bYVQOWpçҹ׳wyW:~=YipiJ4'#"%2GNKPNF?Qsbkmnm^FDBsfcp\WVW\bcªkf:OLA84/+)+*+.12023410//2311' ;CGIHJLJJH56GGFGFJLVMOLILKIHHIJH7 5669:::9989888552  +  "!#  !  + )# + +  +   "17 +$.Uk^0+# 8KLOPNMLIG>%DQC?CA><3*)&$"  $&*.1;;=CGJOPSTWX[[ZYWNE8-!"e|{z|~~~w^SUZ`YRJGN\mt]D9BbxiUB65543497A_pxvh]MZqrDSOGBBBBEL^yuCajdgmlcTITmvrmnusk[esa7JW[otjalqrpi`j]LFEHMONaeZRRQRPFI`frpgfd^YUXXXXYU\gNv{nVmpr[TdƟ@vqg_ffhe[er]rTPOopG-4TpOS@_eQILNUsԾԶدw}QBy;c|shS1&$"'7OcmdRF@Uk_jlnmZKFHwcgq]XXY^cgìin:fXxXV]UUwoZdtķdZNHGIPTW\PeoPLNrq^ufT_gPD20\[OWYU\WpOJPLIHMURa{8=QJA83/--2301321245430.0211' :DGIJJXSQX84GFEFGNd}{x`JLJIIHIFD5 !67::;987987457641 "$!#$"""#!  +*'    + +   +  .2 %'+-*.-&!:JNZ\[YUUP<#>jJABD@;5.+(('&&!$$#')/559@EHMRVW\^^\ZOC7(  "f|zz{}}}~s[SUZ`TLGGQ_pycB7Gf~zhTD764234=OVMJUa]LOV^bWB889>GQ[]ats`>05CHK]rTB96Bbrz~sg]H[rnFUSNNOMLKN]|p>cjbafb[QIUovrnouqh\gs_2ISSiwzmgtjcikpohaj_MGGILKNdfXSPOQNDHagxpffe^ZWXXXWXU^eSxzjYljk[Shϙ7uoc_aceeajoWqRPRm@*4RvEU=^hbQMMOuŸӴ֪yxIJrBHNUZ^_`YSA5(!&h~}~|zz|}}}z}vWTX[^VNGIR^txdA9Dm~|iSB734236>PXLJXdbRQV`iZ@79;?GQ[\\u~nN907CGHOYF>97Bbuyuh`J[m[FZ^dgfd[WR`zmAdjY[[YVPIVttokkstg^ht[3MPHIIJNdtgbfijmg\m]KHIJNMNjgVQPPQNDIeh}~kgfe^\YVVWWXV]z_Qxd[lii^PhΓD{tpibejc[kkSpTNIml@'8OnBY?gwp_RPYxҾԱ٧xGOsBonowL,$##(7R\hbPFD^bbkmliUIEGwcenbbkpfadϽſftzAȾ\ZyVZ]UXxq^fr^ZMHIKRVZ]NV[MLQqie~{tbG[gRB27\\MY[X^^vAMTPNMRTRf~4FTG?8411R}nLfFFF?F=62110030)$>GGLNt4MUfffejze<%>jHBC;PQI@8&!'*&$-+"  !")/49=DMTY[[VG=,!$k~}z|{zz~|zz}~~{z{qVUTYaWMEDQ\nqZA:Gp}~lS>754425?QWPLXd]MPXbiV@:::>FS^\cu{mV?,7DEDC@>:86Ffsxuf\M[hwwOF^i{}ufTa|iBcf[ZXXUOIWuskdgpqg^gtW0LPFEEHNgsgbfgilh]nYGGGJTPMgbXRPQQNFMdkwsjhje\[XVWWXYU]bVt~_cltn^OmЋ=zvrmimocYmj]oTNVm@(9O]kyw:Xs>dw|bOOVzҽѮأvBTp@bqqaE-%$#&6IJORPFC_bdjlojOLAL}dhqjpgbfѻǼbunJȮƽZ^{{N_centahuĴ^ZOIJMTW[ZKHWNKNr}ueauaG^hOA17^ZOVWX_^~=PSRTRTUQe/FQG=74/0G`bM]VhmZ82211341'&CGRWSL@0! (i~~~~}}}zyyz{|~zz{}~~}~|zzoZWU\_XNFFPYggQ=;HqlQ=865656@S[ZU[h^KOW_dUA<=%;KShuvw7_zHVztZLLTҫɾءw8Wi@epm`F.&$#&8KVWURH?cbdmoliNM:K]itswwdbdҽǵ\zjNǪƼRby|Wg}nqZZOIHLQUZYJagNIUvcewfLbmN>09`ZPVXY__?SUPQTVUSi~*MWI=61./-O8RdV|b64421333(*Ijgz|u}*&>HFFGHW{rc\uAHGC, '79CJ@9:::8:896791  !!%&.FXO9&$  +   + + + + +  +  *( %'*-//*%%IOVfccaksX9&7FDDDKanhi_8.0.*" *3."!! "" /+'   &*.4?IPNIB1%$#*^~}||zy|||||{}}}~z{|~~xxzmSWVZ\ULEHOXe_L=;HsmI;655557ATcd_dsfQOXcfR<@EB@IR\Yatxqc>.8FD?>?=98;Igs|whZKZhtz{mGJdxrPbeDehclql^RIZqskaispd[ivW6MJDDFHPhvdckqpmebkWIFGJOOMjdWSQRSLDQhqxrnsui\[Y\\dpgWby_XwVbfjshf\\Soĩ~Gvri[^ch^[nh\lRPUm>&jmiYC.$#"(2=bZOVVY``=XRNOSUUShx'OVH<60//.b!_VQqF01110332',Qqvp')@IEHHLSzY_dDIIE+'9BkymN@:;:;Ow|mN>645458BZionot`NQVa`K?HSJBJT[Y\v}xb6-:EC@>A>;:0?d\SVU[cd>ZTPSVTVNjw #VWF<5/.,*k&lIPM:72200341%+Qzy}l#)BHGFINWzZeHKJD(*:ov]B;:8Gfj^N@0 + #"!&'/sE!"  + % +  + +  +    )'  %(+,.-'$(FO\eacijg^5$3AEDAIkbjihZF><8'"+61#!"  ##$&&%!0" ),JL##*3:?A>=9bkYr}~}}{{{|z{||{{|||}}}~}{|{z{}~}wx{mXYYZWTMCFO]haO<:Ny{mL<753358D[hstrp\KPT\YEER]MBGS[WXs{n]809BA@>?><9>>;8IKLPODAm\vzhMVy0V~aqul{wbalѤ\U]ŜͯHm|{O`YRWoc`i{¹YZMILNTV_YSm\JL[VsyWJ^^J<+EaVJHKWagEUQPWWVXTni*YVKA71...51*SAUbU{g^1212310/$4Yl 0DHEHFJf]`FJID" +MH6;;D#.+! + + + @x+"&0>EhycJIFKUa]kx}~~}yw{{{{{{zxyz{||}|}~~}~z{{~}}~xvxkYX\`ZSNBHP\c\F9;S~{oxdG:88444;G]immrs`NOV[TADT^H@GU^XYfhlW40;ED?>>=;8*#""(>MY\VRGHtXxtfI]w1Y~`ni]ubbnϟ]Q`˨Hn~wJ`VP[yb_jwdzZZOKLOVYbWMdYNM[SyxXRYXH8-K\RJHMXbgAVPOUWTRQoi0ZWSV?4/372-+2/:7)%&CMQXagggaH2'7CDFCKUZ]RVG8AB<% *30# ##!#!!',+*)-**"(T][LMM/  &3  !'5SxygOHHMVXL`low{~~z|zzxyxxxxzyy|||}{|~~~xx||~}~vtx~kZZ\b^SMEGM[c\E:9UzmwaJ;87535==<;AVrzscWO]ittu{hEUh}eOjTMqgdmphXMGbwuqmsxsaYmzE:SJCDEJVmr^ivlbghPGHHINJTn\RQQQQKCVhq{i]^``bpu`Nu|zNbuIjfl}z]XPuͪbOsobba^gd^wYchTOHHF;!#BG_xgiip-bAFepMJQbýΛΒzy,XEexy_A)##!'@[{bQFEvY}seE]{4Y|\ohapebqқYObΤJnsNcURaybaj|Ȱ]YPOQQWZ_WVkWKL_I}wWKONF7+O^TJKQ[ek}AWPPUSUOWrg4\ZpA1:^XP08HG7<<3764323442-!7[rc^`hG5HHDHENbO`FJJ> *KwYYki_WVRLA;5 '#"$+:xGNbD + *'"%     +  + +  + + + + (  +"'Ut<)%'CMPXbdcaVI5'8FDEFDIWXHN>=BC;&".:/$"""$$''%%,1/,,001(Irxskm{=   + 31 &;Yz}q\JQW^WNahmsz|||{zywuwwutwxxxx{z|~~}~~~}yy|~~~|}|zrtxeVXZ`^TLCEKUXO?7:Xuis_F977644;K_c_bquXIRWZSFFR[GAIVXMCB>:701=B@A@?=<9@Wpx|saUQ`ktww~`=711?B?>?=>;8>Zoy~pbTK`lv{|^?[leToNQnhhlnhXKCjwqifnwr_[m{|<=QHEEELWsm^lmlagdOHHGJLEXg\SONONKEXccchjnna[^beelreE~xtMe{Jibekm\VXTzɯZ[ssqqnpm^by~X`bP`m"&EPwwwwi%]MJ]_PISa̾ˑ̋uw'MTv{tc? $G_]oeWEHZyudG_n2ay]kdbpebtАWEp˘OqkM]TQe}bblǫ~_noYcSVfWNObFuVSe[E6/U_TWfYZ`gpDVRQXUVQ^tZ9YT,CRI~y|_bnPTgS?DB954. 8_}~W6JFEGMOw~nSGHH= /A[u>0 #*#"''Jznd(! !! .@6   + + + +    + $% + %*Qx7)%(AHIMPTSOLL3'>IEFGKY^feYJCED<)#/<4-(&'+0399==?@B1Srw}u4 E]  @GF=7&  *$ &((&#2@xqccdeTRillsz}{uvxyvvwuusstsvvtuvvxxy{{|}}~~~|~yx|~~xooxVWTUTUZSB@BEB<536LjiovYD;85667=?>600=AA?==>;7=Znx|ocNMclwy|\AXk`Sr}IYnhhnogWKEhtqifptm_Xn~v;>TIDFFKZrpXjol\fbMHHHLKG\fZQONNOID\fgillkkaZ^aefswcL~{~xw|MeyOh^{zl`ZU|ĴV[tz{wuvn[by~WdbW'IOr~x]'VIH]bMJW_ʼʍɍpz~+KXuvyi>#ES[jdZDI[pvbCeq5bu\kddjcbuΈ[BtʕRsgN\RNewbbmƫ|dɱ]bRReTOScOuRZkZE72T[RZdQZangEROPWUUK]uP:WZ)cB+}|wdmsK|i[nU>54-HFHKMGAFHHF4!^u}~yy1%aU 5_g`^b< )(0+%)-.;CwjfdcVTfjoru~{ywrrmpsrsstsuttrtwvuvw{z|||{||}~{yz|}}~yqqyNPNNLS\]OFDEA=956BOakqsoh]qvZE<;8677=KXWSXcZOLPX\PDCEA@ELVXH?=>>7/1>AB@=?=<8>[pw{mdIOdmwtx~_?Zk^Qt~J\oheijaQLEiupigoul]Zou7CUGBFEIXpnXi~kjbf^MGHILMJ_h[RPNNOIC[ilqtstqaZ`bdgwx^S|~svzMkxJle~{^WZɻP^zr\c}|XcaVt'GKusZ+uzPJISTIJT^ɷʑʊq{z=JLcfqj<*JRW_a\BLX{scAnk6evbmfj`a`u·aB{̐SveLZTRew_bmĮ|kǛ[aUftQOOgPuS[iWE60V]QS]X]ar_DSQNQRWG[wL=A?CDFIKLNNXMJMOQJ7#"#`ty.c>AgiksB$&.'3S>8AP}}nhebTTjpwy~~{zyxvutsqopuprtutvvwuwzzyy{~{}}|z}~~yy{|~|~zppv}PQMPRYckbVOMKJD=>BBJRSQNHWrpWD=<;998?MZZSPRMJMSX\RC@?>=CLUXJ@>@<713?CB?>@=;9C^qwxyqg_SSbmwpv~|[@[oVOt~I[qh]b`UPMEktnddorkZ[n}w9DPFADFL[qgVextmi`f^NGGHJMF_i[SPNOPIEZgntwvwuc\_bciyu_Q}~}p~wMiwHo_|{YY_~ŽƾI^j^c~SdbXu`(IQ|{zxR'luONHGKHOTdʱÌƉr{r;=<@O\YRQPNJOTXZQGA@?@DMXWJCBA>933@DA?>A><:Dcqsttoh_PPcoxovVC\m}VRuzC_rjadc[RKIjvoegptiT\p~q4ETGCCDL]pgWg|~qh_f_MJHHKMF_i\SROOOHF\gmqqsuq`Z_`djwi\SxLptFkcx{w_XadzN`yyznb^f}VfmzV,HVtwyQ+wYNFEHITXfʳŒrxq;HTt~YD$5VOLQVRIQɍXqb@te7goanc`nvgca{zf@}ȂPxZR\TQSX]dlůpkΏZ`U^bLPTn{QrJQcVC33\`LRdWY_pHLSQQUTSN]xC@YR[TO&Pe`>Ufwa|g^i~tcVhjQ94+ @m? 9GFGGLUR~HHGG5 29c{k:Zvnxp_l5+ "2)"#''Jm|R !!    + + +  + + + +    #+Ps3+%/MMW_XY_a`L-*8DHP_UQMMNNKMGD9$"3@BJMMSV[Ub^^]]^YZ^ZUB"'+5h}2 + +#'*3VpttxmB&%!"(0MV_VVY4+-D\-*0@GED]QLYy}itzxyww~}~{{zywvtuuusssrrttssuttvxzyzyz|{{z{{|zy{{}}}|~~{yz|{lptjihijnpsqmjkjjkjhgiijmklrsr_KCDGEBCCGRYTPTTQNRSWWOIJHCFJNXYOGFEA<34ACA?@A@=9Baqqwvl_IPeqw{{TC^n|SSw{>btojoleQINnxpimrsfVYrm2ESFBCDL]pfXksibk^IJHFHKH_g[SPPONFE\egklmol]X^`dhha^PtOowzGmk~cZ_IbiNMTVY_]dYi;1IRccvjqG2VHCFHHVV_ʮtyf=FWvjPB&!6VSY]_VHVˈXo\7uh3jmalab}gcezxj?}SzUQ[SMO\^cpjmΉU`PalIRVowQmRWkZA24\^MXaWV[nCNQQTWVTL^{E?WMA6-+-./&8IT2nahZp\lYYsL33) Bh~v|w|6?>?:Ehutswpj`NTht}{VB_pW\yvCbvmlqscTKMqxqmotthX_rk/CPDBDFK[sfZnpgdh_LGGFJIH`hZQQPNMFG]caeiikkZV\_bdebbQ{sQrvuGmh}l[Z`ûDgbPFGHQ^^d~\oY~J2GRitxguBp[vTU[SShp]cobxɃWbRhmPSVpp]eMS_L@35^\N[\RX`j|@SUNTVVUOg5CZOB81.,)))),/11/13584>NB531%!=FLel[4(>HCEEK[mf=GHE+ 5EShkY{;* )&"$((Uoy@ "%*$!",#  + + + +  + +  + +   #-bo/)'3JMUjoqolQF/(>ISwg~`9$"0?DQKPZbklieimkfdfjkkH&.037EY]SLB1  "()*4Qmlvp,&% !)2Hgltw`20,CR0,:JRRN^P7.=f~|yuutrsrrsrrsrtssuuuuvxyyyyvwxxwwwwuvywvxxvzzzzxy{zxy{{}}xspzy{{ujov~zyyzzz{{zzy{|z{}zyzxvwwwxuponljhhheba_]YXWUTURPPPPQUVTVWUUSPYZTVT@8BCFRRNJC>Fdllmjdc`HUir}xMLaq|UZ{sBgtmnrobQGPtwojpvs_V_s`1FMGDCGM_r`Tbjklfak]MHHHKJGceYSNNONGEccbcfhjjZX`adjzmZZ{nKtymRjW`dlc[X`è?eaQIHIT_]h{]snT6ISyz~z>@{jE?DQQK\`pƱҵٴs}~PJGbuyV8$"$'<[u~fUE\~aqc>}X;phdker|dbceoEűj[yTYZSUru]es\w~V]SmpLSUtebzfPKLF?.;[YQ\YUY`i}w?SSRVYUTJi.EWNB92.,+**+.121201242235641$#=FGOShq**?DCDEDYehAEFE( :amYhxl:( + "$$$&0m,# "%.' ,)" + +  + + + + +  +  + &-hj,*'3MITefhhZSL.)KKWojtuzcJ6"'6BEJIQ^hoolflromffqopM'-18:DUMHH@4 "(),/;HN[bG$%#$+5LfrvzyY605TZ.+9MQOOYM4,>avsqpoopssrrtstuuuwwvwyzwxxwxwxwxvxuvwyxvyyxzyzyyxzzyy||}~umhkq|||}slns¿xzxxzyy{{{{}}{z{z{}zxzyzyywuuurtrolkjggcb]]YWXVUTSRSVTUWSQNHLOPQOE=BDIWURPF>F^aivg^\VHSakvuIEcqxM_|pBhtonrm`QFTuzsppto`Wcuc1FKECEGK_p[S_filf]j[IIHGKJGceZSPLMPEE``fehillYW_dgo}qZZ}iLszoQibvi^Xe=f^MFGJR^WkwZxY9KRzv{:[\kjV7%#%&;R^oh[RCZvf}oa?Q?rehkbrzaceп`syEĜh]zOXYRUsq^eq\{yY\PjfIQRse^zeMHHF?-<_WP^[WY]cykBVSSTUWQIi)LXL@83/,*)*+-/.1313344455673$&>DHJEmi*)>IFDEIfjm?EII( ;dvbbzb6( + %(%$'/bl?%"!$+&!33$ +  +  +  + + +  + + +  +&,hZ.'%0FHOWfdcaXI++DB]hn|Fy\xrI9 ,?FHIIQcrpogeoxwrjlvvtN(04FU[jXTQG8  + + "(),.4;?FB.'&%!%,3Ofqwzu[73=OP/,=JIQSXO4*>ciQimmopqsstttuvxuvxvtwyxytvwwxxutwwwwuuyxvzzzz{{y|{zz|z{}~~}}}ztphaSbn{z|unouwxxvvzy{z{y{{{|zyxyzz{x{zxwsttttqnmonlnkhegedga`_^]^[\^ZWTQNMOPRPJFFINSQMKDAKVW`jbZUPIMX]emurdCGbnuYa}qBhummrm^PGV|}vwwyraVct_1JLDCCFN`p[U`gjme`iWJIHILIFeeZRONMNEKabfghjliYY`ffrqU]|~iNt}oQg\cns]^Zh®AKQKTZ¨бײoH_x;^nttT6$#%)?Tlzi]RC^gdvk\@Q?tefialwbehйbxtGcb|PXWQXpr`dqa{w\\QocISTvc^zbJIFG=/@aWMUXST[ds~~aBVTPOQROKh|% KWK?630-++,+,023434545355551$*@FFKNj*(AGFFHMghbDHFB% :Yrnr\6'  + &'$&*3|~~B! #--!$6:*  +&0& +   + + + + + + + + + +   + &.kW.&%3EMV_nmpi[D++9@[`aVDgYr~u\7#!6BEHNMVdkfa\`xzoox~xU(15\llsjgb^N!%++,/31/75($&'##)-1Ogr~}`176KS,-;HQUUWO3'=cdLe}~~~}~rolqsrsttvwvvwvvwuvxxwyywxvwutuuvywxzyyyz{{}|{}}|}~~~}{{ywxumiebXR_ix~}}|sjpv~xxyuvyvzxxxvuvwwwwvvvttuutqpoorqonnompoollnjllkmjklkjihfdcb][\ZWZWTPVVSSRPNKRUQV][TQOMLNOSVVWLAHZdq{~hH^l@gvooql^QGVx{zzxwo_TevX3HIEDCFNblXT`ilmdbmUMIHHLIIfcWPNLNLCObbdefhjfZZaeivnP\yv{~hTu}iSiU\`kd^Zfì;j^EFFJQ\]puau>:HZv~z1?z[8@EQRKR^ŭ԰خr=fsCfyrO4$$#':Zh]RAaady}rkTCMBvhghaiyacdζ[|mGǿTe}OYVQUwn`esYr[[QrbJSWy^avaMIIFQdbccfhidYYac_o}hR]yt{jPt~eVhh}i`YeǭȜBj\LGHKV_WntcwG:GUlxpa|.FwV=DLYLEN\|Ŷձ٫q9uuAXh[XQ7"$#&;_z`\Rlqqqic #9tvloQ`K;61   0(%%)7xs|6!)0)4JK4 !)-*   + + + + + + + + +  (/E_`O.)$1BLZnqg^XI@&6XamV}~H3"%4AEEKRW]wvvxyzxljyN*4@dxU(.$(*,153.41*(&'%$*.5 #=m|hr}k^`3 6&&%'?p{_:  ,16EP7 !$%09  +  + + + +  + +  +   #)-XgM-,(%0FMW_`a`_Z>"S:hp`beg\vgcK0 2@DGR`ev}yxA*3Biz}M!$')-8WvruqG&%&%%+10\}olU0/$RV(+Oxph]1.DnNampnlpuwvvuwzyxvH1Ahk[UMXOP67bUORV[QZblz~prsrrrqpprttqrsuuvvtwxzyzzz|||}||}|||~~|~||zy{{{z|{{|{xuvyspnnljgea^_]\YYWUSROKMNMIEJOQNf}}}~|nkpynmmpoljgffbccgikmmqstvuwzyzzy{z{~}}}~}}}|~}{}}}~~~~~~~{{{|{{}zyz|zxsrusmjjjifdcac\RMWVUV[[ZWY_cb__\[clfhiid[@>PMKKLOZpm^_fnvn]dfVLHINOERgbWSQNRTEWffiiilmcU]`]`lpaVn}{~_\vvYeQcʤ̇KkYKFHLW[[vrcӼ3?Mbuderj"_tTNLNPRT]ŷѠ١{u0o@RSLLD/"#$';RkmbTI=iPqxjOQ=OZhjhoc^nӢYX^̤Jk~P[TO\xuhju\Z`_byYNP\LurUYpR:,LbNHJMSWbqUGWQILMSNTy_.XUH>64iWIj@gvfl}244574,5FPSR\nsT?;IFEDIK{ZHDGE9 #9ukdlE0 + +!6)&()E{H&"$! &+1GS8 ,*+>A  +  + + + + + + + +  + +!)(Mk_P/(%4GO]dfipn];#Nf?l|]WyR1!/;BFSeceqzu||{|{=)1Cj{wG + #&+.:vF'''##*39h1.'\Y(-/0BnUhorlkqwxwtwy|zx|rC5ng.-*=_STYY_c]XZ[Zy~{nrrqrsrrqrtutvvwtyyyz}}|}}~~|~~|{||}}||{y{ywyxwvuwvxusrmifdccb_\XXXYVRLKKNKJNKNNPSSW\\]n~~|ojrzjfhijiffdddcehlmoqrtuuwyzzzz{||}|~}|}{}|}}~}|||}~~~~~~~~}|{~~~}~}~}{zzzvuyxurqophdd]\aaba]ZTZ]^]`_^_cU\_]\XIJSRQTTWfsmc`cfrg]dhYQNJPQHTgcXUQOTPAUedfgilnaU]_[age]Tpz{{Z\uQd}NlǽFkUKHHKX[^xr`ş%"DNz|xa fuWKNPQPR\ôҟӟ{x/i?MMOMB.$#$'=\ws_SH?kRrsgMV=T{Uhhi``_mҟWP`ϡMqxM_bfmshlx^WaZ_sTMP\MwpXcxnlR+N]PDOSRY_h|zLIVRKLNTJTy\3^WF;49PbNezQypjr]o.53342)4DOVgnnS9:HFDEJP|cqAEGH7 %;vw|_zxivV4 + )L+$')@}H(&#! &0IO4902L1  + +  + + + + + + + + + + + +  '7wd))'2EL]iilqoX:%6DBdFvzM3!.8>?N[WX__QJTf~~z{o6)3Fp{l{J  +&().9ds{~<'(&#")4=[./%d[&-x-1FnYgnphimqttux{|yxzl88q1)):T[XXZqqvhXXXq{nqqprrrtswwwwyyyz}|}~~}}}~}|~~~{|z{{zz|yzvrqqonmjnlfd`a`\XYWWSQOLNMKIGHJKKRTTZ[_]\`a`dr}{}}nfsygffefgdeecbefhmoppqvwvwy{}{{{z{||||{}|||}}{}~}}}}}|}~||~}}~~}~}}}~~~}~}~~~}}|}|~|~{xvwvurqonkfbdddcgeb]PNTXXYYTRTMNQRS_mmhkkej_]ge]\`\XVLQeaYXWPUJBWdegjjlobU[__\`\YJs~~{~Z]tTi~}NjɳxIfWLIILZ[^unaĮz7%GLnyub$prNMNPPOSaҿҝա{w;hASV_PD2&$$(HGGHJRvbfacyyZEHIK7 +&@~}h46UbPjxR3 + +?f)%(+OL&%"  !-FO4$8109(   + + +  + + + +   + +  + '6b&'% 9HL_deili]<%/FISw[?poN4,69=N[TTUF59=Da{zvh3)3IjuaoG  %&)-6k~0))($"+0kVxsiKV8Us]fd`^_`aarϑTKj̢NuuP{htddl{]V_XS[IOQaFy~oN`qG-T\SXfVUZ^euwrfDKSQLLNYKX~J9ZPE<5.*'1),3'H[zDkMm`833443(8GQc~vfR7;GIHGJILNOUGEAMIGEHGC5 +(<|j`c4:>@9aF1  [p#%'&@C$$"  ,GJ0'=8:>6   + + +  + + + + + + +  &:|Y*'&-3j'),Bv|Lbq|pjryyz|}~~{wsf7?a)%!8^XYZva_lVUXwzuvv|M#/A43@Obmk_ghktv~}wxyyyzzzyyxyyxz{|zzz{{y{{{{yxvuqolifedb_]YXVWTRPPLLIFEFEGJILOQUXZ[\^bbceehilminttvzz}{|ynkr{LNQSTUUY[\_dfihiklmnprwxwxxxy||~~{}|y|}{y{zy{|z{|z{~}y|~}~~~~~~~~~}~~~~~~~}~~~~~}~}~{}~yyyxwutronmlhebcdb\\ae[QTX[^\^incafb^[VQ\]^`dfhfYW_``ca`cZw}xz|{{SbtGj~~}LvʺfReWIFFKTY_yghªm++RvN&VFLNKMPW]͔Дxx+XIZa_aM,'&%,DUXXOMEBuWwriHW/TlZhfb_]_bdvы]GpʘMtiTy\]ldi}¼wdR_SIGHPPfFw{jQZqrX./Y\QZ]STX\fw|viDOXQLMOTAbC?YNC:5Ni9TFDG5B;<67404333561%=HViypv`M3%:HEDFIHMVMHKRf[OHGFGK- )?lyV8788YS2 wr$&(312/#(($"  .OG31J:XhK" + +   + + &:V)'#9IO]`\ZZhS9%8IDzgmZ<0#'-7DLTVRI;568=PG]w?,4Mnt\ru8))*/A}{D*''$!$,0C|g<0.)K8.7vwJ-/BxxTh~ss|~{}zwh1C]($%9eTX]hnUU\mrsxxx{}|m(%)**))22:CIT\_ky}vyyxyzyywwyywwxy|zzyzzwvvssonjjdb_]ZZYVUQPPLKKHGGGCBGIPTVYUX\]^acdagggjnoqstzx|~{{}xnmu|FJLLMMQSTWVZ^bbb`ccdgknopqrtuwwxywyxxyxwxwvyyyzyyy{|}{{|{z{|~~~~~~}~}|}}~~~~}}}~~~}}}~~~~~~}~~~{|{yzzzzxuvtstrpqpkggfdea[XRYW\]aeaUZaa[\_Z[]Z[]]_^UXa_bgfeeT|{zyy}zyx{ZctOk{yJsͿ^TgUKFGLVW_z`eɴ(+W|G._LKMNNRWd̖ϕ{{|3XHXagkJ+'&%.CQWXSMAExWwoeC`1\k\ifba`aefy·\Et͔OviSjpNcxZckofƳQ\RHFKPMiH|}kM`tm@eO5   + + +  + + +    + + + +&BK*'"=HK[ecgpcL9';KG{txxtXQg\>@1%,1:DMSWQD6568>CCgu>+3Qou[sr7!)*+-Wj{~~zuwxyzyxzzxzvuxxwxywsusokifdc^_[YXUTRRNLIDFFBCGGJLORTW[\[]a^_beghklmrruvx|}~}yjlu|FFHJJGKLMMLQTVSQORTW[]_`ccfghjmmoqoppsqqsssuuvwywx||{{|}{yz{|~}}}}~}}|}||||{}{|~}|{||~}}~}~}}}z|||~~~}~~}~}|}~~|~|{}{}~~}{~{z{}|zyyxuuwwwvtssqqrsqpqpopolljjjggfgffe`\baa^^^[^[SV[]^]Z]b[]dc_^U|{zxwwvvxVcuFnlMtͯ^TfSKFGJRWb{Xk¶ 5W~A3`NMNMNQScȓ͓vz}=-$   +  +  + + + + + + + '?K+'#"?GHP^ejaRJ:$8HIzmh_oXXd\OB1&.6=CKUXQB6778?BMl}z@+9UmjxwuXpq5$,+,/:wy`8*%''"%.4D8..!=>*;N,1IyuVfqtswzyw{z^-;`tw3('*>tSTYbltnPWXWboxw{{{{{}_%"(**+,+)*)*()+4Mtvvy{qpvwxwxxxwwxwtvsrqoljfb_]\\YVUTRONKJDD@?B>DHIMQTVVY[[]]^_eehiknorvvwz}~}|xhnu{EFDGGFIHIIIIHKKJKKKMQONOQVUWZ\[_a`aadffijkllonruuy|xxyy|zyzz{|{zz{{|{{||}{{}~|{{{z{{||{~|{~~{|{z|~|{xyzyy{{zyz{|{||zwwxvyxuuqwvuxxxwwsporqtvtrnnoqrtrtttuttssurvussvtrprponmlkjida^`cbdca`ZPW]_[]f||{{xwwy}VevKrmKvξ[UeVJHGLSVayWdѷ$<]<3cROPNOSZfɔ̑zyx>MGX\bdF)$'%,HgpnWNDG}Vhmqmd=h-ai`ggbbijge{}`<}΅QycR^ZSnx]]mjnЉScQGGJPOp{FyxfJm{l/3_^S`iWRX^iuwshEMSNMLSQFc<HUMA::yXN5dc|rle=67761!$=LWos^J*'CFGGHJKQVJGa\OGHJIF$ ,667999:9989Id]85.IeH''%(@X-4athCMg~}u~n8.8Yr{Uvq4 '.,,2;]m\j>(('&"$.3W=/-!QG+A^*4J~pVgotvy{xw{}~~xY..7LPSVA&'%(DzwSVUTPMMMWXWhsx}s~|{uX$"()),0/,-,)(&& 0o|nqglumhlpvqtwutrpprlkigc_]YVWTQONOLKJJHEFCCGDHNORX[YXZ\^\]`ceiloprsuwy}~~}~vjpv}CBCDCDEFGHGEDGIGGGHIIIJJIKJJMPLPRONPTWYX[UZ_`_bejjjknrrssrtvstvuwxxxyx{zz{y|~|z|{{{yz{|~}||}{}}|||z{yyxvuuvvuttuvvxwvvtrqsrrqpqqqpqrqqrtrppqswxwsrsuvustwxxxwyx{{{}|{|~|z|zywvyyvwvvttrropnkf^\]`[Xcuz|zyuvx}|}~XfwGskVyµ[YiTFGHNWXc}Yk¿l'O`~;0^RRPPQT]iŔʑy}qGOLW[b\B(%$&,FcnjRJ@FYhlpnf;l~2ad_hhefllgc|yf?{Uz\R\VQhs\^kijˇP_PGGHMQqoC{yePbwrK-:[VNelXUX\eprm`@NRNLLQNJg4LWKA8:hQQ3g`jmOGH466762%>KYtsYI&%?EDEIKKSVRSjdMDGIID" ,4789:<:;==@<5267/ 3Ufr>%('*BTCNdvf:" 4JB**2(%!  + + + + + +  + + + + &FE*&!&DKT`a``b_Q2&6DFomDkxD-#)/?KQZ[L;5458=BPt}y`iX*/9Xty[u0 (.,.2Dgr~~7'&(&!%-2Q;1+(\;+@X*5OkXistv{|vv{~zU.(/=@D;+(&"(CuXTSNONQVVUYisz[o}zuT!)(*2DIB90.*%!)pngXHQVUWZ`fgrocdcccabb]YUPNNLJJHGFECCBEEGMNMQXSYVWS[^^`^`dgjmqsvxz{}}z{~vnouA?@BBBEECDEEDFDDCFGFFFGFEGGHIJKLLKJIIKLKOOOPQTTUUVX[^`acdefghilknqpqqssuuxwxzywywzzy|{{~}}~~||||}{zytrrrsrrrqquuuutrqrrqprqqoooppoqqpqsrrsuwx{{xx{y{{zyyz{z|{}}}~~}~~~~zzzwwtvtonnt{||xvzz|vZev@qiZyUVgNGIJNXRe|\m`+X^70[PQPOQU^jÑɔv{fSMNY\`[B&$%'/I^mdTJ?LƅWhlnof=ry5gcajlmjqohdvfMg~U|Y^q:sHL;246660'AKUsj~wXI((?DEBHJIQ[gtu}\NGFIIA ,4669@F:;EhcE>7670 "Jenv:&&(+D_idiwe8" $:KA*&+53&     + +  + + +'GK)&!+GLUccelidL3#9EFY|sFW|_<-"2BSVYccS:5678>FNo||QUH,.3Ww`p2 ).,-1Mp3)'($!%-4X42,&H<-BI+3RiWnvsv{{wtxzU,'+,//,'%%"-LqTUVWUQSVYWZiv|i||xwR#()-Hkjea_M.'#(jj`OB?=686LQY[LHMKOTSPRVPJHFDDA@@>EIIKNNRVUUUUWWZZ\]affjnmosuwx|~wjow@?=AA@AA@ABCAAB@ACDCEEDCCCEDDGHIGGJJHHIIIFFFFILLKKKOQPNSSTWWZ]^^^^aeeehklmmrppqruuwwyxy|~}~|{}|}}|{xusrrsprqqprtstqprspqnoqqrrpsssssssstuuxy{|}z{||}~}}{~{|{}zxz||w[iyBu~~xhS~ĽLYgSIJMPWXfwXlŤ2_dx62YMQPPNS_rŔv~_ZKJV[]VC)$$'3Lo|sXK;LQhknph@ru/edanswwwumemkCoZ{XQVSRigYbjdx|V^MIGLQTtlJx`Pv~zt.<^[RV_WW[aktypV@NNJKKPNCo}( PSIA68U/VL6sm`Kf7qFU;15555/(?L\Yuv[J&->CEEILKSbcwu\OIFHE? -5579DE=@eoD8545/ +,Tlst4(((+Hj}cona3" +(=F>&#9IC+   + +   +  + %KzJ(% )JMQYQ[a^_R3$9CGmvrtfilrj>0*TcrxxyoP664546357;ACFINE@?A?BBEJKPQTVYXXVWZ\_aacdgjiooswyz|~ukpxĿ¿=?=@@>?@ABAAAA@@CBAABB??AACCACEECCCDDHHEFCDCBCFCDFFGGFHJLMMJKRRQSSTUVZ\]^^abbefhmkmppqswvvwxwy{{~}zywurqpqmooppnnoommonoppqqpqqqsqsssrstvy{{{~{wx||ztuz{w~`lvDvgOлG`fMJMNPW\ewSpÖp!Ibt38\PONONS]qܿÖu|\]EJW[\X@'%&(2IjvgQL>L}VlnpqeEvt4lc_sz~}|pcgnDjXWSWSP]b\aj]{zX_MIGJPTriPw`Sp]zj/>\XPgkUV\boyzqRDOOLIKQMHtw$#TRLB521*-.)496EF9j7dh:44344-*BO[n~UF'->DFHJJNXglpYMGGFEE .7668DF9=ikG7787+ 5Om5'*))Oqv\M[Z0"  *9,&-5771./13656?BCFFHIKNPRTU[\[egd`b^chklkpuuwz{}}~ukqw==>>?>AA@A@AAAAA@?<=?======>=>?@?AAC@CCCB?@@?@A>=??@AEEFHGIKLJILNMILNQSRSOQTTY[X[__bdghjijmopprsuuutsspnmnllmmklkjllnnnppqrqqqoqrqssqrsw{{{}~~zx|~urpxyy~~^kvHu`NѹEecPJONOVX`wWsĵV?`v{s.TcivthQWM21:cqy^Nzd+ +(++/3Qkt`,)'&&'.13Q326=k=+JJ+5Uiqyx||smrvy{xyzoN%(?\RG@:'&#*NgSXYW\_VUVWZkppnttzyxrE#('.Y\=EXcQ,%"*f[OMP>8'!)273.//3:@0.6CW`URMORV[ait}zvwwuwx{}|~}~tkow¿¿?>?@>=?A?>>@@?@?>A>=<=8::756577879;<:<=??=><<::989:;>@ABCDDFGFGKJJHKLKLKLMOMNQPRSSSVWYV[Z\_aaccfhikkkkhjggkkjjjjlkjjklnopqpoqpprqpssqrtvwz~|~{yz~}}~zut{{{}~~^n}HuYSиGdaPJOMOTWbuStȻwC_us)ESPRQRRU\lںھt~Pk;ah`]W>($$&2Mfk_YMCSv[omnpeAyj4oaev}qcӽet|Bf\}OTXRTppcem\}r[ZLFGLMYy`Wq^Rr|~U*EaUKhdWVZdw}rX@RTLKLSOQrr $RQJA5/-++,*,/14356676125676)/DO[wui|[B ,IGDHLKMYjysUHHIJJ= 2755;FLZi|_E:556)&Dh?&(')VtZhhU."  +>=(%HWA+  + +  + +   + #)K?+'#4NKZuighheL/&9DQkwpz|{qZq\I222/H^liU965337D^inpqgVRE005& )340.,.2HT4-6Jb^RSMJOYeq}}sknw¿?@A@@>@>>???=<>>===;:854420/.0100034457:<<=<;9998899:;=?AAABDCCEFGGFHHHIJLKIIKKMNLNLMMQQRRRTSSUWUZ[]]\[_^accdcfefiggiikjlmoopppnopqrsstuvy{y|~}sxz}~yxxy}}ysxuqs~z[p}yMxvY[бFdaMJMLRVWerTvþpH_}k$DYUTQRUW]mٷڼv~Jl>nj_ZU:&$$(4HbkimO;Os^lmlq`?|`:ucfum`ӼexuL˿_a}NRXRSwocfj]ҿiYYJHGKQW}]\q`LxO(FZTQibVW[f|wXEPPMMNUPKwl'QPG?50-,,**-.05645754114555'.BMUTVfwXA 0FFEIKJMZzwoUKJHGH; 08563,"%BL:)    + + + + + +  +  $(V9''&$6KNV]\YW\[I-&:GR|~wxuy}VzzubB0'5@VbN865459Lblorvq[SA,0=bpedI;a_( )+,/5Hszp[G((')*,05uv1?A3O.*U>)5[h}|woqy{trohD$2M('%-QaVVTrpYVVX_rswuuxzxvq;$(*4H\WLZX*%%"0nYEPO=4&)21.,-09b_2//:IKFDABJZhu}}qmmvBADA?>>===<;;;:;<;;;9873010-,,*,....-134699:98:::;;;<<>?@@@AAAACCEDBDDFGIHFECHHGHLLLILMLNMLNLNOONOQQUUSTVYZZ\YZ[]`bbdeefgijlollmknnooqqrswvwx|~}}~~vxx}~yuwyx|}|lkek{y^mytGyXZѫ=ihNLLKRUUioWsH$Ja~k$BXWWUWUU]rشܻwAw|5ed`\W<'%%(4Qt}S;Ql_kmnpZ>]7JHHJKKN[t}|kVIGGGJ: +1956;G]qbq\D:777(6P}1'**0YppXolR+"!2411&(8A9'   + + +  +  +  %)RB(()"8RMT\^SNZ^H,';GSvu~efD-!-9HYK75457=HTfmpvskaA+.Bcu}s\% *.-.4jT)''''+36vq04,,:2([C*8`^v{tv{~uqpmE$5lz|B&%$4Y_TV`m[WXWcsrvtwxywwm7#(+7[oigb@0,&!;9>:99975565788999994466431.0//..,/212756989:;;:;=<=??@@>@AB@>DCACCCDFEECBGHFHKIJJKKGJIJLLLMLKLMNQPNOPQLNUSRTWUWY[\]`aabcfdeggiiihiikoprutwyx{~~~{wxy}}}{twww}~sikkr~w[pyuP}T\ҧ=okPJOLSWVkoWqCHLT3&IeeGRVUUTRU`wدڶ~w~8pw>id`]W7(%#'2St~OBVd\jknl\>^9HHGHJKN\rzhfRIGHHF7 +2657FJcglux|uE(/BfY"!" ./+/6hJ'))&%*37q23,-=3(d;*8_Vk~}}vstpB'4v{1)&"7ZYWZgpYYWWborsrxyyyyj5#(,=juijXKL7&?qPFXE:1&/21/..-;fL,/--7A>9:?C@>:BOd~~pory;<<<;:86541/00//10012566679::;77555534566999;999:::;=<<>=?>>ABB?ABBAABBDDECCCDGGHFEFHHLGHIHHKJJIIIIKMLLNMNMNQPOPQQRRTUTWYXY[[[\]a_``bdfhiijkmnprvxzzz|}~~~}~}~~}ywwz||xtv{zysnkkntv\q{nQ{W[Х@jeNLQNRWWklVs@JSL4%LhcUSUSPQRW`{خڴu9{vC_aaZQ1'$#'3KlurHCWc`jmoi[AR@}\ew}jeӳb}fV˺MeKXVQSmi]co‘ZѷYYYGEHNO]O_oYHHHH7)J]MSk]UV_r{uPDOPMKPUKP|`3YPF;40-+**))-12245524324653#8FMLLQotZO<8IGFGLOLVozjlTGIJIH2 + 3659?Kj{uzYB8496&2H[y%(''.^zqR_eU(!  $8?$%5GJ;#  +  + + +  + +  + + + $(W{.(%!<`HTktuzpXF)&UM<<>=<<>>?<::=<;<=>?>=>>>=?A@B@BAAB@CECBCDCEDFEEDEGHEEFCDGFHIHHHIIHIKJJLLMMLKLKMMQOOQQRRRTSVUXWWYY\]``baabcfikmprtxvxzzy{~{{|~~~{z{z~}xvwzlktpllnoqr[q}_S{~Q`˼̠Ae_KINLQVXkjStCdpL3)Nhd3dTVOLXSV\zתٲ}r9r9Y\\YP3(%%)5JekhZF=X]akoppTEQEz]ey}ifӮ``U˶NdKYVSZ|jYclXԱQ^ZGEIOOaJfnVEFHD5*M_KUaTSU_n|}|MBPROLPTIY}W:^RE<60,**)))-01144445434662 9FQIFOMQQM:;FEGHLLOUfkmQIJJJI1 + 4569=NwnzZ@8795%1CEj"&&(/^lmFWeR*" " /II"!.CG9   + + + + + +  + + + + !(_m3*& >QEXlkmld`H)),9nu?((%"4cWYZhp\USYestrrv{{yth1%)-A?>>;::8854310//0-./1112599<>==@?BAAACCBBCC@@BA?A@@AA@@@??>ABA@@@ABBCCDBBCBEEHEEFFFHGEDFFGFGGFFFGGFEIEDDFGGEDDFGHJHKMMNPNPMRRSQOSSTWUXZZXZ\]_bcefhlnoqrqssttvxzz|~}~}||zy{}~qlmmronpstsn]r_XyN`ʺϞ=fbMHLLQT[pfVuCplP2)NidGbvVPLX]QU`اگ|q5n;U\]]T7($&)3Ldj]SE>`X`loooTIOA~}]hx|ghԧZ][ͲNfKYVP\f[dlW~ՖK]WIFGONbFqnUJIHA3,S^Q[kZSU]hru{yOHPPKLNUH]V;`PD<60,,+**,-23443344214673#9MpmodKPQL5 9FEGJJKPVWWT|fTLIHIK3  4469=Jqs\B::96$/>I["%((1`on[kqO) $-% &5\N!+CF/"  +  + + + + + + + +   + 'WX2+'7GHV^Zbgi[B'*?I`zywOopubO4 ,;OMFSE8689D`trwyx{z61@kr}y\1')*.6s}M)''&"%349d2./%)'.5o|0*,7gHcsssrzzrg8%Ex6(((&5dYYXk{g[W[Ybpqrmpw{zxg.%)-:Ymvr=BL4) No[VfP>5$-23.002BiC//02:73:G\dH848>YaU68@EKVZXUNHGLVXqur}~}nlp|?@@BBA?>>>A@<<;865763434334688;=<;<=?==>@@@BBCDEEDDEFFFEFEFDBBCB?ABABBCDCFDADEFEFEFGECDGEEDDFFGGFEGFEEEFEDDDEEAAA@?DEDGIIJKLMKLNOOMOPNRQQTTQSSRSUVXYZ]`d_bdegjikmoqrtz}~{|||~~~{zy{~xoljmlrnsy|z|nZtSV|Jc˹ϖ=i[KLPNSUZsbUw~?ggK1-PkSHZ}pNIFW\KNW~֤ڬ{t5n:876$-Vzz_O[I-& Rqc`mWQ@%+13237=NlI322,/018@HG;76;?cG50,*+09?;8;:;ISyZ\]_hlmkl{~~}lis~ACCCBDBDDBFC@BA><>==<:7987899;<>=;;<;8:<:<<=>>ACEDEFEFIGGFFFFGGFDFGCDCDDDGFBFHGGGEEGDDDFEFDCFEDFFFEGGEFFEEEFGEC@ADDCDDDFFHHGHIJJKMLKNOOQOOQPPRRQSSSRTTTVVYWY[\]_a`dfilmoooorsxy{{~}~v|{}}~}qnkhjmst{~x}l[rX_MeʷϑFfYLLNKSWXrgZzxFidI0-PoXG_qJHIZZDITբث|u1j=W]^XN8'#%);^VOFAfXgmpooRFKC{[gy|`jҡWPcΩEmyHYVS_xa]du}XTTekZTU_UJGGPT^M{~lSGGGB1+R]P]lYTV]cnrxrDEPOKNRVCbK>bQF<60-))*)).33356431354550! =otQ_TR4 ]~zu33InhiK ')-03pF*'(('385;fkaE2,,/-l5.=n{@`n}zuvs\95=gqb]7+(&(=fXXZg|gYWVWboqg^px|yuW)$'/Imqide^>+$SsfioedB%,274>RLiwPB8/(-/2:99489;=>f~~F5/+).8>?;983.8=a`^behke^Zmî|{pis{¿DCDBFFCEFDGCBBCBBBBA@?=??>==;=>?><==>=;<:9:<=>eUgmopoNLIIv^i{zbjѝYJdϧEpwKWUSXfZ\cvwUQKIKOTR\SFFJRP]L~gNHHGA/-W\PY_RQV]equ{m=FROKMRVIeICeVC;60-*)***-22254343555560 !@l`jkp{;8KJHLLNSXKM`dMIIIJC) &467:?Ozhy}[@9875# 0CUtb#&)(5WT6HunG%"%/"4Z\D"%*26# + +  + + + +  + +   *jp2*%8KJZiolniK@&+BFXgepldQE#;PXMILMKnxKet~{us~x_=/9]r{r4*)&+>eQXZo`WVWYenngiqx|zuZ'%*2Nsiid\L@/'!Oddlqss<$,1;?eicy{hZL/)11178669:9;=faA5.-,2DIGFE>1+/3^~_afrxyh`_^ʥZSep{ztt~~zrls|FFFEGGEEFEFEECEEEEEEDCCBBABC?@A@@?DCC@?><<=>=@>??AA@AABCDDBDFGGGHGFHGHIIIIJJLLLLGKLIHIIKKIGHIIHGHFFGHFEEDDEFFEEDDGFFGGGHHGHGGKJJIJKJKJJKJIKKJLLMLOMMORQPPNQSSTSUSTUUQVVUVVUW[XZ_``bcccjhkmooqqtuxyy|~|vqsw~}{z}}|yyx{{|||yvtswzxx}gdrQcwFm·@n\JMLJWX`ydWzsCfdF.4SsQNhiROLZYGHQԘצ{t4`=VZggQ1&#%+=VidPLD?kQgrspmLLDJs_l{z_oϻєZHiΠHqsFUTQQUVZdtqRPLJJORTZQEGLSRdN{fPJJG?10RYOKNQUY[`nprjY[B$$*;:" + +  + + +   + &`i-($;KMXkrpm_M<'.AFflmv[;(COSLCJNEEH_pspqk,3Ml}\o|D)**/Dq{?-*&)'+15JP,+"0,,{|{//BrsRhu~}vqe9-Pj3*)',@k~S__vhYVXYdltkipx|ys\$$*4MYI?WYOK3(FXcpuxj9%/6?TxvvqO..5239;:<;==9@p@5.--5FKJMOB0+.4^aes~la`ZʛLIEQT[jxzmjlfbagx~yqou}EGFFFGFGFFIHGDFGGFHFEEFEEDCECDFDABDDCACCBA@?>AA@BC@ABABBBBBAACDCCDDDCDGGGIKKIKLKQNMKMMKMLMHJJMMKKIGGIHGFFEDEEDEDDFGGGFGGGGIHHHJHHJIHIIHKJIKJGHJJKLLNNOMMPOOQRRRPPLQQRUTSSSQRRSUUTSYYTSUZ]_^_abdgijmnoouwxy}}}utv{}|{xzxxxxwvvuvywxxvxxxyzxv{yx|rqsjmywDflGmҀCl\IHILTZ_xf]{nIlC/0Sy~LIirVSKQSGNWҔۣyu,Z=ZksN1'$%,@\qXNEBqVotoNO=MmZmzv]o϶ѐ[HnϜDrlIYTMHOU[esmSPLIJOQSYQFFIPSiLxiRIIF>,/SWMIINVVXajmne?ITNLLSTChAJjNE:211-,++,-1555544512685+#G|ws~~6#?FIKMMN_`[TVl_SLJIJLK# %7885EV@9::4$%;^z~c$(*,9L-0Oh_:$!%,$!=T[C-&4H8# + + + +   + + + '[}V-&$7FM]illjfV>$-AJlr~sqpyoW8-RVfUFIRPPT`~qikz^+2Nnds~@"(*-1?r=,)(+%%/5LN-*++.}|}--Asp]iu}~xte1)M{j5''(-?h`b^wgWVX\clsifkuzz{W$$)5LZT`qD`T-% DVfquwa:'0<@Yv|z=,0438=BCECBGKRv@5--.8FJJMPC/).1`^frsk_[Nċ?EGIFJM[^dXinnhT*2XZJHIOUTYaimpc?GONLMRQMi<JbPD;40/-,,+-03655433446574+&Jqrwl}6'EIHKMRb~YPSMJLKJJF &678:@E<9LT?::85&)GnV$(**6;.5KTC,"! '5 &#9MWF4!&>K:!   + + + + + + + + $5dkV.(%5GN[`adjkT?%0BJldWmgsmnP7 Speh\OLRNQX]{~jgpvyU,5Qrc}{;$ '+-1DwA,(),(&/5KJ/,3-,}..Gupcfs|{xxg3)V~f3&)(+ApZ[X{eWWXYbkrj_iwzxuP &*4@_n{WHg:'"!DYjqtsfA)7K>Koh=21473,,/9FKJMJ>0+/3b|^f|jqh^\S?DECCHEPWVARSL\>/86479>CLVY[cjs~xoqw~¿CEDBCEEFEEDDDEEDEFGHJHGHHHFHGGGGHGEFFEBGFFDEEDCADDCDDEEDEEDDCDABA@ABABCDBDBEEEADEDDBGDFHGJJGJHLMKLMMOONNPNMQNKKIKKJKKJIHHIIKKKJJJIJHHHIIHIHHIJIHJJKKIJLKMKJLMMMKNKMONMMMOPNRSRQSQSSSTSTRRURSRROSTUVTVVXZZ\ZZY_dedehnopsux{{~~uqt{|~~~~}|}|~}}~}|~zzzxyyywvvvutsttssrrtrqrrqrtvvwwvutvtttwwz~vvw{vmhjnmkloaWSnUQnYIKLPX\d{b^iHfOC,4U~|JTggRNJPMMYg͓םyv*OKdvrJ/''(+>WecWNCAxX|rmMT:VoZm}v[tͫԆ`FrΌDvfIWVRORV]cqnWQMJINOR[SILKPRkrRweQIIF;*4WVJKMRWWYbilm^U|L>9865'0RvZ$*++=cj_A1,%" (9 !(#(@Q\Q9$&DJ$/CIpe{_bzzwU5%'rrgidQH?1+/3dv]hys}f^[K>?<86;BOMKHUZST536325765136?HQd|~xkryFEDECBDDEDDDCEDCEEDEFEFGEFIJHGGFGFEEFICFEEFGGEDDDCDECDEDEEBDEEDFC@BEDCCEDEEEFGACDDEDBDDECDDDDDCDGGGIIIHIMLMOQNMMONNONMMLNLMNNMNNLKLKKKKJIIHGIHHIIJIIIJJJMLJKMJKJJJKKKKKKMMKOQPNOPOMOOQQQRRPPPROQSSSTSUWTSTQSRPWWTXY[]_acegkkpsttwz~~~~skjltux{|~}|}{yyyyyz{xz{{yxxxwxwvyzxzzvxrutuuuroqoqpqpqrqppqsrqqqnprsrsurtttrstwvx}|wxxmhmna]_twncbgpF_hc[a^Z[ZdwdffFIID07Y{GC[jLKILLMY_ˑ՞vy&OM`stI3(&'-BdndVNCH{Q}rfIU3[l]l|u_vάҀ^BwцHu_FXUPNSW[cteVPMKJMPV\RP^VQQkjXvdRIIF9,7VVLSWWXY[ckmm\?GNMNPSOHr2 MbMB84.,,++),/1445444456675*+Kti++GIIKLSuSrjgjQJMOA +9789?A;?YzoH=:885*$7Z_')*,`zI=6.%"$-=#,.=KX]M;' -FN8" + +   +  + + +  + + +  + + + 'hh0(& 5HHKPZox\Q>'0DHfuPAsJ8'&Dsia^YTL83366;BL]nc[kN-3Xq}lyr6$ "*,+0GfZxw7*'(*')/7_C0..;/1q*/Hzf^fs~zvw^)+Gv{6+,(&,DrrX[]}dTXWZepoi^qyzxvM &)2SmlgaIQE+#(JYcqx~U3!#B<@3tQ5FDBHY`YN7442196356@Yy~wkqv}GGFFEFGGCEECCEEDDEEEFCDDDCBCEEFEEFFFGGDFGDFEDCEBACDCBEECDEFDDDFEDBDFEEFDFFFFFEEHGEFFDDGFBBBCCCBBCDBDDDDEHHIIJKKKLMMNMLOOOPOPPOOPRONMNNMNOKKKMLJJIJJJIHIIIIHILIJIHKJHJKLLKLJKMLLMMMJKKLNMLMNOPOQQQRTRRTQQQSRTQNRQRRSQOSTWWVVW[_aadcdilorpnqvyzy{xka`_dfnru{wtturwyzwvvxxytwxvwtvxxuwxwxxuvrtsttqtqrrqpqppqpppstrrprqsssrstqsssvwvvvz}{}zvuvwyjklgdYjwuhggc`\airwzzuag\PJJF);]x5&npGMKJNQX`ˎӠwz,QM`vvT5)%&+<]dYPLCFRsiLY/]jXm|sZzαz[@|υJycKYUSU^[YcxdVNKJJMOW^SfxXOQjh]xcQJIF:+9WWT`^YXY^emnoW>GQRSTUPIs/!TbMB93/.++*+-13544334444675)-G^ya).HIHJMRgn_yWKPL= +,789<<<;:QlQ?;9996+)7aZ#(*.fhd{R%$$,D#.;KX__N?-#!0GM6$  +  + + + + + + + + + + + + %gc+&$3GGNbujPJM?#1DF\fBK|vHmwS91Irt_YMLRK@;458?JWWpUIjN/2Wtnzt7-)&0.-0DZb2+())&)16^:1-$-7/1{L'/M{a_hv|wtt}{Y*,DyoTO+((-GpoYXY|_WYY^ksoecrx{zsI (,5Qemn_Yj>)#(M[`q}vE-&6BAFwyn@6FVRLJQWnaI9<~e5.-,.4AF[bRH94327Ok`YTOH=47Nu~wmpuGHFEGHHFEFECDFEFEDEDEDCDCDBADEDECCCEEDDBDCDEDECDBCBCCDFGFFGFFGFFEFDEFEFEFEGHHFFHGFHEEDGHEEFFFEDDCDFCAABCEDADEGDEEGGHMIKLLLMMNNOOQQRQPQRQPNNPNPNNNMNLMLLJIHHGGIHHHIIJKMJKKJJKKLLJLLLKJJKJJLMNNLOOPORQPOMONPPRRRPMROMMNOOPPNPQTTVURQY\[[]^]eghidf`TPOKIOZdlnooquw|~~~~}{}}~||~{}|||}|zwyyzyw{yxxwwxwxvuttsrrtttuuwxwwvstsvwwzz}|{~|yu{|opqmhdlpjjhcaVdrp`[URI)?^u50vLOORSUVZɍ՛y}s&NJ_ut[5'$$+?Xc`RMCET}qgEb.`hWoy|~}~nW}ɭycC{Ny]MVTTbi^]bzaTOKLKMNV[NjsVOQmb`t^NIJE8(:[XV^_[YW[fmnoX>HRSWWVMMr)!V]MB81.++**-122444354432473),S{tv$.GKIKMQlir~VNMK; +-99:;FLIC<<;<989:6.3PQ%(+.^{T$#%1J#1AO_b`PC6(!&2EK7"  + +   + + +  + +  + + %s]+)$ 6CJ[udSRSN:%3DDsKTc{qP98[hJ@B?GLLOI87;>IYNRD9i{I/6Xt}yj~n81 +4-./B}V2*'(($)27W51.!*F51pM,3M}_bluvsqs{uT-0h`++*.KxoWXYcXVZ`nrslks}|xoF(+8MQQJ\cF5)"&]d\pi@*,6IAFTkM28PVUTTWf`;8Ee20-,-/8DII>.*+,/ig\dottka\ZAuD=515:=A@FJY]MJ5543BuponrmB21Oy~}}tkpvIFIHGIGFFFEEFFFFEFFFFFGFEECBCEBBDEEEBDCBCACEDEACABABCFFCEEEFEGEEFGEGFGIGEGHHKJIGFEEGGECGGGGGGGFFEFEFDBCBBABADDCEEFDDEFGFGHIJJJKLMMMLKMOOPQPSRRPSRQURQQPMMMMLKKJIGIHIJLIKKJKLKJIKLMKKJJJIHJJKJKLKNMMMMONOOONNRRQOOOMMNONPPQRSTRRSTSSSQSTUUVVWPGGA8500.-9BP\dknsv|~|~~||{zz}{{z|~~||~|}|~~ztuumie`^Yerlb_]XJ7Q[_pup6InPSUVV[[`ƍ՛wyxs.OK]sf0%#%-FmxlQPBEʏQz~rfAh0dh]nwvwxxj^ȵte>wQz^KWRSej]_b~_RKJLKMNV[MfkWPPr|diu[PLLE7'@^VUac^VSYdkolRAIOSZ[WLSuy''Y\LA92.++)(+/11533454433472'1Swlsz~m1MHIKPTyj|NNNP: +/:7;JvzuM7<=;89;81:cĪK()*.PxM "&4H$4@P_a\PD9-'+5@=* +  + + + + + +  + + + !v_+)% 8EL_ed`b\P=#4DGzNiwwxI4%0'#348>COUG:768?HCA?<^tC,7[t{kk7' +21/0Kc2+&(&#(14Jo420>S3:j,5Qciorqomq}rN&1nU++'-GymXX`tVVYV]krtmmv|zxsC )+7KUR\qVA6*"+g^Vvd@*,2<955]s020BRUYcjo~X87C`..,,*.;DGC3+*-,.kd]_fkne]ZZGHUNSVLH8575Fzshvy}k924Qy~tmpvLNMMLKLKIGJIJIIGHHFGGEDEGCCCEDDDEBDCABCEDCDGCBBDDBABDEEBDCCECCDEFHFGGHIHEHHHIIIHHGGGHGHHKHIIHFFGHGGHGFFEDCDDCCAEFGCFFEDDFEIHFFHHGGHGFIJKKMNOOQQOQQRRRUSSRRRQOOMMLLKLJIJKKIJLKLKKIIKNKJJKKMLKJKMKMJJKLMLMMLLKNNPMMNMLMONOOPPPRTSRUSTSQSRRSPPKKD@;2130-,/6:EHQU]fimosuvy|~|wqnklld_c`]RP\WLVVbhoe?Wugb`[^abiÎҘwsj*MQ`|])%%&,C_aULQAFDžTxsi@b4dgbjsstsre\ȳoiBqV|[JWTQfj]`a{aSMKKKNLUYKcmUQQt[gv_MVSG8)A^XU`bZTUZbikiK?JPTZ]WMQxw"(Z[LC92/,*)*,.02456653334681%3Mtyt3IIHLQRwfPOMI: +-87=iPCE??;:;:1?gA&*+4v~m|V!$.B'5AR]_`PC;1,>=1)# + + + + + + +  +   + + + + + + + %uW,*&6HJWiong_O9#3DIz~xiq~}J0'84357AI?65:<;;9992/VvmfY,())'#*47q5-)09/;Q+3Pa^ccfijmv@"5qqz-((*2NdYZ_szZZXZ[gpsbjtyyul?(*6?I`YDT?("2[L^}b<,!'07416ZR2.-:DSqodvpV89NS22/,,068:4+),-)0je[[]bc`]]X:qHD704;DBBMV\ZTL-563IoY=R]aj:68T}~}qhnxMSRRQRUSQRSSRQNNNMLKJHIGEDFHKFGGFDCDACBBDCCDBCDECBBCCCBBDCEDDDGFEGFGFEFDEEFHFFHIHFHHGGIIJIGHLJIIIIHJIIHIJIJIJKIIJJIHGHJIIGGIHIIEFGGEEFEFHIHGFCCFGFIHJLMLKKMMPONNQRPRPRUSQQOOQQRQPPPMRQONNNOOOPMJKMKKJJJJKIFHIKKJLNKJKIKKLMLNNKMQPNRQSSSUSVURSSOPNNPIJJKKLKHJKLLHKPORSQXXWY]`bceiiioprrtxwyzy~}~|{z}~}~|wuqjlolVD+)#  +  + + +  +  + + + + MooQ+'#9KR_UQQ]YL2#4DN{iRnoI/OX;24348848>>==9114Uj6+;btowa+ !8S=-/cfq[/((((&05:o}71, 0B259+7OX^cdgkjqqB"6tT'()*3P`[]epVYYY[juobgszyvk;"&)8O\viR`V9("5SIh{y`:-!&25325_G/-/;69=?@@;126HU/->_yvq|`& !?c3.8avjR+()((*367gd132QW,8xM-7U^giijiitpB%6c|C'**)4Q~eZZc{vt^VVYaqtr`ds{{vl;!(+7EQos\PS:)"7U]t}\;-.4663?t|G,,.;EMgxdwZ77MN4212DXX\[H:3-*)0i]]^jsjd^YS4hB@9667JJOTOAW|Zuog@ns2wfanx{|{{ceĹϽ^u|E`aOQYRNY\[_e[QMHHKNMWWQfeQNR}ExrUOe^E3+H]WU\VURW`mvrgKBJNPXXRC^`2[ZIB910.,.-144577345434225. 8a{[tM7NJKMNX{ZwIPN\2 #3:9*.3664WF3/1BHNmvmM;;QH614AabZ[dVE<4,+0hV^_sxj_[UF_@>655>CFKU\]QI@1428HcneefF0335Zpnq{¿OQPNNNPOPQOPRQPNPQSSPOONMPQQPPPNMVPORQOOQQOPPNPRQPPSMMOOPONNMMMIIHGFFFFFDCCACBCDCACCCA@BDDABDCDBBBB@@?@>ACBBCEGHKJHJJJHHKKMJJLKKJKLIHIJHFGHIHIIHGIIJHJIIIJLJHJKFJHFFFHHGGFIKJIKLLLNOQQQRRRRTQTUSUQTRRRQQSSQQPOPOQNKMJNNJIJKIGIIFJKJJLIHKLIIIJJJIJGKJKJJMKMLMOQQPOPPNOPPRRRQRQQPRRRQOPPQRQTTVVUXXXZW[^]_aaeghlmlptyyxz{|zz|~~|{vx{|}}~~}{}~|{kX_kmkighr{sf`^hlmnsqd4LBHFKQC(%$&-=HJJOMGYsWxwrqg?ts4{dbo~}hg־еX{tHоZ_OUWPP__Z^dXRKHHLOPVTR\VQQUC|mSUhYF3(J[RSVUSSWbnwqeLFMQPTUQB_\4_\K?60/..12>E9-8zxF)',/7Q]WXdqYWXYbmqqkqx{{wg2#)-7[q`ZcP4( =bcqt^A(,5<5AB/15EJPgmutN?>\K629]_IHQ^RF@7/+7k^\gk\YSAXC@427ADDJW]\RL@6514\unnbFC@45a}qlq|¿KNPOMNOOOQOPQQPPQPOPMQRNTRQQSQORQRPQSQQRUSSTTSRTSRRTQQUTUSSTURQPPQPONNMKHGJKJEFEBBCADB@BAA>===><=;<;99::8:;;<>?BDEDFEHGFHIHHJIKHIIIJHHKIGGIIJKIJIHIJIJHHHIIHHKJIJKJGHHHGGDIGIHGHIJKLKMKLNOQQOPSRTSTRQTPRTSQURTUQRSPQSRPNLMLKLKJIKLIJLJJIKJGHJKJJJIIIIHHIKIHIKJJJLKNMNNNONOQRPPQQRPPOQONNORPQSSUTTUUWWVWWWYY[[^_`aeflmnooorstvxxwz}{}~}~}}}{}|}}|}}ywut}}yxy|}yqhlqpjd]^lpfbYYadfjkdYM_NH036.%$&%,9BEGHGGgl^kmnnhAzm5bar_dԽҪXqR͹Rh~KPTORc^Z]iQOKHGJMOUSLKJMNSE}kVViYD0(OaVPVSVVXdovqeMHNNQSSQF]W5^ZL?510147Ku}VZd@:74312365+:zM3o[LLORRvsGOJM( 7=8@c{i{9:<7+>nś&&+,:t~{o<$$*M5 + +.=@I\YJ8*GZYA/" + + + + + + +  +  + + .yJ-($!i042)jZ/II/:ecttlqwr9.=p}?((135U[RXgsYXWZdmqspswyxxf2$),:WN)GV@H5( A]`kxva>*)4:/Gt=0,6FHRr~uvtZ?>XJ72AS>13GPA@@:3*8k`]fh^[[U\F@76;@CHLY__TRL474:TaTWcZX]M89b~|qgq|OOQSLOPOMOQPOPQQPQQPPOQPOPQOLNNMPQPOQPQPRSTTVUTSUTSVTTTTTTRTUUURTSTVSRSQQQQPRLLKLNKGIEDCDC>:::;<966555442357::<=>ABC@DEDGFGIIHIHKIHIIGHIIGIJJJIHHHKJIJJFIIIHIJIHIJIIIJIGMKJHJIGIKIIHIJLIJLMMNOLPPPPLNPPOPQMOTRSWWRURRRRSQRRRQPPPOPNNMKJJFHGHJHHJKIIHIKIJIGGFHKIHJJIJKJJGJLMOONNNQONMOQPQQPMPRSTQPPPSUSTSSSTVVVWVXXWZY^_`aceejljlotwvwvwz}~{|}~}{{zzy}|~~}}|}|uumt{}{yuuxxzy{~~ysjefhcca[JQ[U]YQK|t4.,%"$#$+479:;CKnt_fnnk`>zi8abv[gפUlPβJhJPVPVbWY]eUNIHHKMNWRHEGKPUHz~{jWYeXB/.O^RNYUVXZhtwwhCJOPQUTT@eS6]VJ=6123@be864333552)Am{MVqMKLNR`OcoHPKL' +3=;>Z{fz;:8709e$)*+L^YK<>{g?9# +  + + + +    + + + (tN-'$#>IRVd^W[M0$9BHFEIIGJIIJIHG@3?//64DVRfqqtt~~R1.1,(/Aj~|tz~U" ++..1551332+((+2S[5:}c,5/fj*MD,Il{Ys}wnu|~j7,8p=*+358U]U[gi[UV[dlsrnqt|zyc.$),:OUKUYRY/%!;XZn`;)+165Kwq=2/7CJUnneo{NA;aF64AF714EI52;>4*5pb`hf\\XX`ND:7;@??@CCDFIGFIHKIHFHGFHGHJIGHJIFFIJIIJJJIIJJLHHHJJJJLJHJJFJKIHIIJJIJKLKJKJJJKKJKKMMLLKLNMKKPOPQOMSSQPPSQSTSRQPQRSSSRQPNLMMOMLKIIHHGIKHGEIIGIIHIKIHHGHHHEFGHGHHJIIMNMOOQQRSPRRSQRRQRSTQQSTSUSSVUURRVVTXWZZZZY[]`afcfiiilmnstswywvvwwxzzzz|||zzzyxxxywvv~}yuuvtvutww{{}uihpqrnommlkg]ZELzq;=0/,,*,1647:;^jZcic]NAuU=cgx]kסTmV̱OjKTVNOTSX]fRNJHGJMMZTHFGMQYzF|{kOX_VD,-U`RT^YXW\ktwpdJMSTSUVSBgQ9]WL>5128dxèʵ^CB7335421'CnhF'FOJJMPUjangJMNI# 5<9:Sxpp>:85.Bc())/@9j-##(7 "0>L_ZMCMXF@D?% + + + + +  +  +   +  + +hI)($$AFNYegJT\L.%:CHJIJIIIIJIIHGC:@06MV>769DWjmlszvox>-/1)(3=hz}uv}T" *0159;@DG>4,)-4Ub6Cf+6/~n.9^ľ~,/Q`wXkv~zry|c6/9~r<),4/:TTS]h]YVWYakqihjt{zt[+%(+<`mfdUZ9%$?l[swZ6'+3899Tyr>316ERPPSLvpRC?e;54>==;;;==@?=?@BAADCCBDFDFFFGGGFGFHIHFGGGFHIGIGGLGGIIGIKKNKMJJKKKJLJIIJIKIKIHJKKMLIIJKJKJKJKMJKJJKLKJKLNMMMNNMMOQPPTTQRPOQQRTSTTRSPRROOPPNNONPLHJIHLJIJHFDHIIIHFGGEEFGFHFFGJHHKJKJLNMRPNPRPSPPPSQSVUTSSUTTTUSSSUUVUTVVUWXWYZYZ]^^`^bedbfijjjlnpqtuwxwxvvxwvwwy|}|xuturtstwxttux{}z|~~nmtvyzyyzxyvnlcgbgsi_^^[QNLNRX\^blƼÝdh_YNNGObjr>LwjZ_{^lԠYd\ЯMiGSTLILQZ`iyRMHHIMPO[UFHJPQZnA~~nOS\VE10Y^SZ^ZXV[lvxpfIMSQWXUTFfL?`WI=5004għˤvA8544762(!Clk}y?"*DKJNONy~l]JOMK! +:;6DuQtu~c<;85+Ed"),1Lt)%&)8 "1@Q_]PHKP?88D@D  + + + +   + +  +  + +@klC+(#%CFDPXRQXVI,$9HKQJIJFKMHSILIJ0CA[zr^=54=arnss`NGF1,01,3<@ht~~ypzR# +1204>GS]XJ<2.16XZ8Bfz±Z04.1a-Cò{B0CaqLgw|ytwva9/@||s=(,4.=Z[X]`|tbYVVYckkfejv{yqY+&'+8chVG7#$%!PnXs|sR6' .6;12@}o81/8KPLH@Fs|vWBDl6439><98:836EK7&@]]lyc^`aYXKIC:;C_][ghcZtL,66?Ninvk?EQA4Blykkr}¿`bba`]]\]ZVWXVROSRRLPQOOPPMQNNOMLNMLJMMMMMNPPPQPQQQPNMPPRSNNPQPONNORSQTUWUSQPQSURWSRUUURUVSSSQTRPPSQNNMMMKJGEDGEEDHCDECCACCABEDEDFFFDFGEEEFFHGFFIHGHIGFIHDHHJGLJIHKIJKLLLJJJMLKKLKLKJIKLJIIIKHKGILKLJLLJJLKIKKJLNKKLLMKMMMMNMOOMOQPQRSQQQQPRQQSSPRTRQSQPSQQNMMLJIHGHIGGHJIIHHGGGIIGHIHIHJGIHKHKKJKMMONNOQQOQQQRRQPTSRRTSTTRSTSRQSRSSSSTVVYW[\[YX\\\abafklqtttwwvwyy{}{ywvsrqqvwxvtvwyzzz|}|vx}|~~{wqquvuuqpnpusqnkoty||dabOROIMY_ac^[[G\bi^Ziy[m؝V\`ΩIn}KVTLIMT^_iuVOIJLNMQ^RFEGOS\~mI}}iQU]YC00V^SY`[XV_ny|xjBLSSXXTQGfD?YUE;63/09wa/3534575($Hn~y=/IKMNQNtuqn[]HPOJ +><:Cny}mzf9<82.Anxґ$**.Qzrp$"$)6 !-DW`[NGCC[[LZYP  + +  +  +  + -jur?-(#$CGBHLMNLPF,&;IRyW\zcWSm]/H_k|{lX=7CrZ;95-*-/1DBBcdm{ur|~J  !7/-06EQWTJ:4336L<5:WK55.2b*P9,9djMitxxrt{f4,C_Zy0'-0,7SYXYc}zhXWUZ_jmgakw{ztS)&),7aU2Y^@*%%! NaSozwN:&!07945Gh80-:MKHJABkUC=l:306?CC@;99>LH5*A]_ld_aX_JGHD=8>Uywo<=:3/?kԍ *)/W~panj&$!'4.EV_[QHMtcRajI   + +  + + + 7yE*($)CGCINONMNF+%>IwrXdY:Zdhq~zk[DEms|vX<85-+*./A87EJUieiw}9   ++,,089;A=40/2/?I6HX/5,/_.<{*-8gdKWfptsrza0,8`c/**+-;VZ[^skWWVW_lqh[kxywtU(%(,?]8R_C*)&!OYZyxV9) &28<43Ke51/:HKJMEGkjSCEr;2-.;MONKIEEG>1+@\_mz\]`L^KDEFDN^ticbe`wX/34F]moifdkJ02An~zmjq{~}~zz|{wsutnnighb^^][[WUTOSTRVQSLSOMOQOLPPONNMJMMONNLRMNNOLKJLJKMLKKLIGFFHKGLMMNPSSTTSSQURUUTUUSTTTSTTSUTRRTQSSRRSSQQROOOMKKIGIJHHIFFGDDDEEHHFFFEGEGHHFGHHHGHGIIJMLJJIKJKIIJJIJJJKKKKLLMLMNMJKJKKKJKJIGIJJKJJIGGHHHHJJIIHJJIJIIIJJHHGJIGLLKLMNLLMOQPOPRPPQQSRSQPQNNNMNOONLLLJKJIHHHGKJKHHIGIIGIHHHIJIJJKJJLMJKJLLLOLONOQQRSQPNNMQQPPNRRSVVVVWVVWWVX[[[\`bdgklruy~}|}|zxvxvwvxxxxy|~y{|}}{vy||ww{~~~{ywxxywutqolomggfda]\juuchvvkoce[ozgX}DeΤNsuHWUJGKS[`knSMMckTKM^PHGGORebO~ycOZfY>.5[^OX_Y[Ybq|qGMQNNOROFl< C\XF<61...)PԡR/565454661$(Gxysm-"CSKLOPS~pcHMLF <;;DW@rj:<93-De|!)+.Yxwq&#$),-AW_YMKRzoeXcdD"     + + + + + FxF,(#0CFIPSSPONE)$|F48'-[*bx9-9kW(?Sbigq~y\618g4*,/48YVYYidUVWY^jpg\lz|wv`%%(-@PNt\@*&&$O`fzuW:)"%2<94:Tc415AJNOODHlkYGBGxy72,/ATVUTRKB<8-+B]`mw^[T?WPDGJDIcvofdcac{P.69FvqphYPB96Gp}ymkqz~}z}ysqqlmihjcc_\^[Z[WXWWTTSRSQPMNOPPNPNOQMLQPONOOOMNLMJJIKKHGGHHJJFKJMORQOSQQRSUTRTVSUVURQTVTSUSTTTTUVUTQQSRRRRQNONNLMLJKKKJKFIIFFEDGFGFEDDEFEIGHIHIKIIJIHIJIHHIJHIKKIIJMNKKKJJKOLJKJIJJLKIJLHIJHIIGGHHHGHIJHIFEHGGGHHEDEEEFFGGIHHIIJKJLJIJLLMNNNQQPOPPNMORQPQQOONLGIIIHKJHIGIIHJGGEIHIJIJJFDJIGGGIJKJJKLPLNMJIKLIIIKNMNPPPQPSQRSQRQQRQSVWWXWXZ\_beiklsvssvwwzz{|}~}}~ywwvtuvuvvwy}~{wz|~|zvvy|}yxz{|{{wutqqsvtqrt}dfg`ceu|~{tja\dkwsrwn@nʊKruHYVJJMU]akmSNMdpRMOYRHGIOLhx[N~ycL]i[<+5X[P]\YYSbp||lGPRKJKVOHr6F[TH:4/,-./7qT2866547454.#+T{dWel[%0HLKOOPz~iXOPQ= 8;BnWmJ#Cgzx{scidhf_SLH6,.==457BSCJkJ.  ,+).31,.:?==:1<\84?914(3Q(r?,55543344-$+Lx|Zwo,-LLJMPKumt~RRQO> =:=J_W9<=32Iv~o@'++/c}z`$%$,=  ,JVbaTKXk_ubF  + +  + + +D=((".BIPbjrzWNI,(?Aoi^gE!:cUf|q}zxwmD0>W<797FU?A^C. +  /,,/12-2C[gkK,.237SN26%3E(e4-=rF".9CTsY3(,86A{SX\z~dWVVT\npibqwyyu\ '''.KuU<>'&%$ "e~q{Z9)#+8<=5b{lD=DPNOQQLIED@?@Eu82./6@C>7=A4.-)%MzZ\sqc[Z=oM@Wps{umabcgA143OZQ@7\POU=7Nxsgit|}{vuqrmifbed_ZZ^Z[ZYWUTRUUTTQNOPRRRSRQOPPRONOONOPPOONPQNLLMKOPOOKPPPQQRTTQRTTUWXUTWVUVXUTTTRSPRSTTTTVUSTSSQRTTTRONLKKJMLJJHFGGIHBEGHGHFFGGHIHHIHHGHIIJIHIIIHHHJIIIJKKOLIIGHJHJKIIIHGIJIGGGIHJIIIGFGHHHHJIEGGIGEEDDBCDDEEFEDFFDDFFGFIIHHJKJIKKOOMQRKNNONLLKIHIIHFICIJJKKIJIFFFGIJFGGGGGGHGHHIHGGGHIFFHHFFHGFILKJIJKLLMMOOOOQPSSSSOOSSY\^acdegjjlnpsvuwvvvurmlnnkoqssrux}~z{|z~{yzzxvvtqjilmjfhjjibcbT\ahnjaTUBReqtsrRSgddcZZ_`rfSPNqiPMUaPGHINNoTZv\NWcN>'9ZYRXXWXWf|iDQSLIHTMOv3#HdUE:41,,/HoA54346565-".Jq(3OMLNPUxyTQNM= + ;;;Fngb~\=>;46KsN)*+,\ruc&#'3N& )@QbaTJT^eeYE  +  + + + + Nq:)' 2GK^r{~sONG+*?FqQmI,@MK\rozw|I1IN8C@7HTVewC/ ",+./22.1U|wwG+&)39_U.3!7F*W\'0EtC%3>C;:Eo~t]I;QnaY+*1;6BOXWaqoaWVWX_kpmmuzyyrR ())+11,*(&&&"$gjj{P6* *9@IWSJJRTRPJGGINn63,,8INKIMH600+'Lwbauu_\Y3NA?fyuxj`a[\=77:Q\_dsq8he53Lyqikr~~z|vtrooihaaddZVXZSSSVYVWVURTTQRROQRQQSTSQRQQOPMNMLNLOLNNMMNLLNPQRPSTTRRUUTVUTTUWWUVUSTVWVVTSUTTSUQQTTUUSRQRQRONQRNJMLIIGGGHGFHGGFCGGGIIHIIHIEHIHHIHHGHFGEIKJIJJKLJJLJHIGHJIHJIGIIHHJIGHGGHGGGGEFHHHFGDGEBDDECAA??ABA@AB@ADBAEGFGGGIIHKKLJLMONOONNOMLKOPMLIFKJIEGIFGHGGGGGGFBEFFEFDEEGHDDCEEFDFEFDEBDFFEGGHIMKIJIJJKMNLNPRQRQRQTVSX\]`]a`ehklkhfggghgklnonpsx||wz~}xxwuutnjkgfdbdcd_\^[[^bdbh`]Rju}yxibceo]WTQgZPPUaUJJINSn{E^p_OTZJ@*<[VOZZXWTh{}zf>RRJHIQLGw-(PaRD:42/,/=|{l514445554."0Nu}]}#7ILMOQYlXÒKQPN< + !<=?J}_- + (-+./450/Po{yD.&(29Q?,/A|F,^Z-3Ht='5>B=:WqaGO[^]\XVRVSh;3--:TZ]]Z?385)(PqZ_vu\XX1jy;<:;;<><>@@ADCBDGFFFFHGHIHJLNKJMMMNQNOPRMLOPMKJKHIKHHHHGIIDDFEDCCCDCCDDDECEDDDDCBCDDC@BCFCBD@BDEGJLLKONNMNRSPMRTOTTVVXX\^_]^_``abcdghikmsvz|~~}zwz{|yvwtrongfgfghfebd[RYr{qodc`j|}yurm[Z^a`\WT\gVOMKPWtFap^JIIG<-?`XQ\XWVWenmqsYCPQJGHUIS|+#Q_QC:41/,/Ctrqwve302443664.#2Okz_:ONKORNeerLONL; %>;;K}`9>=83XK&)*4mpUI&%'4C+ 8Fjwr>- + '.-58:AFC]u=,$(13Pt-0.!>{B)|r50Dw?)5@@>>Cgcm|zrXHK[kN0)398DcWWQb|eXWXZbmrngnx~ytD'))))('$%%%%#+S\ejF9(*8F>Y{d3;hjSYcmongaadg]=7/1@W_c`TD==7+*Lq^`wtc[Z0t?Ratqa}OYZVX@789BHvxbMT=755Rzpgkt~~|xronjfida___]UVXYTRSRPSTRRTQRRQQNOMJLNLKLLMLKIJHKIQQUTSTUTSSSSUWWVVUUVWVWYVVVVTUVTVWUTUTSUUSVVWTSSSUURRRQPOOPMJIKIJIFHHIGGEGGFEGFDEGGHGEGIGHHEGGGFGIIHHFFGIEDDFGIIIGKIIDGGHJHHJGFFFEECBB@>>;:8899:9<=?@BCCCEDABEDDEEEEFGFDFHGJHHIHKLMQPPPRRMLLLKNLJLLFEIGFEEEIEDDFCCBABA@>>>=;:;:;<>>@?BCEFGFIJJJILOLNSQPQRSTURUVSNOVWWYZ\]_aacgnmrsuy{||~{zwsz~~~}{vwyvsomlgeecjdac^_^qoka^_[_ilhaXTcf]\WQTXtFel\KHHE9,A_XNKKVUVdlmmmZELLIGHUHWz|'%Q[QB92.,,4ie5./2345780$6WxbmnAW@ >KLKMOWqc~x_MNML5 (;9:Hg}R9>;54\ò:)*,/hxmr1&&)3B& +6Uke[O]|WZ6,'###"% %*++0.)*-&>MT[n{eXPE',CKnTtB!$DmhYTcotp|udND@3.,+()3C99Ikwn:) %/7NUV`eS[m<)#)36Ro830%;}E-ɣ31Fx1&,79:=Fq_[ctsnRURUzjH+(091CkRY^{\TWW[boqcXpyzyr> '))**()(%&$#",VXa~}eD8()8D9Wo|O4IyeZar||njqrjTB?/2FWTWVSH>>5)/LkX_to`]X6qYXd^pmUXVWg?856LhliifEHQ84V~nekuxtrwrnhghba[\ZVZUUQWTSQQSRQPSRPPPONLLMMNNPPRRQSRQRRSRSTWVWUUVVUUWWVVVUUWUVVUTUVXVVVVTUTRTRSURTWUTTTTPNPSQPPLMLKJJJHJJJGGHFEEBBEFIFGIGFEEGHGGGHGIGEDCCCBFGFGHFGGIHHJHJKIFIIGHHGDCAA@>=<<;>?@@AABCDGFCCDEDEFEBCEDBBBCBDCCEEFFHGIGHIJKJIKNPMMMLLNLLJHJGHGGFFECDBBAA?<8944532259:>=>?@CDCCDGGHGHGIJKLKLLMNPPRQRSQPSTPSSRSUVWY\]_cefmpnqstx{|{~~zxvtz~~ytpifg`dceb]d^a]Z^]fec_SQYdjie\XUXgos}yfLe}i]HGJH:.DbWKJMVTUiyyxy_DOMHHKWMWy%&TZP@82/++2fy}^2011234870"5GEJWNYYOM<@MKKMO]|lZzyynROON2 6>::E^~v~V8<;48W,))*.ocEOJ-'%&7A%   -Omi]Umw`YB6+%&(+)&&('&5F>3382)#$"#()*)/-(,,#?ORgr[SE*+DOtSn64Pqna]_nsmnufS?30+&(27:?Um{k9' '2KyWel<+"*49l=07'>C-e{v%.H{)(0469CUVle_hf\CaNCkRJ3'(075F}}OXZx^UXXYconYZqyzwm?!(')+))*,)'%$#+P\gc>6( ,:<9RhyL4J}b_nuy~n^WA.1=FHIKKA9:2(,If]a|wj_YV;{m^`yfdw~RQRan>979Umpro_HmL38Z~~mfms}zzuroglhfbaYYXTYRRTUTSQQQPQTSQPRPOPRPSRQRRRRTSTTQUUUTUUTUWUUUUVUVVVTXVTWXVURTTSSSRTUWUUTVRRSSRSURSTPPOMNNPKKJKKIKJGFHIHGHGDHFFFFFHHGGGGFFDBEEGCCBFGGFHGIGHIIIHGHHIIGIIGEFECCBDEGFGFFFGGHFGFEFGGGFGHHDBA??@A@CDCEGDFEDEFGFEDBEHIHIKKILKILKJHHHFFCAA@BCA?><866333578<=<==>ABBEECDEFDFGGGHHHJKMNOPRPMQQPNQRPRPPPRRSRVVXXY\abbhjllnprpruv{{|~|~}}}|}~~|zvuuu{~}{wusqnomljffhhebgeb_WZZ`e[[[SX[lnlg]]Uz|hWLKKH<0JfXMOMWUXp}^HPMGHNUMT~p ,WZNA91/,,,9>IP[\B./0024574- 5FKMPMOOOJ;=KLMPPWaLNOS/ +0:=;;Q{dv}O6<:4;V+*))9~}oyI$#(>D)# #Kki^WpxyTM:3, %/6:65:<8;K`NDCD?4&+155.$!#%&'',& %'(**/,())%ESTns^VI%-EHkVy[4&cvablvqogxsT:2,'+37BQ]oza8* *9[Vh~yg>,%-55tê@48&?D1?x\e@'1L|&*1344>RmW_sPB7eC0528.+*268GsNY_~|]UVYXdoraasyyxj=#()*)(+?<<5*-S`^`m}~i]\S7zpaanjm|}VNNa[570;HMWY^^[[@35Y}}kfms{{ywolmlefa`Z]\\VVXYWWUSSRORQPOQRQQSRQUQPSSSSQRTUQUSRSSTUURUTTUWWVTSSRRRSUUTTUTUVVVRTSVPSQRSRQRSTTQOOPRQONNNMLLKJKJHGGIGGFGIGHEEFEFFIFFEFIHGHHIHGHHGFGIGHIKKJIGHGGHIIGGIHGHIJGHFLHJIIIHFIGHFDDBDDBBCEEDFFEEDAGECEDFFFFCDEEEGHHKJJHIJHEDAB@A@??>>;;<<<::;>@>??@?@CBCABCCBBDEFHGGGGGHJKLLLILLJKLJJLMKLNLQPQQPOLNTTTVWUX\\b_dgijkkporuuxvx{yz{|xvuqptw|~|yyxwuttssqppponnjc`abb^cbboibb]_fx]TUSPK<<\fca^XZVZm}YJRNKIOVJYl"2Y\M@91.--,,+-/382/--/245670 7FMNMOPPNI< ?NMNOQ_b|HNPL- /2=;;L~Q\H6:848`k)*)+3yI&%(GC($ + Cond\ty}vtJKB5$ ,:BHDDFGEHUlTGLLH<+7;;=;3+*-374770 &'&+,/-'())GURlw}}n\UC%.GP]f~<!H{|jcsqomeyV;0+-4Bcaaltv\>- 6AJZeqQjvoV9,'158c~03='F8,2?O:GJ+$/M~~&#-69229NuRtoA6;Z>3>EE:2.355HsQUb}\TUVYfprahu{xtj=")''(*HpgS<'%$1jiv[C2(1>=9FfF9Jjsqw}~b_N8/2>HJIDHB9;4'/^Y\dy}g\\O4|vbXnuaSQ`U694=UbYVW_TFA69\~}kemt}~tqrnie`a`[[ZUWUTTTTSQTSRQMPMOQOMQTRQRRRRQRSRPPRRUURTUTSTUUUVVVVTTSTUVTVTUUTSTTTUSSVVRPSRSUTRQSTSPPPNONLLLKIKLJIJFGHHGGHGGGGHHHHFGHHHIIHGFGHIJKHHHHGHIHHGHJJIIHIKMHHHHFGGIIHHIHGGEFFEFFEEFGFDGFFEFDDFFFCBBBCDCCDCDEDEFFGEECCCA?<<<:999:::<=ACCCEEDDCCECCCDDBCBCEDDEEEDFIKHFEEEFEFGEDGIIJHHEHKJIJLIKOQRQSSTSSUSX[]^__``dgiklnnpqonnllrx{|~~~~~~|zwwwvwwwxutrruqoomjfi^dcedyfX[[VRLDRhovsm`]Y]iv{vgNLXSSPQUHae!6\]N?6/,-----02130---.03677/@>:57;@@??=;#')*.+/-'*,-FXYhlqobVUB(/DKwc<5etk]dhpth_s_<.+5Trmfiem[>/ ::3?gpPlYJK81*145DrykV-6EV5,3G[XM7,(/N|"'3<<89CsyY_}rD7=N66DWK=40425GrVUeu\XWU[foncjuzyth<$)())>hVG)%"7kek|zZC2*2=<5?@AAACEFEEFBGEEEEEFGD@BDCABEDCEDFEEADDCDBCDDCDDABABDDEEFFHJLMLNNNPSQRSRSVUUTRVXY[\\^`cedddeiorsuuxx|}~|{zyxwvyvx{yyxxxwwvwvpirwmgneb`\TQ^nuong]_Z`kif^XJXheeaWWLga 7]^P@50,,+++-123541.-/02578/<:Qz_TcbP9;963?Y|])'(*4cr\#&$%03*%  +Avunjqidaa-*?QTTPQONPO^{ZQNMNC:AAAEFA>?CECA@>: %'+0:74.'**/IXXlssoeXQB*3GQ}v0-MijZPMp~kiT3.8Rzxch{YQJ/ 231ezi~{tnj>(9<7:@MTU]704;Tn2-3VvqX6)'1Lv$)7AA??JppieqouH986*.4<3///208GpVWfyVYXSYhpmgju|zwi7$(**+Ne:B`M)$ 5TVa|aH5'"6@>8ZsB::2)1Z~m\Zcywh^YVXoS\rj~ve`PMA463<>50C]5C:89b|kalv~zvspljge_^ZXTTURSPOROQPOPQQNPPPPPLMPRPQRRTQQQQRTUUUTSRTTTQUVTUSSSUTUSVTTVTTTTVURSSQTSRUTRSTUQSRQOORPNJLMKJJIJGJIHIHHFIHFIIHGHJIGFEGHIJGFGFIFDEHIIEFJFFHIHGJIHHHGHJHFGGGEGHFBDDFEEDDDDDCCBACDBCDCCBABBABA?BCBBA@AAB@CDCCDDFFFGGIHCEEECEDEFFEFFEFDECCADCCDBB@BCBBCBACCCDDCBBCEDBCFDEGFEHJKKKKLMNNQQPMNPQQPNNTRTWZZY\]bcejklortvwwyz{~|{|z{xywuwxzy{{}}|~~}|yvtpmljfcghedd_bb][TUUmusg]YUk]5WWK=61.,*)+.013311/.//348:09 (*/BkkS1)+'+PZ`|v_RC#3JPxc&$@dm\MYwhxpE18cyleqgyK 062bjvy80X@6:;Sd]O;01.Jb1,2Zmp\5**2Ltn'4?EEDFUowsffeG>82&')*)(')--8HjSVkpWYXYXlrjdfuzzvm9%()*1NN<;^:&%".IQk_F3=>Fosq@?Jr~~~n}VK5-8O[WYVOD=;2&2\{tb^g|i_\[cfUVu~[x{h^OLA651DVF9>e|icnv}|zzvnhfifd]YYUWUSTQQQNOOROPQPQRRQPQQRQSRQQRONPOPLPTRRSTUSUUTTRSUTTTSUUQRRQUUQRRTTSVUUSRUSSRQORTSQOOQLNNNMLKJLJIJHHGHIIGHFFHHJIIGHIGGFHGGEEEGFEEFEGFDGGFHHHGHHFDFFGFGGEBEDEEEEGFGDCDBEEBBBAA=@ABABAAAACBACDECDCDFFEGGFHJIIHIFIIHHHGGHHHGGGDCEBBCCDACAA@BCBECAEFECDCDCGEEBAFFFEDFCEEGGHHIIJGJJJKLLMNNNQOOPQSUUVYY]`bdiiilnqqrxxz}|~|zzx{zzx{{{}|~~}|tolllnrl_YSU[X]c}lg^[XRw[9WVF<60--*(*,134422//1033680>72;dGZS&(')8ouH-*(%&'*7%"  Ewwtvsnjia,3IUa_STQUUVfw^TSRO@!9@EJJIEA?CCA>?@9!*,4pf1++&)QXg}~s_]E 9SO}3"@@??ABCBBCBCDC?CCCDDCEEGEHFGHFHHIHGHEEEEDFEEDBBCDEEDEEBDDEDAACDDGGGFDCCFEFDFEFEEEEEDFCEFGEFEGIIKLLLLKMOMQRMQNVUWTY[\``bddhklpsuvx}}}|~~|{}|{|{||}}}{{zyxwwyzxsrqnkljcchb_X\XcU:8:==@FF9)+2\-*)%0RZkyw}{i^_D"BX^xy-%$#2YndPP[Zfkjv]Xjecds{iSS A26shgxuxm8HK:7:P}g>13.Qf11:NZR70-*4SqNFKLFDIYym]R}ykXD4)%(()-/-'(&6SbVV`dmfZUVZbjqoapx|zub.#(()):cpc9('%!!9Ra}{_?4%9:FAFjmBFc~swmrP?3/6A@<=BOG;9/)6\{k\Zgia[O4`KRvrq[TKJ:264;LSek^PVW:3Ek~zdgoz}}ogkic]]YYWSXRRQLMMMNNPMNPPPOPLNLJNNONOLONOONOONNPQRRUSRRTSVUSSTTTRTRRSTTSUURUUUUSSSRQSRTQOMNPONMNONIMNKJKIHIHKHHGIGGEEFEEFGECDECDBCCDCDECCCDFEFFFEFFEFDEEFFEDEFEDBEEEDBBAB@AA?A@ABACDCBCDBBDCD@BCECDDDABCDCCDEEFEFEFFGHGHHEFGHGFGGGIGGFEECGFGFEDDDDEEFFFFFGFFHEDGFEDGEEDEGFFGHIJJKLKNPNOOQQPPSUVVSSWVVZ^``bcfjmonrwsx}|~|}}|{}zzyy{|~~~{xrqmklghxQB[UE;40//+(*-142334321.0135-%>HIIGIJLNI/=SNJSSQRQMNPOPQOMNPQMI /<9;Chor{Z5;;70@e|_dJ*(((**'((''&&&$%'  K~zx{onla08M^o^VVTUX\lx^WRTSA5@DABD?63168@EF;+.1\0**&3RZiraX= JS\o01721OgcZZ`agejwm|Tcg ,-7ay~h^H31557:U}{^:/0.KV/;`dZMH>.,3TqAEIGFFJ]i`YS|~kQ@1)''(-7=4(&'8]^UX[nrUUVY`jrmasw|zt`-%)*+*Iyvf4'&$ !=[i}aG5&<@XEdvi;D]zm[XoL@5/5>:<>Z]@84,&7Zqf[^nmaZKAUFQvtZPNJ=566;2>imhFM[D3Eo}{een|vqqki`]YYYRONLPIHKMNKONNMMNMNMMMNLMMMMNNOONPPNQSSSSTQTTRSUSTTTQSTSSRSTTSSTSSRUSPQPRRQPRRQQQPPOLNNLIMLKJKJIHGHHHHGGEFFEDDEECEBDDECBBCEBABACABCDDDDDCCCFFEEFDCFHFCBCCDDCCCECDCBBCCBDEEGEDDEEDDDCBBBCDDABDCCDDEFFFGGHGHIGHKKJKKJJJJHIJIGHGFEHGGGHGEFFEFDDDEFDDFDEDCGFEEGGGGHIIJJKJLLJJLKMPPQPQQPQUSSSWWY[^^^`bdijioqruwwz}~~~~yzy||~~}zzvvrxL I]J=6/-..,)*,.2233422103457.%BJIHFFHLMH-#?UOLPMPQPQRRRPOMMNNOMG 1?:::<71=gfQPD%()+)*()*'''%'&), +Lyyy{}rona, 6L^pbVUWU[[f|\ZVSSC3<@@BFB92259zqjn2,'147;YnukN:20'DT1]yyyR2+5[eRRHLLKO`id]]vuk_M;--)&)6LR2(&(8]\SYkrVTVX`jrnjqy{xp]+#))*-V^k`:)&# &Khm{gM5*':P^>kj>BV~vuNHLvMD3,/16EcjI234.&9^i^[[jzoc\TcLBMy~rsTRS`>865:;wj^ZeU?8Fsygemy~zrqjkf\^WXSPSRNMJIKLOKLPNNMNONNOMMNNKMOPPRRPPOMOPQRQRPSSQUTSSRSUTTSTVSQRSSSRSSSSUTQQRRQPRQSPONNONOKKLLJIIHHGGFEEHFDGGFEEGEEFEB@@@A@>AABCBCCDDDDFFECDEEEEGEDFEBCFFDDCCDCCDDEEBDDDFEFFDDDCCCDAACB?AADFCBDEFBDFEFJHIIKJKKNNJLKLNKLLKJJJHGFHGFHEGGGFDFFEEECGFFDEFFFFGFFHGEIIGFIHGLKKJKKKMQPQPQQQSTSQUTTS\Z_`a_cgikmputwvwz{|~~|~|}~}}|}~~{|yvnlN$A@4.+)))*&')+.0233413245579/'DLJHGHILNH-"DWQNNOOPPPWZ[TQNOOMPNE 2?:;@MNQBBD8<=<>=5/?iI#BE')))))+())'&&&%)2 Mwxz|t|zrrme*5M^maZUVX[Zo}\ZWVTG29=?BDB=:89;;?>5',+5O/*'$;e[mwnjxs[V9!0,.8MjhF0296,&:]gb^^rqd_\nSFSwzsx\[]gB9769ZzT]^WVJ;5Guw_cnz|vvrkeda]ZSVWVULNMNQONLOPOMLLILKHGLKJJHFGGJIHNNONNPRRTTTTQUSOOQRSSRTTTTTSQVRTSRTRRTQQQRPPQPNPPNLKMLMJHHIHGKIHFFGFEEEEBDDEBADBFBCBBCCCDDADDCDCCDEFCDEEEEFFGFFDDECECCCCBBCDEDDCCGGCFCBCCCDEFDDDBC@@ACBBCCFDFDEEFIIJIIJLMNMLJJJJIJHIJJIIHHGFGFEHHGCGFEEFGDEECCDCCDDDDEDCGFGFGIGILLLLLMNMONRSPOUQUVURWXWT[^^bbefgknqquvyzz{{}}zy|}|zyyysjH",##"$" ! !#%),-01101243588-)DJJKJILMLF,(BQONPPONPWhmj_QOOMMOM> 4=99;979<=<;;:<=84/BlDJH((()).1.0-)('&&*/ Qyyxzztyvrqoe,!6N]nd[[XX[_}^YVVTB179;;===:;A@8+/,8P/+'$EwVk~sUT7!>KbnUs)%,&3EISYPT_iyVsZ~S2#40Aeb[ZU6,(.77:MhWYSL:3/!][)a|H,,2[wb>G]aZ[fxs]tjnS>016*'+:QJ.'%)8b\SZrcUUWW`nrmci{zxrW$#&'),Hsk\8)%%!'WdS\mRF6+.4.-1Fb`@84991()>Zng^^s~jci\dUETipzt|j`[ef?265==???AEEGGILONNPQQQSRPPNQSSSSQSSSTRRRSTUVRQSTUTTQSVRQSTSQQQQQPONPNMMMKJKIHHGGFEEFGEACFE@GCCDECDECDDBCDCCCFDDDDFFEEEGGHGEFEEFFDCCCDDCCDBEDBFDDDDEFEDFEDBDBAAA?@@@DBB@CCDDDEFFFDHJIIIHKKIIJNMLJLMLKJKKJIGJIGHGGGIHFEDEECCECCCDB@DCDEDEFEFGGGFHJIJKLLLMMONNOQQSTRTTSUSVVVXZ_]acfiiknrtvxyy{}}~}~}}{z|tmP?HE@81*'&! !!!$%')*+/0388,/HNNMMLNNNH-)DUNNOOPNP_u~udVOONQON@ +;>88;==;:;;<;===73.Ey=(cR+(&+2?:=<51,'$%,3 Syx{|yvutqqd*":N`me\\\Y\\^ZYWVB1666788;>?>99;=71..;Q.)(&>bUflYS7!?J`\`W) .?@=N^][_hnKJntwP* #9.FyXeqphF0(+44@wytzmR730!J[0uN,-5]F%Fbb_\dyiWrsN<-0-*(+:PG,(%'9oZU[ggXXVWamvlcjyzwlO$'')-H^dc<*&&"(N[OXpWKD8%.?QWXl^~hFAblRTQUS}x53./8KXMDEC;2/*+>]p`^qogehM\TEWi|l|wib[_V;363BTXRP]\UU66Ot~scgq{}{xrkjje[YSQNGDA=95:6766<;<=?@DFIJJLNLNOMNLMNMMPQSQPQPQPRRSUQQTUTRRRSSTPQRRSRSTQQRQRRPQPONNQNNLKMKJIJIIHHHHIGEEFFDDFDEEDCFDCDGEEEEEDBDDECBDCDFCFDDFDCCCDCEDDBABAAACCEFEECCDCCCB@@??>@?>AECBCCEFCCBDDFFFFGEFEFHIJJJKKLKMOMJJJJJIKIIIGGHGFGDEFCCFECBCDCBCCDCCCDBBCDEDFCFEHIJHHJLLOOONMMMOPKNOPRORSUVWVV[]bddgjmqquww{}|}}}{|vu`k{wplf`VLE@:1.)!! "%(-12*4JNMMOOQPQG++EUQOOMQRUeqxqbVNQQQPN< 9=8;<==<=AJJ>6-&#%.7 +Q|zz{{wvutqob'"?:87935/.@M/)(&A]Whzp`T5$?Js\^j++C=77F[_ZX|nd_]`{~}S% + )?,Ek[xo8.$)45KvP61.#T_0sm=,+8aA'G`f_Xask]i|K5--,*)-=H=,'$&>rWY]xeWWVXalti[mz|tjL$')*.Tkte4&&& (NWLU^RIE5(-@LOLH:kwB=DegYWWWSk43.-5IWVWXI91/**Csg__l~efc[DsVG_x|mv}sjaXRO8451@dkjgd[_A44Mxregpz¿||ytlheaWRIDB<@8>>CACECFGIJIJKIKMMNMLMMNMLNNNOSQRSQRPPRQQPOOPPPPPRRSRRSSRSSRSSSQORRQRRQPONNMLJKJKJJHHHHFEFEDCCCCBEEFECDDDCCEDCEEBCEEABABDCBABEEDBCCDCDCCCDBFCCDDECDDDBA@?AAA@DDB@CDDCBABACDCBCCCCABCEEDFFGIJIKJHJJJLJNJKKMKJHGGHIJGHHFEEDDDCCDACCBBCCCDCABDEDEEGEFGFEIHHIIIGGHHHJKJKMLMNOPQQQSTTWZX]^bdgkmmnrtwwy|}}}~xzh{vqf[UMF?60*$#$#(,/)ANOPOPQQRPD(/MUNOQQRST\bgg\SLOOQPJ9 4?::;<=;CCC@::9::6/NcB(*(+8@OUJ=9,&#%07 V~{x{wwvsro_)"@Qetd^\Z^^`wxX][XW?,1033777:<=9558171.CI/*'&@dUp{u_R2%@Hw`Rsd%'?>367<[a[`TY{}zsQ% /J3IkottiaC/,&,27LvnB30.'e?0t[B3+-7e<,I]g\Sa{{n^p]A1/26)(,9=3)("%CvRX^xz`WWVXantj^nzzuoJ $'')1XihS-&&$ .OSMPVKHF7(0?JPFECh_@@Ggg_\\XWg63./:PQORNA863)+En}i`alod`[@ZP^}ovlYSIL9444?ABDDECEFFFFHFHKJKIMPNOSRPTTUY\_afdhloprsuwy|{{~vybxsoh[ULF@9@E?1Ag_WSNRQQRPG(99:>?JSSKA:;::;62G?()*,9H\ZP?8(%$%,, Xz{zzvvvtrqa)"ASgrc[\\]_`{za\[XV>*./01578:<>9777-6..By:-*)&A`Mt}\O2(@JzYd[*)278=;9Tegepv|Catpyy{yH(=L.@muzfeSDZF+%*16J}L=404*306nD02--7h;4IZgXK^lruhQ_WL233C:)))4=/30#)?mOR\u]VVVW`pskgqyyvmG%((+9`f_E*''$!2RTMNQLMG5(2BV_>EE^iQ=AKfrnkb\Zh72-08AAED?<895./Irg^`trbaX@~YUW``sbMKBM:5779YcCGYL1//0R{q_iq{~zxvttzzyvqnkid_\VSQONOPQMJJKOMHLMKKNNLLLNLJJKMMNMMMMLMMNNMMOJHIMKKJKJIJKKJKIJHIIHEDC><<<;;:;:899::;;;>>BDEEEEEEFFDDBBCCCCDDBBDBBCDCBCCCCABBCCBAACBABA@CBACBD@CCC?CB@CAA??@?BA?AACCBCDDEDACCCCBCCBACCDDDBBDECDDGIHJJIIHIHMIJIIJKIFEEDFCDDDEFEEDCDDBCDBBBBA@??<<8766798;:=@><=>@BEEEGIIKLJKOMLNNPSTOOVXY\_bdghloqpuxvxxy{~y~b}vursvuyqkd^]\WVH3Urth\YYZUTUTRUTRPORSQN5  8<8:==I`mhTC=<=<;62N|;()*../0./3569?@8686*2,-5fxH.+,)CVS}s[T3'@FyRZa&$6HURCE`yysv{z}@]oglsp}~E-BG.@]]fD*"'38SpJ@12>',5:s7-3-*8j~,2DWhVGXcpxnB;84244E<104CCCL:*1CPU^v\TWVYbssprxzzumF()++6\`[R2''$ 3RXMRWPNI7$6Ch_?D@IYI>CReytfa40,0;HKH:49893//Ll`_xue^XC}[UPet{ZNGBI7887BmK,mvS<301Vmahr||ttpkiighm{ssnjca^YUWWPMKNPNPKPJNNLMNLNLMKKJHLKKIJJIKHEHIDEDGEEGGFGHEGDCEDB@?<7886565532222454468@@A@AAB@A@@BBCB@@BBAAA@?>>?@@@?>AA@BACCEBCDDCCDDECDBCCBBBDEBB@DEDEDEEDEEGHHGHIKIHHKIJGGHFGGFFEFDCBDDBCBCA@??<97201-02569:::;:==??ABCDEGHHKHJKLLKNMMONMOORVVVX[\^bgefllntsuxvwzzz}~v~^~|wsj^znafdY[YWVWXUTTUVTR7 BA:<>>TdjgVF?;==:5/Lm{p7''*-CS`d]S>*''&*! `~|{{}yyxuuvvvtpe*#DVjrgaa__`bz~^\]ZQ? /-///1257>>8654(/-,GE/--'GUYwzt]R/%>HyWeZ'+LdmcRc|GTh~pdW5//2FTVI97=:94.2N~e]buf]O1y]Vb{|_UF?K1888IdFNlR;139Xicis}|vpje`^^cz|xrrrhab^ZVXMTTOOROMNNLNOONNONLKKLKHIJIHGGGGGFFGEFFECCCCC@@??>>;=;9:87767655569>BCGFIIJIIHIJGGFEEFDEEDCCBCCDCCBBCDDDBABAA@BCB?A?A@A@A@@BDAABB?=?AABB@?>@@AB@ABAACCBBCDCEECBCDEDCECCCCCBBBCAABBDEDCDFHGHHIHIHLLLKLHLHKIHGHIHGGFECCC@==;98588889;==<<<<>=?=>??@BABCFEGKFIKKMLMPPNLOQQPRTTVXVZ\Z^^egfklqorsx{zz|}zkZ~}smjigeeda]___[YG + #FA:;=AS_b^P@><<<931Qami4((),EVhihR8*(''' c}zz{zvvuuxuuvtr`''@Zorf`^_`a`~{_]\]X5!,/1/13678:::843'1-,MB,-0%H[^zu`S0(BJx»]cU,9f}qmqxrx=kkk?*#Hm>4./FYyGtkB+")68ZqHB511&;46tq=14.,9n69Zmpd\[[Zo[7267710P^`\NQd_<<+,InRW]xcVYY[ipvv{}{ywl@$((,=ICV;(&%" 8a^JXcLKH:(8Je_EEBSYE@FQs~seW;3.1BPJ?;C?8<6).Lye_cja^Q3UQelu^YLAM9;61DUW}xT]523;[jbiq~xogb^][^tzyzrkgdee]WTSVMKKNHFIMOONNLMMLMMLLIIHIHGGHIFDDCEDCDCDCBCB?A@??AB?>??>?BDDGFHHGIEKILKIHGHGGGHEDDDDCDCCABBCBABAA@@AABDBAAAA@@A??BDA@AA@@BAAACAABD@ABBCABBBBA?BBACDDDCBDBBEDDABDEDCCADC@ACCB@CDDEDECCDGHGHHGJIIIHIIKJIJIFEEEECCAAA@@?>??=@@>==><>>==@?>===>@A@ACDDFGHKLJJJJKKMMMKKQRPQQROUUVWY[`acginmortuwz{{||~~~~~||zv[}Z}y{xrllllluc!5UG:@?>A?>CB=>=<<930SPac6*().?PX\ZA7,'$')$ + d~|}{ywuutvvwvvrb''B]npf`]_`_^{a[YX\8 -02345:;<<96665*5.+O>-++$I\`{r^S*)EGkXcK/Qy~}vdSTlw}~>cqdbYh64XF,-->n_g|>*!7A9]gJD736'2/;vy27:26??>>=>;=<<<;<=>?@CBDDDEFFFGGGIJJOMMLOMMNNPTSRRUVYZXZ_bdfglmoqrtvwxz{{|||}|{zt_|ai2+$6iv_MQICC><@B>?=:=:3/QmAeP,(+-:ELNG=6+'&(3&  f}||zxvtqtxwxxvsa&+G\lqc`_^_bg|b__[Z9"015::?ACC>95:;:,"40*P;-*('J^^ueWR$*DKz`]qA2_z~z~pYLVfjbfw{{:ivgbipy69B ,./UpTchp7-5cN:Rz_LC64?&/78sg7MXTI?t|ETbkqkgf_TP:356510/QMSfHCRF:)&,M_RW]k~oVVSWYjsx|{vj8&'*+*;TI8*'&$$B]\W_YKRO;*;Px[HEB\ZDCISvp\_F2-3<:5RgS>=;3-2_rW]l~{lc[R+yEEfg`^SWN29623Qve>>=<;<;<====?@?>@?@BABBDEGDCGIGIIJIIKMKLNNLPQQTUXYYZ_^acceikmprsutuxz{yrm­{\rigwskg`XPMMTRMLG=FA7)8U5Cj3,-/579;870*)(*/!  #f~|}{yuortvvwyur]%(G]lqb`_]`ahu]_b^U5"048<>B@><759A?C-#7/.O5-*((H[d{`UL*,DNbMCkiN[_M7Wkvzz~sWIYccfppl}9p{~msy=7; 110Mqsyylu[:0Ir@8SXLC87:!=5:ziIu{M?wWUZendad\SH=9:74650;@VNANYC-''/R_UVZdp_XUVX_ou|}~{u^1'&(*8kgNA-'(%&AjbW]RKTN;.;S|[GIKfXDEHP`oyiY<0+19?SuW:>?93.4amWczseZO6tBLrzn}d_SPJ8:5306=6272133=c~~kbhq|}wqmha]YTU[q{qnljcaa^XYTUWVQURQPPPQPNLNLKKJJKIIJKGEHJIGHHGGFDGIHLJKLPMMNNOMMOMMLLLNLLMNNLLKMKIGIJKIECCFDBC>@B?A@>??=??A?>=?@??>???@==>>???@>ACBBBBCDCBBCDCBCC@@AA?????@?>@@?@??@@DA?AA@BA??@AAA@ABACCDEBFEEDDCDEFHGHHGGGGHFDECCBDABA@A?====<===<<;>@>>>??@?ABCCCDBADFDEFFHHKGIKLNPQPQTUTXXZ\`bdfhgjhknsyz®tY}|qnfhrke_[_d`L #8|wT=BB621300,*)(-.%  %i||zywssrtvyxvsa%+G^oof`_^a`gy_a`]Y5#49=??><;:46<=>>. 6-/S7,*)%L]bxvwhXL,+DMKGLMENOyC28euzx~]IH`dhsrr|7-'6\`VTVg]QTVXWapv}~}zua."(()*BY[aG+)'$)Dn`ZcUQVM8->\z]LLXiRDFCDSdnQB1,2=J]R32==;4/5_f`lqd\U@oAQ{v{c`TUM794///13?=7332>d}}|idjs}{tpjgd^\WRT[rwtsojecba[XQVQUSTSQOPOMLLNOOLKKLLJJIHFE>DGEGFFFHIIJKLLLOMMLLMLPOOQOLMNMMMPOPNLMLJKLIKJJGFFDDC?>?=>==<><;<<>?>><<=<:;=>>??@ACBABBBABBCCCBCBBDBAACA@?>@??>>>=?>>=>?>@??>=????@BBA@@CAC@BCDBBBCBBBBBDDFFDCCCDDDCBBBB@@<>>=>><=><=<=;<<>?=>??@?@=<71,*,.:6 't{|{ywqnltwxyww^ *B`nne``_`bex\]^\Z4*<=?>=?==<:=;9;aQ>>Tz|x~k-Y-'733cskr6.,26=gg`hY98&%KB?esrC|e(I~rAJRiuWOHHGGCE=7*(%#(Km\Z\[TSJ9,AbxTLQZ\TIFCBNvpiRB-*09FB4,47783/4hk_i{ibZMEjAWm{wm~daZ[O5820010Sn[F721Ah~{gbjrlleef^[[TTRPT]u~|zynmgc]X\YWRRPTTLLQPPONNNMKGGHGGEABCB@@?BDFGIIKMKJLMMMNOQQOMNMOMONOPMNPNNNRPJLMKJIHGECBA?>>>??<>@A@>>>>><;;=<=@?@@@A@@BA?BCBADCBABDCCCB@?ABA@AA;@=:;<<<>?>>;>@@>?@@?AACAACBAAA@B@?A?@?@@@AA@B@@AACAAB@@B@BDABA??BA>=>=;<=><;=<<====@==?@=>AAACDCFEEHHIKJLMIGFCTacjmuľz]oy~n^dnw|tqpl~yk`abbZPHB?BeL )r}|}{xtqsxyyzvr\$,J_psbac`acji\_]^X7/FC@@@?>>@@?:99;/%2,*;WrF0-**/S^^qudvaQH)-NMKJKKJLNNCQMAD=,G_ro[bn{t[GLpwuu|a?E@68[zqd2U&$2-4_]R[jj|o1+).3?az\?@&,lE9fdIQ/Sf:DQruPKEHHGEF@8=A;8:HYW>FQX:'&5ROUUYdmeVUX[ant||zq[*#)*+,==>=???BA@@@BBAADDBCCAAAAB@@A>?><>?><==<<<=<>??>=<>@B?>@?>>?AB@>?>>>??=>>??>:=:==A?>?ADDBBDBA@@@A@@=@><;:;;<;;999:::<<=>?AB?=>?A@?AB@:5Cr|x|wbpsw{{nmkgfajknkpswpjggfqA +)o{|}yxvsxzzvxvs\$+Karsc_acbbgrca__^[:8DC@>><@ 2yA:|uIXB0Q`3AVuiJFCEEHKIF:8::9?B`L5IVY9)'4RZSYczucUVXYYitxwpV*$()*.>Tcd?*'%#)Jfd_k\VVI94A^kSKIWaWKEFKdvlA91.//*)((**+.,0<=>>>@???A@@CCBBBBBB@??@@@@@AA@?A@@@?=??@??>>==>>;=>><<<==>>>=>=>>>=<>A=;=<::;999:;;<=@?>?ACA?@?ABACAA@>;==<>>;:::;;::9:;>;9789<=;;@A@AXtR_ehnqt{~||ywrigfdggknourtl& /r~{~|{{yyy{{zzwsY/Fbuuf_`ccbac_`b`^\72CD@>>:8=AEB?=<<,*?,*+.-0/*,+,P\_a^_`WQRI$2HJKMEP]__]WULJG;.+\kcVTbpr|}l\JDmsqw}xqi[2065<]s]/D%023248=.Q[I3+)-6;m~s;:;4uA3Tht]apf26Z\;GVoZC;?@CLQMB87=<:AQ]FHQYX6*%9VYVW`yt\TWVW^lv~{wtS($())+-/HL2+)$#,UxhandYVN87F`qYQMXdZJFFFdnnB;0.//+))(,,-.-+A~m_nnfcZ^`Ogn|zkj`_<78424=n_h428How]dlsyuql^XTWOLJLRZt}}~|ztnkic`\[[VSSRRSPNNNKLMMMLLMJIKJIIIIIHHJHILJHHMLJNMNNMNOPNMPOOOOPPNLKKIJHGIHIIIGGGEEDC@?@@=@@?=?>>>>@@@@@@@A?@@???@@A??@A@>??@?@AA???@?@>??><>;<=<:<:;;::;;;;:;:;==:<=;<:9788;;99:::;::;;=>>>=??@AB???@A>=?<=<<<;9=??:;<=<=@=<;:=>]mFRRQT[acgiktw}}zsrpmlinWA?>6)$!1y}|{{z{||{|{ywv[,Ifyqebbaaa_`bbba^`68FD@@<;<=><:9==<)3?,(+--0.+,'(Q]aa^_]VSTH!1GIJKKimksm^]LFH;#/Vel_akedqid]LGZlrvxsngG1169=[x[,C{$390.1764chhW0+(.5?v|e484<|C4epgS7,;Y}S4HSdI9<<87>88?UOJ>?QR5%$4ZRTYfys`VXVQaqw}|zvjS'!&((+./461.'$#,XvbdscVWP:8F_jYRKZjZHFEBbZnD=1062----8=<3-+8sZllhhYcZRnxxhjgV777436Szs=.36Frv`clv}wun_WNMIHJPU\y}}~~{vphjihikljjs{}swurjib`\XZZWVTPRPPQRPNNNMMKKJIIJKKGGIHIIJJJMNNPNMPPMNNNNNPPONNNMMLLMLNLJJJKKIKKGGFFECBAAB@?@?>?@@?<<>=>@???@A??@?@@A=??@@?@>?>ABA>?@?=>?=:;;;;9::9899989:78:;9:;788:989898:9:989::;;;:<;>=<>??==>@==;=<::7582]pLQPPRSTSTXZ\Z`beinsuz}}|yog]UOHP~|yzyvz{zyz{zvt["*Ncvtifccddbdedba\`5;DDBA=:@EB=7:?@@+=?.(,/-//,+)-R_cknkjm^UE%3JIJJRramt]a`IGD84Lcwaajq\UQTUJFN^mlkif`;.168;[~yW6G&L=/.245:mvob2-*/6Dnwyx~Z43)Fz6K{iV+*4NR5HRV<688:AQUA969@86?WQF=L]O4'#Te|zCF68NE6/.?[ZQ7/,@¬}crykhcWxYPm{xhggS8551023Xvy\1336JwscfmuwoniaVBCA@DRZ_z~v~|rkhdb\\^\X[[WVax~~zuz|ywvwuyvz|zxztiddbc^_ZXUWSPOPONPOPOMMLLMIKHIGHIJJKJJKKLKKKKJMPONQPRRQNOMLOMLJLOLNNKJILKJHIHGGECCABA?>>=?>?@=<>?@?=>=>?<>??>=?@;>=?A?AC@>>?A@;?>=<9;;9888:97866656675576667787998897788998989:9876::;?>=>@AADDB@@@?=;:99965;hmPORQRRNOQPSQTTSMRXY\[adkkmsxz}}{yyxxuxvuvspU",Mewzhedddcbcdcc``V.4?@BC??NXQD=;?CE,C>/+-.,.-*+')Xbl~|gUD&3NKJJPbWj`Nb[EKE6+;O[XUqb7<8DIAABKdd`a`Z>14779Xm}Y.Lƃ$G1.0121:{X.-'.4=YmpnmnE03(@`/`kHkg+*/PL7GHQG978=BOH;56?F:4>97=FYYG-&'C}QRXiw[VTUXdpv~zxjN$(-*)+O~~d0$%&0Vddikc``O>;Khx\UOblZMIC?dcyqVR7MaX@23UvrV5//K¬}czphd^IvRTrwrnhfbP:66312=clbB2234Mw~~sUfk|ÿe^a^[TLNOPSY^avxju|qjghgeb_XZZWSMNer}xvtstqnkmkjghfejkoruwz}zsnpkebb\VWUTQPRPQOOLKMMHMKLKJKKKIFIIKJJLLLMNNNNOOPNNOMMMNNMMMMKNMKIKLKJKJJLJECEEEDCCCBB@?AB?<<==;<;<<=<<<==>@?>>=>@=<=<=<;<<:89866547634455445547868789988888775576:==;<=;:<::6<9=<;?CGHVunQVUSSSSRUVTSSTUUTSRUUUSXZXY[[_hmlqv{}~}{qwvulQ*@Smrfhfgfdddddcb\22?DGE@HacVI@>AA<)>7-+,.-.,++&,[`nfZC&4JKILS^fskjiZIIG5"(0:A>?\E(04?B=<90467<>>==<::;:;:;<;;<<:;>?>>;<=<<;<<:;;;::;989:7554343424476565655455665456:<>@;:898865698998?FEMNSQTOMSthLNPPQSTTUWVUVWVUSTURSSTSTQRVVZVU\Z]`efflpuy}{k``h|vnonlighjkkkruF1FKJFENa]TICBB?:):5.+,.-/.,-'1Uds}~jZD&8KKJJSdvt{lfYGJE8&!!-55>N3-18HOG>68Ob^U^c>12552B*Ct>FstX-,+3VF69NVB98;;AG?47AGK:31+),]US]euvYUUVXbmw~zwoN!,**+1[^0('$5Wdbfsnf^L7>J]WQcjVMJA:c^x^=9]sV5,4IZQ;1/)F‘agvrh_YC]NXnvynjbV:520028cqsU6227V|~|s`dlusm^XYYVY]_____ntbfj{ukic\XUMGMGJFJN@?===<<<;=99;;;<<<<;;:9;89::99:;9877768747677987655432234777778:<88:86532222:@CFPXYZVTOLGHIGJsbHLKLOOOOPPPPSUTTZZWVVUTTVUWUTVUUWYXXY[ZZ]\`_gjprvw|}{|{wupnqrqqvL&AQMKGIOUKHAABC@>)=40/02233.*(8Zcp{}}yhVB$=JIIJQjjacdkXIKH:-848@.09QddVE89R|hT_\<02369Qwz[M:24 Nf(Va+-++2[D39?G?899=X\B/*4IXM80-+MYcwi^X@rMXippf^Q330.22FnokO524><<><:;;9::89::998976699877776668776777635524688878777564432221/35?SRPUVRSNIHHEABDDHxõYHMNMLNKKLMKLLMLSRSTTUU[YVWWZVXVVXWTTWXXYXYYZ[[Y[`[affjps{|~|uuuw~zB9^\UNHIG?@>=@BA?;)D:0=6/*(:_dip|lXTB#R_^W?2:WoWRYN/-277;a]VbI8/I^%0+.-00/03-(%&..@U5JunQJ93/!939BHF;$0,)(+AWQC0&)@kSV[pq^WWRVbowxl@&(()*>_eeF'%%)7Wbgmtmd]P8>S{sRXYcjXMI=<[muor\rQAA_M<0,2CXR6.+,J]btrh_W?TVdw~rd_L/3312/?\jnS314>_laeozwwkWVRRX`deba^Uq½wh`|ytqkfdb_Z]\^^`XD@p}|tmlgeaYZTNMHGE>NIRYYOjz|xnjffgb[\ZSVWQNRNLMNMOMKJIIHGJGIIFGGJJJHIIGFGGECBCEDHIHHNLMMLLMMNKKJJJHIIGHHGDCAA@B@>>>>;:;8665786365665565554444436555769;99:9875555412458;>AFXVRNLHGCAABDEDDFEP|ĵ^GKMLKLLLKILMKLMMKNNPQPRRTTTVYYWXWX\Z\YWYXWXZYZWZXWXXV[^\Y^`ehltyxxk|wjdaWTNOIGHGDCA,%GBEmcejfG/,+8ebduw_RU?%H[IMLUdU\[^iXLIE81614445;KTUM:/@[Q@S^O4157;Oi[ZeJ51RN'2+//03101+)&)/4j]7KsZQG945'7C2XqxY+*(*8`05=FC76468765?CEA3).-('/APQF1%*FfOT]jvlWUUVVerw~}rmB&')+2hzkQ3(%(-;\hjsxgd_N=>VtYWXgoYMI<9[tvytvdZnT?:DHP91>`kS1,**O_d{th_ZIYRj{{|pg`G03312-GpkhM/14=]|h`fo|ptohaZ\bghgdb[Lt¿fZĘyvsnjda^\W\XXY]XFAnzxxtqqpopormjfeedcbVZ][Jd}}xpopkaa`_WSTQLORPNNONKKKKIJJIGHIKHFGFDA@=CCBCDDGGJMOLMMNNKMJJKHJKKIKIGFFHHECBBB@?=;9889944454323211210333477:;878976897757bpK))))6`08DE?;543448>DEA6,*44*(/HZTF-$+DiPR^gvcWTTTYhsy}|sg=%))+9hidY1'%(+Blukopjg`M>BqpZSVhhUPM=:swckU<:GaU65B[kQ4-*1Op]dvh_[<{OLgyh|wngYC.5631-C_igC124;\~fcem~jhijd__eijihb[RybVŚxwrligb^^YZXRW^YB@tzwtsonmkigghehedhhhkgop\R\vtsplecaXYWUQQSMKOOOONLNPLLJHFFHGGECCD?CGIHKJJLNMKLKMMKLFGLKKIIJIHHEGFEDCBCB??>=;:6664222246646745666668758689=@EMPY[NKHB@?<878:999<===BCGNQPLJO´REGHGFGFGHHHJKKKKIJJKMMLLMLNNNORNQPPRTRTTUTMKQQTRONJNIIIJJLLKILMMRPSTUTWWZ_bdknsux{zuvoTfvf=6@JMkcs}tf^A+~{KKMXjihkgtOKJG2(0//--/7KRUJ3153.7Lp[506?Nsz|yS5/]B*/,+-2A?9<4)).6>gR;P}^MH;2,VG1b~B*))+5].8IFG=53524=BDA80,-C;)).CNO7)#+CoXS\jui\XWS_ltz}|sh=%)(,9[ejM,'&((HwoirpjgaP=CoztUT[gfXRL9>~u]dUE6Jh[6=Eb]E3412Ttairf_^@OJplkpjcW;/65121FP_jG346>\~fbfn~a^YVONQakligbZS{c[ęuvqkhgdc_[XYWZb[JIvzyurpnmjihfecddba`]]`io_Pazzxrmidb\YUTURSSPSOKLMKJKIHIJHGHJIIGDJKIJKKMOJKKKMKJKKJIFHHIGGEFFDEAA?@?;:8685567:886766554555799?AHOSSW[RQMGB?;9:985786:;:>>>@@A?>?@APóUGEFGGGFDFFGGFGGHIFHHHLJJLKMLONNOOOMPPMNQQQQMSKIGIKIJEFIGFEEGEFFIJIJHGHHIMIHLLGLQV\cejntuvs{}{lgTVo_^vsslnoj_Y=1yJLLOZ__df^HIHH1)002/*,9FIMC2+--,1WOIX@-)/62.*0O8$)-=IJ9*#0GkSVaykVTVU^nr}}|td;&)*,6LYiO-'''-Jwqmstkf^R@N^vWX[lfYSJ4@zpbcbR7VwU=GCMVH7=63Vu`ekd\Z7LIbnkaoif`S1,41143CXqx?024;:78:<::99898887532664;BHTRUZXYTOOGDA@>=<<<<:>@DFBB?><<;9:;;:;:IñMJHGGFFFDEFFGGDEDGDEFHIGGJKJIJHKMMLMMMMMOPOOOPORQPQQNMLOKJIJJEGCFFEGEDDDCCFFCBEGD@BFJGJQNV[bffhmpuwy|~m|ukfdha`PDwDLKLILOOQKKKJH/%6C50.+,9EBG=.+,..1445879=Wkw{yI2<;04,-2JaV`gB/(.7;UC8Zys\RK;0*87020:?-)+**2ex.@BNX712258<:50...3H5)(.KXL9+&0Km}OWdt}mZWVVant}~zse8&*)+1XulL-&'&/QvsosthjeRARarYU^ndWSJ3JvelyI4bzNGL8E_MGS6:Y`blc\R8JDZ]_]t|dbYK4/83234<==?>=><9884466333248?EKSUYTSPKIEB@<;=><>??AFEHGJIDA@@=::=<<::<;;Qð}HHFEFFFFFGFEEGEDFFDDDGGEGHHGFGEFIIIFIJJJLMLLLMONNOONPQQRQQQOPKLKLJJJHFFFDCECDABA?@AABCDEBGEKOLTT[_`fhllpsv{~{wsw^PpPPMJOQGMLMKMJC0 $(Oc3--+-//58/)*...001:?66:H_x|b:36;(01*-5JYTW^C.'+23NJ=Wy|dVQL90,6?2--35-*.,,9hq3QQLE3-/237<84/,.,2@.()/?<73)&.EQW`vhWWWXgpsz|zq]7!()*-DouF-&'&,QvpmuthhaR?SdpZV_hcVRJ5D~ofqxHBoo?BAAdsGJK19Z|Zg|jb^M=~FBPW[\rwcbTG0/61025@W[:21/1Egzeaiuypf^eiklkgc\Sie}vqljgbc_]\Z[Za[GL||zwsrqonlkhfddeaa`]^_bnt]Qg}{srqomfcb^ZYYTRSRNPSRSONMMLLKJJJGCB@<===;:AQXJGC>?=;4+-2214;@KMMQTPQMHECDB?>>>>>?AEHGHPGEDCB?@AA@@@A????=<@V|KIGFFEGHGGGGFFFGEFGFFDEDEFEFEGFGGGGFEHIHIIILKKLLKMMNQOPPPPPPQQTRSRPOQNMLIIJIIFFECACCB@?AAADHIIMOSUV[]`_aa_innlvzxxkjia^a_[TQMQOD6$#+}w80.,./-,,+*,/1233036069=]iS7296-30+.7N[UXV?,)+.3^E9HVWWRNO73/GY2--/7/06+->mtaI?1--139AA=4-,)-6)(),3//+&'6k{SY_s`TWWYlqt{xxs^5$(***CltmR,(((.Rxnjurih`S=MxrWW_naVRH8Rrin~EGu^>63=ghFK?-5\nZjrb`[ME}JIMV\cnoh_VK5/42/350ATD7202Dfy`ckv¿}womjhijiihgb\Qgjwtnkige`_^][Z^d\IPy{}ywsqoonlkhfeccaa`__^_mr[Tgyvuqpmkic[__^ZRVUTOMMMMMG<::7;DNU[dwvYTSNMMF4$-;=CGJMLJIIFDBB@>>>?@>BCDGFIDFEB@@A@@AAB@@BA@ABBABAC@\yIHHHHGFFHGFGGCDFDFFEEEECFFFFEFIFEFFGFGHHHGJJIIHHHLKJKKLNMOOOOPPPPPPPTRPPOOPQOOKMLJIFCDECDDFGHGIJLOQTTVYYY[_badghljoqruw}|zwoja[\TPG%(DC84012.--,--377761.0347AoiS;0Dy:441,/2?T]\M6)'(-6_I6?GNYXRS94/ib1,,2GD:/*.;n=B30258?GGB6-+*+.)'((**(&%!IsRT`}{bZYVUjnt~{zs_2"))*-;nrN*&'(0\yqlwmgj`RDUzlXVckcXQC3PfkqAHxV6-/Bf^HA1&0`e\i{zic`ZH@wRIJT\dgurnpe\O3153122EmYI9106Diwadlvkcb`_``c]abcb\Xknxsnjigc``a_ZW\f[MKy~|zwuropmnlihffeb_`a`_bns^RMu||{urmdcc`[ZTSPJ=@ACTkmijnzwd\``_]U3)@PNQNLKE@@?=<:;<<==AGHIFFFCAAAA?@CA?A@@BAAAACABDDDBDC_w??AABBDFFDEDGEDEGFFEEGFDGGFFGGHGGGGHHIIGFIJIHIJHGIHIJIKGJKLKLLNNNONMNPPNONOOQSQRRRQONNMMMMKLKKKKKJLNMPRSRUX[[[[^_`^__aacfjossvz}~zwnsYGLjJDAB>:6311139==;824965:FsrcJ91@x5:64./1;enX@2)(*-NrG6?HUliYO;6+pX,+2Ij`=-).?rƋ:eD79;=BIJ>1-.-.,)()()'&&%$^nMZf~cVXXW]ksyxzq]/ ***.AzylL.%$'1krkomkkcT@RmcXWeleXRG4U}dnq{>I`I5-0@X]K0+(9aiaduvhjaYF=nTMMMS^egfsvygR814322:]l`U9117Hstcakx¿][TLNMNOJLY_b][ihxqjifc``_ZZY^k[JK|}}zxvpnnlikjjefgb`a_]]ampZO5l{ywupojd\STbwqolpyte_bbc_Y30HZYWRKKHF:30149>BCDCDEABBA@??@@@AAACBBCBBBCCDBBCDEDEFbu;9=;<<=>@AAABBEDFDEEEHFEHGGFHHIGGHHHGJHFGIJIIIJIIKIKLKJIJJJEHIIKKLLLKKMLLNOMOOQPPPPOPQPOOQPPNOMLNMLMPLQOMQQTSRRUUXWVXY[ZYZ`a\bcggkmqrw|slc^\UNF@9>?ACFHG@:GF57;?Ye[QC:/GY6;;;-,/Dnn\F4)(*+W|E7@JemZJ64(!bK+.:^f;0(-BuˆcF;<>>EKC5.)-.,*'&(()&&$#$@>>@A@AA@A???@A?BACCBCDECBEFFCCFDEGHGe¿s<9;;::;:=>=<>==@CBBBBECDFEEDGFHIFFIHGGHIGHHJKJIHIKIILKJIHKJIMKIHHIIHKJJJIKKILKMMLLMLMNONQQPPOQONQPOOQMRPNROOOPRQPRPQSMQTTUWXY[[\[^]]c`eejlosvy}|{yqk`]YZZUVURYi_GDDDT\IE@9/Hsn"=AFA0-1IcbRC2'&*2YqB;BLnjXG5/*Rc13>]x`=/*.LzǃwG=@?ADC82/01-((&&&'*''%#%3NgaXe~{eXTUYgqt{vm^.#'(*-Ffss@(('(8xpp{ngkcPFWot`X^is`WR:5EMFJ_^iw{Otyy>CSV>=>>?>>AAABBA@BCCCEEEFDDFEIJIJlÿn37776787:=<:<<8;<:===?=>ABBCCBDFFEHGHHIFEGHIIIHHIIJJKIHGFIJHHHIHHJHGJKKGHGDDFGGGHIJIHIJJNOMMMOPNPOOQQQRROOMPPONPPONLNPQRRPRSTUUUVYXXXX[Y\_^_cafigmsrw|xxrnr|~obde^d`NSO=.I7HOVO8016?D;7-&%*0YsD;?>623246/()()(((''&%$7Rlfdiyz]UWUYgqu{wqZ+#'(),Eo~mB+('+>v}rt{rmmcREXmxbZ^gk\WN>:GMGKKMJOTVXUR}zx9D_a=/3BYZ9)+,:tjm{wleXBA~jeaXENYX^^Sk}tb:453/3;ZhjQ:45;YxrZcmw[XXROMNMONT]_[^¾eb{wsomkgdb``]\][[VRKOt}|vwuqoolljjihgcfcbbjgYH5y¦m{z|~}ywx}rfcccb_T33Q[^__abcF53202001354358779;;:<>=>???BCCCEEEEGDGGFHIJKmƿk234224358888:;:;99:;;<<;:==>>@?AA@ACDFEBDFGGFHHGIHIHFHHHIHIHGHHFGGHHHIIGHFEFFFEEDEFEEFFEGIJIJIJLJKLNNOOONPNOOPPTSQQQOOOPPPQQPQRQUVUWXXXYYY[[[[\][\]``_efpuw{~syndldLPl,3dbyrR>67??64/)*/2e|A:J[k\A545(.a02Gqs\L4)1X|yi?=<8785456982,,'))'))''$%9Ywkbc}yZWZW[gqw~zvlZ("'(+/HwkA,(()Bxuz|oomdOJY{\WZbc\XO=ūj`nmhcYJ6rzhij^JOTW^\S]su^7231118;RhD546;Zzqbfmxa]XSLACEDHT^^W_bk|xrlihea`_\XXVYWPIMx~{{vvvqspommkkilljjilihgibVH:}ær~}ywzqgeedb_T-4Q^_a``b\CCF@<<96754444666667779988:<>@AACCEFFFGGHFHJImŽf145333333543667998:89::99:<<>=<<>>=?=>>>?@CDBCDEEFFEEEFGGHGGFFGGIHGHGHIJIGGFGECEFBBBBCDCDDEECEEFHGHIJKLKLLNMNPORSQRPNNOQRQRRRRRSTTUVXXXZYWZXYZX\ZZZYXX[Y\`Z_dhjnptuxz}t~j7F`x{fWLGPK?>96247:;852-))*(**'$!&:]jf\it[UVSYhqv}yumT'"'(+3Muvg@+('*OrzxpnldJKZy[YXhi[WR?;LLKMPW[\ZX\\_y{x.?nk6-3MoZ8+)*D~Y`qifbXN4ulnrgPOTXa]UZdeI/0402366BF6545>[~q_dmznifh^[TNKOYabZ^aqwqkigc`[ZYXXXZVPHN}ywtpqnmjjiggghfffffdba\VC5¤y~yx|uddeeccR+8Xaaa_adb]klb][XNOIBA><;79;::9;::899;<>??ABDDDCCGGHHJKrĽf87976544553123367877889976:<;;<;<==<===<<=>@>==@AACCBCDFFEEGECFEGGJHGFGIIIHIHGHJJDDECABDCCDCBDCFEEFCFHGEFGLKKJKLKMMMMMNPQPOSRQPNPRQSUUUVXYZYYYY\\[[YYZ[[YZY\]^[Z_a_bgjjkquwy~}}x~{qg_`i^XYVSTWY}DATeswt^D3;M;He.19;<=4&)2XrWM>;548S_OF?>>:52+)*+.+(&#(>dW[[lnZVUV[htx~~}xslR$#&)+/H`ok;)'(+\xzvrknbNJ]}q]Z]rnXWU:>LKKML^f^ZZ]_^w}u7PnH1.<\dK5-,'JmYftiibZREkpujZPW]d\QW[R:/363224FMBUB54=^oafp{¿\X\YQPPKLPT]`\_cqztnigda^YYXWWYXTPFN|wwsqnlkhhigffdbbc```_^[T>7{~{y|tfeebebO-:\`bbbdheh{}x}}unskcc^YWONKKHGFDCDA@@A@??AA@@AACBGGHIIxƽdAA@>;:8876444436767766765689::;<;>><>>><?>=?@@AAEBAFDDEGFGGFHIGHIHHHHFEEFEDEFDCCECFFFDDDCCEDDECFHFFFFFDGIIIJKLNLPPONMPOPPPPRSUTWWXYXVXZYZZYXZZYY]^^[]]^^[]\]^[]bcilmppsxz}|zqtrutonmlqnvh{LEOJXX]YG:?aTrm0../,+)()3S~nKO<825Orx^QCED;:2)+/85,)'%*=\[WZj{r[XVXZjsvy{~||tjQ$(+*.LfnW1*)*+fwyxnlndPEZxp^\_og[WP8BIKLLZmmc`_^`Xxp:PC4,,5N^T8.+)L_gw|jh_`ZWbkrn`XX]cYOUPC;013221@bTvpI55===;<<8888777867466979::89<>=>?@@@AB@???>>>==<=???><>=?>AADDGGEEEGIHGHIGFGIHFFGEEFFFGGGECDDDBAADEBADCCDCCCDFGHFGGIKLKIKLMLMKLNPQQRRTUUVTUSTVUUWXY[Z\\]]\\^^]]^]\]]\\\`[^]]_aijlmrvz}yzywxx|u[\\X`__bZWWspo521.+*))+7[jNL=1/7YomeKCI=78+)0EZK.(('.@[]T^nt\XVX\lqvx{|{zuoP&)*+.=NK;,)()4uxwujsrkLH]vr^\bqe]YN5@JKNTcikfb_`bcn}c/3/,*.5QhY6.*-Pcdozng`oTLYeqpgaX[_WQRLG?212223HZyhI58Bfi]hqzjed`XVWZXX]a`^a¾\rxrogea]\YYVUSQQIV~{ytoolkgeffda^]_^]]^[YYXXL<<¨~}y}sedeffdM)<[ddcdgiel{}}}~zvyywttvvrtxtmhpheef^[[TRRMMGJKKHFHHH|Ź`BEDCFEGHFCDBBB@@?=;99:587879976665668<=?@A@BACBA@@@?@@@?@A?>?;=>?AA@BBBADFDEEEHHEEFHHFEGGFGHIGFEDEDFECCDCDBCDBCBACDEEFDEEEFFGGGGHIHGJJJNNNNNNMQQPNPRRSUTUVUVWYZZZ]][Z\]\\]]\]_^[[Z\[]Z^``a_ejimmsvz|~y{wqs}vsq|i?>;851,+.5b\AA90/7MbbZEGF30,),7fsJ/((*/AaUNZniYWVUYlstvx{zwuoI(**+,+,0/(**,8|sttrurdKOdz][aiaYYM9BKIKO\fib]^``fpzZ/,+++.7\oW3.+1PċadongatF\O`onhhX\\WQONND334323Af{Y725Bji\hp{¿[[[ZQV]fhebd`[h`qytnjgdbcaa]]^TJZ~xvvolkggfa_^^]\\\ZYXXVJ9=Ļ{{qfdefeaL,<_efffhifn{}{|{{xxxwutwvqruuoouwmjnkkfea_]`^VQVTUƹaEGFCFFGFDDDDGFFFDCCB@@=>==;<<:897655789;99:<>?>??@A@@?@@CCBCC=@A@A????@A@@BAAB@BECCFFFEDDDEFGHHGHGFHGFFHFEEDEDFCCDDCCDCECDEDECEEGGFEFGHIJJIJJJMMKLONONNQPQPNPVUTUUWXXY[ZZ][[\\^[[[][[XZ[YWXWZXZ[^]dehintuy}m`WTRPJA<7JrzY9<3/17M\\UGH>0*()/=]iH1(),/>bVV[hlWXWWV[putwy{{ytmG!'*())**)'&()0>ururssfNQf~\ZZca[YK+45221=|^J025Ip|iagp}XWQPEJWhmlhgcYeÿdsw{zumiggdcba`bbUG\}zysnnmgfdbb``aa_\\WM9>ów~|~qfebddhE,?_eghhjjdl}||z|{yvwzwsvwtrqtropqpnnsrpuurnmkglƹZHJIHIHHGFFEFJIIFEEEEEDDDCABB>@?>>=;:9<;:756679999>@@BCA@CEDCCDCDEFGFDCBBBAAAA@AAA@?@?>=>@??BBCCCBDEFFGEHFEFGGGFFGGFFCEDECCDDEFDDEEEEFEFEDDEFFHFGFJHLJIJKJMMMLMNNOPRPPORTRUUTSPQNKHLKGIKJJOUYZZ\]]^__a`aadehdimnpptqvwz|}D83136:BKMI?D8/-704WtcJ;+.//Fq[WUTUUXXVW_suvw|~}{rd: '++,++*)''((.=rwwtuufKQjx_\`gd^WJAFHFGN^cbVILHObliD,./25:<<7/*+-3f|bhvlc[`qqkedjfoymRZTG9/55231GZmh;327Iu{hair|ce\XXYajsspme^f]zzy|wtroljc\[^bXH[zzzyxwuvsnllhgedccdcggjpdR?=><;===???AAADEDCCCBCDGGGEFEDCDDDDBDCCBCCDC???A@@@@@>>>?ABCCDDDGFGFHIHGFGGHHGDGGHFFEEEFEDEFFFFDEFFGFFDEGHFEEGHJJJJKKLLMNOPNQPPOPSRPRPQMMMLJJJLOUY\[Z^^_`abdccdfgdhggdcbdgeikjiqu{y}k9:86:77G926=2;baUJ5*++2HsZTVUVUSUTWcuvvu}~|ypb6 ()**++**)()(,8qvvswseGKmsb[^cb][J:BEBBJ_edVGFHNdae@/6>GPQGCA;3/19f}cnyuhW]_cjd^\PdtdZ\VI8043123Grd:23:Pwyf`jqYVPHGMWmvvqlh^n¿`|vy|}yxvsnlme][abWKb{xxvuw~|rpnknofa``_b_gyhN2>Ô|xrijkcghfrϷ~}}~mgdefe`E)E^ejijkgdw}|~~z}|xwxzwuuvvuwxuwy{x{}}z|µTNPQMLJKKKMLMNMLKKIIIJIHGJJIIHKJJHJJHFIIEFGHHHFHHDEECABCBACCCDEFFECEEDDCCEHFFEEEFDEFEEDEFFGDCBBBBAAB@@>?>?@?>@@A@BCEFDEEGGFGFFHGHHGEHFFGFFFGHGDEFFHGFFDFGCDEFEFIGEEHIIIHIJIMLLMNNMNPPQPQQPOOPQRVVWXXZY[^]^_^becceeeeddefedeccdfdhhjmpqsvz|~e[7BMNHA<88;>DCO;7AF738;97+()'4HoVTVUSVTWWZfxvuy~}vnc7')++-3740*)*,8wvssutgFPom_]\`c^ZD3ADCDJX]ZMACDId`dC=IZmr`ZZVYI734ezZo}yiUPX^fbWQGZlc\_WN2.21123Rv{c:45;Qv~~x`ajpia]RRS^nusojg_o_z|wuusqnliifa\[ddSL_}yxvwx~ypuwmd`__``gwiS49³Ƽw~obababdcffgqŦ~~kecdfe\C+E_eijknigy}|~}{|}xy|{y{xxzyzxuxyxw~yzŷ`b^YYUVVROLQLPPONKJJHJKKIKLLJJNKHJJLJJJGFHIKJKIIHGHGGGIGEEGFFFFFFFEFEFECDDFGFGFFFEFFECCEFEFDFFFFEDDCCBACBCB@>B@A@AABA@BADCBDDEFEEHFEGGDFFEFGGGCEEEFDFFFDEEEGEFEEDBCFDEEEEFCEFEGKJIKMMONMNOOOPRRSSRRRRTWWUWXXX]]^_^__bbbcbcca`bccbdbaeccfeikllopv{}g:6Okuj^]WJDDFGIM=;K]G.---*)'()4JxVWUWUVVWVZkuuv{{voa2 ()+,5LcSF3*).AwyusvrfLTp~e[]_cd]Y>4:>?@DJQL?==>Dk]_CQcshQhhhn\F72`wao{saUO|Y\`]SVZ`pe_bYF-052036T{~]7537Tz~{~y_cju¿ulaZUV^krqmif`m`{~yy{vplkeca^]^aaUOb{yxyxxwpz{qgaa``^eweN4>´xwg`WX\_]]^__bchuƯ~}gedddd]?,Fdgijjkhdz~{|~~{{|{y{}zz{{yvz}zy{~~~ĵzzytsokhec^\WWTSTOPQMIMMLNLKLJIJGHHIILIHIIIIGIHHGIJJJIIHGGFEFHIGGHHIIHGHFGGFEFFFFDDDDDDFEDCDFEFFEEGHGEGFFFCABDDCDCC@@>A@B@AAABBBBDDDDDDDEEGDFDCFDFEDFFDFFFFFEDEDCCCDCDEFDDEGDFCEGFGHJIIJKJKLLMNOOPPPQQRTRRTVSVWVWXXY\\\]]\^__aa``ab_bdbaaaedddbdgknpryvv~w}~xxrb]YUPWTBAYub4,-.-*+-38PUUWWVVVWXZhprtyywpd-"'(,/F}zm3)*.DzzvvyteITtwd][]cbYY>0;>=>>?RP=;:;EnV]Xcq~nBendp}q[>:esbspm`WPzVW\QWeltuidgW>+/54236^|qT844;T{~~vedkvÿi[MNJLYhpolif`may{zsrsmeb__^`_WGb|xy{|{|ztuvqjpnje``aaajthL4A³’{tc[XVVWZ]^ZY]^^_afuռ}y~~hffgdca>+Hbghijiicy~z{z}~|}}|yy{zyy|{{|~óz}}zuvroliefc\WYXTQSOMNMKMJJIHGKHHIHIHGHHFFHHJJIIIIHHGGHGGHJGIGFIIIIHFFGFFEEFEEEEFFDCDGCDEEDFGHFHHHGIHFGGGGFDDDDCBCBBBABA@ABA?@AA@BBCBCBDDBDDCDBADDEFEDDCDEEDDCBDFEFECDHGFDDDGFEDIGGGFGHJKKKMMMIMPONOOOPSQQRRRSTUWXWXYZZ\^]^_^bb`__`bb`````bdcbghgihosvy~}xtlni[X{iM8530/0>E9SOSVVXTWWXYbprrx~}zwn^-#)+,.Y{1,+/T{{wvzv]KZrtg^[]b_[Y70=><A@@@@?AB@ABCDCBECEFFFEDCEFEEFDFGHGIHEFEFHGEEDEGHHGGFHHHFHLJKJLKLMOOOQROPQRTUSSRSSXXX[\\\\]]^^^_``abcbbbcbacdedefggmlpstv|}~fVRMKDGfZ?k`[ZRXYYZZXdvttw||xuk_2#*,--Um3,+1Wz|xx{q`QZrzb[[^__\Z67>?;=JyxH=<>DvV\asa@9Gnubb{~qQ@mmds|ui[TYtQMKM[r{rb_[L=614336>TS_R733<[~m^dlxmlhhefjmnnkjg_m^{|}{wvsmkiig^]_]QKh}zz{wrmmkhieaaajseM2CĽҞuqsx}wve\enku{y~xuspieg`YTUX[gס|{zbbefede6-Sdgjjlnih{}~~~~|{~|zz|{{|}}|~}~±~~{{zwqqqlgfgb\]WVSTPNNKKGFGFCFFFCEEEEDFHEGIIHIIIJIFIKIIIIGKHJIGHHHHIIJIIHHEFJHGFIGHGFEEEHHFAEHFDEHHHFEEGFFCDDDEEAA@??@A@>>??>F@?>B[{_G>@FxTW_W@::AXdNY|znI:sgdxxsj\L]pNMKLShxn[`^KK714347>YahZ803?\~{l]dmw~}yqkhhimmkjf[sf|wrqkfdd`^^\bbWLi|{}}vsvvtme`\`hpaK5Jĸ{vq^GBD7AJLWhrw}|z}~{ccjwv`MNQTaΠz{{\_cebd_5/Paikkkkfe}}~~}~~~|}{{zy{y{|{z|~|y{~{uurnmie`a`][WTRPMJMKGIIFDCGGBEHJJHJJJJHJJJJKHHJIIIIIHHHIIIJIIJHHJKIHJIIDGFFGGGHEGHFFGHGEFFEFGHGFEFGECDDCCACAABA@@B@@BBB??AADCBDEDBDFHEDEHGFFFFFFFGFHGFGHGGGGGGFGGFIHGEGHGFEFHIIHGFIKJHIKKMLNKNMPOONQRSRORQPSUWXWXY\`]^_^^`abaaeeeddcdddgihklmoqrvz~~~|vyutx|ywutl*%(*-3Leb`B-)*-Kx|yxxrbC^so^]^_c]YWQI>=?HzuKABO~zQQG;89:;DG8YxvjA7~^gwyrg[LZkOMMMPbsw^W_]KI114425BglkS724;^z}l[ckw|}xtnijmlljf^scyurpkhecfbaecWLh~}}zwywrpooomia]^_fo`L6FȽjORAE`ZC6('*02/8DGIFEPd[]lvxupCFGN\vͦ{|y\_cced\53Qcijlllkj~|z}{|~~~}{{{|z}{{{{|}}ò~}~yuxxrpmljeb^[WWVSRPPLMLILLJIIJJIHGIIIHIGIJIIIJHIIIJHHJJJIIGIIIJIJHIHHJHIKHGGGGFFD@GHGGIHHGEGFHFFFEEEHGEFCACCBBCDDB@AACBB@@BABCCCACBCEEDDECCDCFDHGDEEEDDFEGEDFFDFHGEIGEHHFGGFGFEEGHHIGIHIIJJJJMMOPKNTONONORSTTUWXYZYZX\_^ab_``dbcddccceefefggjklpsuvy|~zvvxzxxuxe%(,-+1=EGG9,)*0Jxzwxys^J^no`]\_`^[[T4:;AYqFDFPyVK549<=>DB:0))-3Gtvvwzs[M_ua\YZ^WXP4/8:=VkFCCKqSE235?@??=?@AAB@@@A@??@CBBCCCEEFEGGEEDDEEGEFGGEDFEFDCFEACDACFCEDDFFGFFIIHJKMLMMNKOQRQVUVWW\[\[[\^_^aacbbefadhgggfijgjolqrqtuvy|}}gJFGUcVF?=77/0/,.0:P}trrwztZJaq\XWYa[YM-07:@[hFCEOrQA0/5ALZdaITbK6-?zaftpjdlqƦ^TPNJPYejaZ\_T?*04332>n|oJ225=d~kbeny¿SNMLSNNJMPY`c_w_{|vpghf_]Za_TKk~||zyzzuuqljicbaabeb`abil`J/IƿǿlUTC2/VzA(+7BHGGGA?>=?>?>>???@??=>?@ABCCDFDDFFEFHIHIIHIHEFCDFECDCBCC@@A?>@?@@AABA>@CEFCCFFHGGGIJLMLNOPSSVWYY[[]]_a`a`acadccccdeeecfglmoqstwy|{|~vwsjb`WRNMA7<@EPzvwz|tVHd~q\XWZ\YWG,/69?Zf@?BNjM=1-0;G]k]IH?/-,:ybgxnjaycˠdVKKLW`ltd]frcG124456DhonF432>ehafn{¿¿dYUKOKMHCGOadaxb|}yqhklc_^caTOmyx{|zskhdghhgd___agqbK1HǿɿkTRD8S|W/(1?B;==DJAJUSOH8wϳ\#RP%&',6f͞~}|t]Y^`deT20Xdeilllhe||}~|{}z|||{zz~{z}ÿ}~}zxxwromkhha\[VTXWTPMMPMMLJJMNLKKIMLKIKIJLLKILLIJLJIJKJIIIIKJJHHIIIKJIIJJIIJJKLHIKHHIGHGGFEFFFEFHFEFFGFFDDB@CDDA?BBA@>?=@@@==>?????=?>=>>@@>BB@@BCDFGFGHHJGEFFFFGECDDCDDBA@@A@?A@>>;;<><>>:;AAA>>?@C?AAEEGEGILMNLRTVTVY\]^_ab`abbabdcbde``gdcfijhkorrvx}uohhhfin~|{ymRFf|oYXVXXVVA$/57;\a>?AWhM90,,4ITKBHA.,**Cbev|plbb`Ŕ`QQQYainrmtiN757534=\oj@744@k{h`hq|sng]VZa_ZWZbd_yb{vzxtlikh__^c_RNm{|yyulrsrrpojb_^`jr]J4GɿfWRGWdE5*)*2@?994@==>?>>?======>?@AA@BCAACEDEEGECBBDDDDDC@@A@AA@@BB??A????>>>?>====?==<>;=><>=>AA?A@@ABBC@?=?@A??==<=;9;>>>><;>>>@BDEBB@ABCABDDCDDDDCCECCCDDDBBCCDDCEIMKLNNSRRWVXZZ\]]\_``aaa`cbbba__ca`cb[bejlqpv|~po|zmf^b]ZV@+66:>AGHVOC=AD^UFB.)-5MPUYU;/,+0G`jy~rkdjm^ab[Z_biwtnia[I16511366@<5224Hp{cags~ihgeghf_YSZbdazbxtqmjigggcf^TQo|zyxz~zxrolopqrpja_^bmrdG6KžȽdWSFZurjgcU<*1>IMJ7<=;:<<:8:;;<:;<=?>>@>>=<>>==:997889<<=<;;::=@BEFFDABBEFEDHHGGDHIHHGFGFDEDDGHGHIIIHKJKHMMKNOPSPSVWXXXY[]^^^`aa`cca`b`_^__\Ybc`acimrvw{}ztoneVFUIIE;?;;A??AA`^_D,,/8OXXVK<,,*/=]gy{njioichgbZ`_bhdca^XD(5423355643336Iszdafs}¿¿jlhbeeeedbbdcd}{h|wttrlliknlo`SOsvzyvvvqopoljfgjfcdcbb`bjn`G1MſȺ`XUNv{}|I%.;:8:;;8899;;<;;;<===><;9988;==<==<:<;=?@CDEEDCDGGFFGIHGFIJIFFEFFDEEEFGHHGHIJHIIIHJJKKLKJMLILPPQRUWWY\[[\\ab``_]`a`^\_a]a_[_\^_[ddemhmtxtwzxxtuv{{ytngb]SOMH@?CHiljG,,-9DIIKP7,,)/H`hvzsljgnc`jtlbda`bdmpcXK.4423357964126O{xhais}¾YSOSWY]hkigfeb|¿~g}ppsllljhknj]JNtzxzwwvtolljhhed_]^`a__bkm^G-MſǺw\VUKeh8,>YtJ$2AIJOIFCBPZWOF:ǿХChw548-/)"   !!! Ji>#%)(&('"%'(09>FN`jclyy~~tvvw~o+%4\~{|{jdfc_`_J/;^fhjkllgr}}||~||~{{{~}|~|}~|~ƺ~~~}}}}}z|}vvuvljnjfec`\[RPTRPNMHHICGFHIFCEEDEFGIIFEFFGIIIIHIJIFFHGIIJGHHHJJJHHIJIIIIGJHFHIJIIKIFFFGDEGEDDCA?@?@>>=<::88;89:=<=;:?=<:9=><:;:99;<<==>@ACCDGHIIHGGHHHFIHGGFGFFGFEECCDBECDEGHFFGEGIIIHIHJKKMJJOPOOKJPSUXXZ\\]^[\__`ba]_a_]_\\\\WTWSQGTZ[[TPWY`hr~ywtonlechxkjF1-.5DECED.)*)-D^eprnpofl\ŎYhvnkhdeehloiaK.420428HOI=24:Tyc`js~¿^ZUOKJNgmmjggd}{l|zysnlhgfgfddee]TOv}zyvurpnkkihfbcc`__^adkt`H3MŻx`VSIgm@)/FsE'4CEFPLIIDP[XQH"jv268-1(  ! "  IS(#&&&%)(')&  -<(0G1 %2>EOO[lxy~o/ %3Y}{}~kdcba_bH0>=<><>@>==;:8688556869:<=?A?AEFFEGJIGIHGGGFGGGFFEDEEDCCDCCDEEDGFGEFCFHFGFJJIJIFMIKLNLNNNMONRSUUUUXZZXY_\\]^^^`a^]\XTRMKONQMGJJKS[gos|ioE.01037681)()),E_jnplkkflw͇T`monnea_dhkjhB+33131LtumM448Qxcdlt¿¿pmiibX[fnniiec|wextplhgc`[]^X\`cZPOv~{{wvtronmkifeca^____b_kq\G0KºǺz_SRFWuX=?g{7'2=9=HC@DBQ\ZUI;¨8T̤>#kq/7>-/' ! !!!" "#FU&"#%(')'*)%" !!""  !#"30%""!'(++.7B~}t1 )Ux|{~}i[bc__^D1?Ydgijmjeq}|||~~}{{zzzzzy{z{{{|}~ɷ~~}{~~z|zyuwsoljjhfc\ZYUQSNGIFEFFDDDBAAABBDEGFEGHHIIHGHIJHJJHGFEHHGFDFFHGHGHIHHIIIIKKHFIHHGEEFGFEEEEAEBCBCCABAABA@@=;:::::7786569::;;;>?@BBEEEEGHFIFFGGFFFEFFGGGEDEEEGFEFEGFEGIFDFGEGHGEFHHGJIHKJKLLKLKKNNNQPPRSTVWXXY\Z\]a_[\]ZWZ[XWYWXXXYWY_^hlswx~wvVAA;8960-/+*+)1L{ajprlkkgq~VY]kmla\`fadcV>,44358nD25:Xsffmucd_^ZSUflnkhfbzbxtpkigcc___ZY_c]OLu~|zyxuponlkidee```____`mt^G3Pȴw`SQD@kpn~M#%-75:G=:@?R^ZUD;´Z>*-& ! !! !HR%%$&%%'&($# !"$$! "##!5/&#$#''*,,4;~~x,)Vr{~|{{f`be`_^G-=]ghlnojdq~}z|}}{~{{~}||{{~|{z|{{}Ʒ~~}}~|||{yx}{||zvvurmphfb_\VVPOKKGIHGECDEBCCDEEDEIFHIGIJJJLHHGJGJJJIHIHFFGHIJIHIHHHHIGHIHGGIHJHHFFFFDCDDCDCCCBCAAB@@A?>==;;<=<<;9:;<<<==>?@AACBDADFEEFGFFEEHGGDFEHHIJIFHGGGFEFGFCEEEGEEFGIGCFHIIGGHIHJIIJLMMLMMOPRRRSUWWWYZ\[Y^^[]^][\[]`aaa^`cdehlkqqx}~ukhc]TOLC?=6727X|djssonll|XUWbhfbdpeT\][A/6616=<:OWUQC:­}ɢ8%ou+A:+/&!!" FD'%%%&$%'&"! !!!$#""$%%#70&$%)))'+-5BǼr=?e;NOUenosyy}o#*Rwax{|}zhbbbb_XA+>\diklnj_k}}|}}{|}|{|{|z|}zyz~{~Ĵ}~}}~~}{~}zx{vqomihiec\XRQRPPLKHGGFEGHEEFEIHIIIJKJJJMNKJKMKIHKKJKKHGGFIIIGFHGGIIFHHGGHHILIFEFFGFFFDDDDCDDCCB@ABBAA??=>=<==<:>=;=;;?=?@@@@@BGECCFDGGFDGHHIJGFIJGEGIHHGFFGGHGEEEHEEGEDDGFFGFFFHGHHHIIJKLKMMNONNPQTVUUUVW[\ZZ[[_a_``a_]a\`]bcb`ehmsvw{|{qpkg_]X]qystuwuto{|\XWSYbgnvbS\\W809736>lpC78=\r_dkwĿmgfb\`fjmligfc~ujtsnlifca^[ZYZ]baOKw~ywuxvsoomjiedbbc`_]^`_hp\F.TƲy]TOFIbe[SFE6',:DIPOIE@NXXQC>õț5#qt095-0(!   :8%$&%&%$#$# ##$#$''%5-$#9F@4,+-3;Ͻc"P|U$))*'(*25EZ[Yg}}m!,QoYy||}za__ad_W?-?^dhkmnk]n~|~~~{|}|{{}|yy{{zx|||}Ķ~|~}|w{xsqpjfhb`^\WRWROQMIIJKIIJHGGHHJIIHJKIJINKILNLJJIHHJGJJFGIJGHEHIHIIHHIHHGHGFFGFDEDEEDEFDDFFDEBDDC@AAA@@CA<>>=<><==>=>@@?A@ACDCACDEFFGGFFGJHHHHIHJIFGIEEFHIGGFEFGHGFGDDFGFEGGHHIHHHGHHKLJJJMNNNPRTTUUWWYZ[Z[]]]]^`fc`c`abccc`chghnqswz||fa[NQ^gptibgbP20<757BkywV;89?]taenwke_b__fkmkhgferistnmifda_\[[Y]e]NKvyxwuvuronkjhdcbbcb_]\\amnYA+VƿŴ{[TOI^|~|zF%)3;JVPLI@PXWT@@ɛ5$to+;6-5) :6!#"$%$"##" " "$#$%$$%#2,%Dzt[O=1.6?Ѿe-VpO'(++)'&'')9*!4b{w?DMUUZbiqu|f +WkXx|{}za\^cf^Z=1A^egkkki^u}|}~|}~|{|~{{|}}|{|zxy}|}~µ~}yvtsyrkhjf^_[[ZVSPNOKKKKIJIJJKJKMKJJKLKKKKLLJKLKJIIHGFIHHGHGGFGGEHHGHHGEHFEFGGEEEFEFGGGFFEDCEDBFBCAA@>A@??@????>A??AAA?@AACBDCBDFFFHIGGHHFFFHHHGJGGGFGFHHGEGEFIGEEFEFFEEGGHGHHHHHHJKKJLKOMOLNOORSSSUXY[[[\]]^____b`adcabba]edfiljoouy}~wlheY\egqxlmxdJ66?844;MVYH<68BdsafmxURFILOYgmljkibtmyunljfeb^]\]Z\d[PQwwzyvuurpnmjheec`a`]]]^ajn[B+UǶZWPC;=huhyD")1>GQOJH?QYXUA>Ś4-tk)A6&6(  ;=#$%$$#$$$#"&&$$*'#"""$#6+&GI>TWQE44=̗Y,QyA&*2:*&'''.5+'+;il )'%&"!,-2>>?@A@A@CCCECECDEFGFFIHHIHGFKIDFHHHIGFFFGEEEDEEEEFFEFFGGHHHGIHGKJLIKMOLMPRRSQSTVXXZ\`]_\_`^abaecddaca_dcdggejkmoswz}|vvnqsu{{x}eT5:KB65;DMI;767Cfqcgnx¿nfbZWSYemlkljatrwsnjifc`_]\\Y\d[KNwz{zvwvqookeheddd_a`_]_`jkW@+TǷYWPA?]qvw7$-;KHIKK>:QYYRDAĖ3+zo'D5$,'  DA%"#%&%#"""&/2'%&$!!"#$#8,'6F;)B]RC:@˪V-Pb3)6e^(*&((.5+$(4ch$&'$%%$%())+;uimxqssvuz~}zs] &N~}|z}t]^`adfU;,CZfghjje]w~~|{}~}|}}zx~}yw||{z|zy{}}||}zwxurpmidb`_]XVVUORQLLLJIIHJGFIKGGIIJKKKMKHKIIGGHHGGHHGFGFFFFIIGHHHHHEHGIIJHHFFGHGFFEEFFEFECDDEGFEDBBA@@@@>?B?>?>?>AA?@A@B@CDEDFGJGEGIIHHHGGIGFGGHIKHHHFEFEEEECCEEEFEEFEFGGGIJLKJJLMLKJNPOORRQOUXXVXZ[]]\_`^`_`bb`bc`^````bdebhnoltxrh9[pVE>>;<<7565Dgjego{lge_c[[fllmlhcokzsnkjhd`_]\[YYe\PMy{{zwutsopmihgfbdbcda```kp[B.U±ʸ\XQKjnapq=&&1AE<8;625SZYU@Aș2-~n%@2#'# J?!#%'$$$"!#8RA'$%$""""$' 5*')SV@6KYI>?εU/S`4*B}ug_^I(0<-"%3hd!%"""#%$$'),>|oqxtqrrqkmqgggLld!'R~yx}qa__bcgV7/E[igiiid_y~~~~|~z}~~|z~||{~|{||zxz|}~}z}yutqkmniddaXZXWPOQPKKJHIHJJHKLJMJIJIHIJJKJIHHHIHFFGFFFFFEFGGFGFHIIIJHHHHHIHFFGGFFFFHGGGEEDDGEDEDCCBBA@????>?@?>?@?@@@ABCCEBDFFFGFFFHHHHGIIHJIHGDFFECEEDEEDDFFEGHHFEFFGFHGIIIGGILIIKMKMMNQRPTUSVVUUXY[Z^_]__^^`a__]]_\^`]^b^_gjkptwz|uwzbZXQJCBDC?;Rjgbfp|^WSOQLUfklljfcpowtmkifdc^\\\[\b[PPzyywwwrqsomkjhgfdcb__^_bmoY@,R²ɻ[WQHYG)+/.(&&/;AA?90-9QZYUAAɕ.)~l >0!   I=""%'&%&$"%;U5$#$$!"!!#&5,'2XtYTHKOEBɝ̻R.OX5*5}[$1B*"%&cc &$$$&((&((.@novsprx~|nogEp`*R|yz{n_\[abdX9-EZdhijlg]v}|~}z~~|{}zv{}{xy{|{}}|}~°}||~yurpklfgec_\\YTVQONNLGNHJJJHGHGIKGHHIIHHHFFGDGGGFFGFEFFHJEFIGHIIHGHIGHGIIGHFEHFGFGEFFFEFHECCCA@AB@@A@@>>@@A@?@@?=@?A?@BCCCCEEGDDEGHHHIHFFHHFHGHGGFGHEFGFGDDDFEDFFGGFFFGHGHIJJJJLMLMOLPNONQQPSWWWXZXXZ\[]Z]]^[^^]^]_^][Z`didffjnswz}yvtneb`^TQ[olahr|dca^XW_knmljfdpvxtoihfdb^\]]\_bXMW{|{zwyvtspmjjgddcca`____mmVA/XʸTVPCQa^TPKE1&*1=FJF;6>T[XSCCǔ,.j#'G4 " !! !  N;"!$%&'&#"&7=/$#($!#! "# 4)%&Ek`KWMT>?ү͹N0NW5)9u:%0A*$&f\ %%'*AL8*)).Coswto~mmh=sb!'Rrtoqpsz{xx~zy|n][^aacS7*D_eiijkcXw}~~~~{z~|z}z{{|xw{|zz|z{{ñ~}~}zwuvoklldgd^]ZUQQNQJMIKJHJKJIIHIIIKIIIHHIFGFEFGIHDCFDGIIFGIHIIGJHGFGGIGGEGFDDFFEEFFEDECCDDEBDECACABA@BC@?@@@>ABB@@?@@BAACDDCCFEGGFGHGGGGHHGGFFEFGFFFFGIIGGGGGFEFHEEGHGFGEFHJIIIJLKLKLOPPOQSRSTTWUUYYWYXYW[\\[]_^`_]a^`c^gdbdhmprrswz~wvq~kbiqzkmnmgefmolljfentwupkifda`_]\[_f\LOy||yzyttsokhgecaaca`_^`akkVB,_ɹNYPLs>&')8LWUJ@>SYXT?Gɒ,3g /V6!&+&&!(*$#!!""#D3 "$#%%##&9G8$"'%##! $& 4'$!(Qbb^VG6DǍ͹H/R^6*Hh3((26* "dW$%(GwvO-)*+Hmuxts{qmdAo[",XmWby}_[ZXZ[Z_a`gffmmprtyyyљ}|zyzk\[^b`cS40F\ceihjd^}}~}z|}}xy}{z{~{x|zyy{{xxyzz{{~~~²~}zzvrurmnide__YZWVSQLLILKMLKIJIIIJHJGGIHGFFFGGFHHIGFFHHHGGGFGHHHHGGHGGGGGFHFFEFEDEGHEDDECCCDDCDEBBC@A@BBAABBA@?>(fŻɸSXSW~kTxsx>&'1=PZVL>=P[YR?Hʔ,0c!/W2)JZSXPaR,-'/30'%!8.!#$"$$"!'9K;'"&%""""$&!0*%-Tm^Sh]O;DʺżθE5_^5)?g&('.4+ "%jY!%$&Pi]?%))+Hnsyut{tplcCg^ ,VmZamn]YVXZYVYYXZ[ZYTZ]S]^`duԂq{x{|{}~}{wxxk_^`_deR10J]ddghhba~~}|~~}}y{{||{wzwwxz{zzzzzz}|}|{zwwsqohgcb]_\UXWSOOLONNKJIHHIIGHGGIGFFGIGHHIGHGGGGGEGFFEHHGGHIFHIHHIGGGGGEFGGDDDFGHEDEEFEDEECCBCBAAAAB@>@BAABBC@ABABB@CEDCCDEDEIHEGGFGGFEGHGIIIIGFGHHFFFFGIJHGHGGJGGHJLKKIJIIGILKJKLKLLMNNOQSQUTVUVZWZY[^^^]^`^[^_``cddcagimonpqttvzy|~~~{|~hbjq}VUVWWT^lnokiddpsvunkheca__\[Z]aWJZ||xwvvsqoliffcbbbb`_``^khT@*gŻȸTVSTsNFoDQj6&,9CMJC:6=RXXQBIˏ,2],G+0nwzY778FF,gƽȹUTPKT9?P5LT+&.;><:7324-51B4 !$#"! &8=/$$%# "%(3*&=m_8-(,1;Oǚ̲<8YX0)Dv_0)'*,%!'pT$$#EvoD)&'*KxJj] ,[j[bji[WVSUTSTVUSTUVWWWVVTUR>BfdghttׇqwutwtuvpV( 2V72;BIQXVYbpskv|z|wvui^_`_`eP.2GZ`dghfaby~~}~}}}~zy||w|}z|{zxz||{yz|zz{|{}ż}|zyyvsqrlhfec^[[ZWSRNMJJKIIFGHFIFFGGGGIHHHHIIHHIHFGFGGEHHGHFHHGCHGFGGFHIHGFEFFFGFEEEFDEDEDEDDEDEFCBDDDCBB?CBA@?@@@>???BBABBBCEFFDGHIJIIIHILKKJIHKIJJJKIHGHFEGEAEDCDDCCCEEEDCEGFGIIJJKLLLLPOPQTUVUVYYY[[_`]``^`_]]__]]_[]VYacfilkmprsvy{{{~~gejs¾^[[\^`ejmkhda`¿ou{wrmjheb``^[\Z^i\MX|z{yvvtroomkdeeebba_^_`bljX=,jǹǵXUL@DL42AL=%&0<>?@=<8?-##%"!&) "2*#9R]J6+,06Lлα>6`]1-JO`p*'(./& %oI!&&+Ojf>(()-Lümm\-]eS`jhVRURQSRRTTTSTSQUVWUTTR=Bgbhcohum_jhhklpokF-P*&))(&&')((+3f~|{zwute]`]\^`N02HZbcdfd`cw~~}~||~}|z|}}{|{zx}zwwz|yyxyyy{{{|ƻ~}}zxysutolkg_c\TZWWQPIJKGDCFIGGEHIHIJIHJGGGHIHFGHHHFHFEDFDCHGEFGGHFFHJHHHHHGHFFGFFFEDFDDEFEFGECEDCDBCBB@A@A@@A@@?@@@ABAABDDDFHGFGHHHJJIIKIHGIKIHGGFGIHHGFDFEDCBCDCBCCCCCDFFFHIGIGIJMMNNNNQQRTUVWYZ\]]__^__]]\^^^\^^^_\`ab`^ahhjknqsw{}{gfls¿URRRMO]gjjfdb_px}tpmjgeba_[[[Z_cZHR{xyxvusrqmlhgdfbbb``__amiV>+jǽȷXUONva4fuZ)'8ECBJIC>?RYXP>Svdͅ*@a,C1/VyrZ?"$-88326<]t(M0 #&%$" &77-##%"!"%*!&5)#&A^h<*,08Rɣί4:f^.,?sfk5-&-1&!&tK$&'0EM6((+0Qos]-]dT`ihWRRPPQRSTTTUSTTTUVSSRR@J[heofrbE6A>AA?GNJ)(D-')(('%%$$#")Pgs~}||}xvutd\`^]_]N06O\adcdc`cx}z|~~z|}{{}}||}yx{|zx|{{yyxxxy{yy|}~Ż~|y|{zwrqmkihb_[WZSQMJLLKJJIKGHIEFGIIJHJJIJJKHGGEEFHGFGHFEFHIHFGHIJHGGGHHHGFEFGGGFEDGFFIFFEEFDFDCCDCCBBCBAA@@@?@A@@??BBBDCCEDFGGHHGFIIHJJIGHIIGGFFFDEDCDCCBDCCABCCBCFGCEFEFGIKIILMLNOORQTUVWYYZ[[\Z]^^^_^a^_`^____a_b]]adedglmpruxy|~xcenv¿hfb``bggfheebc¿nzytqmkigc__]]\[`eZK[yzyxvtsqpolifffcaaaba`bjgT<+mƼȷTQOWtWYole.*;A:8<<87?SXWM@TúɃ%;[6U0>}xN"(6B@@C==^lfnivj8 ('2F8/'& '@,'&(''#!###"$&*MOGWe`ZW[fvtlrw{{}}zzyvrrpd[]^\\^J/4I`bcabc_d~}}}|{}~{|{z{}{wy}ywy~|xzzzxy{zz{}~~ƻ~|{}zwyxrqmkikd]^ZRSRSPKJKJJJGKIIHIJJJHHIJIHHHGHGHGEGHFFFGCFEEFDDFIIHFEGGHHGFHHFGGFEGGEFFEGGFEEDBBBBA?@>@@>>>?@@????BACCCDDEFEDGGHIIHFFFEDEFDDEEDDCCBBBABACBCCDCCFEFDEGGGJKIJKLMLNPPPPRSUWXVX[\]^_`]]^`a`_`aabbccbccdbdghkmonqssuwz{}|}~uainuddecimkfccab`cj|}upnnkhea_^]\\`dXIX|}zxvqtqpnkigfecccc``abjhT9-mźdzPRMLvsi0fg,,8;0.32.0?PWUN9Tſ$CV=_,8YcbR!*6;;;:34YO "j- #%%#"! "*+%"$#  "&.#)=)%-V_G.*,05R˿Ϩ(>eS+*Pj.*,/$ #,tG#&%(SF((-0UnlW$3ffX_edRPQQQSWiofpk\dWYPSQPNL>??BA@BA?@AAAACBCEFECCBDEBCDDDDDDDBCDBBAB?ABCADEDHFFFIHHIJJHHLMJKNMMMNPSSVUVXYVX\[[[]____dbaadeecfegfhghikkiijlklonjkmptwy{~~vdglvWNURNR^`ZRZ``^k{|vsqkgdb`^]]_bbVHZ~{zywvusqomkgfedbaa``aaieR;,lżȲKSO;XlZ20_A&*4:69<744?TWUO:T} @T7]&*25]d((4:;9720C?!#T* %&%#! (/'"$$! !#%-%,<)#&?NB1++.6X˶ϥ.>gS,*TkjP*))-0% 'yD#&%&;QJ4&(+4ZeqR$2gfQ]ebQOOQQUqw|kPNOOOI=Qbfimion3!)DzvC## +@.((())$!!!!" N9/-++-///,*,++-/222Bbfilhijortvvwx{}{{yyyxtpqp\X[Z\^^G+2N]]^ba`Ue{|||{y|}|{|}xu|{y{}zyyz{|z{zyyzzyy||{}Ź}{yyvwwrroifg_^aXWVTRPNPLGLHFFBEIGHHHIHIHHIHGEFEEBADDCACBCDEEDEGFFDCEFEFDEFEEFFDDDDDFEECBDDEECCCABDCCBABA@ABBCCA@@@A@>?@>?A@@BCB@?ABAA@A@CBCDDCEFHJGGIIJKJIJLLMNLKMPMOQQRRRUVVXXUWZ[\]]]b]]]`aacccghighhikjjklkijiifhijjjkkruwwy{|}}vfejyhfaXLS\`WJ[_\`jy}vsokjfdcbbbecWL_~{xvttnmljhfecbcbaadjgR:,mĹȯQUO@]T,5UU8'*8ABILFB>ARXWP4R²}d|{$GQ;\*-FDC<- &4?BA@-"## !#&,$&8)$@__`>-*/7^őϡ+DhZ0*EJ)F9.('+4' !({A#%$%7^bH(&)/[ӽixS2mfRZe_LMNPOR|nQPOLMG;F^shtex^/")"" )=+%(&'(%&%$$" I9/..,..//,,--.0//35=X~jonnmmnnjllnknpjwxwxxzsvurrvrqok\[\YZ\]E.4N]]aab`Uby}}|||~~{z~{y{zz{|zvwywz{|yuyxxxz|y{Ÿ~~~|{yzwyspokkfda_\XYWTTPQPJJJHKIHGHKHGGFGFECCCDCBBDCBBCBBBC?>?@AA@ABAAFCCDDDBEFFEDFFEFEDDEFGDEEDFEEEDDDCBA@@A?>>=;<===@?==<>?>??>@BCACDEDEFGHJJJLMNNNLLPNNMPPPPQOQQPRRRTSUWWXWWZ\Z[]_]]^a^^cbddeffiijjjkijmmkkkljkjjkmlklknpqqutx~~~xucdnz{zzk^_cd_]``Y^fszvttpnljfbcdcedcaUIX}{yurpojjjgeffc`fkhU;*jĺįNROUP8vrS,,;D@AFIHBCPWWM9Z¦z|sHPCX'9z{sK!$0869BH-#"!L- "!"$! (CI/!#%! !#(+:+(EZ`b<,-18bϡ&EjY.)/2.*)+*'.<( ")}D"%%(RF&'*.XѾjyR.qdQZf^MKTXTS^`ciqhiozRNNPLKE8/VT[Y[aj*!*C9$" '?*%(&(),?@?=???@BBBEECCBCCABEFEEEDFDFGFHFHGFEDEGEBABAA?>==<=<<<<<;<;<89:;:<>AAB@BDFIKIJMLNPRPORPSSRRRSRRRTTQRSTTUVTVSUX[WXZZZZ\^]\__`aa`b_bdefgfggfhkkjkmlmkjfccbcc`]`adbhkopstypair{¿wtgWT`cddeba[^¿erxuspnnjigd`]]]`]ZXRHVz~{wxvwtqpqrolijkjlnqcT<+iƾƪߑPRP[v`l`ne,+8>500;EDCPYVN:]Ʒt!KS?T$,D2@k: &7@BEFD,!"!P-!$#$%" %:@("&'!  %+ +8*&2JN:%'-28cΛCc^-*>^cZVU7'2:' $'}=%&(D^P/((*/ZѽhzP1zdQXa[P[wmrqd]_WXXPPVXPVTNE1)3.87CDT, )@z4"! 0H+')(),TtD!#P;.,+,,-000.-..,/105;Z|ipjurmonnnporQ"&%'0.****-2/0029>LWZYLF;9;NWORYc]Udxyrruz{||z||}|~yxzzwy{zvuyywwxvwx{}}~µ}}ytuvzu{~~}~{|zw|xttqkkffda_`XWVUVQLNMLKIHFDGGEEDCCE@BB?@A>>?><===>?>@@A>@AA@@BBCBDCDCDHGHFGGGGGHKJHEGFFCBAA@AA?@?A???>;;;::;:<<=??A@BEFHJJKMNPPSSUVWVSUVWUVVSUUWUVXTWWXWZVWXXZYZ[\]^^__`]]aa``abaacacdcdfegfeeZ[cd^[ZXQZcdgfhllqrdkr{vdIM^dddefc`\d¿fx|xrmliifba^^]^\[WQETx~zywvssnolplkmkiigefghjhe_U;(nĨސNTQOryV2h`(*6=5./:KCBS[XR9_ȼp"LMBX$,@?BL!&088><==>=?=?@?==>>=?A?@DCCDEEDFEIJIGFGGFGGDFDEEECDEFB@@?@@>>><=><>>?@@BDCEFFGHKMPOQRTTRTWYY[[[YZZXY\[]\\\\[ZZZ[Z[YZZ[YZY][\\Z[][]Y\^__]_^^```][\djib^\\Wkmpprrrv}peiq{WOTjqkdefgc`]c¿d}~wtmjfdda`^_][Z\YVFT{yyxutqnmnkjjjfecbdba_a`^S;/lɨ݌OTL;JVD,6W6$(3><?@@@A@>=@?AC@DFEFFFEDFHHILNNPQPTUXYZ^[[Z[\^^`]__ZZYZYWWYZ\\[V[\[[[ZZ\]^]^`abd_cddbijfhjsvtnjegnvvwxxxy{rdho}Tdppf^aghgc__gcxwsqlhdca__\YZYYZVPCQ}zyutrnmkhghhgebcab``_`_\O7+nǨ܊NRJ6/2),.-($%-6@GJJA7=SYUK:eƿʹh"MKMK 5^Zl~A )8;:;6' 5X," "!!#" -CH-""$ !$(.4)%1PVY_[I:5jԩb͑JdV,*0@bl8@*'.," &4!$$+FYA*&)-2cӷb|F4{`OU\UJOYPR`_X]cagps{~P@.%)(+-39F&#+Cn:" +CI)%'),?H7"AI&H6,*++/4?ZfbP6,,-117=_uirĻpoE%%(**,Ki>*,)&&&'&)* %$'&'*.,,,,++*&%&$'5Lxú~~}}|zxvxwxvvyxrsuwu18Poomleeefp~}~}}}zzzxxwuyqoqmidda`^\WVXPRQPMJIKJLIHIGGGIFEFFFFGGEEDDCGEEGGFHHGGJEGHGGGHGFGGGIJHEGGEFFDEBCFDEEDCBBFEGIIKKLIHIJJKKLLJNNPQSSSTVTUXXXXZVY[XYX\\[Z\[^`^\bb`_bcegggjjlopmnpqstvuvvw{|ynopqvyzz{zzz|qcip}u~ymhdfghhf`^hezytrnifc`\]]ZYXXWRLFU|zwstpnmkigedbeba`a_^]^]\ZN:,mȧׇQQJC[cYTUN;%#*18@A922CVVTL;lǿ̹c$VFGH'Ftpx9$-7:85) 6I+"! !""" #.CC*#"# "$49(&;q^KDY[I8qǎһː LgX-.Z?$*+ !(5##$(5LU@('*3fԴa~F0{^NS\SDBCCDBFEFFFEFFQTNJTPG=+$(+/06ed\w[&&H5,+,-.3EchgU6++-/28:>JVevm(;Svnnmijkjt|~}|{~zz{z|zxwvtvpomkkihgd`_\[ZZYXXVSQSSRRQPQPSSTSPRSSTSQLOMKJLJILNMMLLNLJHGDEFEDEFDFFGHGGHMNPOQPQUSSSUUQOUSSTTRRTPPWYYZ\Z_adgjnpprttvwyyzz{}vsuvy~~}~}|~}ncmv`ZSRUNVbghigaag{wssnjgdggd^WPI`xtromjjhfda``\^^]]ZXK7*rĽƤ܈LTJ9AC3ZkfB"%0<@B:3/2@SWVN,;=<=D: !5I.! # !# !''#"$$ "(*97%.`eOA,+/57|ǽɉ#PiN)Pt8&*1* !(3&'&&*$"'()-5mϳZhi+B~`GNXO=?===>>?>A?>A>@@@AA@@:'%4cw}tF%!%Sl+  :9('''-]e!+Q4**,,-5Fc`K1,.,,-078mjrsȿ’iomo<"%&*+-07HlvuB%%%&(% $$',;Wd_ZWUQK<+&&$)1?}Ɵ‰{xxxzz{zy}yã^*@Z}ipwmny~~}~{{{yzzvxzxxxxvvuwwstqsqppokjhfggffhihfgjie[PNKIGFIIMPPNOPNOKHFC@?DGEGGFFIKPSVXYZ]_`cdgjjlpqrstvvvvwxz|||xx{{~~~~lhnv~}|z{trnnpokgl¿fzxtqimkklmkaRG`~|zxvsnmmoqttsuteS9/wöĞRRJHvqyySub*%$,@UTPGF7]YRE'&4;?A6'4D-!"!!#%)(*)$&)''! $),9;((V^^Y8-068ƯԯɆ&OeT-p}R(',,!$/2%%'(+//,)(-9kҰVcU+@bILWN?><<;=>>=?>=??>>===??9'&-/MkmB&#$Fd[n5! =7()))/\h:#! ,S4--,*-7G\aL3+,---39=rjqküЏlpqn<#&(*+,/_n?'%&()&$'(+2CNUROJC?2'%&&*2?Ѫν|zxyz|zy~ΒzV'Bbsuloy}~}~}||}|||}}~{|}|{{{}|z|}xyzwy|~{wutsrmZPMGFB>GPVVWZ[]^\SOJEA:GKMRPQLLPSZbceeghmoqsuxy||~~~yz}~~}{lhlv~|wssspniie}ytplkiimllcPI[~~{zutrrqrqprttpdQ:-yŸơ~RPF8IdyndZ)&)4EQRLA6CSVQJ7xƲ\ AS1(**-,-)++.0/247638>?>7>@ [pra1&4<<;4*6G, !$&4JLJGA<<:2%#)0;8%#AisX-./5;ͤϷɀ$SmR,cS"(&.3"$/+%#&,IeR2))*5mҭVk@#?^DLXM@<9;;:;=>>=<<<<===<==6&%,\q|`B#!%Ca) >5"&)(2_v_- /S/+,*(+3FZ\H9/-.-.28@vdqlmĦpflpm8!%%(+/OvM-'((*(#'&+3Qa^aRC/('%&&&*3?Ü̼{yxwyyyw{τsR(Icxngmy~~~~weXYVSIAVaabffjimh]VMG@>NSWWVTROSYbgklqtwx{}~zz}~~~~zignx¿}wvtrpnjml|wvqlkjiomjaQJe~}yvusqrqrqsutrdQ=.yĸŠڀPQD62=VgoY*%&1@GKA6/2DWWRK;yèɲZEU6*)')**(*)*****,)*'(''#E> `m?ax5+;@KSA*>J*!"!#-Ly|zusnnbC& !)073%$)>5&56068~(TnK'[@()(10!$10%%'3Y]A*((*4pөZl8CǦZBIXJ<:99;:;@HDDD@=<>?><<:2&'4q{nUE !%/tS! ;2(&'),c`#! /M/**,,.2DY]WH6,-,028Awcqr{zmo6$$$'*5X\."7RT-%'(('#%(,:PXW`WB*$$&'%&)3@˯м}|ytnoqvuliy{yĹ|vQ%KdwwbclwukiebYaggljikmnrk_ZTI@CRYZZXTSRX`jmuy}y{{}ydgrz‰}{zxtsqmihj{vtpnlklnlj`SH_|wvusrrrqsuuuqhM;,|ķƝzMPD2+..0+&%$'3;5,**+1CWWSK>yȿ˴THS0*)&''&'''()((('('&&$$ QD ai#'0=OE0#AG*!!" #1Rqyv||{xJ%! (+38&$%<<>M>279ûw)TgF(r<()'.* !,,&&&(,3/+&(+4rԪXi2-4Hȡ~U=HWE:87788NM>7*9G+#  $*9CJXn{jezH%! "#(&>8(&KcNBG=88?Ըms*ReD.qL,'$** "11%&%-OcP1'),2rӨ^~q~=Ooh_Ý}P9"4O1))*+-2DZW;**++-02:A\pwϴwiind."%%,23fryY(+)##%*$!"$&,=W\YZ[Y]XE+$'),3H¸ѷ}yxuOKQjwWyjzvv͜ycZ^ljsnlkmkK286+48?D4fiMWfouprvtyzolkighiikmmmnssg`ZUOBIZ^b_[WRXalu|~whgn{rpnlmlnjighjdhi~ogdflmlkki_O@brqpromnkihglptssutpeN66òÛtNTJPky~S(*7@CGHEA=HZWPK<αMLR.)()**1212.1..0**''$## )Siu_"" CK)! " "8lsBWtE# !()F:$$8X[e]eX:>üm.]iH,?oxF1%&2.#$0}-&%%,Rsn5*+/:uѥVsvAj}T>DJ@424437?r~s1555664. &-1??;BO"2=+#  C3)&&)0f\#:N-*))),3G^WB.++,,.39Bߵ[mkygoqoe.#"'',:]{zm%%$')%#"$(,8NULW^[XW9'#$'-5LţеzzvbESSrySeqrzvuΣ~lbXnxdpnlkkeE61/4GK:.#36'/.146=;EQ]`bnznlmihihjmpokmvujc^WPFL[\a`]UU\dpw~vgis~PJDJELHDNUbifnh}pifeknnkjj_SAdurnonkkkihfhnstvutpcP75ötKQE4+:\YUzY'->DDMPMMAGTROI@ȺѯMHP/'('-8WaVCYWKFKD3*'%#%!3>(AA FF)" 5qoB\uC"#,*Q?$$Df\]_=9=ּd/XjC)/mfE($,) #1)%$%4om/'&,7wӡX}uo>l}T;@F;211335=t~{pm,312442-#'4]wpwnV!!!&R. =2)''(-dj+=Q+)(&(+5L__B.,+,,.3:C߶[nrľ|lnlb&#%(',;_!$$&)#!#%(-9PG@[VC0-'##$'+4NӸhbαyzvcYY`vuY]ptwtuplb\hvdorljidH611NoTF,".9-+,+-7B77/,00@p½znmljijijkopprurha\WOEK[^ab\WV\cmy~vghrbYSKJQVUS\chel¾f|riggjlkjii^QCdxsrolkljhhhgnsuuvuocR<=·ÔxTRB1+5X\sG$0=>@JJII?HWWQH=ȼѰC!FL0***0Qjawq|{jJ+##$" +hUFQcpJ$!$#',(  JI-" "7ti>[rE""+(!bN)#9rmVK;79Űe/ZiC%2kn(&$-,!#8y+%&&,CVC)('+6wϡVxrAfzP;AF9101335:vQ5d\1222330+ 'AaR!'n+!  7/&%&(3uk<?V.+*((*3IYX7-,,+,.29=cpgѻxipp_(#$&),2SPPlaf;!#$%( %&(/AW`mj[?&##"#%'+8VҴέxz~fPMYpr][qzupoxok`YrneoplkicC208eh^_/"8;,,,,9Xmld8/.15ZĻypnmkjjlklpqpqxukb[XOBQ_`cb\WU]diy|}shitg^VJR^eedgkjdmg|pgfgjiijji`Q?iwqpplklhhihjptsrtsnaM8:úŔsUTA/.1FevT$"-;>EQKKF=HWVTG?ǹ~ϫFLI1)(+7T}jmotyyv}O*'&"!!!@2&hyO'/00BF9)LE,  !! $':^Q8bm=" )(%zF&$>TE[bSA;>Ͽ].^kN%5sB!&&.-!$:w&%$&(EWT8*),8yКS{tl;eyI;?A81.11139[Y74[v5121022/+");cU-<:V #$>)  :0'&$&.XA\O BO-))'()0CVR=,*))+/26BZln¾Թwjos\+"$'(,3[}kuV"%&%' &&&->R]UMR6# !"$$'+6Sҽͪ{xrK;PexrY^ozwovwxܿgh^^qoiopnkhbA1/>xbqf-96*,,/Dm4/11:^˄nwwwy{}zy{ypnmklkklnqooszwkc[VMAT`aa`[WU\dp{ngnt¿PSOVdiigffhkhoi|riegjjhhie]MAhuqonljlkjhdhpvvutrmaP;;ºpPQA5?TRY\I5&'4@GKD@92JYVTGBөFLO0-5B@Kbfc^puhuys9-+($"I87V) (48@H66.G?+" "%1=<@@4=ee7$"('$Q4#"%;E9IJ.4=ѬпZ6dzQ).RbpN)')0-"";r &$%10AjT{N*$52)*,/Dwi.,.18c{ptssontmgqursqqommnmlmlqqrswumd\YNEU^`bb]WX^er|ojnt~hlotuxumgfhgami|oiegkjghjg^O;esrponkijjgginuvuuvo`O9=ǾsSTDJ{}t?$$.HN,4XxkV\dVH\WNYhH:?<2'# J;*:&.:@3'-'DB*" !"-Unijhdfgd6 "))?0'$&FY\DGLJ?ѵɮкU9lyK*0r;&&&/, "8t#%$$-DW<%'(,B5-,./0/5FrtpheM./////-($,@p|wL;Y"&\v) =+#%&&2a? LQ*)()*,3=SVC4*')+/4:J]lh_]agcaglthlpqq\)!#').8`XKXlo>#"$&% !$(,>_cc\L4&$"!!"%+5UԺp`΢x|zr[OU]pwV_ntqpvwzܸbg]^s{pqrrrocvǽsvtslllhpplrlnI?CJRW\adhrqquxrroonnoppssqu{wkbbXLEYaced^TS`jt|qioysxwspssqnmjgapj~pfdfimmmlf\Kksqq{sf}n~T'" )wD)ZnO30?<9;7# UA) ""!6kuget{qqh2"!(("O2$*N\T)%O>;CԽμJ;_eE%4ixC$&).*!Ar(%&&>pi3''+3rV>oknnkpmtxv|~KQw|cCe}K4~vh/,,+.-,,'&Xx}gB:]!&[\# >.%#$%$%PU(()()-5G\YF1*+)*04:JblhilmonpmporonopV( "'(,;ui_ewP!$%&&"$&',?Z`caP/"!!!#$&)6\Ҳ}˛wxsZMV^rqQ_mjfdcdorvجfd]cwsug8/07KNB8'+-,,+.M}X-,/3B{ǻ{twssnlnnqqmnjf52525526<=>DHNkxz}vqqnnooqqrtrquzvkc`WIG[cbcg]VZ`kv}qjowjrn][kpopokg^p¾h|phfgjllmkg]K3+++*+,;<(+*+)+,..+%)UmswL>e"(M`cC# ":,%$%%(lwrpnmkkhgghintwwvvqcN9@ʿrOQDRpvsed[7#&3AHXJKB9JWWUDD»ȝ2"QL**.**+44.71:>JQIKDK2##!=,*))0($;o"'&((.AF0'&(2tYDuuvuttuwwsuo@Ou}]AezC5:=1,++),.9=*-.-+*+*+,+&)A_o}>:d !@i* !1)&$'(?6PD%%'()+.1773,)))),1:LXjfkyvolnsO%%&')+-5mȓ1%%#%'&$$$.Jc`_cW7# !#$',8_˳͖z~xpU^VWypMenidbcgotv|ئc{]]gc:11;GJJ9' *,*+,0Xi?0..6@̴Ƶ{vtsomprpppnnp~~~a8344444356568;^Ȗzla]\[`egsǂbgiotw}}}tqonnprsrsurru}ukd[THE^beff^ZZaly|lioyga]^^cgqrnjf]nmwjgcehkmmldZKBpvppnmkhihfdhnsxyyvscK2IoOP@:qg2$'49;F@728JXWSEGη{ɛ.$XJ.**(')()()),*+-*()+'#!":-0Eo{B 0>DMI3S>(%=on6@nZ>jd3#**'C/'*@L_\[LB;NҷF@hvD&'+-1)/.)+-) #9l"%$'3ZpX0&&)8oWFxwwwuvtuutulBOu{]C]yC5cșxeLGHKIQUtnV[Xahnqqruu{lcdkpnsxwztponopsssvwsrw}wkd^TJJ]bffe_X[`my}nkp{VLKOKZfpqnje[pÿ~iwlhgggimkifZIii3Cu^@k`5! ()%=.%!RgTYVZUCO͠д@AftB'(.BLHFB0-2)Bl #%'*0HB$&')8qTDxuwwrsstvvvhGUyz[C\xC3;?1'())*,/UC!,-.**)()**)'5=;WbkD?k|#'Quua+ #1&%%&(2WH1A_$CB+'&(+-7Qe]J6*()+.3&"=ec3CnS;je2!!*($;/(.R`bbZ\`?RзӰ6AgmA*.Fn{rlcE73)Cf%%'2WcV/')+8sUQtvvtihjhpwri>Rw~YEeuC0:/(#GUX^]WP6VرЬ8CglA*?l~qhbfVF<-!Ff"'&(?hjO'&&'5qQPzxsob^``osqlCTwzVGbwD0:@.)'('(+4x@((''(((&86Hy|tECny $`[  &5*%$$'.os&KB%'((&+:N]\N2('(+06914>SC%#+(%A.$`mULS^XFY֞aЫ8Cdi?1Yy_=8EQYVL4Da"%$&)7B9$%%)9tQOxutodkrpstuhATxxSL}^vE0:?/)((&'*5qr0)''&&)'!#,4Xuxacut$`zxa) +;&$#$(DR;Q@''('&*6J[V?+)))+/58X~fpdnhk<##$&*+-ArtF(# !%& #%&.K^[^]\\YO8%!!#+9tϭiă|weN\M\xlWgmoroqsrvuՐci3QpuyyY411@|P*"$2*,.-2]qvw>/.05Mѱ|Ĺƥtrswz~zrpnl}}|V3223553147765:nоlULJJKNSָrf]W\hrqmgfb?;;;;GQH>::@M=8675654678Ehd=DAAHOURV^bit}}u{||~|tqpooqsuvusoqy}uhaZRFOacefdZV[gq{|w{wfls|baWLHVdlmlhb\lÿxp{wuplkkkllicYGEo{yvttrpstuuuuobL6GûȻ_ROG^x^koH%%.:4+.4@IBKVVPFP̿¿͒'*[E*'()(***,..-*('(&&$#!"5O&=re`U"!7B<=8- 'Q5!!3]pmikkb`L)#--1aI./Q[XG5X];Zɤϩ4CioA6ng>*',6Unf=( ?]!&%&6fkR/$%)@vPSzxvrkmpprvuj##%&'+8X1#!"%%!&&2U`aabcdh`;""!#-?yґuÁ}whULBf{hSjmnorqsvvtՎdc0RntrtS424>trK*%(0*,+-.A\ZL0.-05QʹŠlrtv}t}|rpnn~S2343355456767;qкmTGJKJPVڶwl`WZdoqmig`=:::Htq]D::>W8546676467:Ce[><<;<;==<<:=QwǏKT[]^_dhnnrw{sppqpprsuvrpr{~thaXN@P]aed`YZZgpz}upyucnu¿{zmTR\gkljga\rvkzuqomlklnlkdYHAlzwtssqrsrtvvvrcO5Kǻ[PRK^O*'4-! !,8978:DG;LWSOEXɽϒ(,_A+*++-/0/-*(&%''(''%# !/G"-6)+7'*?;.076% +,W3 =l{vrx}ws[( +(YiRGOM]>*2:8aղά4GomF>mR7,(+0YxyO0"C^'&)BruX*%%&?qLQwwtpfghforrj7\xwSLw^pC49;*$(%%&&)**++XD'&&'''%'LskM>to%SmV 27'$$#$LfT5J<*'&')0@U`R5)%()+.2<<@EFPj~yopppoprsturps{}qg`XNDP]\ad]ZY]hqzyoizvjnws_Xbhnnkhb\psqxsqomljgiikeWEBs}|xutssqptssuvraN/KõƺZNQ@0@]_Q>)!!(4:?EFE<4NTRNEXЕ.%[D),//0/*(($$&%&'(&&#!#!,B.t~{=*89:167 .W0Akn[SX\Y^D#)%Iqfi[ZdR5+157dξͪ+HnrI@bO6-(*1`rL-HZ %$%,=<-%%&&?tLSvvupfdffpurl9YuvNIy[w<19:-'($%&'*1=:DA%'''&&$*@Ldr?)(&(*/@S^K/(())+.6;_vglg```iɎiqpj=!##%(1Olg@F|X48:::99:::7[|wqpproptuttrqv|}nc^UJCQ\`bd^ZW\fszyimtdmw|xpicchmnkic\sslyvrrpjigedgghbTC;o~zwwusrrqpnqsqtsr^M0OƺZROAKq_@&!#)4AOH<59LTSMDWэ*,\C011/*('%((&''&&&#$#""!-B%9bYWnQ!0::./# 3R1 Afd.$**-3'!($0IQZept[E:47:fǭyΦ&JpvG7UM=/*+?o|^<#!MV!&$&*@NO,#$(BwGWuwtrlkjkrwql8[yuMMv]y9/98-(&%')&1`~7#(''&'#)@ti=;wn5BO& 36$%$! 6iod+M9((&&).vf0.-26^Τ^Ɠtwws}smlmjwG3247Deo^C=@AIV68:;;<<996;T}vqoqplmruvustw{|od]WL@Vb`bb\[U]iqz}u{thmw~[\[UQQ[fihhbYp½pg|{wsqnmjedda``^]YQH>m{}zywwywurpmmljjjjhjklmmmh\N.LȽŶWVOKrF5L_O/!!)1><<8;OVSNFZЇ!/`G2,()'(''%(''('('&$#! 1N%>\DE -741.$ 0P1!Ag^* ")$.>:GA>YbRMFA?iԸФ!Oqt@.DOPF7dwnNM}i&VvuX .3$#"#GxX-H5&''')1@^cXG4)))+/6;feinƹmg4""#)*-Ejog`X[. !$&% $%'2MRJZ_H,$" !!#&/EԬӶpyxf\jjst]Tgf^]XX^kuyytqG=Uh~wtG342UxA-&.0+)).;sj8/.07hɩÊrpooytnnngtG2345FT9TW9559EϯnYaYTRS^ԣ{}|zz}tprpjq\=::FysC>>@p157LXN_Y;68JkT<:;Bk]TcI>@@IíM:9;IxoYB77:Ypz|{trpngbeottuuvwzwl_ZUIEP[`ba[VX`it{mgnv}yrdbdffd_[utr~xtqkjhfec`]\[YWP?:j}zxutroonkjiigedefb`aa`^ZL3NĽǺ\QORl\%'+Vu= $1COQC6,8PYSOD]̄'4Y=(''(''&&''&'&''$&&$## 4M"MxNTr: *3D@1".Q,"?o\' !-&0;)$4hjSCB\R6o—ÿʛJiv?'2MhqsyvM66%UR%&&),)('%'*BtG[vvsnaRahpuse2buyAStXx>276*&&%%$&''#$.1'%&$%&$(X~iWB~g"Bk{c  35#""Gzc! L7''((*1AX`\H1))+*05;igmmimof2 "%'*1q*!!%'$#$'1@ECWT<*,($ "$0AԾѴmxv_LIKjx]Pljdbb[_nyywlrASZsO84669LmR<:;FgYYhI>?BNĩF;9:o}ywtronmkijhffcbdcbb```_[K/Pöƺ}XQOKiX+(/ak-!%3AGJ<12:RYTPFaDŽ 9U=((*)*)'(()(''))'&'$#!!:O!]{|x,#3AH>4'4M.">jT'!$0$-9)$16@lJ-/;;s˺˛!MlqB&-@bhebXD61#!SP%#+OMGD<=.*IwN[vusqhdeepvug5_swASs\s6196)&'%%$&&*4.8t_&&$$&&%# +Mn\?=c&jS 56##"Vmc'!L6&&()*2AXaU;+()),/4=lakoҾùjore/""$&)1]wxg%!"''"#$&2?GEOJABB=,"!#$1HԶӲoxvfZJSj{_Vold`NIbtwxyǬinA8Unh|tmG422XxA+!55,,,-Bw|i2-.4;pаkhÆrrstvpnoqk~m@1436Emy{D756;JЦ}dcZVXdѡy|~ynibhrW9:>zu/5:X~W568MmO<<.177rȗ͙"KlmE%0SxsntpdJ6$"VS &1sH'ItEZwvuuqkiisxtd4fs|CUp_{6/79(%%#%&&->dELU= $$$%&%"(L~}fGB]"YpX 64%# AK`g!!K2$$'(,2B]aP9.())+/6=p`kpֿ̼Əcqr_." #'*/?sb~o! "%&"!#%3CNTZ]bhkO.!"$'0K־ӯqzwskcesvXZjjcK:Lepwyzزfh;=Usuj|wqE721Wyo?)$@6**+,Dxh1-/3;wƗ{struqoppkp>0146CbxmA7578KФ~\ZeXUVd՞x||{lfdfpT6:=Nxyj;::<>~m169Ru}Y9337OqO<<>LtpG@>@Sȝ?99:\adiJ:71-7P_wh254242174LhAEVQRafdqqm}{qlkuuxysvqlli__ceksyv~~qopkaYkrutuvuwwteWRSHCW]_a^WVZ`ks{njmwVNMHEMaigfb\Ww¾go|vsmifb`a^\YSN=>r~xokkjgefea``^YXYXZZZXRG1SɿƵ]TQFPjige[G+ %-;EMPB@TZVRDeĴ}:\9(*3MW<;=B0;/14++)%'"# BM! :F'#@dj`abYYXD##/"0?'#!3NA).26=?GsvE=?>AUɚ;:9>crrK862..Mczx95798869O{U8;41FD:HGKvB?CLXZ\UdtXELNA;<>@OhTbophedfllnxz~pqolhgoqtsuutturaTPOBDX\\^ZWV[ajr|mhqzqkbTPV`ggea]Uu[s{xsnmjjjjgb\R??{|wuuqmmjihccbde``a[SF.UȵUWQEYeqzdE)!$1=GOF?6:V[XTDe~;X9'0m~tht{aQ]KK " KS 4?($Jozz|xxwrS!%-6=*$.G<3.46<|΍!OnZ&0EY^pq_imH(\R",==>AWȕ?::Bd{~[8772.0_nq2=MGD?7FwJ;8:BFA}" FL7>'&Hwu`]^bjpQ!'2!48)%.RZTC/355͍"OpT&5YvwxpqyjN. ^I *+lo(%,MtC^xttnijijstrb2jvx9Zq[|;5:7*'%#%&,K,&%($$$"%'$.89I\)sX H4#Kzq%G/'&%)+3BW[K6('').17Bvݼamqo`) #&)/Leb]MPI%!#&$#%(8QY[^``dbJ,#/JԴuѣpxtid^jrvVUjgUKKJZtxvwͻ^hw2DX~yl=537OdY3)"91+**0LX.-/2>лɦ»{vsmi}nnpor~i5/357?]oeH524:Vɜ}^RV^l̐~|~xlkegpK79=Vw}l>:9<=W16=RuwN=75:VlI=<;Ku{kB>=CHSZSG.%<.-.1498WtP@?:?D;(Fp@=6.>A:72.4TU=6/47;@=NdSowktytpuvt|}{~~~}oopponqsqrtqommfVONH:Gy}|zxsqqomnoruwvto_I3_Ⱦʳ{RQN7-1Kg<%&""$.;CFA=6=W\UQDmĩv=U8/;ź΁# C?5;#'Jt[('/3VgO#(/"59)&E_idE.256͍&Ss~B(7[vh^kwn[[4!_G!*=3&+NrC_xruskhfgqspc7nss;[n]{7/<6(%&$$('N'&&'%#""#&',3|ܹ\mmʲp\&"%(2r!"&# $'*=QYX\d_ecE#$2OΨСuxqTH=[rpZXkia\TJ\rvv}ؽĹYmq*Ecv}g=524cX1',T2+++1KM+,/1=зӥøtsqoq||llmpu|d30239Q}L536:Xƞx}u^MY^pʍ{|{igbipI8:@[~_99;=[mI=<<<@Yʉ68NhtN:8&! &,4=AB>@RYTPAjs@Y8.8o^## D=0;  'NvO$MqO" )3!68&"&LR;(,145ˇ(Won>'2KhYTdhVW]-"\E!)?)&,Ps?]uvvsnolottsa6nuo7[q_v41;3(%&%%),N#$&$$%$!#&(,5=NO/lx?#&B+"$JyT"#@-&%$%(3J_^E,&&(,-08<۶^mpҾ}opW# "%+3i~v|E"&#!#%,9MURTSIJL."%3RոϝuxrgMAmyoXWlmohYPbtxvԲ϶Rho5NgĪm}d?416ad4,#1S/+,,0HQ..01>ҿʪ³rrrrrvzqnnnr|b32325L}ud:368;\ƜsUPX[uȋ|{{shffnrH::?Of;:::EK58>gztO657=ZhD;;=NwqL?<=B\ā79GE|]ENLN5SO-+**+,027ob;/2./889Jk\34689656;B@:?BJHIP_nYPYZ^afkonnmnrxrt}~z{}}~zqplonmnonmhiigbSKJH@.2CKNTTSW]djt{xjkr}TNODGTdbVUb`^~p|}xvolkjlpqmdUCDz}zwvrppopouvvwn^H1aǾŰ|QSOLrrtZ* $+05;@@:=OYVQCn¿lBT6-/*//*.1./15]M?VQc*$# OM7@&OuO#*MtK"!+7$7;(%8oT<2./4:~'RnuD'4TldeoujWH'dI")+qe$%+OsB`xuslddbgpsr\4oui:as^u-0=6($%#%',G}Y$&%$%$" '1DBCNMM)XevE*&@- !X}=#A.'&%%(1F][A+$%&)-19?״Ymn]iooT# #%),Gg96ga3#!!#&"!$*;SYY\REI:" %3Xˍϖq~xteFSmtuTUnomk[Wctw{ͲG_xl1PhĨ~b8306A;1,#,9+**+2V~M+-/3E¯mrtqoxjlolr`21217Ue`T9566;^Ǘxxp[TXXwĈ|{{}wifkprC::dpB556<[g@;;>RxkE@>>B`}4:=OWSOBpŦo@U5,/Bty\Y]]EN4BA09(+$!" +`O39'KpX3"#4[xH  +3$9<($7NYT<-.3;ġ|&Qp{A&0CXVjw~^@%"iE")7y1$%PtD`wush`Y`cnsr^7nuj7aqVk'0<5($%#&%(/LQH>J. %%%%$$"".UzXK.f> D018##>,'%%%(0EZZA,%%&*.26DܯWkdxfnrrQ !%)2th8X?"!$(" #%)6JQSTPKPA/"&1Y˨ϒo|wmO@I[uqR[iid`QM_vwz~έ®?Ppj0VlǮ`5323E^N1#%-***).LxG,,05Aîprsogz~w{mlljv|}}]21238Zmj`>537;dÊ~~zx~|`SUUy…{{vxbhoqpH:89Zv^D:99FE28>erH866=\cB=>>U{[>>??B]x9:JNdC7;503:755I|cG57=:6649>EHKIELNORONMMB""WiK?9JTYWVS[bksy~wlkt}j`]abcimjjhd`m|~vqljkopoongW?H{|zyupolpoqvwyxwq^I7cȱzOTMIbaM8'%  (17AQSM<'#%=G_>*/4;Ǵrqr&Tsp:'-ColxxtjJ7$g<#*I-$)Tv=bzutmghkkstv[:kxl3dn[l'2:1%$&$$%&')'()R_("#$#"##%/g}r}kLI(Y^/# P+#<,(%%%'.?QW>+''%).17@ݩ[os¾~moqJ"#%)9^FqePN#'"#%%+:UVYZZXWS<$ (3]ѷ̎q|zp^QSgzvU\hbMECFdvxxɭ­APkc)VqǥY5539eU/%&.+(*)-JJ+,/6Hƹ¬prsni|nkjkw~|{{]5234:Occ]5546;gշaSRZ[|uMLOP{ܾ|vmh\fnsoD98:bz\D9:?Ub?==?BYo8;;Fm|_<88:=hɻsI]~kQlUBCM]}=>ai0ea, M~^-+,3DH8qeLEEHGC&=<300223E}u]>:>8797:;BHJKLMUgePKMH;"My9.--1.0;[^SYZ]^_djjqu|y~~y{|~}||}|}~}snonnnnnoomkieZOD<75-#+DSY[\XU[cmt{wknu~VPECFQailljc\j|}tjeeknopoleT@K|uwuookjkhhilquxxxwp]I/aȮyRUNDSnujN6*"%,SSYD1/6=Ѯ{e`_ekzo(Vrw<(7e{unaUQ:%i>$(8L"'(Wt?dvutrqsrqsttV9lui3am^u-37/%$%!#'&),1-F~($#$$##"#-KJ=PhQD M" #=-'%$$'.@VW=+'(&),0:CۦYpkпqnpoJ#!$')8We9vF"%!$$+7MPTUVYYS4#"(1^ؼˊr}wqTBUiusSajeUQYdnuxzιŪMTab(SrɟuV563@`j9<;Gt|^=89;=\ndzr77xp0@SRNvq2K^/TS02ApyVV;=KD80#@800235GPY_{idhmlpqwvyy|{~~}~{|{}}~rqomnnpoookie\H?5012,&5MXY\\OUZblsytgpwge\WWV^gjkjf^e{~rieejlmnmkeV=IxtrqmihhfcdhrtuyzwoaH-aǭ{FUM8-@usfC& '6CD1')8QXTPBv¨Ҿ_DR7.5±ðC##$\E *G9,Oix~~{nP+)2<:(.[ZZ`\M@==ӲhWcefhq|j)Wmw='4^ifhigfX>&#m?#+7QB,(" '',Vp?gxvtsssupqtrV2nxk4eh^u+181'$#"%((+Me_uw!$#$%"#" ,TkzkLF B"?2"=-&%$%(2Ib^H6,(()-4pyI:99;R;77FugE865>a]@::AYz`H>>@Aif7;;Itz_<::;;_Į@4tB5Eu[7HmcPyGEɳx<7fi74pT90#(D924:E7WeNICB?;:^gMB?չspk,Xos:(/J_Y[_^WA6#"k>$)3Fa`]X=*&-OeCdyvtqkmiiosoV6l|i0lkgt3171&""$%%'Lv &%%&##"+6fiT? #;8U"%<-&&$'*3Nc_QB-()+05;?۞bnjloprJ#!%&):\|phfkE #% !#*=QY[[`[^U5 )6iLj}wfHXJJgjP_klb^]br{v~žš1@i_;t˟vzpN:62BrT."&+''))-+,00+)-07Q͇bbɾǜsprqnyjec_}|}R03333P^JP>677;vιsvٲsogdwxu__dlul?88cRIFI^_^`gkly{rquw{|~{}rmlllkmmmkhbR5042012/0DU[`_ZVUZ`jry{~~}~~pgnx{|wijosrmlkc[e{}qdcgjlnollcSAL~vwsqolkkfegiqwvyzyo]G-aūvORKD>Eȹh+Wsk:(7_yokqjZC5&$k>#*:ƾ.%-Sh>dwxtjadbcptpY2pl0dfa=390%#$%$#(Gt{o &$'&$$""(-bsQ< +&8$:,&%%')1GTVR;*)(*-4:Fܚ^ruįnopoJ #(/ML"& $(:RYYUO:;Bi{J8;:9\288K{[8766@gX><>@iyU?Uk{Q2E~`ps9Nh_>66:992((000/47632*&H;.5GC87>?>SpbY[]jkkjnllu~{{}~{}qmlkjjkmliidK2370-3:94EW^b_YQSZ`hrx{}}~}{~phow|txmVblqsonld\g|}qdbejnpookcQ?Mywsqoljigefhquvvxzn_F,aǮyPSMQy~N4%# "+9;BGC?AERRROH~ӵZJV/,-+,++))+**())+*)''$" %`A 1/%#$$#0]q<+6#F;($").15;EƼe*\uh7+;hoo|kcY?$&m8$)Xʱș/',SjsL*#(-('(**..*+*,./5Z̡ƽÕrrpnwpiecb{}|yM0146>nxx]8357?xеU2Th٨gfbh{q^Y_byh>9=@d|?;;;>Ar{L><=AD{V8;:JV^`^XSXYbipx|}}~}~okqusqf]gjlopoke`j{ogccimnnplbR9Mzvqrpkjjgdeirzwwxwn_I0eƬzNSMGB?$*57+"!*6=DF@?@CRUTQG¼Գ[RZ2./---++***+,(')*(&'$# &Y<  :6 6KMICAIin= ,=#J;,/*),0028?Gʼc/\um7(=_hI\gRff?" "q<$-Rq*-XlElvtutqnhmrrtW5q~e1lei@3<1%%&%$#&&'')N]!$#$%%$!!-OjZdcV:1=7=352 "1&#!"&(+-4>7,&&').3:Hًej{ҧfprlC"#&-<\jxmN`8!&!(9GDM[R. ):xֶy{vfTSVixkT`e\ZW[ckwvzĘM}SEz̍}~nkF761H{M*!&+*)(*,?K<2,,./5`̙Żrqoquqfcab~{yvF00238=HcU;557@~յU5RiUxۤdhb[rid\W`j{f>:;BirT8;::=e49:Lx~b=547GdV@=>BhxT?=>@BŰS:;:\|vI::;:28|øvhQu]bb7:CûT/C`2RIxtMAWlwwoEbK;9>CEIJO\ksqqlOJC0([I7httsxY3+),3Mgndl`3&&/-2;WaXW_:)R4Ry$*UC)+.0020.-##cj749@EECD@*:}LTcilmqt||okilllnnkmiaN;633>BE>;NZ]^^XTWWaiou{~||~{|~hjpwe`[^``kqomjc_~ÿl~{riechnnnljcS?IwutsolljgfeiqwwxxwpaI.dĭ|JNLIfV*X|nL'&6DELRIGCDQSSOCɿ׶WP\D;68640/-/,++-+++*'()% )T;%$>1+Y{tstsruk;,5H@QhcbachbXHHa1\qf3*:ZW?VM;hg7#"s8$*=m&)1`oBluuul[agmssoP=tx^.lfl73;2''&%%$$$()'=z`"%%##%%"#0OY`q]S7+5?etc  &>&#! #(.RflT8359EҲ]8>]aڢjg`_puw^Uco|`;8:FZJ@B=:89j179JoV3658IiW?=>Fk}R>==?IĮO<:<]{rH;:7;CúszfeX>Qa˹W6CSV?`mn{J8=q}_8=Q\\][TUWY`jow{|}{{}|}~~|kgnz[[NMP\psqnhaWk{~shcbillonkaSCFuvuuooolgcdjsvwwxwn^E+iȬzNSP\uKI`')9?SiyfEDתp~^0Yme6(6KUOTGIZG0""z5'*_‘.*2_qBmvvthclfluupMYelnpC99FdvH>=<>CȪD7:<[wwL:89=I»ɿ|VWv_\˸{\?B79rnW58Gam@EIJMNQSTSJLJD)-d@=eotwrP.)),4UeebgO2#*0/1E[abld-3K5d^%,G>.4IOPLJC1(wQ,Vg#!F=.+.0-,//Yymgfcgkjklmjfe^SNLLNLG>961U|pD,!)-)'*.@dsZ)+,16jʢļnomo}jdef_{{y}}l=0238@WVS?4579Hҭwu{ٛ`a]\ajhSYdl{^;;=Vc<:;>t28;KN88965:LlR??>GjpO;QWWZXTTVW_gmsw{{~xxz}}~~}~}}yliqwÇytsnvvpnje_esfdelpmjjjbR>Jqvrppmmnihgjquuwwvn`J0lũzޘKTIKqk14j@!%.+)33/64I~SGա|{|ѼY.`vo0(4HQVbuvoR,! .r3$(@k)(.goEqzuslda`mtvrQBp]0nbbz889/&%#%%&)^5#$&&&$$!&<}{TYh277BO3;&"! !$'+/)%$""#+/3=MӔtrtmkfba^Z^bputrj< #(+Oo[UC3 ##!$" "$ *CԸԹnvt_N:DgrjOciUKKM[ozz{Ã]=Ttymf=95-TysC*!),())-5J(,-19qǥýqpnpxweadfbz{y}|i?3448>`{M537:HͪՓ_a]`rwlY[fo}\>>;IvvB8:8;u}09CXfgkZ569LkU:=AI\SGLC?>CNɠ9:=<`qZ:88;9QѸǡah:p`tQBpwml9Mg_;GJKLKNNMNOMJB);b92Ldjk_5*')+5Q]bgjM/",026J`bedW*@V6r\#3]G,APUWWaM/'K9/`K &G7+,5>>@<6RWYX[]TUYahnrssuyz{}}xxz|}~}}{}~{xilrybXSVT]nvsqmg^l}vffeknlkkicR=Lpqqomljligfhrvuuvxo_G0kťzۏQUJ8AK:299*"!"!"%'%',nyvneC:5/Pvz?('+')+/Gl-*,,16tz¾tqrq|ogafg`zyxzf>1337@w^4456:Jˤϒ_a\k}m]bho{X:=>Nc7:88A}j39CqyE468KpQ<>BblttoG=?@Jʛ:<:;TJ7:;7:=SԻĽ¾¶ouoGq|\5Oy]@IJKKLLJLNNLJD)Ae:3Kcg]I/**))6V^bjeA+"-002IZ`eia'DM2wM7W>+ARVXVZF*+H31hxC.G1*6NRMUM4:ISVPID?DGLPQP\xoefgd_[WSLWa]WR[VLOVZcbhdhlhfjnx}~~~~zwvy{z{}~~~}{|{vhks|oh_UQasvtsof_¾ltegdhmlkjgaR?Otsooklmkhggirvuvvzl]J2pŧTVMLx|pO# $# $,>OQQNGҭH)k(-Y0 +X'2YQ' 4=B;dztxx|rQJԷ۲޾йV2i|g0);^ppvkZQD3##(z+&*Zǃ&.dnDntvrib[alrusK>~^1q]hv644,%&&$%%(+#/hT&'&%%#$!'>}kd2013d~]GC*%$$%)5\^a`H)$(+/5@]½Q"$'*:qŵU!% %0^syyueG)*@Ӵӷhvqjcsku{bTbhZIJQ^rx|ŬĆi80yÿns}z{to_>:21Ov7( *+)**/Dsg)*+-1>yƒutqpuhfbfb{{||f:1336HkZNC645:Sʟxnwΐc_\bszqfggoyQ9:;IY<:7:5d18:]yx_346=QqP<=AU]@=A?Mɔ7<=LY`bdB::=Uֻ˞x~{d}J:k^?INQJMPNLLKLJC'Ba=3:FQP5/,)*+4DS\^X9)!,/04GU[`bI"DN4mu15MI0CRWVWVA)(@/5n|rG-A00>Z[^fV2Ly{wtqnonmjjnrtvvvxm\E0sڈJVR[}~tS$ %%!&/_P"&(.cU1"#")G}qv~xhU4+DϳԳltwvttv}w^OdgTKRPatxyxϻ˂m52·jnwvyrnc>724YU3'#+*'**,O{`)**,4?¥{ʿ}trppifhhfhgef}~wc;1357Nep}D5466Tǚx^Z{nqʆ_`UgyiggpR7<:Oyn];969>`479h|G5479QlM>?AM{qH@A@CPʐ69;Qn~e>9872Bu̾þ½Ɛ{tľ^uZ?M[d`^^\SMLKKB'RkdgRI>1.-)((-3BXWVE3+ .122,2iC0C21@RW\iG/>QUTr{ZRG`~`]jjbVWbnvwuc8,2024342334039=7!5GA@?DIHV]ctjmrtusy{ttwyrfox|}~~~{~vjkt|Òyfemtxwtoh^»c}zwonjiknmli_R?O~~{uutvopruuwvvwm^G2xƢ}ޏNTN\sBXk?OG( &(#(1?RUPNBƹ˧F.j*-U/ P*!04)NE]olAGԳβͳF7bxg2)5CIGGNOL@6# *|-(+eh&',bhHouvrc]cckuurGC}^.vaw·x745.%$%&$'(,4/,:L($$$%%##&6kJ`-3+1p]&KJ'#!#%+cһD!$(.L`&%#$ !#%CidgqK$:fo1!,Lï֮jxxwvwtxya]ih_][[ltz{y~q8MóilvtwqmZ=801@N8.&!)**)+-Wh2,,/4BϮde˿|qrsrtslighga`yg82266;`h95457Sōlmx}mr̀^_ZqnfhgrP7<=Sex^8:8=ET489^jG4467ToI=??QgB??>@Tˌ489ewV=8:8=R39:gw\I5348WpH??@Hz]G>?>>UȂ3:;Dud:9:8:O˾I@||[<^}xrxylOPNMJ? `ƿb.1,()-Ad\QQ4**'.47<=@LN3(VJ7I8:>91,4;//AQUX^T5"5A,2L]lyxH%>J.0DTVZZA4APPftZT@c~qmiffXW^^VRNH.+2,+,+-/,,,+*-+$-D?@SmhdidJU[HIHFIMJZn`Ub[\lov}ropppd[jwyzzyzy}tdlt|]\VW[gt|ztof[d~xvqkjiklmli_O9R|{|xvutsqrsvxxvn\C5zãގKUK9.#/*'4+ !*,$%)APRRPGļΣ=0h%6c,& $T*(GPHC?9-# 48,ILvKRΪþβ@6h}l-'*/620/.,>?Svt_C>?@@V~598Dt~Y:889:nٷX6FCiŊ_CɿSC\b]DG_kRNLJJ>kQ02.)+6y6.-)!049bjZ^=.'N[n|yT0/*5@4/5CMPN>+!7?/*1DNL?/%?F,/BXY__G7FSLliXSFlvpingVYXVVEG@0.5.--/6F2--/,,+&-KEFYiik{hCLOJIOTUXOPMp`n}{{zbktyuu~{zznmqrmdWlvyzyyxx{t`ov~lfgd^et{xuqf^þd~xrnjiiiknmjaS=U|{ywvtqqqtwvwzmZH3x¤܈LPF1,+'%#($ !'(%%+?ORPKGƻ͜9,f"5S)& %K&-Pb`di[E*!48.MKfkkmuty}i?TmYϩ3:fve+%0DPNMQLFF>!%;~'#$)8TK<("%(2ifEotwsdY_botskK=|~U-}]qk/7;.'&%'')/F[T &%%$&$ ")A_=?__m*5)AjS&\A!!#,68:?xƺ<5Jz|BMe¯VBB-1HUX^Z4/DMGn]WREzwkaiaox{dKNH313../6TYWCFZeijvXINPNLdwspuaHjwo\p{~hivstvvv|zwuxxurr|~|zwwxvzpeou„ykkt{yvqhfþa{vqnjgijkklj^N;P~{xwvssuvuvvvxk[I8ۆIUJ=E?,/;6+"  &'#(-AOQPKLʼÐ64j!3X'"&@!4V_\\[O=&"86/J- &,684\ڿqeofϣ3;`rc4&=_spnnoe]J(+9~v""$)8MN;'#%(4ijHrxwukedflsskID}Q2yfƳe29:/)$%%'(*'UxM!%%$%(' (BfvZa_l&>-6RhZ$"_C$ $(.dwy~fG/'+18Boɒ[S_{`e3$*+KxSD2!$#,EgbaY[`bdR*" !".UӟmvqZGUfus[angihgbjy{zΫugyy:˥`puqnljR974HE7?awnG867=beC>@Aj{aA=?A?bt699J^t[:679>{48QJ7Mʼn-4GŠNsX-m}8pŮMAMdnvOMOLNK:&gh9./-*.Jj.,+('4<3/)%aK..)CN/,.-/2/)'>?,),,+0+)!?G-/8JVVE01=?HwmaXQFyiZZylUUP123..0BfWiwsz>.*#!CMBI]gfgjTGKLH]aFqrtUxel\NQYbferx}}}~}{yy||}pimvfe\\Zfuzywqib¿h}xtqniihilnli]P7T~}|zwsrrqqtquwwl[D2}ǿچERKSpE2_s]C% !'(#**=PTRJNİĶ93e 1S"# +M!:]ZVPIF:&!1)2L-$#$#((6<3[Ϭjcjrlxϥ6;esuE)>Yfelnic[R)$.}r%%$(:LO<*$%(0kcErvvskfegnrsnFH~V5xe¯`39:-('$$&()EG%'&%%(('9r|[kUi%I/7de!"eC"#&>tywbJ;)&*18Coйy{v`ϲ/#)2ku!###(7`ijghjniV) !#"0SХҗk|vmWVakwukqkgccXSh{{~ƽͷÿm{r:ʡaqvrmllP9=15b|b4$#0*(),3_F(,,.4GѲpɹxtpoojgaZPx~Y6358GYolb@5569cҳxyܽaWVTz{rnnjqyJ?AA>;;=<=:@HF8AmrqW3467=ejA=BXx]B@@?Bfo3<NQRLL72c2L"% 1GBZIGHGF3!#0+3O.$/N5*3:JLB]ЗYagjuѠ.7az7$5HM[kkYSWD#(u'%#)eeE@CXkjdG>BAA=ni4;Hg}pX989:;y685mžR/5{/23Y}988}ʹƼN05|ú^_ze2TĵGAYv~vtvg^feZX@%n˹A0/+.4AD--+'&4=xV2.#)]qn,1.(?G---/:92*'>9,*,Mxp@+PA*-.149.+2>K87751.!#56+;,&LnPVMFOK3^̔ivs̟&9f~n*'?_{qrtha^:!.n'$!&9MUG+""%2m^Evwusl\W_lsrlEIS4~`xc199,&&%%&):qcni"$$%$$$%!)Met# P&;lyY0)m<"'-F]]O.!!#(/7Aqf"26r$"'+2;$"!$(RsdffdkiP&" $2[̳Вr~uj^WcvzqefgeUARhp{|ɹbh[s˓\svqlklM==29gT5$+6+)))4vr?+*,04PͦĿǺqpllpzraaTXuP33468VP\R9546:jӫiClۭ^`iv}{zqmmo|}yrnghbZRg@::9:988888>kgCACB@=>@A@@B?m`7;NorhD7699@܂,47H^91j\-33p}734u͔32%$??&;.(CUQ\XY_Q7`Ѹĸ̣(}rgF#!#).7>\ӼT&10HkK%)-*'! ""!*Tg:5BDCFD1 #6cʗʎr|uo]LfuxqT_icSR_cq|~ǴensiΘktuqnniM@>49mU0$+3*)*.4szF-,,.6Tβ~ȹ vrojwr{oaZPhsL54589T^8435=>>6776?pC.6C|{1Qh2Ln[S841ú65E9\_*2AoS[de35Y<930,,+X\-/-&*57np6/#+Xn@10&'`Q0-6oI*)'D;+-D~2($P;**-.+.,-5BSVY[ksrmbXWLKy]i||JXZN463...:.-)"!>GACHT_bS?@GINh|zT[n_VPgbTbqn|||tiqvsr|jiry~k]mwz~zvrkh[ypniheec`]]ZZWVTK4M|xvvsrrpnkgihgfefdeb`a`[B5ŘyQSH310(*0;8" "#"&,:POPJQ˸˖27^=U",&& .3DNC>?@5*!%<@-?-$3LH=TveQ?`̣+4545>u΢rUUy۱}yy~ohib{vsqlfaYX]qric_YSRQOLKFFFzõY>><=<<<::99ns/,?al_ZVJEA7$ 4i)$#%1FF=.%%'3q[Dsxvrigkjkrul?J{K:~Y{f598-'&%&$'2b~?%%%%%%" (FvoZv#? 4apZ'1n2$*MmwrwG "(-15:Bː[RF<9:0#-Jp`RIA<40M=!""*Xnejeifoc,#8fʅu{to`Ncxxoj}kc[:Aft|λígjomnʊdvvqmlmKB@5@wX-#/2+*+.8tW0,-/7\ϼ~Źnpjirb[WcmstrnrM2366;brbM8348?}Пz|hZyۮxzy|qgd]Ŷ~yqoiįaMKJIFBC>2JxGJVmmkpndhhgsv5X|5510/4lxR0/.',6WN0/!?f~t71.&'_T/1>E)&)W=+.At1(*M6+,0Lmh5.9JY]]ahkng^YTGMsrtw~}vk]Y]]I583/17Sxy:*,)"?@INQh{[Lh~LVbbTNowPSl^Wmjr{xp]}wwuu|niqzfXVc`fx|ulhdx}tqpkffc^ZZYWTONME9O{zyvrpoojliihgddccbb^__][[ZU=7}“uSSG9FKB?UR""#"%+ARTQIWʳ˚05\AM 0$( +7/Tg_fk\B#%6+/;*$=QLZ[]V4jˀY^X\_gom̝%?ls((3KfccmnZJ8"!9h%##(3DJD2'%(8s[Gsxvpa]`bntuf;IyJ=\e698-&%&&%'7xp,$$%%%&%'.1Y{Az"?M|h-3m6 %(63qmJ[TVI*JL0S6[VgFEP#.2FgXL]w=15?|u>4CxqEJfkf`kuguvy~V/buJJA<3-_I/1/& -7V~{Y1.#Ed~x3/0&+gL+0CB+&3W;+-Ah0(1Q3+0>t2.;NWZ[\^ab`[ZSDTrrv|ypf^_``F472-.03FPKGF3,+)"@C313234flT\^VOH8$68/@k@)Wv};%P}G'-7*T|LE}i}U*24U{|yB4>jaCLbXfu}flvxdG1jeʹg};+eSD57-/6KQ4/#La|~874(/cB,-F8*&1\>+0Mj2'7N3+4Jm./?UYZ]^__^__YSE]r{yph]Zab`^^C572//,39A@<5.-+(!(>CN~}KLKFN\e_QIQjt?KOKHUpw|ylZ^n^Wp`z|{wt{zxvwydgpzĒ}vkib{xuqlhikijkh]M9V~zyvrpoookhggfcabaa^`VC6tKSHBDEJMS^?# """#*BRTQGTǿϕ-BXCJ"+#" -0+:423/-+!'6*/9(#*HMS9,726pɥ¹̖#?oc*,EerillgS?,":b!#!"-:<8(##)6r[Frvsqklopqvui?N{K;Xvd6;8,('$%&()G|X,&%$$#%&(Gks[ 9&FC6-9i. #%'$ !!$(+.6rp<**,18e˻ûqmjhdRaojrcmmly|pD5568FQMkU7468D֛l?ohuؤtywzxgXPYƲξqS7111225MbMJNKC9."/1.1516HSnX8Y@&.5Z;}a;@P63>24B}S=b|u{~yZ>s`cyՙkuvqbWR]ƴоuX30102158jm`\QJGA:/"),,+,01s/3CB/b>*12][+Jhe<36 /N46Vv1[}ilzuI491& .1 DSMILO[O&(7(3A)':CN[]YK@3sǰˎ@m~]$+D`d\ZYVG~Zb8=7*''&%&+@n:y/%'%%%%#"#$&/>E!1I`ixS#8r-!!%)E\T?/&""'+069Uкр &*+/Ib];/q6& ",Kbinoqqhh>#9yӱ׿pvtkYVg|ylirkgkpsu|ɪǛeJ8esovtoopk:C91?xqB.&-+)**-@k**,-17m˿jki}phdVTkqjynlju~dxp>3468HymWL8568JΒh>{bYhВgnqxvrn^XM[ȽͺvX}61103104BzS[hYKIHFC3%,(++.6SC.9EI3Xo44;434-L<\c@0..=`31YC:yd[C.49@Rovkkmjn|tR,p{v00KǞB39\;.2WQ/,(8M0*/BbpN,$?J3.2MU,5DW^__^`lpolaTN{sw}}vrniOM617/-0579AP;:=3,$*BCaJGNJvcviXe_XT`ptutstqjafi[UW\ad_YXo{vy}}zyxvitzzvv{txpcjozÜ}vll`{yvsnmnoomnh]L=^|yyvurprttvxzwhYE>nILGPvxoo_T;!!%$#$,GXTQJ`ưʉ+FRAA.$00=TYRY\[D!'4&8E)$ RSLFZ]O5uΧqp|vgp͎Dm}`'-GehjvzpYE/!#9a%"")@RK3'%&(:pULtxtnfhj_ktsl9Mx>B^|h9;8,(''&'+:`8'(OH&%%%#$$""$%+5>H); DWJ.4!9d' " "%/Y}zhX8$#',26<^u").7' 3m&!!+CcllicSDE7&=|Ы־kswo]RfvzkdinkilkqzǴƚoI>ghlwvroqk;?9/KwmA+&2/)+,-A}j-*,-39sʿ~iljsqmf`I\opjqglmzkzl>1679UmZhV8478K͓fIrSWqЊ_jjqy}o]UP^ǿ̻{`{341111456XbM[`J@JJIH5,3',,-4;vZ-3?KH3Qe0A<.,,-=O'+;7-.+XX1/@A1ArcO_6129BGHiyurhlm]QZZ*c}ǯA,77"46Hb_uuDS7-3I`nO.+% >K.+0VS-%IR/.5\V/7HW_ba`amucUO~r{y~xuiO358206GTTTM6EP6+$*CGhsILRU[zunruuussrgTbkWUSVZYUV[q|z|~~}xlsxw|}~{wvu{sxqais~Û|vkh]}xuomkjonng\GP\enfcdU6!#9[$%&*ARF2&#$':rTOuvsrgbbcpuvl7Lp>C_yj3<;,(&%%')3TbC+*)&$$&#$$""!$+4>L )?!f~X8(=b'  %,Kw{zv<"$'-38:_p$.EU:43(8RRA$!!-YmeU>85FN<(>ҿպjttjQRiv{hZemfNHA^xȶǖpCFhĽdhuurqqi6?83Pup<*':4()+/J|vN,*+-3:w|nkionjd[Riqpnpfflhkkt@3878d}ss?7569K˓WDnVZuχ]eaqyeRPR_ļ¾˾ziz.2023025KaopT[bXHJQOKH072*,.014F]/26CDA2Ce8.-*()-.b{)*5>957&GI0.1.2?mKEWD41/8AHtph_SUerc(qЮq+03gy$ 6rQQxvutjjjlpsvk9Mq8,(&&'&(EhƸfmvvtrrb8?83Nj6*/:/()+-5Td7/+*,3<˽{ifimjgdZSkpnlkdcphhkion:367Bq^ZC2677:Q̒zqKPsll̓]e_s{jdTST`S8=GKRZbcjsvrν¾˿{nt,334624Gw{yijfTJLKSQG'7-+,.1355``0229CDH1Lu6'*(((,0<5,2@E>=4'AD.-/.04\l5RRD3//8D^[L\dbc{nJ$$~M}V45ƪ= #:7TRo`ĥ!GN6.2aa--&/o^..2jD*#H>,.7ak;.6HZ^`a`cy}kXNt|p\<7724A\G8JSfc8-*% 8OESVMSU~aK}gvvwwttsiTmjWS`wf`~v~ylosqrroortwqvjaksž}yojasmlllopnmh\N;_xvuussrnllnpsuuvwrhZC8ƸhNMA6ipl*%%!%.@NNMHdƻȆ&QKGE) <6(A?.>5/DB%&>/A@*" MQ@/2565ǮȃApY$-Gm{mcks^B0 A^#$*DTS='#&(;oUTuvtoegagnsrd?Ml;H^g8@6+)&'&')4_{bB)$#$"#%###%*4?Q*:$ay^*E]% $2aw^B,# "%+09>ci&<ÿÝ"! ".apcc`]]a\6&=ϯ׷exsfPRsxygZkncity~ɑp?Kgɷdowvsrpa:A94PjD-)'/,(),1Xol\G---2?ʼh^ccjfbdUWpokmjffnvnppop|~h7445FU6346767:R̍npp]``vxzgNRQaM3678874328<:ɚ̼wwm)27AA56KxfIJIO^SB+..,-/3ELDL=4759:GKG1\|/(*)**-/.O<.3)-4Ekg=15IZ]acbf{{hSM}t|}ulk_655338IPGE_\ZK8*%#=TOz[OR\r[]izzxwsiWngXUo`c{wx{tmllljlorxs{|jcmtÝ}xpj~c|mgiknoqomh_M;\{srnnnkhjgghksvvxwriY@8ǹiQOHb~NX$%*DUP=(!#(BqTRuwtnfa`gnvrd:In;LXg5@5)&%'&%(+Ho2##%$%&#!%%%*5?O'3Rx}`<B`&$/QhXVN1#"$+17;af';ys!!1[YY]YVWWU9'?عֳbztbKTmwxg[jomrxtm{ɸw;Kb̰^o{uqtt]ȻTONPTQRPHZh`dj^akvz|ywuru{q;225??5666656;[ʌ{zXWb|[PQLiQ688;><=:99=@[6>CHIT\aehgf~qlu{}ɭ{|j%5Mvw>3HdKHIVXLG020-.3Niolf>MY`S:g]P1k2(**++,-.5cp6899@>EA8<556/../.04QvxA16DE1+/4ZO)f}NW]?I--3mX++%*M?).9uA*"NB+-1Z|>/9LV[bcbf|r_TQzt{xblm64732F^T=6YqaPA0#!;MNtVPUOa\wv|VmeVRn]hyu}xsgdgegmrswu{yjdltě}xnlc|ojekpqomoi^J8]|tqpnllkjgffjtwvwxshX?9ƹz`ONHH;.*,'#  ""!",DQOOJf®ƀ"[JHD ;.)JUIVYIRD'+K35:) .VW2+0578ٺmurrrqohvy?hb &.84-14+0B9 BZ!""'@VS>%!"%FII^TYknorrruswg6/014212421457_ljrSViwp^RTQFgL688>OJM?86=GS05251641436WgE=>>ACDJJIWSY^к}}t{sxh,>U=S|t`DOZZKO* 90,0Blzm{v~S]bA$}w.(+,-/0-.-//>cYUC@F>=E012..1./14@^cM;/6>6/,/4?F[g`_glZVginqS>v[,Iskzór%*;DsgAX'jt~fmu`R@F1-7qU++""K?,->u=)'YE+.7x;-6GOU\_^boslg\PQy|}vu~~_q]44742@TK4>L`cQ?1#=HFod>PUPqTo{yOgbWWvZnyszndabbgmruusz{kdhuƜ~xmkÿ~^~rlhkqspoph[J8azrspnlkjiifgmruvwvrgX@8ŹdPNFNqqqoZP7!"!"*EQOPEdɴz"[HXN E7#DX\O6FW=#,tOWsxxuuttsvtsh8Rr9L~d`7B5(%'&'&(*Zs2#%$#$%%! !#$(4@V-3"[qZ-Q_&  #/gxUH=$"%(.4::mңId['3O[E<4/$/A" "#'FfZA-Ub^U4'Hɣի`vrgXXpvxcUpxmN7Gp­ƒ~7GgϤ`uyvtrqVCK43[}`9',2+(++H|W5+),5EШnȺ,/00/+--00/41X8CJKQQSUY_eg?,++-/..-00/235edžv|t|~eHRdh`TQPHDnI89;TlmrK98?DJPUV^UVzpyf,>ZZejz[PX[WJK* 8.,1arIXWO.uc()+016@40119m{sC?@ECD'/1-23215@uyr}|Ĥ2-jyaNLJ2/=wI.+"*S>,-@z=(+_@).9z8.4BLRW^^eloug[L]vvxpn~`fQ15735IWC8ElfYUL4"!;IRyqhPTU`w]~|UkdU[|Zmwvxlcdddgnrvtr|}idkwĝynl¾~Z{rjilpqqrpj\L=[wrspomkjifgglstuwwrgX@>ƹ|eJOJpqik@  &%!#,FQOQFgǀ#_E+oI D2#I[H3K[N9"+848)$E_UL7/476԰s@i~d()1FLDFDV,31_a9GW(!#+[]..2%#$*-39>pҰuԿR%C}}v{ !!#1ceSGash^\B(IͰ֧fwrfR[mwwaYrnYK\q}ĺÆ{x3GbҢbtywtsrWAE23YnX9&+>)'))2m_7)+.4Cϫǹ,<=2303244323[-75250144476.()+--,+*,--014băslqh}tڽbDV^_[WOGFLqD8:@awqN<:9JF578:J?C856;^kG?@@ADDBDA>>@hԞ698967845462X}j{_'Uydz{i_[M,%3,-0u~9hB,"xh++0H`VZJCC?P|CCNL@;!0518C@88J@:va08;2/./30T%1;AnF1jy^WK41;yF,,#,M7*,=w6(.[;*-6*'&%&&*OxO $$%%#$%'&"%$&+3?T+0(roE)JY(!%.Xrro`4#%+/39;y׾M&JƉ" 1PT]ifJ3YtI)FԤfvvhQXnywb\tvzuy}̿o-GgѝauywsqpV@F47Ype2)AAC<7.(+850200.24101dtrKEGHYNfyںjSZU[a\OIKQsB8:@ZziC9;;MF68>XnsmC36BEW`_TC=>Bḯ699:;><8765:\^X']x}{k.".-/8<+/~f(*?}xvrp_|z|ZNL<@99<5egWNM\NO{mMB?50/03ǧY-8CD/id9%UG20 $K0)N\`P4FM2.764)!4]H;+057:׶͡i>o_'1LusblvmfP.%DR"$,CTK/$#%%DrPZ{y{{vuqwxvqa8Uw;O{_];>6+('&&&(.,"$#$$$#%%%&&#$'#$*4>S), a{{SSY$$1nxwo0"&+.3;;{ӹԹE %Fqktk!!#(@SUN1(CbcE *IҮӠdsrbMUtxwaewz~~~Ŷƾ~l/HdӘcvyvsroQBC68bc2+T=)&)*2h_2+,/6MzƴMxdlegkg`g]QBaYxhidbhdcdXF-,@ZQLLLJQGLG;3gӴIe^H?;8<>?]qغi\VWbd`UNKM|@:9Acv\678gcB:=BZz]?>@Amn4989INLG9419^Ra*`o{&%5,-6thTOe=+/y^**T}vyt]RD;,G?`}ttrdykYO8.24=Klwk^SO_WSUPS8`uirF0^ȷP /34rƆ3qƥhz*'NB02C~A-,#1^A+1Ds4(:l:,4Km*0;NTW^`boxgqnXE\{b^vyvrmV/54.3AG>>K^YD9/+"CLYSUX]le}t`vaXZlz{Vz~wv|m_~mabcgkpsvuu~ybgowơ|wpoyUIJzpjilnlonnj\L;\uusqnlihgffgksvuwvqf[?7ǷwbHLFG6=K,3:#!$#$&.AKMPGkçza~C.a<(*I+&H[\OIRD..752'*X`F=2158>Żҷh=m\&1NjW=GJED@+HN "#+BRF/%#%%GpQ]w{yzywwsX3X}=PzW_rsB88:lkU=56@daD<;?X~W@?A?uf387JnnmT:546fzRb*]|Uzo$/7+-2z\=>d]K;)4_(*]uu||xkU.)D>~h}pbH<2>Jgic`e}aQPNLJ9X}C5zoXi16Qī~t.>[^z}C(SE.3E@/* ?c7,1Fq4(J]8/0Jf/2?QY[_`bu{tnbKCa|~kYp}|qkG082-29B@Gat_A++)!"BHW]elaWHWUVuitigybWSnxZwzyjOymfeegkpsvvvydgnxǟ}zvmm¾xXƲ{pkhkmlnnnh[J:_yvsqnmiihhfglsvvwvqh\;<ƴr`PKDPqoha\V5 #&2CNOPFnʴv"jCRwUd?D7(((&'&$$%$%&$%%$%%&(%" $$&-:@Z-%,tk)`Q %/_qs|N#!$),3&-bxuvi!$ $-O_X]^\[cpI/QИk}urTL[luv^hvpLKQeüʲd-NlӍdx{vqtuQCC58qX0%-0))*,2SA(),03UyŲy8@CBKJIMUPKH3qxE^[[ca\cb\[?*.W_hbppegmfgF:rט'96330431/,:گA:ABEGEC?AJ}~77:AlqI:7;<=NuxA>>@@w_875TypqK9653fpP}]*Vvt|RR8LpzycO).+.-A{ptK1ml6(9]$+`y}|w|s,(SOl|qdLFJfd`sPQQMKI9\ʷB7tug3!#'0COQMIus!g@Qr<&'J/&CTOFE>2#-:<9% 4W^B.159;չh>l}S(0I]SIJC498' JI!$)8>5&"""(GtL[wx{^4ZyAQv[]5A3)('&'&&&()'&&'$$%$%%#!!##'/:D^/( 2H1`P"%*Bgnb:+%&*/5;DΟն9%RͶ:!$ #'6HB?A@ITjA 1WȠӔi~uqULQfst]huvnw||ơ}`0OmЊl{{wtwvKGD4>tV2%,.)((+6b>'++/4WТĶq+.0-.0054793.xr%24A@7@CFA=1((7=@EEFKOZNR/3yՍ+USLLKKHIEAD٥7562057777Dzx<8:@pyI97:>U176D}o]>545Ahc@>?DgY=?>AFyW687QVdvJ7763_jSS/\}GocTuNgyYbZ]yO%2+.0a|v<35^G/&DW&.`rwyxzj.(0oO\jHJJU|zTJRQONK;Unpҙ;9u_4=c})Dv42YB)5Ny6,&;O446NR1&BS1.2T\05ETY\^^`rbTVOur|~qebf|veUZF8:1.7LU<6@ND3-+(%@JJr~mRY[w\qtihsmke_q_W_ynh}wu{uvlgffglrvwutuefq|š~}xops^;}qjhlopqpmg\J=]xvtrpmlhhfggnruvuuqfV;Aȶv[OOFp]:p\@Q8 !"!"&0EOQOBqƱpkBIY1(&T3!0/3441+%.;>4$09(*047BԷ¸пb?pR(0A_ihd^WL9%"ME ""&.85+#"$(KsK^yyzqqa/`xATy^W6?5('&&&''),-(*)(&$$%#%%!#%"&/CF`~6)6[G;,]P$.SomhY2%&*.5=>ڼѮ7#VaS2+tC"!$-Te[SYUR\t?/XŦЏn}urWP[lyt]jy~}zƶ̰ø{X.Ps·lz{ux{uFJB/=juT0#),(*+*5j5(--/5_̞¶k+399<789;8313k(5EaX\YM<5.-'&)-6<9<=>=Ew:99?jt?7576\279Gfs_5434Cp_A>?Cm{P@>@@IzV68:Hvc578:=aqUS:j>OwcXYY&&9,/Ao~lv\TL;23.%LS'+JPglr{eidpuzJ-)EnCs|\RmGI\l`aXNONNH6\в~B9G^U!6:[)SwTj/6_>-3Ql5,"?OE^nmlD;/GV0.1VX.2FWZ]\\]yw^cQNvlfQbsvqg\]G990.4LG398<:1-+)(HJguOVTamY]SigNtnm|vuptXeq^Xarr}sx|wtfgdegksvwuxsciqyġ}|wopybμxpjdjmqqongYNEb~stsrnljhfefmsvvuvqeY;^5&'!].*T`TQRPK71A=5% &(+/46@׬ahhiglmmͽ_Cn~T(,4Qedn|zhE* LE##*DVG-#!$&ItG[xxzwtpa0ZyAUtYX8?5)'&%'((-IX7GI3&#%%$%%!"%$'02BfxS0&+*'(),@o1*,.06c̪rµe,?TYVOTXOIA66c*8]wksm_O6/-'&)1ENVe[T>0-18|9\T]g`[dcdJCؘTcZYVVURSKKs77:Fql<7876`.78Go[3335Es\>==Bf~yTA@@>C­l68=SxL555;KrxNV>H8?}nwd+.B/-Bh}ze:51&RS&+.632C>6/5>:J=[h+)'Hb9;CBQuV=bxqp{PANlwfaiiWMQPMH:n`;/GȢQ"675Zv awdÛz/9^A26RC1.$ D_oaORMF7OV3.0[O,4KY[\^Y\s~sgbSMvpmavunnlgaF87/.08=C83Z8)%!S+1VVKLMX^;19?4&%(.48Aԩnz|yptqϼZFpU(-@fqp{{dC- JE $-HSB-"#%)PtG`xxwtvt`1ay@TuYX6>4*'&')(&C{\%'&$&%&##$'.9Bbu3#+WUR`1`M !&-`~wza&$'+17u(56?CGD8?ՐZ`_\ff__YSSp998CsqA6679b067Juy^7336Hp[?>=?fL>??\\H6-+&0LIgQOYXsb|vcfn]Xbgxyvxr_buiefhhjpsuuvqfkp~ơ|xlntaռ{vsqomooomeZKSuXZ9=3*('(((,an"'('&'%! #"&-8Cey7%0d[P[:iQ!  "%9oplnH$#%+08@Mѭgԥ/'[oilpw}a )&! +ELJSUVSR^=/fӱ̄oyuj[Tgv|ubk{phttzıyO.W~uk||wrmHG=/163,*%*+'')0Dd2,*+08jӺz̿Y+AMRJOTKJM:0=U/?DDEJME622-')-6>@OPXP5,-/;n%;GA?@99831I،BIGJMFFSMGHh876Brq<868=j36;Np]5458JqW>>>BjwC?=;?LóZ66:TtqI6539ISD<96?igw~|~JKC93Kkzn_Ah]0,/.1@N\YMMMIKB8@3(*,)@V1.005A>jb^^VQujJ`kkZFIJMPMPPNONNLK1dh]P\^\\RNC>;Objql_:NC$!5=jnc@grOehk[^?X5GpSR_JBMPZQ4!Vctvf^YL=#_O..:i9/7O\\Z[U\~wgTNyfv}x^SQepm_S76;.0#/. " )IZ^gnteim1 3gҸ~|mzwn\N_sxr]kxtzԥuuN5X}ri{stn=J?--./-*&0.(().Bm/**-27pćx˾T'07,?;C;:6049J7\hfa[XVRKB-''3CHCCKDE<;;1@g1Xebe^WXLB6Q+<9::95:<;K`769Jy^>;9::r*67Ow|L5658JtX<;=AlqK<>==LƤU98@`{uH7546LzOH?>=@UIMod~eSa5#FF;20=eKs\1,/,M{{q\=002*=T/.012.hQ=J^a_^NB1@`ZAIJKPOPONNNOLE.#qnmxnqhbSK>/:YRWPOJE=(!::>AKTSX3DenZj`caca_MKoNb`PVZn|k8N`a}}id_V?%WK059avO04:NZZXZYpraRD}]oyXP_oso^M86823C_\d~zwuurqrpqtvutqfV?Iǵ}YPNDSrsjboo6"#!#$-IUROK}ʿhxw3;Z-,, L&$CabcohM0,1=/#&+079DжSGtZ%1Ngecjsq[B+SE )CO@+#"#)GW=g{yz|pr\0dzCVtWO<@5*''&()/al#$%&%&&"!!"%,8Bgu0!3qy$!oL%7jzwpS*$(+16>BƸԞ('HusG>@@ASɦC9:>Xvj7644;SsM=CF?9FLMRIO9,?TVW[@2M>*$rXlQK`uw`N968/3B_N79Qb^N;*#"8ORzPV[VooOywz{Xol\Zj_s}vr]}pfbgghlqtwt{jcjuğ~|vigp[˿{vtojkmmnmg[L?bû~zwsrrpprrtvwvrfW>IDz\OPG`uwmlwi6"#!!&0LURMIɿؿe yx3O`+.% $E%,Sf]YVK>'-37.$ &,27:GͱϲN HxL&1AQTct|~fB*"TA *CTI+""*AP??>TĜG88A@=Yb_MG;bgFHMNLMMNPPQQME)"gmY__cfdLHG:EwmWSY^\aS+&7:DSSRUNWmZ$n{qi|uov^#P[SMC?GNfte+(YabhlTUUT=._e|xnlaZY[YRg}skSBc\aMTgvfS96802AT?/15LgK9+$"@QN~\W[Vn^~{svg^qp[Yk_t{s}o`tdcggglrttnx~jbmuà|uklq]Ƨ{wrnklmnnof[J:aŷ}zxvtsrqqpruvutfX;Fų]OM=(($%*AU6 !!"'2HTSNLȿڻc t4]a,"1'"@%0X]TTRGB1./8-$!%*39;IоҲK!JoM&,=Tgw|yn];&"X>!"-AL=(" #)BSAfvvwqus[-i}ATtXS:D4))(%'*.cB#'%%(&%#$##&-:Cgm. 1uU#iG")EihY6"$)-47>EƮ¿ϖ!(PqposomlfT)" !&($"7sһn}ywjWO`s{o_u}xx}zҮoF>[r||xyweEX:.-//.($/+)'(-I}~B,**,1:̰ɽB..-KJJLI3+.3C@,FRF>;G>DB0*');?DIEOQMQP7/KS556:?Q<764AOxym{5KXlmXE75:;5/,=in_C>:.$LA26@[PMPTX[ZljeF0%'}A-.I=IcpX+,$c\2//2XG=>A>KVVZaa|a#m|w{u[q{d!^pQC?FOOgj`+)X]]j|_QICJ92dxd[[\TOwpcEJad}j^`oreW;88118DO?/07A=8-##CNNLX`dr[~epo]Zw_zzwrfnccefgmtvsrzgeluß~zujm¿oXհyvqmklmnnme\F;cï{wttsspprssttndT9JDZ|TLJ;-**()?W- "#"#&0HPPMJֽb#~v0[\,.'#=%9XYVYLHX: 12=0$!'*15 $%$%'%%#!#$&+9Hij05g~a)kD"7tviK%#)-17@CϲВ )lF !4umusia`hvyobwγʥq=<[μr|~yxvveJY7---,/&$,*'&'(,*4'(++,2>ȷɺ:-/++-.-.---4D;;_j`^\`c\I3)%(*1FB=:?E;;20MйW[f^\Y]``UIJbc5C=BCECA?8aX297T|_9:79>b/58YstS224:VnE<=?LlyN@><=T@687Lz_=7546K|o[~LUU]ZXtR?HY^THBqh[@=>5$CC?>>;61.)-3U[V]fX'/\daltcVLB?+1_s~]RYTLUjqTA@N\encfnvtnh^;6400>eb;3655:6,#FPPzS_abxY|ùQqn]V\|zymgkfdfhhntutt|iglwĠ}zuii¿nW~wtqnljihhgfbVKBe}|vwsqrtnprsttwncT8Mı~VPMD>;858>;40.+)/../,6MNMMHùֿ`&t2N_-1+$7%/V\]T:DK,248,$ (-38;MЫƷ϶HJu\,4QfnmjkodE$$[: !"';C:( #)DLFjzzyyvT2qv?UsZV>eptzyspP5+&+5Tdfhfhnc>00JҸYVTUUZ]cjXOGdgLXUUQMOGG@eU59>jK8979EZ/7;cM3459ZrF<==A`rL:==;=Yǎ379>eu\=5554Etx}=:DXdtkho~ti@B@3-MQE81.0.-/6:ZZB,/4)+Q\YIFE>*530)#"(-17:LГbcjicdnjάA#JoS%1IV]ZbcdM8$!Xz5 "2QTA($ #*GQAgwy|{trT/oyAUp`Q>D2)&$%&%.bva<*$$$$$&%%""#!!%.:Ekj47l{O!-wB!%Douu|J&%(*07>Hͥgτ)PJ',/:;Xf!"#=}̕u׼fztqheiqwqiz|[K[Wvw8>:776735052W33HPY[ZW]WC3+')3Xehholnc;0.Sֶ@0;7;7@F>9?9jcLX]\\^XTRCiQ5;UvT<977BQ/7>dxv<356@`|0774TnaD5333M}48BRuxV[kvwbHI=1,PJ8//.-,..3?\T2.0>ZG$t88>ikL^ay{_--#HL367|wdgimMHottu|pCGcedp{XXTOROLB*4z}prtpdbO6PmVMPTRGPL#+<ct\V^]cy`x}gYzo]\_wt{ndrjfghnqsvwt}zihnyÞ~{ujnĿkM{yvrnkhec`_\][SH:Z~~{{zxvoqrpnmlljgffffeaba_U7LƽǯiQLMLMNOMKJKLMLMIMMJLRTOKKKGٵZ&z,DZ,8'.P),PiegrfP-631,#")/49;KΔtqqдD$Ln|N'2LelhojbO:("-_:!&=MSA)#"")DQ;iyz~xw~xtvsT1rzDWvZH8A2*)%%%&/IlO+&&$%&(($"!#"'1:Bme 8CV/sD#4bxpc."%(,17>Iظp~ )M]9(!!&""%;ˣֵdytkTTcqwjh|y\OTVzûv}7Aaմju}{vw}^S^5/.0/,&)1*'()*'&&))+,,3Bʹh]ZQMGLMLFFGA^98879152>?7/.)).6:8BGHE@0/2WѰRPSK?ADE=A>ItW?HFLJKFED=iG6:_X5768DR/:Qk@556CgmB9:?P~\HC>?Ah457EnuiD513:hB>AFMuU@EEO\]6QG:*(N@92.-,,--/5?:0/?lmQ&B<@h\Y_at~[1-!HS8?Ybq|bUYefXHHD?`sttx\ZWQRNLC$=vkoppzlJ:QnTPOQMDOL"0:71-2J^OGONLa=-(""HOYgT^]``d{õv[{m[f}`wykPsihihiosvtu}zgdnxğ~wkjiM{wsnkihga^_ZXXSG4Y|zyvqsqomkjifggghfdb`b_R>J²ŮjRKNNQOONMKMLMMPNQQLKQUUOJINúԳU(n.EV'!3#0B&-Zc^][L?(11 8+"!'-478QϷ¼δ?/Vp~U'9]trjpoj`F+&3\~:!&9MSC%! "(FL?o}xvtswtuQ)o|DUs\O=?4*)'&&&),jP''&&%&&!"$%'.:Cjb 1 P=3r=!%/=,("&,07>Nƿ|!,rQMA0%##&;ͦ׷\wtndioyyme{xl^afʯz|7Cdֳlu}ysv}YN[/-/0/,&+1*()*(((%')**-3E¤ȶxnojgglmidf^_lIDLVODBF@?=>6/2431200133434^ӮlkiXOY[^gge`|K1DC?<=A?>9mF8=SrP3877DK1AtoC676?ekD;:>XtmO??Biċ367Bkqe73344cxemTy|lijbdhaP<1?[jknvo\[WRRPOC!Jjdhkri?(# !)DI>p}vpS/n{HUsgP>B5('&&&&*3oH'%%&$$%!!%&)/:Gwb 2JxaL94s?"! #'+29>Qe}x /sndwlMD3 %>Ϋش]ztiacmxzjedWegý{w2Ek٨\t~vsx{Y^b2-///,$-5()**')*()))*.5HϙǸrlkeefnnijkfisa^v{^W[]\XYcI9>A@:::459AejB8;>X]?==Bqņ177Gv{S75469ls|v@DBFJl|UJWQD>e_CCFE>))<772-,.-,,1/-9?\x{a"Tp,-8q|]WUHanY95%%aI9?j_ggowowsmilk[C9>Ncjmxr\[VPRQN@!Tk^`^vzdG@Yhec`^Z[dG1>Hu}})=}|ovt|A$[WBDG@`hXD/ Eiiea__bO:/NckqrRettqo_MJTVKOgsjbRCFgR[w}|sldP6;3,0QY;8X]ZSC1) "OPRmU^[e{im{jd{o_frjwv}e\}lefbbgputqstchozĝ~ztfgeJwvqnlidaa]ZVVSPON@6^yxwqomljjiffddcbba`^][P8GŬjKLPOPRYv~mUVURPNOMûٱN*l#DP& 6"/C$1N\ck_XX4!<6 2+! &+4:6UսήhkqO&1Th\VYnnX:( $^y3 &+BN='! ")EIAp|sO-m{GUs]R>A1(''''&-2f$$%$%%%$""#$*2ЦدgysaOK]ovkk÷{q/Eg֠au~wtwzVa_1..-.,%,1))'&'')*()*+/7Lйźmkkfeikmifggiw^iw]_agccjmOAEFG@ABEKLIIDEjԡ*1.989:@ACEJxٴHUd`TSRRVMBt>477cyxI6668P=58Smn=247Djh>::<;Awq/88DtrQ<5469pwx?IGIGv{R^q^KElb@@;BB8+>4641---.--/.8Nv|U!Mj,,;y|\YNHjW;6#$VB:HplllQ_{|tkirnpocI9Dhwy|lY\VOQPMA#i{aLWUpvbF7@K]b]a[^R0"2:Go}"By}vC-`P?AACfaU=+"Nuic\RRTE:-QhgyyMZszxiXCISMBCTmgSE@F_R`t~l_ZL;?3,2EE65?jl]I8)&OQm]guU]^hXleU{~}tgq]bZ`|w|xfezkchfeiqrsqutginzę}{ufffQxsnlfcc_\ZZQPOC8\~{uqommjiffa`__^][\[ZZWN4IȾƪiYRRPPR[voORZy~]ssNSTSQOQMԯR,l"FS%"73A"6PWORNWJ($@;!4+"!'-699WҰǽΫjqsU*1Ibkovua=4& %_w1 "!0KRB$ !)DJHmwyvrvT4nzJVsXSAC2((('''.^t:'$$%$%&$ #"%)0Gۭ?;BFBFHJLB;{<484X{vN855:V@56Czi:657Cpm;9;=[{{Y==;JOO.9,142--,--0//ApxK0"^ihuXmxi\JORIH]{qUI@NST^p||lg]]M;A3,/9=857YxhB7-"3YQmnIDo\Qb[Zdo{t}og}l^e|me}tzyfowmehgehrtussoehr}×|zscddS˞~xtojhieb_`\\UG;a~yurnnllecc^^_`]Z]]\ZWQ7JʽƽëeOONOPMYpdFHFrYapNTUTPOQPɻԴM/j"ET'#03= +0! /50% $@5"7,"!'-49;YŸżϫvnn}P(3Fbe`_O:24" "]w1""#/IS;#  (GKEpyy|oN/r}JVr^OK]%7k2!#Mw|H"%+/38?Uͦl#6ʱW#!'H˧դgvtaHM`yymkxyɢōxn.JrЎ]x{uvxyQ]P,././-%,/*('''(''(()+.6Rpŷkkjggjjfhghib~bqqf__ehegpoMBEFBAFILKFILIBrϡb_ix֧E;=88:8589?978=h|J644;Y:45Ere?767Gqg::==QwZ=>==?}R698Nj]<7746;~zi@QTXzfrploib\y:<=EhI*5//45/.-.-008bp{{HBe).8ukjcLQe@;#*`=1M|vqc_yuQ^qzkP76B[r{vpkZYTRPQO8pwshgefnzU=43g|ztR)2;Jpvuu"Y~~@/OOIKMbreD6,'^daYMMPOB0&_iixl]tvhQJRNI]}jQ:R~FS\r~ls}yg^dJ<;1+/7H73LpcJA<+"3VKd~vmujFSg^~opci~m^h_a}uywddxkbgigkqvwwwqefr|×~zyra`aNŠyslkhcba^^]ad[D9d}ytpoomjggdbbb`]]\]`_]S;NɿóŪhJMMLORZmdLIEei^oRSSSNLLRҪL.g$HR(#0 3@!"!!7.%5*"!'/58=^չx̨nqtxU+5La`ZXSJ?4$ #_u3!!%2IQ<* ")EKGr{x|~stL4t~GVucM:@0(('(((/7:,*.-(%%%$$&%$%&*4@H|[%:h-#5MfqrG$&).4:=Uɤk&>Š~M- !'Hɡ՝jwr_T\pyvmlcA\qpŽxl-PwъcwyutxzP[L.../1.#./)''((((())*+-2VΧƸŹnnihgiigi]ehajosnfgjikmogFBEECEKMKGGNLHCt͞bh{ڥ`\SNMGA:?GR759=av@677<]766Gmwi=346Gte99;A_\><<=D®U6979AGGD;55=~vdJRSY{t\b`pznlaUxI=5=_5*7,/641/,.123Lmv}HK\$+9yyfybONc:6.a?1ao[hxjpqw|ymP27Eqz{{yf\]TRSWZD$ccEY[Y\_da0<74Zzv?*!46FkztlY};0d_W^bkiJ94+)feaVJKTUE4&clnrvdUq~ycKMKKOW`i}nY?arHV\w~oxtge]C8;/,1=?05N^OA?5( 7ZKby[XeajKViyxyıWo}h`k}{zwTc~w|v]`wilnmilqwxvr~nbjq}ÒzyxrbdaNļ}vpmkhec`]]\adYE;i~{vstrolijhfcba`_^_bd`S>MªŨlOMKLPQXkbDIPo`nRQQSONLOʸ̪I/hEM""4 7A!!8-%6,!!'-586]Χyzrwvtq˨dlq{_+1=W`]dnkT;$!_|0 !3GSC,#"$*LLDtx{vwsK3y|OYs_O==0)'%&)(*Dvj_re0$$$#$%$#$%*2>JS"+5Z+ %@^QT)"%*.37>^իzc$4nH!(Lȣ՛axoaPYsyxni_Rwcd˵yd/Sx͇fxxrrwsNWE*-../+",.(')(((()*(),-6\̠Ŕ³mmjhikkjfgehf^ouglojkli]ECFFDFLMJGIKNJHz͖Xhw٪yoiebaRFY\_768Dji@7789a685C`wk9467Ixc;:=@[|zN;=<>IV599KjrmY944?}l_NPQTrz^QSmnlna[{E3/47 *3*,4510--/4<`xBN[&);qoidkx[SKİεZ208.'fs]WPKL[[H6/njloqZElcTMJNOR[^hvk\EfkFU_uupx}qf[N90B/-1@RO3j~{xuuqnkkjfied`abbacj`S9LºĦjPGLMNNTjbKNWa[lLRRQNLNOɾˡD4iAR$!9P5#'8D"5*):)!#)0597eϊqxty{ͪ]hsj*0Bdgfr~qc>#$^t3 "3Q^F+#$%,RRDtyz{{wsK0|zOTufN=>0*(%%**2vI"$%$&%$#$&)4>OS,2/Q, $KjbMC)#&+/5:Aeww[$/KI+/-+&$*9"#'Mʟәavp\U_txxdhjj}mkɬa0X|}fxyrrupLXI,-../*#01)()'(()())+-/9^ͩ}knkgimkkiighk[vsnoqhdhnfMFHGFHLNIGKLPKLΕWep٨vtpmkgYSa_^~887Bhg@6688g178Frm856:Rw`:wu9&%zhcql`SG@8)'chSLKOM]ZL16ghlrvJIvdTIHPSbmkbeiV;ekLXerrs}~k[OH74<0.>_pF0H\Q0,+# ;XPieCX`YxtV~\s}cZox^hzu~pY^qlinmlnruxwvleiuÁv|~{xvpa__XΌ}zrokgdb`^Y]bg_H?kÐ}zttomjijefcdbb`bbhpgT:OʿŧhGINOQQWshGNVLdpPSRPLMPT˾ƞ85bFR%+ty*7> $3&(6( "(/694h͎pϧWeui+4Ypjfdc\R9#&^q0 #.E@.#!"$+OL?y{z||tmK9{KUsbP;;0+*&&)+5|J &&%%$$!$&+5=NL.1+Q*"%Kqwqa<&$(-5:=_ҵӿX%;urVTOOMPly #)OěЕ_vm`R]syugjh[tomʠ]1Z|xkxytsumMVG-.0..+(21)()&&))*)),,12cҸwlnkhjmjigfggcPsrmjc`euiIEHKKIJNKFLKJNLΓ[apvw{uبwtsrph][]W[t37:FlmS6769:m136En`2456Uv^:<<@FSWNA<:>LȯO78;VqfG.444Ah_ONMRnsqnlafbW\J4..( %/*+07620.0xcJck]ZS\ϮL.(Ie3/Qos~tield\osr9/5O\UgagYX]US^tI,W@V]VZ\\W<1>3;xk;''ATU}raK[^S^[wlXxc[ndozsyp[aqgdijkmrvxuw~kekwÈ{|ru{wuq`a\Xؔ}ysokgfb`^[Zaf_DBk͑|zvtroliheffddaba`muiW?PʾǪmJPOOOQYrqQkTArnQQQRMMPU˿™8:_IO&ZS&8@!%7'(5)!$*.9;4hʢΥWesZ&6QXRE>AJ]C&%dv, !!$$$" "#&+OHAwyzzxxumH5|}JSp_S@=.**('*-5G!%&'&'% #&+5@JM'//V,"?rvywA#$'.3;>b¹DzS %Eg__^bo!#+SÞяayn`MZszugquwt~vϫŹU8_|tkwxqrvjFZF,.//0+(1-)''&())+)),,16mл{kniikkihfefjc{Tgriedcgm`EGHNLJLMMLMMKPLʎ^`nmlwxoأrvtqnh`YXY^q589E]TS;6888n/37Hu~M3547Ty^=<;B\zvZD=<=S¤O98;HlrE4533Ej]MJHKtiimWSGNUz?2-0.&,5*+298673&1]aKMQWUVZR4>hafzp{jMWJFLXap{q]`R6ilZfqvrnthOHLM<:7./RRw}S[^VpnU~xƫbz}dZp`sxwpWgrgdfgkmsvvv{ldlvpr~}xuo^eaRג~wqkifdba^[]efYF>lɏ}yxurpmjiffgdcb``cjrgU?PʼŦkJONMMQZvfclnPRQPNNPSȹŽ58cIP#Cn) 4>%9-#3+$!&+09:5nΤXetQ'6MVQJS]rlG(%el-"   !%%-SOKs|wqvunF:}{KTpdU>>/*'%%)+8I!&'''&& &#&+3=HM(*:Y*'Q{ukN$##'.4;?kښYӺI$7J$$+5hZ$.YҲэazvl\O[qyukqlNlqκK5^{pguvppumF[F.../1,$1+))((*)'**)*+07rɒp|ɾjmjhihiggffhcwgxstliif``YECJKJJLKKJKJKQQLj^fwokksrt՟syvoke]VW]jk8LK$(8U'!*QdP;0)$#(/4;;qˆur}ӺM!")+/Bmڷg#!*]Яχe{vpYN^s{wkrjWqzֿL7btolvwrqtlCaF,./00,%4*((*)(((())*-2:vŜʾmmjijjigfceebyq}zrrsqnjfXBBHJIIIKLJHFINTȂVgipjlosuњswtnldXSYdoj4:Im}n9787:z-68V}~S344:W{V8:;@_tG;;f?&w~|x|N QxSYW]O88GH*/SJ+--.10:R@!?_eztbnnvVLPICO\an~gL:tthhfxh[[OKMK534-/?\K58MOJ4,*$">L@[xYRafe|dRQV^__~|Wv|`]~]qxsy{uu~vjWrtideggnttusz~fdkvä}|wo_Z¾]Pғ~sqkgfdbb^\]clZA;hɍ}{xutromlkkjfccbbb^kteV>S³ȦlOQNNOR\ŴuMSSQHKOUɶ•2;[XT;o( GE*C-(79ULDEOTQRN:oΝ[euI%3LXVX_fnpB&"iu,! !  !#&+QJHu|nE8wxJSpdK?<-)()('*5bf}T!'&&&&'&(;9'&+3>MF&(8T&%GcdcZ7%$*/59=nХ}dzѳC!)9oڬ\"4# ,_ɧhytrXQdrxukuhMqھ~G>`}iguurrtfFgD*-010,&2*())))(*)))*-1>}Ȫ{ʾokjjllkiifeecspwnowz|zjV@?DEFGIJIECFJOWxReqjnoosutЖrvrolcYRYdqg069EldG?888{,7:Qv{Q5448ZyS9;Vȏ077<[ymK9438MWFCEBF]_Zd^UOW^vdD/-,'/6-,/HGLRY`2@>//-!xC)-IpfN@?B?DVft̳P-'0:14Jlw}qwidgCZfuL-38Xlh_`npyu[VrwA9RAS\adebW/08/Kh3#*=>fsvz~|-+uvEPfbH($ih+" " !!!##$&0NIKu||ww}pD?{wHSnaM?:,*)(('(*,0)!+#%&%&('(')6-'&)4@PF &)>U&!+Ruxzt9$%+16:@oʽҵѱ@!:д{A1Jsd"0dǸ~i}unb^aqwukn_bzؿxF@^{djxsortbL`?,-010*(2+(+)()(()))+-1:~ϸɻnkkgmkkhgdadanolajrpjlk_V??CBDFGHDACFIOXwZ`hlpmpx}}ϒousplcXPWbob,49HyrlF869s*59UtuK323>[|L9;>AXuM=<=>Z‰588=etlD4325R^;=D?Fe_cfb`^Uek4-..("8;034IOnAed400 }9(,Dmo`FC11>ls{kqtfRbXCVjz7.2;[qscfqpvjYWnpm1JZ9MX_edaV./40N~d4!.@>P[eg`jsS'xr0EfjdabW( S`LW]ZA6DH;)?i>+./05F|;D[JLIKJKRHURM$ApLF!+?*'<^e@?Ywke=t˜YixE')++)+2AQJ<#!(fn)!!!!!##$%'0PKLw}|x}pE>yyMUl]M?9-))&''&&'$#&&&&&%%'''%#'('&*6AS>$owo^UadhW!/f˯m}sk`WThtujlho׸}E=^Ÿcmysoqp[Ed>.../0)'/*''&()))()*+.1;Ҹȼ}klikljifedcfeqxygemr_WWSZU??CCBDEE@>BBEJZvT[fpspszϋrutqkcVKM\t_586RZ588:i069H_W:426=]yI8;=GwoL><;<^486>e|j:4436VyX.7DXB-//52Kxj&D[OECEBFHDCMRKMU]avo_`JDs`jpm[[k]PTOF5<:15FVD3@ifD/--!$OT;<<>A<@7-((%&('&%%'('$%&%%'('$ "(&',7AOBCtE$'!:I$ )YofpV'#%*.58CzҪ8"'% "!#3f/  /hعro}to^JOnwugkhp}ֵrC@_ŵbnwqnprZDd<.-///(&/,()))(&())(,.2?ʇȻ|ljjkmnkhfdghdqvm]fjf^a[S^V?@EECCDC>?BACDTͿ{Mgvrst|~ʅuttpjaWLHVxZ57:X_6469?`0776PT@313:_uK8:aŁ058Bd|^44236Vu_0;;@wrqqnotZ=/.(*Q^LJJgy|I20)-;&(,Opruxtz~uw˿A.!:?302Bmytk_X__FIvt,,-2:24>M81AZ]J;5.!)UV>@A><;>CPjfcuUf|KovRRui\|]YWW\`YYdtsv~voeS}xx~|tvwsu}wfgq{ç|yukKNĿM@xtbZTOEIFDGPUNQRH7+VĿ}||{supnpkotvskwq`S:TơlIQRQPR\]NVSRNONZŗ0@ßTRM2?3  RF+B&,<^hAD[ib4{̓TdtN%(--*-11-0."(fh+"!$+>G0$%'0WINvz|wun>A~yOTo_}K<7.)(&$%'(''(''%&(%$&'&$#*+))1=IRBP|='*=F$&8*4NG-%&)/5=Bxѧ8!%$$# &1mղox|uk]Q_vyuln^g|״wCA_DZ^nwqoqqVDl:.-/01)(1-*)*)))(**(+.3Bѫȹvjiikmidccaed_{xrgff^`cYV^W@BFFFFHE>AC@BEUкnEztry}{~ɆsstpjcXNHQrT37:Y\<859@[268U~`M:34=fzI9<<]mH<<=>gx.57Dkt\55547Rolw/54JonoyyzxwZD-.+4gZNTXuN30*/5()*/Ght{y{Ǚ@.KI:02?xtyeXVhKEQU,..2=ZsxkqjX]o}h.dg@\XZ^]]L+6:*3T`XZ]_E-$4F8Lv`2|m?wN%'frZ^]THXdXF"HaBi}pw|p>Qb5././/539HVQPRRQrg[XJVpheio`Xnzoc_O9>;0297-/==>AUj`IHGSOQRLjrsUR{z^|w\YW]mtWXluturmeWv{{tvxtuseio|ã~zqpjY>EB;pymaZ^WUSOODDOI5-Q{ok_^YYRIKRHDNHOPTVSVS\_[P3M¶ƠlKSQNOQU\UTWWZZ^go`GPTUVROMY̙1DʧUPL ZL,9 +Cdǩ\sytsssWQs8..020((1+)'***)&(*)*/5CӼɛȷrkihlledccdfgj~|rnngddaehcEFIJJIJF@DC@BF^вTQxpywrcvǁrusqlcXOIJwQ579U}}c>85:GT06=eyuiX:4=kxF92?nY/wk?{V%)ntP`_VNY[Y<'Tgww9#]Z1,+,,-/5?O[SNJHMk|e[XLYpg`eolqphgU<=8/-,++1CVdZHHHHFEIRnziSR|tZ}v`Z\p~x^\lttwrleP}v}|tuxvtsaip|vrnknqp^WPO>?c}f40h|yxwuqlgjeXG4)JxwqldaaZWYPLNLJHFGT_[S5WƝfݞLRPNNNOOLMOLIJMOTQMPOQRMKIYʾ͚+GϦS[M XL,5"(=cgET^mg3|ɍNat{C''+*(-60/1/$!+ke*!!#)=I!'1MAOzzz~wvmAA~wQVjbN;6,(&&&&)2ELF>DC2&%&&''%(23>HPca\<N4(%PN%*V}}nG$"&+19==ѝ0#%)(%""  '(%&(8u˫ټeu{wkTJfuztpo[QwծtCJcȥg~}{xWf}844561')/*'')**)&(()+/4JévƷvlihjkjihjjhglq||tsxtplpqsi?GIKKKHCEHHBEG_ЭHoodh`WJs~susrmdZOGAwN467^|W8749HQ05=i|pB55AnsC;:;1)+-&5c}{rpfeb_\Z[VTURJ@-$Et~{usnkkmgegcfgfifec]YQ5]ĝfٙJQOMNNOLKLKJIIIKMMLNMNPMPKZ̮ȗ-JϘO"ZG _G/>#-@`ogbVoh7ljSfv~G'')+*14--1+!!,h`* !$(1̼l'1PCUwy{~vuj?F}xRSfaH<6,)'&&)0Rq]Opt:(&%%&'(+49LLR`dg9"S.*&QI%-`rh_G,#&-29@Cљ/#!&+1,+-(" 5]QQWaLC=% ;y׸a|{vlNFhzzrsv}կw?If̦zUisGNGFC8*)0*)(((**'(()-.5G̶Ʒsliimnlkgiefdnq~zphfjeegbgT=EHIJIHHIIGBHIcЩVfq~KKQCEAy~xwtrmg^RIG~F35:XwO:759KN17=d|nN446DqpC;BD755Bcq=0014C^]WnltIMCB]C5=PAC~.'+;fmB98/8aiZT}tOA!C;.1119ptyzgROl936?:8CkxxwppqfUSp~]4}NHlgffe`C-K:3e~yqplI,"7?7jJ:a>uB",ssU[[PDIMF2(Ucvqy''ZM.,*+.3PcRKTTPLHOiiNI@bh`foupxyonlX:82.,*()-37#.<&4Duz3ʆM`tI'),-+,.--,'"!-hb& "%*9w (0PGOwz{}{vkBF}zVQc`~@=7,*('&)3Z9O1$%$&''((2=MIP]kl: Ozu0*&MN%!1LYkZ-#&-25=E͙1$#-JfXY``G?6 $dzbni[)9}׺Z~|wlRRgwwsvשx:Hg͢uwOgx}zo\6,2)(''''('(*+,.4K͸ƶqnliomjfca^ddmt}oiee]\^\WH@DGHIKLLLIEEGIaөqoxLAA?;@CxvtspiaTJGD36;_mC9869LG36>vk?736HvoB;<>X_>=<>Gxj066KqrU9357D??=KX/Cv*)/YvOTPElypTj~ZJ##SA.0012TuyxeY{B:9@C:9FbjonkiircW^szb7vIMhdfee^?-E97mzD+%%'2'1A"7BŽ~8ʄO\sH&(,,*')--,*! ,fa'!"&/dt!%1RAU~{x|xvkA@|y]Rdaz?=8,)(&&(2[D]5%'%'''&%.7FGS`rh5#Rd\i+))YT"#2DDH:#%(.25QlrN<>=>E|]574U{yO7547?EAOn`ecGTSiTlX!Eo.*1\uif~]{^K#'R714524@gvuw~K:GDNE;9Gahlonjir\R^s^AUP>MSOOMHGQh[CA>6dZelsqheZY_dhQ581)*))*)*++*,+&"8YPcmmj^D@I]gYelDLW|tpQRtryv\a|ich~t][dtwzsm[`{txututxhelu||}xuqof`eddab`]e]a[gebaweƑmjfbXQSQFIHFFFELNLJOTUPLKJ\ȓ+K}J*cJ!$!Q;$"CK3,*)& 2A"4@}9˃L^p}B$(+*'&(*..-$"+mg$!#).RP%/SAyw[Sk`yD=8,*((()1^yx.((%''&$!(3HTWhp`11`]m-() ^O" 0]reNA4/(-36@Cľ̐+#(>xjVe\?)agSVԦh]& =ʱY}xjYOcnvvzգwy6Lk̓`qwolomLiD./*'')*)))***,/5KȻĶrlfgjheeegjojxhxnkimia]ZYSF@CEEHHLKJJIEFCpұk>AB~U667QsvV7435=omre*~hxZ_if`mmdU5MhRUpzuM Lp-,2jhǮqZI&+Y-6KG945Dl{T(*=<9q0@wOL{yup:$4fYW^_LIUUF-/\eW`\"4bI1DdWGWZXQRRQRNIECHGC?;4d|WblpolgSRZadP780,+++.,++++,+%!:ZXjrv{_BAL^gbboHJRqmGFVnzxuvrsnbk}gcks\Xc~tv{smX`|wvqtwvwkhmv•~vywh_a[Z^XTRTY^YZafc`YSZZ\ʓ)JI2aG"!!$.4)"$ZB)%ED6:82'#3G%0Fw.ȀMbr{@%&*,(%(,141%!!)g_&%&.Dwݐ*#&+:(Ty{{xwvsuxxvm9F{{[QjbwF@9.**)(*3`v2))&&%&%"(/Ugovol3Jgo$*& ZI"!+[yuqmN3)-06@EƢ͌'&!)DpeX^ϐ[> (ffTiޤfb(!<}q[{uiWQ_rysxҢs{6Okˍ`runmolMp<(.)'))))(())),/7RʹôoljikklkjkicYss|stztmaZ[YUDuo;'&793gyPbnttyvOPZ\`I4:/++021--..+*+& =VEOq{}S?CK_nuglFMXgtuEDOqyxwwvuo_ox`^g{s[Zc}uu|{pjWdzxvtvvuykfmuđ}}~|yy|yqtz|wsmnqijlj)LK!=bC0IK;*)AY]N5+eI.(!"3,010*"%8L%0D~p0ֽŀPapu@'(+,('+-/42%""*h_('2dӏ(#$&$&Xzzxyvwxwwvvk:Cwy[Rfb}D@;.)(''*3c;Sv5('%'('&#.T|2'42$)% `K" %Vzx{aD2),07@Dٽ{ω*%!)HvcYnڍT> *fjQvށ\^#!=eYer[{seSWdwyt|ҟzw3Ooʉ[qtnlmgIz5(/+()(%*)***+,18[¶nkjkmpspha_\Ww{~vlc^[YTF?DFFIKKLKJFFFDuЛHA??AGPSWڭ|yvtrtqj^UR845436545777a:5333102227Owg=9:::99:<;:;8juJ_swx[[fWX@7;0-27;:8686.,+&!Xj{\+(ZS7.$ " ! $8A 1Dg4ֺž}QfrsB&+.0*)..-0.%"$2j\"$3nsI!#"$%"'Tzzyyywwvwwun9BzwZQg`}GA7-(&(()0cl1^j7(&%'(&&&0P[_cihh3+86$!*'" gL"!3g{cJ=2((,07?H֩û'$"*Gwf\{oV; *enqǦUd\"#?h_cҤ¥\zsic_jwytyӜqq/SwˁaqtmkleN'..+***)()*)*+,04]nmhinolf_Z]dcvngb\ZYRC@EFILKLKIHJIJE|͚u=>>ADKOPU֫tzwtrsqjaWX976423657777`.5223222127Mwa999;:999:::>KE766L[F54357EݫiwbYv?'SSEAr<hk.-3WntshmVy~v{Q:C54Bw;))VCDXBqPfdRWZqN+DmbXaYIKURA%8dg|JVPHdMVV[]elneFNRQRSPOQOLE@=:prLaz~zWM:7:0/5>EJFDB=1/.&$J[Zrqc[MAA>VklcIM_\nzbEDUrzyxvwwl^uu]ammVWkvtr{{rjVkuzsuv{{{zhfovÈ~yA6jC(17Rstvh61kfF:-&$""## !#"#%$" &55 -:biipzG7zU^qxG'-35++0.,/.(""/bW #$# !#$#&Vzzy{xwvvvvui?Gzx_SdbyC?8-(('&(2YuSKgwX/'$&&&'')BLIMMYXm0 8M\e7+.)" kJ  /TY=58'$(,26=Gԥ}%$#(Ixks˓LV8 /uyw#?a]vxĠ\ytgV[ivvv}Йqp3XwtartnlmcPzlWk|M//))()**))),,,10_mliimia^bfjmhpthic`^YSFADGJOLLJGEFFII~ϝi:>@DGKNRZبy{{wttqmcZ^666544655677f,4423211335Ls`<99:;;:::9:>K@65557665458O⻃_>IRhc_8HgpiffhycciS6;EJFL|6Jz394/1.-,45?=[mbgb^Q/1A1En3*6^?>~nJvI6BBFAWF(Al^W]XKHML>%Adk}OlFO^]ijmgnc^ODUWTUTT\e`ULC@?yqHczUG34>117AXcMFC;541('N\OcsvqT@?øŒ%Da^tyȜUzscPYlxxvЗxj3XtrfutmllaQ}P+6;AGGJMUbףyzyvusrnf]a|06545554457:e15333325525Nv^;;9::;:9999;K857654355547Qo̹^28`|A2;DC=09iT015KXPNLRxymu`2;ˣZ>3/./0/035411.8CF@6]=!^\/vmjDADFGRcjemiXYpnnFW[8al_ccaL*0A+@~wf7(=V7K|RwDPsqkh^1:jPR[VEBKK;&A]i{Qnx?Na]afceKVXNJZ^UUX]kleZRKEDtOlvVA2=@03@P\M8CB>=<4)(XbYs~xP@>9@VRvRGOdw\HASszvwyvulTqk]`l}hWWlyuu|uohS|w|ysuuyz}ydgoy~~~}}}|}}{~~}}|yrutmqqsroijlipkoqa[|wrhca\\YXXVQMNKKMQSLB>@KB" >I@5/./13:AGJkDrzK(*12/,/1285)#%9lY#%#"# "#!#"$$$&Wwzzxyyxxxwue9D{xZ[dly@>8-*))(&'(('&%$&$&'&'''(0Hggkjv|,7QwhG#,'"  %eJ#;ulb='$(-28>JԿw""#1eŷf!Iķԏ&JrȔUȚ]ztmkjvyyq͕xe3\vmaqqmmlcS}M.12/1*"1.''))((')*(+,/3gɼkhiimpvqlg_`[szzxzukb_XUOD@EEGKLOOLGEACEϡo;>BDBJOVdءu{yvuusphady286444544687o/6544335427Oy\;=9:::;9:::=M757745455577JfgƸq;7zacsqZvS33Apf[DOGYmT1>HNSaGn[a@4jn:733:83.19?:35@FKHYa;Em3ujeH?@Q\jjb^jhZUh|2V[?ad]aeeJ+2@/O~t{R0%7Hy{z^`^nĢvC?7/**((&%&%&&&%(&%%&')+*3Un`neyw(?\кsI ,%"!"# .qH !7eF %"%*-05?Mu%%!%1nĹ^!  @z)KɒZzwvstvwzu΍x\5[zjfrqmlj^T{N-01/1)%1+%%()&(+)')+-08nɼjkjjqha_bb~rux||sibc\VQEACDGJNRROKKFDMˠq@?@EHNRWdӞyzwwustpfaer/66545643366t*3354222348OxW;<;;;;;::::<0=OJ2.899<>=2'-l`ZhvlF>=5:STzwXJPhvUSJ_zuiUtl\^k}t^VXqywuzwphRvy}wtwyw}vdgoz†}||~}}}~~}~~{||zyz{|z{{}}~}}}~z|zvl`di\QNJDDFHPW[_guº7&;DL~VAEE@=>>CKIEFDZuZ$''%##$#$##%'%)\wzyyxxvwxvuf:Gwxxe_ZrƯyBA8.,))'%$%&&'(&('&&$$()+4__khnqww& :ZӽfE.'""!#""-tH $-$ !$*.28ANo##!&.jR"!:s+OӇY}yvuwvxyrˎ~X5\~girpklk`W|I.///1*%1/(()'''(+))+-3KnR59:}|XYpm}vB0.)Uԓf;9mE1HfhTBT]F2@iiJ75IncA2JXXQNVrFJ@FK]|mdffioiWYsrkIccB_caed]I#09+.Qd\XXUL-#0B9\e"h~Gub+ SxXZ_VIT\TD'NdbnwQ;2&7oYCl~OBA>4=S]xoYJJpnp|Wt~zqXzk^[k~}eYZsuwwtohTv~ywvwwv}u`go{‹~~|}{}}{y{{|}~}}}}|{}~~}|rdSIBACDJNY^bcba]aidfooogjghliflkkpnkcTO]kccyufYXYYSRQYda^bX_l\+1.,*)(%'')**),]xzyxzxvwxxsd9@my{a]Yx˿t>A8/-*)(&&&'(&$%'&'(&&((+6Taoboow&/CH6 ,'%#"%%# 0wA"%*/39@Rf#$#%0`ŴL!@h(WЂY}ywuvxyytʇZ5a|ehtqkkkYYC-0101)&2.*)(&(((+-*,/1Rć57755634469;mɻrY9]Y.4Ec}^`lh5314oҞp47~fltOE7r[^3=jnOMgohQGsg37G\_\fja_kfb]S\uyvCk]IchgebbI#4A+Pzzv[/$5D8RR!muC{O+!bx\_^SHX\V= QZC;@2Jn+&U]]ghYGMS\RYj\WUV_pzhYVQI\ydnzwdWJ1:;842.),0..1;81'>n`[~|dUEBB7BT[wvwLKLruV_g]bzv`YWrwuv}uohXy|vuuwwv~ochr~Œ~~~~~~~}|~~~}~ufVID=;IOW_glu}ypfgkfgfb_`_]ZYU]czeRLDFE>AC?:::<5:o}{wuttswtru`1F<1.,+*(%'''%&'(')('(((*9Sqamfws&"6PFC8"*(('%&%#! 0jB "&+.39AQe($ &/_I!2v,+[̅c|}{yvy{yyɉP4dcivqmlkWX}>-1101'#2.)'))'()(+)+.3:zɻkigmnyekk`veklook|phd\P??A@BGMMHFBNɛszTBQjthdalБvtvwvtsqni|`/7775676668:u3554333344;X~L;<;;;;9::<>>Wń35454533358AryuݼpFZM2Fl}ofZkw98:3\̩yFL}{PAIYh:;c|w~wyR18P{deycehSVVNYgutEpSHZZ]^[W<#>E+BvzU,'7C2[Z%oqGt|qzztY.%gwX[ZSJVbU5$SW3+05Ol,-]_Oik]OZ__LWeZVUZjxxh[VRNgwhnpvynf^cT8;92.,+*-/-,/43.%>l\a{]BMO9CRUwMQTux~L}[~d\cum[Z[tyuvrpgTyzzwwvvxnbku~ˆ~}~wjXI;>Q\ekqxyvwvsqonlljikjkrwomkkqrpinl^LW~lb|if~{XZ?^Z>5100-)'++++*('(()+**-<_fiimv{$!&!%*)'(''%&& 3l> "&*/27@Pҽb!$ %.O{ "*Y~_|z~zsƆJ8_`l}wnsoV\G-1/02&%3-))+*+)**)'+-3@Ⱥ{hiikiqkf_RzhjlmtjgbR=?AA@GAĤIHEBOȕd~ybFizqelΑvxwvutrokc{]07:<99<9989>i/5544435565XL>=;;;98:<KMvͮkWFs|PMKhMp@ADfg74>d\\hkgfaRWSLQigAiL+LLPQPI5$OC+OV-&8I:cr|*%ovM{V'/x~ZZ]QFS\J3)RR1+-3UxtsY+hc\ksW@8*)*+*+-,*.-.+%?oRWxl^RhY8EW^lKST|Ey^g]bhtqcY^{wtztrcUz}yvttwxicmvÁyl`Z`jqtz|}~~|svwqlijhvto}zmoMw{^MILDC>8588563///03224@hgqhsox{ !,)&((&('& 6h4!!%'*/38=TX$ $*,'#  *b|_}yŇG:eĩjVrE-//21&'3-))(')')')(,-2=ȻzjiihqФeg]^exfjprltmjT9>@BBKWըGFDBRŏbmwb|nt̏twwvutsqoc}^4:BUUE=FI:5>h08455556557b}O=;<<=:<==;<;ax96465646669C_^H4Zd[j̵]+aGITewav`L7ak=7:PZQfx~}mXXSP]w|_CmLE^SRRPL:'O=5wK+%HQ9;gW7@ad+)swDvy~Z 3zLaeL?INF/&UQ0+,1G]XbC 2le_fdVIliaCKZTVV]elga[VPGfsmnvxvwj^affX==3+*)**,++(**))%>eYhxnhbcwX9J_gret\{}}H_]Zcuz^Zc|yvz~qlbY~x{usvzzmejvxw|}yy{|yz{}~~wsqsxzy~~}{zomkjrwmjgga^UQRNNNHFC@ACA@BJ_rwuwxzے8-%""##$)'+4)(*+*+*)$4\4" !%##')-/14:?UҼZ&"!%()(% !  .gwavw†E=iŦ|Uy=///0.%'0,)()*)(*(*+-/4AȺzjkhrxdZV]jpllste̪~tuZ5=FFEGZҚCHDAVƔvhz{}ȇywuvvxvqle\4Adjtw?:D`4:8><;9876:cyJ7;>@AAABA>=:`s67875778757F]ZxQW˹qGa>Oh|wv[:cg>=MwUZ~|_SWTNQt~gIzOX`VWWZV9(G;.\}M+)RH3Z{xA-vr[{{pwP!7qMTM?AIGC/,VP1,+,,)>aP(7o_\ebYSbga@GSORWVYhj^SNIEkokkpmc\LM[cgX@:3,))*,,)+)+,)(#;e[dicf_cqS:Vgf[|~bL]a\dyn\]i~vwz~qo\Zvryuvwyzp]jv}}~~zy{vuwyqtopwwxwyz{y{|~{~~~}}}~}}~{zwtutqjkkdfdec_]`_]]]amxy}{}}}t^JA@ABEIOB?C876345855-''""4aG*(&%)'&.>;888HSRFAIoԻW+$ $)*'% "" 0lphwxD>h£Ru9/2/0.&*4)))))(()*,+.15FǷyjkk{lb\[_cmnmospzխ|ywZ;?CFCN|h>GEBTœ~uyyDŽvxvuwqkdV4L…8E_aMIXWB>;at8:;>::=?>98EԺgɾ{]opYrlSY~Zmvm_Y[U[TNf~SFpES[SUSUQ8#G93nG'%XL:{8-|jNfnm|Q!9{e16536<:6(.\U/+,-.5i^?j_cjeYUty^@GLFLQRclbSJFCEskhkssbOCBOah[:70+*++.+)*+,*)("@c]gfNTK`]B>^bmZ||shFP^bZezp`_gvuzsl]^{x}yttvyymckv}~}}}x{vy|}zuyyovsxruszrsyxz~~{|y{wvnibbckhgeghcdg`_\VSRPSQRQNLGAA=4_~ZAAHFC?LmztogtlgzӴV/('*-,*(#  %#!3oek{ztrryzzx~>BgŠpSt5./02.&(/)(++,*+++-+,06Gŷkedffcdg`YY`glkjkmlrϞuyxZ8>EHMsDEGEB]ҿ˭]]qq}uvsssmaT5H{68MS9Pe68mvG@Fi~{A?@e87?afPBVU;;IԽs{_bdnqxiUXPMWQSDNqKWbYYXXK2'J?6r{8+2cK2r83ulQF!8s]/.-.,,++#+c[,++,.9cj6BkaegiPRhgZDKMEFHIQZQJFC@Cy[[rzvfSGBK[e[85/+*-2DA5*++**)FeJFYZTIFD98RVhyx]upfQICStld[exw^Zc|wx{zrl__yy}ystvyy}gekt{y{ywyx{tvzv~||r{y|{}~}~~{{{xvtrrpppokjimowxlffieeclsqqtomznvѲgNDDG>;:71-))*)'(')/'%2rڿdq~yuvtvyzzy=?rQU\mwiVICFP]Q9:4,+2LWF5))++)( RdM[|n`O?>86MXfY^ci|lHDL]hcZd|l]Yc}xvx|{qj]fvv~vstvxy|fflwÐ~~}|}yx|w}zzzxy~z~}||~}}}}}vrnlkllqlirzy}hda][WURMGFDA?=>>?DNN@72,)%%))$&&%%++#"#(-6y[pzwtpvzzy{:ChƔYqsmjjhIox7///1.$&2,+4DGKNOKJH=35HƸ<9;;=@GDBCGIMbabds~royyT;>BEEC?BKIGFF@hӹєmWHIںyrpmgxjb`I4Tůg67RJ4bV7@ml==KĻuAA@pˑ==ºx;PjL]deedfX/)>3B}Z3*;hF!D}Y*-**++(#>n\.+-.-./$K_YdddQTa`Q.).7JWM4)*,+*(%[cYt|n`U?=69PX`dQMZ{eEESejx\]f|gY[b~|wx}xnlYmxxursvwzzhelvÎ~|~}~|x~|}}|{|yvtlmlfjlomlfihjhhjhfdd^`_^[]^bjpk`ZRMEFGDDE;==@MJ<@BCDB@ABjԷmspdPELٶwtskjĪh`]?6Dv@9:WE0fO6?vmB?PƺuEAFvс6Ao?=IռKh{{v\QVOMOLKCcl:XcfhhfQ*)>5;y~xum<(Am9B|&:^Qztp>#ST*+-*)*'"#?mY,+,---+ !R]TSVO6/4SONcP@>BLU\]OE>85iu?GQfmf[TJDEP@9?.,.8GTH0)++++'$ZcblmjeQ><5A^Ziy^MFM^tWGF_sduZ]j|j]ZdyxyyrnZ{vxwruwx|xdfny{~yz{wxyz{vvusvllkkggdeaioa]abbkv}]}|vuxzzӲ4Fl]mnkjidJjl800120'/0,-7BJ[n|~N56Kù5403:JeF20165b8;:<@?6BBDJA1+.13986::<9886iе}Pb|^<8FGBLزrvrflĎbef?761X{sQM795\@2i>9Btl>=HZ@>D}q7Ec:=G~˼^TNkƾzOuzttobTXNGLKI@cb9^hkjecO(+=0Is5*Km;JdD}^eqy~o4#^@,-.,,,%$%@xQ*,./.,)$T[C23/-0E^YKZKA?CGRccOD>82kn@FK^_XYbUGDJ:6:-+-1O^G3+-,,+%%[g^qyqfG<=9SlXWqyvTJPfĢn^AItdfoY^oj\V`yvtxqkUquswtwxywefnw|}{zww{~xkojotnv{qwzxyyuuvvuzr}Ļ~-CdpcjgffhaZG1453.(,1,,.-4Cmxy~b415Uþ{42/2=eC30148g3:8;L\=UD833.*-22JTFPAN:138mѳjD><8;=>@BKݰnppol`dg?8>Ypnonc@85^<8?^iN59Kwi?>BqlA?TE>@EKepWJF@95ojAJTf[YcgZH@@43;.+/7aXA02;;2+%(jjR|m`G@AMooUg}ZJRhġ}[LkggnZ\j{dZ[exvwzriXsu{wuvxy}udgox|~~~|~}}}|tsifgefdgjglpq^ea\bhlrwv)6QosPQTQONYXsZ;7541).2+++-,/07581.19Rǿo-./16]x>10/47i3;7:tset=112--04Hrhlf^m0137tӐQA<;:9899;>Qݩ`ggwe_]^a95?g}~wX=76];:4C]hVPK78Ftc9?<9[wfSSD?=KS;APhnT98;Tн^]\^[FI`njKk|4YiŰkb{|TUy[louuk\VTIJLKJ').-.*&,^_9-/./1:A.=VI@BDM^aYMHB;4seAJPXYVfhZH>>25=0*.sju624/*).2M~f{gmm104>CL:;=Th`SP?9=Wȿ]XU|a:>l_K:s~:Dk~wżwWGMzftO[R@]^^bhhG$3A0KU/%Nj;;^aL8P_*V[ge5#k>...-,,)(-FU?,-+--*'2ZZ7/.-,00--CXJ?ACM]icUMF:0ufDO\\REU`[F?<19=.*-BYVGWe^TJP85K֐7<:=IGIH=>V978=1886766:lğ2Bi|w`C>>G¨G:<_uwuw_>8=]r]drk<7}BAAM}[NKeĝe^J9|HIKKKJMMLLKKG;_PJ]\_eldC&6C+K}|U/-Yi61/,,,.-#VPLpsrt~q6 l};..**+*''-HZ=+*,/.)%3eb5./.,2/,+K_CWMAOORWZK6"9E+TW-+bg810../-*UYj||za,"kz8..,+*)(),Qd9*,..-($003>_+5Byop|k246X׌=;?XliY9;@e}554324443689zɖ18963456666U~\:;<>:;<<;<>>G<9-[|rB,'b\831/--,(]E58;AEY5(hy>/.++)'%%+Vm?,--.,)#>f]0-../2/-*S[FEGFVuv`_WPC@aaivuTKc^L@;6-41)(->_m<8BD?1*'F}dd~scFFMewd\sPJQvysLWUpybX[twbXVmwwvwndTtxtstuu~oehr~Ó{{}~}̶vwy|~{y{{|}~|~~}||{zz{}}}~~~tzyy|c[OLOMNLNMJKNNj<<>C=5<;=@>:4;?952015;6664>Z.2HzWoO/27YԆ599=ebe_XReoʮwF9?X|܎646qfyu\GZn045_wfuxil;?[fADGIJJJJLJJKKHAT@,5=9:A5)$<>,8BAFOPF0+(VX610.//.*  _y@.3766=;3.:5++0=PO;?cP:1/,_PclszS?AKo|gc}`HIV}}gN`}~wbMtt]W_s]WYl}xx{tncPvuzsvuwznaluÏ}~|}Ͳ|~}{zzywuww|}vywqrtnnjjffj\XYXbJ]\LOQPko;>;ts4677678987:=˅4976477798R׀8998DFA^}wf\fvPOLH{rquD100=:F\||q`cfN>=BhO=DFHKLJJKMLJLGAU5M{wumA*!>>,/:43@8**/C^F<<::;=<;9:`}V;?A>===>=?==W?=:878889984!f\SVTYbW\gkotrmP226YnZXI3jeHHLILG27;BdkC;522?Sdinhns^>D2PutjrjF+I@,7Zd_nk@0(-fT29Rb^n\5(^j2=chjgU8%(jX..464;++rz5.Bbdh[*KgM40//34/-/_`RVW[_a`XOHC@f^VYOLOZdTE<945@6,,0=?97AB>[E@?;:;:<=><4&!mKRW[YN?IIQUYTDB>8:WPQ`J?QB/5#6jD43333Sdeg^in]WUC?@CCA@DGKJKKILKJJJB+<_gbj`A/(6fP3=Yddi[8.jq3EjfehV5&.m_04GRRW4/}x3/HfjmP%IgN1332483/4fcWXWY`gcWGC?>iMIJFILNJFE?942C5)*03464BN>80+9yA@BA@?AEUujc~OGL[^NMNQj~xywvuqfWyv]WWX[\ZZd{|vvvtvxvrm^Rsrtousqsuv|}jfmx–~ʱ}{}|~{xvz{{zvsoqzyjhda^[[]XWRWn}VJGFGHHHEFFE='.s[EPON:39BOWYUWUIBHA5QaNGqaOULE<8:>J?86/2125/!MZD;/3@KOIC?=CEE79AF83-5gG3222E_^QPRPPVWVHCEIG@@DFHIJJJKLKKJE@C9JTp{tI0$#P9+?X`]b\@/*2dM4@V^^dV64w|3EV^mjF/&4ws8DdclZ03yn31D_giJ"LjL.8ADFM902fgWXUT^sbNF><@kzEDFMMFDNNC=824F5)*.3:8/2==60,7}z>BDDFDDHW}cSsrUCFKZ]SXZ^g|yxvuuubZyuWVVU[`ZXfzwvuwz}vsm[Ystwuysstwz}{hcmwĒ}}|~~{|~~}Ű}~yy~{qonjjhihfeilZFU]KTKE<<<:DSZY[]USJ:F^dW^ta[EC=::DJD>741/16-%^RA>4BJHF>987:<97D=873- 8rQ7535N`QHFGILKLSICGIA>AEFHIKKLMLLJKFAB..d|N<.$(L;-=S\diQ5/(5fJ4>NZcjI,6{/8Elz]81%=~;EY[]M.7vl02A_pj>OfK0H]ahf<0:kiZZRR`f_PD>:;irCELSTNM]YF?;29F2+,1;DC66EJ<20@xA@MTVXOJV^HJIIHIP_cdirllywvvuqp^ZzpXXWZa`Z]m|wvvzvrmYeuuwyytstxz|xceoxŒ}}~}{sikllg`\[gotsutstsw|{zxy|}}}~î{|zzbb`[VXXSRQU`cc^\XNRX]ef}og]KRPMHKOQGD?<99=CCyb[NAC@:83331129IF34660#MS:746DB2443/08EMFADA<>CFHIJJJLLLKIHCF@.Tq\B-#,S7+0Ait_@2+%8`C26<]n]6)Bu*1X^@3#={7DPZ\E* 6g*/;pz^? NjH7Whktc72ErkWRNJXecH@<75dp@GTkcR``PA==44>2+-4F`I8L\L42.Ks=IafebQJ]TGLRQQNLbhntugvyuqnplk_b|mZWX^b^Y^qyvwv|}wrkZYttyywtutxyyychoyĒrcfebZI2+N_eilkjecqxwvuptusosupqtsonsuqtvtvvxxwyz}~~~{usstqrpqsuttroopkjjhfejps|wsopuwljkkjiea[UTYZgwogR@AB>>8*fc>;87<96643224@JA=<>ADCEGIJJJJKMLKK@DE;m|k_T4+&2S3/4Q~y`L3,(=`?48JvvZ='Fr*D|zaA1#<~AAYi]=$=d.4QqmZ9OgH9Qdhi[60IxhXLIBZq\E?=72ep?I_mVGIGC?;;23C1,08W_;5?C8+/+Jo=N_hf`PH`{LJX``fXMdhklnfvzrljfjjae|oWWY\b_Y\xzwxy~vrlUXxrty{wuwvwyywchqzÔsbdedWF/-Lcdiihgcetwyxuuwutrqrnooiilpnmppnkmqnknmnprrsuxyxwvvx{|}zy~~}{{yz|z|zztw|xteZ^ZSXYWYYX^_NNOWTPGiRKE@?ACD@;=>cC4?dqiS4%Jj0@[YSE2."BXVssZ?%B^,0?JI>) JfL<=:29D,,.6G>;5/,-,//Sg>Q\abZMFawRSfnqpWNdbgqo_wsoejhkice{oTWZ^d^\a~yxyyurlY`wrty|xrtuvyytfhr|uedgaVE13Lefikihehuz{xxxwtrtsqopqpmnmnmolkmonmoomnoompuvwwxwwz}о}x|||~{zvwwxxuyzxuuuz~{wuwsjha_bffd`\]_S[^YUU^`MMMOQOOTLMNQMJAhW<./34123)<;52ji?FO_hOD@>>=:/9@-+,4CTS5.,,,.5cYCOYcfVGLiuNTiookTMh`jpbS{xqkkeji_`zrSXZ_eb]f}}ywyxumTa{uv{~xstuwz{rehrØt_ehaUA01Lahjjiidgu}{wxxsqrtrrqpopopmmqommmomlonjnnmpotwuwxww|ν{w{}~wwzxwrsqfhjijlehbbfe^`gygWIEJDHOSDRYPA@9305;<9-WyH7643/0/%JY.240123.%bmPD6-!!"Kd-.24/+#`aB[vsaH6>cycMEA???<:;:33wd=FSulIFEA><907?/*+5UrC1-,.---axW?IYj]MFOlzPWfmpdPMm|Wsl_^ttnjhliWdpQZ\`d^[cyywwy~wslI_{tv|}vssuww|rdkp}˜ufggdWB0/Scikjjhfhvzwwxxtsutpqqnmnonmmpknnnnmmmllmmnqpsvuwxuy}ν~}~~}}~}}~}}}|~xrnqou|xpy{nfe`]]^][W]`RNIHGEA5(-f]F>:97454(zmKN=/,&&\j-0331+"!jg@TWZN87;cz\H@C?><=<<:41qeAI\hTMIFC>=94=D/,->aW;0.-.-/-XgV;Jbn_RJPpyNX_jkXJNn|grf]]wtlhlpgVapSU[`a[Z`yzwyz{wrhOf}uv{{wtstwy}oekuÔsjgghWA//Uhiikihejv{wzzvsttsqpnmmppmmoolnnklnplkmnnoqqrvwwyz}̼}}}~~{y{z}}~}z|yxrpjiig_ZekeaVSQOPMUqWikP@884+vb77<852'.f<>9>=6AEq`CA@@?>?=;:50n^BIXUNKGIE@?:4AB-.2?H?3,-,..-)Rq^?Rie\ICRxrSThv`JHStv[\TPYuqmnmmfZeqXXZ_]YZ^uyvwzzwsgNl|vvzzutsqtw|mhmtÔwjgigXB,3Tgklljifkx{y{ywsutssrpoopomoqnknmmpoollnmnorstxyxy{|˻|u}{yry|~}zxxvux|{yyfc`^YZ\USSQME?8>YYJJNMOT^knIKFAAFFC@>9;u]CCBFKKLKFCB=4BC/-../.-,,,-/,,Y~f>ILLFBDSwgPZpraQKU|pFEDH[rrsqpqi]pnVWZa`[\]szwz|{triQstvy|xtrsuv|ohkrÓwfhjhU=.8Vhkmmjghmw{|zxuvvsqtrpqronnqommmmmoooooqnoorstwvxy|}~ʻ}}||{xyxx{}~~~~}{|~}}zzuutqpnjmxudjgjmstt~ka^VVWZTPOOLD~VHGDFKKPIJFG?5?A1.-..0../-02//dwPEBA@?ESw`J[odVMIQ}gKGGMfzyxttshVikWXZa`\[]t{y{{{srgKvxx}zuqrtuv}odmvēuijkfS=-9Wlmnmlhhjw{|xvvwvsqrqnrrnmnpmnommommnpoopppquuvwzyy{~˹~}z~}}{~|~~}}|zz{xy{|}{~~}~ywwuxvurpqtkfyda`abdc_[[XYSLWPC;87997345587Bt[MGDDEHU|vVKKGJHFTgEGHPjyxuvvteOnt^\]_^[[_w{{|}~xurfR|xuzxqrtvwymenwÓtkiiiQ8/8Ujlmnkiikw{zwwvtprqpoprpoommlommlnmnooonnopovuwzzzy|Ƕ}~~}~~~~|{|}|~~}~~}~~~~}}|}y|xvuuwqisk^\Z[TRNQLJLMSlnbXRMPW\hvbUOKOLUhfKIFQgyyxwtp]Nn\Z]^^Yb|{|{|~ywvtjYvvutwtuwvvz~jgoxŖumkniU6/;Wglnplihkw{xuvtsqsrqpqpmorllnnnoomlmomlooopsuuyzyxw|ʷ~~}}}~~|~~~}}~~|}~||~~~}~|}}||~}}}|~~~~wxwvvtooqspliljkonsst|zqigdihnzxaYSYbs}|vtgkulgffe_i{}zyxyxpdxwwvxwtuwz|{jhnzĖsmklfT2,<]imnpmjhmvywvvtrpprrrrmnqqmkmnlnnljnpnmnqnprsxzyy{||ɷ~~|~~~|}|~~}{~~}~|z|}~}|{~~~~~~~~~~}~~~|{~~~~~~||~~|{|}}z|||z~}{}~|~urqqsu|zt|~vtqpropz||{{{yz}zo}y}xv{}z|hhnz“wommjQ/,:_jmopnjglwwwwutqqprsqqqppopmmnmnonmpomoopnpqswyyxz{{ȸ~}~~|~~}~{}}}}}~~}~~}}~~~}|~}~{~|{x}~|}~|||wuvxvxzy{{}{vvyvsttww{wyzqrzww~~~{ggp{ēxonoiQ/,\nqvuqlhmw|{wvyvrqtrllqponpononmkorloqrpqsrqy|{{}y~ι~~~}~~~~}|}~~~|||{|}{{{{|{z{{|}{{x{zw{yz|y{|yzyy}~}}|qoonnoppqnhfnswymYYWVHO_baaYMIOWblswy~~pjpyđ|wtsnfJ2?_rqttqlgkv|yx|{tqvupopooonoooolkmnnnoopqrrssy{z|}~}̸~~~}}~}~~}}|~|z||}}||||}|{|zz||}zyyxz{zzzzz{{|zzz|}}}~oempomooqrnggorxzk[YWSEO^aaa[OINUbmux{~slpyÔyvtpngJ6A_qqtuolils{}|{wsuwqmrpllnnoqqokkpnmppoqssrouyz{|}~̵}~~~~~~~~~}|~~~~}}}}||}|{z|{{yyxx{z{{{{y{z{{|{}}}~~nQhpppppponmgipsx{l^[XSGQaba`YOKMWdpvz}}pks|”ywuqocF1A`orutnld^gsxyxuvxuqopmmonknqommqomnpopssqrquwy{{{|}̴~~~}~|~|}~|{|}}||~}|}}}|}{|{yxyy{{z|zz{z{zz{{|~z{}~jRYhrrqpooppoikptz{k^[YQDTabbcYMKRZcnw{||rhs|”{xuqoeG4@foqusmj`LShowwwwuqqrpmopnmpppnnmlmonmqrqpssuvyzz{~˳~~}}~~}}~~}|{}|{||}}}}|~}y{|{yxz{zzwy{{{{x{z{||z||}|iVU]fqsrqopqrpjipu{xi^\ZRCT_ccbWLLRZepwz~olr|ŔwvupoeI2@epqssmi]C@Rbqtxyrpsqmmppmorpmnpmlonlopooqtsuvzyz~~~ɳ}~}~~}~~~z{~|{||}|~~}zz{z{{x{yyzxz{wyxz{z{{xz|{}|~fQYTWhqrppoqrspkjsw|vg_][TDWaee`XNJR\fpy{~|lkt}ēuuurofJ5Ddpttsnk]C=DPbovtrqpnmooooppnnmmllpmlpolorsqrxz{}z|Dz}~~~}{~{z|||||}}||||}|{{z{{yzxxxzzyzzxz{yzzz||}}~gYWTLSgrrsqqorsqppuwyug_^[RFZdfgbYNKS^gs{~}nnt}ÓyvtrncE2Heortqpl`E=?DOflnrsnmmpmopponpmmmmommqpoqqsrtyz|}|}~Ʋ~}~~}~}}~}}~{{{|z{|{zxwxyx{yy{yzy{{z{{{{{|~~~~fVVPIHVkrrsoqqsqqppwx{we__\RJYdgheVKMT]ht{}{mnt~ÓwvsqofC5Genqrsok]C=<=AO\hqomnmlklpmlmqnnnmlmmnonoqsuvxz{{y|Ű~}~|~}|~~~~~~~}}}~~}|z|||{yyyxwxz{zyz{{wyzyz{z}|{|~~fZVSJCFVjqrqqqorsrprwy}wb_^^QK[dhhdWKLU^jtz~zmouÒvtsrplD3HenrssnjY?;<CM[bjnmnnollpmmmmmkklmlonnpqqrwxyy{|~¯~~}~~~|}}~~}}}{{||||{}|}|{|{zzzz{yxywxxxwxy|{xyz{yzzyy}||~}~}paXXRFDB@DXorqnkflqsrrqw{uaa`\PLZffccULNT_lv|ulnuÐ}tvsnkb=5JiosuqkiV@77;;=?CKXejknnnjmolllnkjjkkkllopqqrvwy{~|}~~~~~}~~~~}~}}~~}}}}{{|{|}|x}|{|x{zzxwwxxzyz{zyyzzzwyyyz{{{}~}~~}yo^WSQB@B@AGYlrrpkflqspqswy~qb`_^RP]ghecSKOVbmx}umpvď}}strnnc?8MfprrohjX<67:<=>ACMYcknnollmknlljjjjlkmmnnoosuvw{|z}ο|~~~}~}~~}}y|||}z|}|z|zy{{zyyxwxyzzz{zyyvyzyy{xzzz{|}~}}|ymaYVMC??>@CFZlssqh`hrtroou{~rc`_^NK]ghf`SIPWdnyvnpvÍzsrqnja;7OgosrpjgV;579;=<=@CHZdiklllmmljikklllllmllopssuuwxy~̿~}~~~~~~z~}~}||~}y{y}{z}|yz{}{z|zwxvxyywvyzzyyxzzxyxxzyy{}~{xr\ZWNE??>?ABD_mqqmdZjuurpqw|}qaaa[OMbffh`QIPXamzvkpxË~}~xrrqmj_86NinpomidS;5689;<>BDFKXdgllilplhiihhklkjlmkmprpruuwz}ν~~}~~~~}}|~}~~~{}~|z}{z{{}|{}yzyy}x{{zyxvxywvvzxwyyzwuwwwxxxy{~~|}{vi_WVPE>;>ABB?D]qppmdbptvvstx}~o`a`[ONbgih`QMPYco{vjqzÍ~~~|}{~~~ystoml_<4QdmnplidN<677:;;@Zmopqllssutstw}naa_ZMRafie_RKPZfrzulr{č~~~~~~}{z}xynpnlj^:9QemoqmicN:778;<;=@CEFJMW_dikjghhhjjgghkjklmnpqpqsvvu}ͻ~~~~~~~}~~~~~~~|~}||}~~|{}{|}{|~|~}}}{|{zzzxzxx|yywwzxyyzyuvvxxwwywwxwxwxwttvuwvvz|{{vi]ZVNC>==>A@?:7A^mpoqprrtvrsty~}m_aaXMVbihg^QLO[gs{qjq{č{~~~~}~}}|{{|{}|wopoli\;8UgnpomjaL:9:9:;=>=BCFLNOT\ekjjjhehgffghhjllmnnmorssv~λ~}~}}~~~~}}~}~~~}~}~~}|}|~~~~~~}{|~}}{z}|||{|{z{{{y{yxyzzyyvxxyywywwxwwvuwyvvvuvvvvuuuuvvwyz{xgZZYMC@<>?AA?:54@fqnqqpqrtwutwz~~mca`XKUghgd]QKR\gt|mkr}~~~{||{|||z}{z{|{{wmppmj^36Vgmppjf_P:8;:9::>?ADFHJNRX_dfijgfhfeghhgjjknmlmooosv|η~}~~}|~}}}~}~|}~{~~~~~~~~}~}~}|}|{}}~}|}}}}~~}|}{{yz{zyzyyyxxyxxwxxxyxvwvxwuvutvuuttvvvustttuttvwxtg[ZTND>?>@>=?:68CUpppqonqstvvvy{|lba_TGWfgfe[QMS]gw|nkt|ō~~~}|~~{{{{{y}xx|zy{umoplm[59UhlmmifaK7799;;;>ACFFJKNPSW\dgfdghfffghgjhinkknqoptu{̵{}~~~~~~~}|}|}||~~}{}~~~~~~}~~}}}}|}~||}}{|~}|}|||{{|{zzzy||{yxxxvxwxxwwyxwxyyvtvwtuuvvwvvttsusrtrsuuvwxyj^ZXQJC>?<=;;:8:G[lnpnqppprtuttvz|ia`^SJXegfc\PMS^iv|{}kit~ŋ~}~}~||~~~}{}}{~~~}|{z{zxvz|wxz~qlnnjiV77SgklkiibJ=988;<=?BCDEJMLMPRW]acdedaegfgggfilkjnomnrpwζ~~}}~~~~~}||}z{|{{~~}|{{}y|~~~}~|~~~~~~}~}}~~~~}~~{}}~}{||||}}z|}{||{zyyzyy|{wwxwuuuvvvuwwuwwwvuvvuusvvuvvwsqssruursuuvtqc\XVSNHB?<;;8777::=>;;87:Fcrolmolonopssrssuy}ymda\QKXgiid[OLTaks}wr}|nmt}ċyXY]colM=78Fgqrnjkmlnpnqstustuz}xka]`OJ[eihg\NNVaju}~sk|hmvÉpRKKHJD<=?=<.(*0?ABDIIILLMONRUZ[^ceabfdegfgikljklmor{ȶ~}~|}~|~~~~{}~~~|~~}~~}~|||z{|zy{|||~{z}}{|{~}~~~|~}~~}~~~|~~~~|{|~|~}~~}}|z}~}}z~}}~|~}~~~~~}~~~}}~}}||~~~~}}}|zz|~}}}z|{zzz{{z{|z{{wz{z{yyzxxxwywwywwvuwvvvtvvswvvvsstsvsrsprsppoprqprprrrvl`[VTQRNKE@=>?@<<<=932/-.-;Qgv{{}}~~~~{~}~~|{~~}~~{}}{|}|~~{y{||{|{z{}{{{}||}{z|}wwwyvu{xtsxwtrwwrrpqqz~}~~}ifjhecM48Vbiiigc`bgd[H>;;=ACEGIJJJKOOPRTV[`bdcecdgffiljijilnpxDz}}|}}~}|z||}}||~~|}}|}||}~~~}|{|}}}{z||z|zvy|yy}}|~}{|~}|}}|~}~{|~}}~~~~~}|}~~~~~~~~~z}~|~~{yz|~~|}~||}}}~|z~|~~~~~~}~~~}}~}~~~}~~~}~z~}{||{~|}~|||||{~}}|{yyyyzy{{y{zz|{xyzzzyzxwyxvuwxvuvstutvvsuuvwsusrtrttqsopsppooqpnpoqqpsj_\WRPNMJB??=??;;Nlwspommmknopqprtssruyxeb_[LJ]gjfbYMOV`ju}xnovĈ|gNFECB@?<:=?AC@>9643014CVdox~}{~~~||{{|~~zx}}{~}zz~~y||z{|zy||{|}~~}}|z{|||}}{}~|xwwvvz{rruvxuvutsriYey~|}~}~|{zgghfd`K6=Vehifec^]ggbXL@=;ACDFJNGMMNPQPQSV[^bdedegdeiiijkklpowǰ~~~}}|~~~}~~}}~|z|~||}~~~~{|}}}~|{}{|}}{{|zz|{y{{y{{z|~{y|}}||{|}~||}}{|}~~~}}~~~~~}~}~}}|wz{|~}~~}|~}}~{{|{|~{{{~|||}}~~~~}~|~}}~~~}}}~~}}~}}}|||}}}{||y{{||{{||}{||yxyyx{yzywyyxzyzxwyxvyxxxvwywwwtrsuxusuutsssrtuttqpqornoqoponopooopka\WSRRNJB>@?===?Nkzurppmmkjnnooqssrpqtw{sfb_ZLJ]eggbWMRWalu~xmoxĆ~eJDDBBA?===@?<;>@Ogyuutppmlkkmlnnnronnpswwoe`_\NJadef`TOSW`mu~tjmxÄmccjr|}{zaICAA@>>=?@@A@A@=<<:8:;<99997636AMYdpuwy~~{~{}~~~|z~y|~}z}}z}~}||~}|||||}}{zy{zzwyyzzzz{|||{yyzzxwxyxz{yx{~ytwyuqtuuuuutuuspl`OTNhzzxw{wyyxt`dfc_^J1=Ydbggdc\bfeff_]KAA>AGJKKNMMNRQTTTTVYZ``hefeeeghgejjm}Ĭ~~|}}|}~|~~}}~~}}|{~{z}{y|}}|}}}}~}y|{y||z{xy{{yz{zy{yz{|wwyx{{{{}|~}~{|{{{}~x|~}{{||}~~}}z|}}{~||||~|}{z~}|{z~}|~~~z|~|zz{||||~{{|||}}{~~~}}||~~|~~}|}}}~~}{{{}}~}~~~}~~}}{~~}|~||}~{y{}}z|{z~}{~{zzz{||{zz{zxzxwyz{{zyvvxyx|ywwuwvswvtvuvuuvvvvrsuvutssssrnqrrtpmmnrnnopnoopnoomj]\YTPPOOF>=?;;=@Kduvususrnklmonmnqrrqqqrxxod_^ZNM^edd^TPRWbnv|rjnxŅ{\KNOML[cow~xy}z^JCAA?>?=?@A@@>>=<<<;<=><;;:;7766639DLZotx||}~|~~x{}|}~}zy}|y||z|zy{~}zx}}zw|}{z{}xvy{xxzzzwyzxyz|yuxzwywyywy{wvwwyxvwwtuvssrttttrog^TWUVguyvrsxyxysebec`_I0?X`cffda^aceed`c_NC=>CIJKKKOQRTQSTROUX]bbcfdegghfgkkl{ĩ~~||}|}{{|~{}{|~}}~~~}{{|~{}}||zz}}}{{z{}{{}yy|}z{yu{zyz{yxxxuwzyyzzz{zz||z}{|}{{{y{||{}~}|}|{{}|||}~}~~}|~z}}{}|z|}}}}|{}|}|{}|~}|||{{{z{{{{zz{{{z|||{}|}}{}}y||}~{}}}~||}|}~}~|{||}}y}|}{{~{|}{~{|}||}{}~z}|{{|{}}||}{zzz||{zwxyxxwvwvx{ywxuwyxwxwwvuxxtwuvvuvtvvuvtqrsstpqooqmpoosrpnnmnjnppnnmnmnkfZWUPGHNLHB><;;?BH`psvwvsrookjkkkmllmroprqwwl`__[KMaeee_SKPYbmv{~phmxńxZLMLILOOMU\ajv}}wsy|{\B@@@@>@?>>@@@A?>><<;;=<;:89977:;9995149@FIJLMNOOQOPSPPSTV[^bdcdgfffhghk{Ī}~~}|}xy|{y~}z}}z}~~~|}~|}{{|{{|zw{}||{{|~}{||y{zy{{yx{yxy{zzxzyyyyzzxzzyy{xy}|z}|{{{z|{z|}{|~|{||~|||}||~}{}|{|}|||z{|yz{z{|zz|{y{zz{|{{yz{z||zy|{z|y{{zy{{{}|{~|{}|~~|{||{}{{{{}}{|||}}|}||~{||||{z{{|}{|zy|{z~z{}{{|z{{{{zyzyz{{y||zzzyyxwwvyxvwvuwxvvwxvttxxutvttvusvsstsstrqoqqpqqppmoqqqlnmkknopkklkkjc^\YPCBFIHHA;<=ADL[fotwvurrpnlkjkiklmorooqquth`]^ZHNehdg`UOS[dovzqhozĆvnq}}}yz{z~tQKIHGGKMKMNNNOS^_djpvz~|sqqqyzy|wWD??><>??<;>??@==<=;;=<<979998:==:;899;<==<=?IQ`mruv{xy}|xx~~}z~}{{|}|z|}{|~~yy}{w{~{{}~zwz}zw{|{vwzzyz|zvvyywuwwvvxzwtwxvwwwwvvwwuvuuuvwrqsqjnstnplncUWTTTRLcttmdluvuxq^aa`\\D0>Z]add``[]deaaba_aWMBACCGKMONPNNNSQRQPRW]`bceffcffgkn}¥~|}{{||||}}||{}}{}|y|~}||{xz{yz{wy{x{{yz{zy{|{z|xv{xwzywyzyxzwwxwxyxxxxz{|{{}}{~||{zz||y~z{{{{~}}~|z|{y{}{|}zz~}}|zy{{zzyyz{{||zyyz{{y{{y|{xxxyzyz{zxzywyzy|{xzzyzyz{{|~{{{zx{{|}{{{y}}zz{z|~|z|z||}|yyz{|{xyy{{|w{}yz{xyxy|zzxwyzxyzuwxx|zuvuwvuuvvuuuwrwvsstttrrtstuttrorprpqsonmoonlnmmnpqollkkkillklkihc[YVOA;AHJIF>;=>?><<;>>@@?<==;;;<<;99;9:::;>@?=<<=??>?BA@=?DU`_ehefegoy|yz}|y|{{yz|~}||||{y{yw|{vw{}zvx{xw{{uvxyxwz{wsw{xtuzwuvzwuwxwvyzvtvuvuvtttzwtpvrpmqttrrk]SXUTTSOMfrtk]luvvxn_`c^ZTB4?X^abc`_X^bdabc`^`^WPIEDGHJLMMMPOSPQQQPRWY^cfdcegfhll{~}}~|}~zz~{|}|{z{{}~}z||{{|{}~}|}}zxzxz||z}|yy}|yzzy{zwxzx{{yzzywzxvvvvwzywvvxzzw||yz|||{yzz{|wy}|{}|z{|zy{y{|{z||||z|}{{z{{{yxz{{}||{|{}~x{{yz{yyyz{|yzzzyxwxzy{}zy{{{{{}{z}}{yyx{zy~}zz||}zxzxyzz{{xy{z||z}zw{ywy{yzy{zyyyxz|wz|yywwywxzxxvwwwuuwvwwvuruvvvvuvuuttusrttttqstrrqpspoooopnollmloooqnlmikjhlilllgb\XVL@:=BLKIE@@DIXcfcfluvvsrrrphhiikkmnopqoppvxha]\VKR`ghd\RORZcmu{~qks|È~zeQKKT^mwxxzu\bdku|TCJVXUS_i{w}fcp~tqsnqrwuyjOEDCFFFEFEDFECFFFEC@CFGO[emrx|k9.8HWdmmvqRA<===>;<<=??>>>==;<:9;::99<===>??AB>>A@=?@A?>=@OioM:.+45=_uy|~}{{~{{y{}y{}{yz{yzz||yx{}{xwyyxzzwvxyyvwxwvvxxvtuvtvuvvtuutuwuqssrqqusstxtqpsqopoqpmhXOUTRSSRKIiqreWkruuu~}l^``[WT@3DV^_`cb^U^ca]aa_^`b^YQICDGIIJLMOLQQQQPOLPRY^befgggijk{~~~|~~|{}|{|}yy|zxzzy}{{y{x{|z|}|{}{xwxxyzz{{uxzzyyyvzyvvzyvzxwxyxxywxzxxxwwywwz|zyzzy{|yzzz{zy{|yy{|{|yzz{zzxy}|zz{{}|z|zx{ywyywzzyx|yzzzzzvwzyz{yy{wwzzzyvx{xvvwy}yvxxz{xz}{z|zwzyyzxy{xzzzyzzzxvxxyzzx{zzzxxywy{xyyyzwx{wuxww{xxzwwywwxxxvwwuvuvvtuwxvvutxtuuttutustsppsrssrssqooqnmnnnpklmnljkolmnklkkljjlhljibTVSL>;;?FJKIHEGMYbgedfouutrprrpihijkilmnnoppotvf^][WHPaghc[RMRYeowx}}oks}ć{cQJHHNYfv{x}q`^^]`kr{~tQAENVZ\ZUT_y~kXV[\dqw{|rnkVFCCBCDDDEDDCCBBCD?;;956;>KU`cfmt|mZOF=8;===;;:9;;989:;=??>?AA@??BA?BCDC@<;S9*%'-/0Ywy}}{z}}yzz}zu{}zuxzxx|~wvxyyxy{yvxzyvyzxuvzwuuwwtuvvutuvusstuvvupqtssrqqpruusqrqnlmoomgXOVTRRQSQKNeooe`osrrt||i[^_YTR=1BUZ\^a_]Z_babc^^ba^__[QHEDGIJJKMLQOPQOOOOOSXaejifikki~~z~~|{{zwzzzz{|z|{yz{yz|{{{{y{{yyyvvwxy{zx{{wwzxxzywxwvxwuvzwvyxwwvtwuuwxwwxywzyy{{zz|{zyxy{xy{yy}zz{zzyy{z{xyxy{yzz|wy{wwzyyzxyyzxywxxxwzxyyyywwwxxxzwsvxtvvwzztvwwxxy~ywzyvvvxzww{wx{zyzxxyuwwuyyxyyyzxyzwwxwwyy|zwxyuvwuvywxzvuvx|yxwvvusvvuttuvtttsvwsstursusrtroprssrrrsrnopnommnonjlmkjlnkljjlhjjjjijhfaYYVKA>=?DILJJLJMVaddfdgnuwspouuqkiijkfhkmoooonusf^^ZPJTafcbZPLQ[dmvy|}mks|ĄzaOJIDEKR]l~}sp}~qb\]][\`doy|}gVSRWYZ[ZZZUW]ls}{~}n_\VPRWUThu¹pmtz|xUBDGDDCCB@@AAB@A?AAA=435443786;DDFOW]dimttuhTA8:=HRY`lw}{nH;:<<<===>==<;:9:8989999;?@@?@A>?A@CCBDFEE?8;X<+#&017Yx{||||}{uxz{y{||ywzysvzwtuvwvwxzxuyxutvxuuxxtuvwsrvvutuutswusrvxrorsrpoqqrvvqqqqoomnnh]QOSSPQOSRLNNcmllnorrqpz{hWZYYVQ>2?VZ\]]\\XX[aa^]_`_[_^\XTKEBBHJJKJNLOROPMMNJMY`ghfihhm˾|}}|}~~~~~}z~}ww}|zy{yyzxwxzy{|yxzvvz{y|}zz}xy{ytvxxy|{zyyy{{vzywwwvxyuuvxuyyvwvruwvuuuvwyxvyuxzyzzwv{zxywvyyyzztyzxzzyzyyzwvwxuw{zxyzwwyyyyxzzxyyuxxwxzuwwxvvuuvwxyvxvvuwuxyvvvvwyvxzswytvwvxxwz{wyyuwwxxxuyvvxxxyvvxxyvuvvtvwvyuvzutvuvuuwvuxtrvusvuwxuuwuusqutsstrtrqrrrpqsprqppqprrpqopononmnlmmnlklkijkiiiiigghggedhb`ZVUK?<>>GJLLLLONVadccdchpttqqrtsqigihjjikllnonmtobZYXPJVadcaXMJRYclrx|{lks}ąaNJFB@CEJWbp~qZiwzxvsuw{s`]]^YYX\bckqvyrbYY[[\]^[[ZZYZVVV\_`hoqsux{{|~|}~{voj^WQMIFFMZgsYMV`hsyYKRMHHEDC@B@A>>>?A@=822489=CCDCA>>=;8HaQCIMKC637:?FKPTPKZyhF99:8::;=?>;??==<=<9988889;<>?>?>@>@AAACEHFFD;36U4(!'.18]x}{x{|{yy{zwv{|zyy{xx|{vuyzwyzzwvxzwtuwxwwvuruwuorvupqutrsutrrvupqrqmnpmotvqnpoomlmlh^TQPRSSRQOMNKNeomorpooqpxxgW[]XVO;0AVY[]]Z]UX`b_^`__Z]`^[[ZRLGCBFIHHLLNPRONPLLKOV]ehgegnλ|~~|{~~|{|{z{||yxyyyxzyzyyywvwywzzyz{yyzxwyzuvyvwxxyxvuxyxvyxywwvxzwx{vvzxxxwuywsuvtvyxvyyvxzyzzwy|wwxwy{wxyxw{yyyyxxwy|ywvxxyzyxxxyxvyywuwwvwwvywwyvtwttuuuwvvvuuzvvvuuwvutquywvxvswwvxvwvwvwwxxvzyuyywwwvxvvwtuxvuxwwxvvwuvwuuwutuuuvwvtuvrsvuuvuxtttuuuqpssspqrsrrossssqqqnpqqoqpooookmnmoljlkjjijikjgiifihfhefffib^ZWSG=9@GJMLMMLMNZddddccfipssqrssrnjhhigiihjjmnnnqlbYX[QHVbde`VKLRWckt|}{notĈ}^NKHCABB@DJTcjnu|z{~{{vfSY^drtstnnpmkggiklrx}l[ZZ[ZYY[\[]cda[]^[[\[_aa`_[Y[XYVUPRX\agjostw{{z{|}zyslc`SKGHKNSRhSGKJMQZafsz~~|bjeTMKHGGDECB@>>>=<<;;=@CCIKJGC><@EG\9&.,.02222111633-Au{dD889998:9<>=<<=>>?=?><=<;=A>><>@ABBDEGHGC6,0S:'%*.2=]x|ywxxuvxyvtx|zxyxvtvzxtvyyvwzuttwwuvwuruyvrrwvqqvvspsuusvsqqstsqopnmnomotrnnpnlllkeSSTQPPSSQNQPJGOcnnnnnmnpryycW\[XTM<1ASX[ZZYYSY`^\\`\[\]]][[ZXWNECCBEHJJKOQNMOLILJJTafhgjm;|~}~~~~~{|~}|~{{{|y{}{yzy{xxwwxxuxxyz{yzzyxyzwyxuxyxxxwwxywxxyxwywxwwwxwwvvvxxvwwuvwtswuuvvuvyvwzxzzuvxxvyvwzywzxwywxxxxxvxzyvuvvwxxyyxxxuvxvrtvuwvuvuuyvtutsvutuvsuutuvrtusutstusvuvwutttuvwuywwuvvxwwwutwuuvtuvtvvquvvvvvxusuuuwvtvuutsutuvuuvtsuruvvutrsqqsrorrsqpqrpsqpqqqqospmomklnomllijnmkljjjijjihihfgjhfgfgedeeec]XTPG:6==>@CFGJJFA;66?DDh~7+.0//02211324130ApxbA98999877:>=;<<=?AADDBFBEGIGED@B@?>?AABCDFFCA6(&Z-'&+.3<_uzuyzutwxvtt{ywx{wtsyyuuxxvuxzvsuvrruvtrvyrruunosurquuqqtuoortqpronkmolnopnjnnnmmj^WQTPPQPPQOOSOHHMcknmmmmmppuuaQYXUQM80COW[XYYYTY\^\^_][\[Z[\[Z]^ULFABDEGHKMNPPOLJKJJNU^egki˻~~}|}}}}~|}|{zzxz}zxzwvxxvuyyuwwvwzwy{}z{zxzzwxyywyvvxwuuwxzxvvxwwyuututvxtuwvvvturxttwvtuuvuvuvxxx{ywxvwyyuwwtvywwzvxytuxwvxwwwuuxvuxxtvwtvvtuvuuvsuxqtzttuprtttsrqvtrtqptrrsrrtssttvvtuttvvvvwvvuuvvvvvvvvsrrrvtuwutuuuwvuvusruvvwvtrvsrtttvutsrtsrutqtsttoqsqqpqrpppoonnqpoqpqrnnmmllmolkkjkllkjiggijhhehheeggggfecfddc\XSQH:78BILLKKJJM[dbefdbbbejorttssutniegfc]^ejkmljmni]Y[ZMEX`bb_UIJSYdltw{zlnvÃvWHIGCA@?AA=ADFRj}paSKED?;<;74884;99841022466225GS]`addbdeeehilqw~~j^[YXXYWWZZ[YXZ\_aefeg]ZZ[^_acd[ZXTUUTKMJIN[adm{|{}|}|xnbVO;345JXrUGFFGFGDEEFFGR[gqy|{tq{~|{qkc^SMJHFGLIFC@;<:>BEGGA73+-;AEn{2+-0/./0122233221>q~}}r]>8899889:;<=<?@AACBCDA>3)+]+'%*.18esww|zuwywtsyywxywtswyvswwwvvwutuusqtvuutuuqttrlotsprurooqolpqmknpmijmmhknllmpllmh\OPOPMONMQMQTNKJAI`mklmmmnmotp_RVWSQI42BPVXVXWPQX_][]\\\^]Z\[[X\^[ULE@@ABDGJKONRMILJKIIVbeigɺ~|}}{|}{}~|{||{|{zz{z{{{wxxvvwuuvwtvwxwxxwyzxu{ywxuvxwtwwvxzvsuuvxtrvxttwurusrttrsstuuqsutrtvuvussusuutuwyvxxuvwuwxvwwwuxwrwwuxuutuuvurtuuuvususuvtsustuurtspsrpqonqopsrqrsoqrpqqoqonrssrqqursustvsswtttsuurtsqstssqprutttprsttsssstsstuuvsrttrrsqrsrtqqtssusrrqqppoqqpnqonnpnnmoplnonnmllimmklmjkiikjljijgfhfiffgfecchfddcceab\WVRKB;:BJOMKIIHJU`cbdeefcdfiopqrrquvmfcge[TZcjkmlklkfXVYVMIV`c_]TKJTZbipv{~vhmuĆy}vUHFFDA?>@CAADFRlteXMD=;:87422324311-++++*,,,+*u||qZ=68898799::;:<=<=?CDDFIQ[glpz^RJJIEC@@CEDED@:/')`,)'+-38fttxyvsuvtsvyqtxxtswyussxvuvvtruvqpqussuutrtuqnorqonqrpqqooprpkloomkllllljjmmmlmgYNNOOKMNOPMPQOKOH=Hbhjklkkkjouo[PUTPOE43AOUUVUXRRWZXYXXZ[[[\]\Z[^]^^WMFCB?@DGJNWcd`\^^\WYgkqw̻}~~~}}}~}|}~{y~~zzz|zyx{yyyyxzyuvwvvwwuwvvvwuwxxvvzwz{zyzxswzvuwvvyxtvusuvtvwvtwuttvtuuqqvqptrqssnnsrsvtqrustuswxuwwuuuvsuttxwuswusvuvzuuuututrtttstsrsrutsvttssstsrvumjghfkinpqtppossqqqqqqqrqnoortpsrrurpuuqtsttsttrqwrpnprtrtspqrqsrqrsrsqsusqsrqrqrsnoprqpqrqqttrroponopoponolnonnjlonnoonljmjhjijklkjhikihfgfhighgefgdffegegcac__\XTOLD<9=KMKKJIHLVacecbegebbeinqpoqpusmgegeYPXchklkilldWVVXNFV^`_]SLLT[aiqt{~ukpwă{~~sSHEEC@@>A??ADDNl~ve[OE<::96313200/-,*(((()*)+,1=O_t}wu~whZTWUUTVX[ZZZ[YZ\]^_\WbaURJJHA;:7:9:86695.!pǬ}TI?*"')!(0.//20;MhQGHEDCEDCDEEB@>96:?JTahcajib\]`adeiknpuvuld\YSKDB??@BDCEE@:0'!&4tv7&*..-..00010131)At||yU:67997778;:9:;<=??@BAGLV^iptrYYUXTLG@?BCBDC@9-#*j{1)&,.0AhutvvussprsuvtuwvssxwqrssqtwtpqtupmstqpttpqttqmprmnqqnppplnonkjmmkkonkjllfkonmjdSNMNLLLKLKMQQKKMGC;LdgijigihikplXOTRONE52@MRTVTWQNUXVWWWYXYYZ[ZY\]\^]ZVNHC>?AEHNmxwxwwwxw{}˹~~~}{|}yxvx{xyyyzyxyyzyvxvsuvrsvstvvtvwtstvuxxvuwssvttusrvurrvttuusutrrsotvqrtsmpqoruppsoqssqtuprstsutuvuswvssuuttqvwsssuuuttvussttwursrsssrsqprqprsoqrqqsqpmf`LDJMSV\aaecfjkllnkmnooj\Z^abdgkkknnrtqpqqssqrqorsqppqrrssporrppprqpoporrqsqopoosmmrpppoqpmqqopplnnoonooommlnkmlimmlnnnnkjkjiiijgijhggfedegdegfeddecbdcb`aaabaZYTTQKE?9?AACKnzg^ND=:;:53320..,*+*(&'(**)*+15=yŻyiZVWTUTSUXYXXZY[]]^a`ok[PHC@@B?@A@:+$,nu*+,.41Betvwvtuxsuvttstuttuvusqrrrvwontsroqrrsrsrprspoprolopmmqomkjmljmkkiiljgikjflmlm_RJPNMKMMMJINQONQKDEP^cijhighhgfqmWMQOMNE20ANRTTTRPOVYWVW[\WWY[[ZY\\]]\[^YQIA>?BGTpwrtuwwvwuxy|}̷|y{}~~|vy~~|zz{wvsrruxvwvxyx{zyzxvwvwussuvtutttuvqtvuvyvuwtstsquvrrtsrsssttsrsqrutqsruwxrstussrnoplnqmmqpmqsrrutuusuupmloqqqpuuqqqsvtstsssrrtusqtstutqqqqqpqrpqsqprqpdQJ9&$$&(048;AFMT\_^^afhihP1,06?=<=>@@Eryi[ND=:<;54210..-+)''&)*))))*..9¿zhXVVTTQQTWYXVYXY\^afhhYLCCBAA@=;:978881$f͸XG=/!!%-.)),14961//13MgYnuaWOE?@??@AB@>BBEFC>:750//124558:7*1}/(,1../.123236A@ADJNVanssbZVVUNNJ@=@@@?>>9*%/w`)8BGS?AhvvtrssspturrtvuqrurrrqsqrurnruspstqquuqpprqlnqpllmmmoqnlloljkmkjhhhhgiihgkicYKNLLKMKLLLJPMJMNKHHS_`bfhehghhfgnjVMPONOB21ALPQSRRNNUWUSXYWUVYZZVZZ[]\]_b_ZQIA?BCO`jmliklooknmkq͵hfeggedhdg\Z`^aijhijjlmillmmmnlonqonokmpplhmkjifdfhkhkmptstuvuvvspojmoprrtrstssvvvuwwtuurtvtuwututssusrutttsqstsvts~|tf[X]^[]addeilprnnqrsvkU[\ZaddgihlolpsnqspqqqprrrsuuuusrsrrpppqqnpoommmR21/+%##'''$! #'-.36>Ipyj]PE;896521210.-+(('&(&),++-03:}g[XUUSPSUYXYXXYYZ]agh]XJAEFEE@=<:8:><:4$iԶ}VG=0"#! (/.+),/49GGBCCA??AE@@AA>:60--/7TkVl{l]WPHB?>>?ABDDECBA?<;95-*+--255355, /3&/7:85565697;LR3Ez|~~}~|{|zd@57778768:999889;:?@BBGRYdqtm^XVTOJLE<:=???>=5("0wX-Thi>AitvsprqqrsqmrsusptsoqsspoqsppstqorrporppqrrojonlmplklqpjjnojjoohjjhfiljiihhcTLLMKLLLMLIKMOJKKADDXcbbceefdddhgimeQNSOLJ?22@KOQSSROLSTSTWVRUVVXYWYZ]\]^`_]^ZRIDCCL_hjjjklmmklmqy̷zdb^ab`aa^_[Z]\]`a_____^^^^]^\^\\^a^_`]_a_`\``^`[]a_bbdeghjloppqnje_Ydhjkmqqpsrosstuustvtrxtqttsssssttqttrqqsrsrqtq{nd][QPPNPSRSYaccfflnmTDLKBFJMUPS[^chfeqqnonmppprqqssttsqrqsstrroqqqpniYNJD?;8653-)'#!"! ""!'&(#   "!"!!&)-2=FMW\cfhljjmnptroppsroorqpmnqpmnnnommmmmllmommi[ZZZ[YZS=<@BBGFJNOSWZbQ716;AFMMRSYVSRVZ\^\]aa`baabbd`bca``_a`a^_b__]YTSMIFD<78=DCA>>CSZ\]^`bcddaaaaacflnosqsongb``bacdcegefdee]TQRNEHV[][UNGJPW_fkruy{{~ojqycjolnyxxwqmljkptvz}vib_ir}tgVD@;899:;;9853/132/-.+*)('('++,-/86;}{cYZYVTRQTWVVVUVXZ]bhha[RKIGII@?<=>>@?<7&l˵~VI;/# %& "-1.*)+/4:HI@@@@??@AA?>?;73/-.04ZoUrxdYVSLD><==BEDEFCA??<863--+,-.24455, 3.(4HTURPOOMLJGZX1Qx{~|{~xw{{vcA7667996::976789:;>@CBGT\fqof]WRMNLKB<;<=?@==5& 9V.aK9Fmrrrrtprtrqosrrqqsonstspmprpvrqqprqltqnmpppnkmmjmpkjlmjhhkieijedihffiighif_RGKKJILMJJIFKIJNIFCMZh`^`aabcabbfegkdQKLMLJ?/0?KNMTTOLLSUUVTSSWTUXYYZZ[^^_`]\^\[UNGFL_fiklkklkkkmovwɲ{ab`aa`a`_a``a``_`_aa``bb`^]\[ZZY_`^^a``bbabbdcbbdbabcccccdggfdfffdbacbehikjmonmmnnoppsssuurstrssrqsqpqtrqropqqqrqn}ytnfc]TMONJLMUZWPX]ZZYSMKEBBDFIKPQSZ\acghghjhikmmmmnmorsrqrstsrpnmojjfc]WQRIC>74.+'#"!   ! !!! " ! !&,4AJQW\chlikmpx{||zttqnomonoooikmlmkjnllnh_[\\[Z\ZQQNHFA@=;=CEFF2!%'*155235;:BGKWXYYZ]``_`bbda_^^^^]``[[WTOOJFD=45;ACA=;BS\Z^\]`_aa`_a_`^agmnppproogaa__`acccghccfcZROPIAEW[[ZTKDJPV^fkptx{{|~~~|ifpzbdhiloghklnmmoqttwxrhcbgpx{~xl_MD978888;Jn~qcYJCEHB86797034/00+*,-+,-13<5@}wdXZXWURPSUUTSSVWY]akhh_XQMJMNHFBACDEEC9 %l̷|qeVE@CFC80159@?<92139Mw;9;;?>???@=>?;61,++-4[qYqr^WYXNE=888=AACEA>@==951-+*,,//3564* 1Ë0+8]wxnlpkimlhl[+Qqx{yxy~zxyytaC<<988777;98888::ILNPROMNSVTUQRUVTUWZUXY[^__`\^`^_^ULKP`fkjhhijjijnow~zɮ}bbceccccaefddeddffghfefgggfdc`acedcdeddfffeeeeffgegggggfghiihhhhhhghhhhhhiijjihkkiklkorssrrusqsrqsrnprrpqploqoqsqsyslb^]XUV`ffihhfaYYVSNJF@@DHNRSSV[]^_bfggjmmklpqqqprrvwuwvruuqqnjfb^USMF@:72*)''$""!  "#$!""!!!"!!! !"#)+8?GNZiz~|{xwwtopmlkmmlkkld^Z\]]_^`__][_Z]\XRJFGG>5343--'&% "&'$$&*+.336@>==DPYZY]]`_`b`a`b``_aflprmorqng]_a^]_abaddaaedWOORJ@HTWX\YSRTUZ`gjpsuxyz{yy|}||mjq|w{|{wtojkonomnsssrm\V\djknw~|}m]QF?:4558Gn}tqqqi_WUUVU[]TNOE=FGIKLPVYS6=r^SSNMMNRSTSSQRRTX[`hblf\TQNPONKIGCDFFF=&%tͺ}ushihiki^P=:3.*('+5rWno[VTRNB95348<;850,,+*+-.2464) 5|,)8\plheghkgfhdQ,G`hjqstyzyyxytf[SLE@<87787788899=?AEHKWblqg^XMHFHHEA;9:89::82'%4J4jtT50LlrrsqmprplprqnpsspoqomnollorojmqqmornkmnmljonhhlmihmlihjifehifgjgddghihbVMIIHHJKGKIHIPHFKIAEPY_^\]a_a_`bbbeeaej_MINNKG<-1?JHLONOKLPORSRQTRSWVTTY[[[_^^Z_[\_`ZSON`ghhfffghgilmuǫw^dntqmqqqrrqrsqpposrqqqqpqqqqpqqpnmnqokhkmjihkkjknnnnmnnmllmlmllllkkklnmkjkklkinllmmonooqqrtpqsqrrpkprqsqlmomnpppt}{ywxrrogbcaa_^^^[WUQLJI@A=8B;ACJGPPU\`__bejjghkprvqtvvvywyyyzxywvutofb`WRJE>75.*'%&&##"####"#""%%''&&&%#"!! !,Kjzuusrronmjda]^]Z\]^^_`abcefghfgfb^WYWSQLGEA@>:3+!$*18>@EKRUQD;?CHLPOQRRSPLJE::AA??<<96/,+,++.02465) @@HFCGQK*?PW[^dhntvywxwsmgd]TKFA?=;::997:<>CGJOYcmpf[QKGGHHD@:9877776-%$>?,at\45Okrsqspspnotppprqppooqprnmnrrooqpnmpqkimpllmolgikjggmkhijheeggdfid^bggd^RKIHGIIJJIEDJJHHMH?HS[]^XY_\Zbbabccbbbdf\LILLIG:,0EWVTW]dcdlmkrspqu{zwtttstuvvvvxxwwwxz{z|}}||ztfdiloquwz|~~~xmir|Āǿqfklnqlmnnjiijqrv{}k`^^WLC;8?lxrpnnox{uti^bint||t\+Is`e~~|soibZQKB>BKPRSVVTRTX^^`wn\SXXRTTQNKHIMNOA(*mϺwrnl]\ead]TT\^`\WG2/4@@@<<:950+)*)+.13453( D^(5?@:46487579:98741.***,.03563' MR0HPXQHJGHEGBBNR?,26830.+)(('%"&H1Acp`44PnpnmonnmnpploqqinrnjlnlkkookprjhmnlhkmkhhokhhkihiihegigdehhcdgecbdfcSLHGFEDFFGIGHIIFJKGGNZ][[]\[^[[[^``a_^a`_bm]FDKJGC7,1>EFILMMHFJONPPNNRVTUXWWY[[W\]\Z`_^^`a_adeeWW[UYcWK\gwʦ~Yezuy}~{{||}|~~yooooqmlimjjlqprwvuwuuusvutxz{ww{w||}y}ywy|}}~~hlnkklkhzwuu{~~{uqg^ZTRPM@>>@GGKPRUX\bfjmrtx{}}zzskiecbc_]YXTXUSPNKD9/2i<57>=AAHDJKGDOSQTSVXh|~|yxvsponmmmnnmlmostvxz|~}wtogb^[VRKD=4,'$% (+058ADEGFSSQSVXWWWWWV\]_djklomnof_YWYYY\]\]][XWWTJIIJB:COMJLRPOTVTWVUY\dfglmomtx{}~}{mfefgjlnpqttvlb]UIKKLIDDNRSV^_ghkntxx{{{}~{{smhp}vklu~jſ{rzy|}|vdXM`}}|zxuspnow|~wuqgcejqw{}x\2S»unkih_\\]^\\^_a_[UNGBDINSWZZZ^eÿĵoZO90.5BKRD&2ұkQvٮǰ:'*-3?@@>;;82/,)&%#(6sxYsjZUQKG@720027;<<==;7563/+)*)*-11254( +QK.KbsrlmmobgdccaM1.-/39>CKRVZdjpuxxyvvsonlkh`WURSPMJMTYZajmgWF:99:=<93.)'&$"#&%%(K0I{~\..E\]bchijmqonlopklspkhmljknnnopmjkmnkimjhghlhifijhhheeghdedda^debadd\QFGGFEEFEEEDGJHCHIDAR\Y[][[\]\XX[_^[`^[`_^aeVEBGGEA6*1@GFHJMKDFMMONOOQSSRVWWVY[[\_\\]ba____`cedbWV\TYeYV_hxŢ~I^lD8CGFNMONUWUVVVY[^abgjiploovrrvvxy{}{}[MHIIIHILJJMMNOUXRSTUU\[^a`_d^^^`adceediihhghjjklqwrflmloljmw½}lurrnf^SMID97?>=BKSY_chlorttxy|}y|}wyo_;*7s8:=@9>G^ntwz}~~{yzzxvtuusstvx||{vnkf_ZTO>:3*&$#" 'CbC>@FFKMSUWZ^beegjmpqtuxzz}~~|xvslb]jzvhkvhc,Ei~~|{wtspootxzurpgdelsst{~|z]2Rûis]YYYWZ[XWZ\[^`acbcaSGACHNQVUTWhqYD(+-.9FLH(4ϳjNپſǾƕ'*+.5=Tؘ3#/0(##$**OmBMPHGGFEEFFF@842231/;n^Upi[UQJHB931/47:;>=997451+(()')+02577' +ZI)7UmpljkqrwojkdP7.-..036:DLR^kx|tlab`^]_\Z_a_dkleTB;768970)%%$"!$$%&Q-Jo~U-,('0?MY\_jjginmlmtsmkknjkoplmqrljlnllkohhkjjikhggkgeddfddgecabddcdd^NGFGGEHFEEECEHFEIIEJNZ\XW[WXX\XYZ\^_^_^_^]]`bVBAEFC@2+1@EEFHLIFGJKMNNNQQRQTTUVYXW[]]\aa__aa`bdfe^VO]TYe[W]i|Ğ~GjP + !&*27;8CBKPT\bgijlmpqt{~}t`;.B|w7>F=GB@@@@@>C=>=>>CBFLZbchlpvzvqrstuwz{{~sfWA<=90 :t{8...135441/17<@HNLNRZVTLNPPSUYUTYWSRRQNGDGD<9AKJFGIMMNNKLPROSWX[a\WTW[]adafiigqpoppnmkkklppon\KMZacekljggb]YRPRPQUVX]]a^`abejloqstppmf^Zgv|}||~ugmu~azU'@f||ywtpnnnrvuqopfdfmutwy{{qV)]ülVWWZZ[\YZZ\^`__ecedWKEEHKPSRMQeßk[JBFCGGLTH*;ϩeQ¼e$+,.6:XӤ~zue1*S\N.UnIbqn\QSTRNNRJ?>ABHHFGN;QlfZXSLIB8211447;;9765550,(%%(),/1674% + `B'*AUWQUQ]nl]RSUF6-+,--/4:@EKWh{ǽd]]^`efejnkeXIA7473/*# ! $$%*]w*Ccg6** %,6HPS\hdekokjlljgllkjmnmilmkhjjjehkihjnideifaegf^ddb`aedecaWGFHGEEDGEFGDEGFEGJGIUZYVYYXUY\ZUY\\\]]^_^\[[^aQ?>DDB@3+0?EEEEKFDGJOMMLLPQQQSSSVXWXZ\]]__[`aa_abba[RLTT\dWOZg||HtD "!)(14>HO\kzsV!!#&))*.33:AJSfvlkljkkhknuz}|~}ɾZ@`]`QLOVRPOMKHJJLJKJIOW^`befilnrvy|s`92Il:MgMmIGGEMMCIGH>GKLD6,HRNQQ\bekpsvy|rmnoqruvvuy{||}u^LHHD7  + ʟb`żD&,-06;_̟8,~8]eKoYdaPIPY^hhYNSXTHFK_mk]ZTMKD832258;>=;966764-)%''*-127::( h>)):\gb^]gtf\WVZJ6,+++.036;@IUfzeVY[afggilmlf\UOFC<7.# !$',ax(-Q^J1(! #'09GLXdbelnkkmplklnmiilljhkjhghifehigeegebdda]da]`bdcb`SHEGGDDCDFGGEEECAGKJJP]ZWVZXUX\\YWZ[Z[\[]_][ZZ]`N?>BAA>2*1>CEGEIFDHMPPMMNQOQSUUUVXZZZ[__^^_aa^]_adbZUSSS[cRI`j|˼N}B #,1&"#'(..219?Q]juQ "%((+27;CLXvwojgjjggjmqyy|~}xwx|{y{}}~ȽRP`fePUQPOOLMLILLMMKJEELPTXY\_dgjlrv|q`70JeLssyzV\\VfbSRUYR]afQ60ORRVUXZ[`dglpsv{~unqqrrsrtuuvwxx{}~q[LIGD7   + EB=?CFFDEIKKFGGMQOVQQRVUV]_bacfjlmnrrqqpqqqusuwwwz|~vrcE'+7?==BJOSUTY`cegcgim~qblw8>i¼T)Cf}~}zxvqnnpqtsqnkjeciqrrux{qX(g¼[1Pj}yvtqroljkldbb_a`[90Wyߣm^SRQVXMCWN*<Ė_q»ŝ?)+.15:aј43s+^V1gJG7*/7BMSOC?@C<2AO]ol^Z\`NC85669;;B@==;::A=0++,*,-.3:;8% +#s:,*FozprmtuqnkplS4*)(*,/36;@JSd{ƸfW\]abegfhmoomomfbZTB8-$! "$-fw$:z_;)(@dH0(%"### &-BCEDFGCGKKMLNMOOPPTTOTWYZZY]\\`^]]\]]_bb[XQPQ[[IXdh|ȸGA(L9797211327;;;:4/'*=Ynh\UOLKQQLPU^`fzxR0@KOPNLKIFBB>?=9;?=:972&6@C@A@>48<9<=?@?>?@AGKIQU\vvmhgifhighouwy{}~~{yy{~{z{zxyzǽLL_bfXSRNMMLLLLHJPQTO> X~Z-.fN3dI:*&*.21-*()*-,)8AOtmbbvqZJD?BECG_LAF@:KWX3.110+*386)+:79=DNUXbjmjgkljiiiigfjgbfiddfgdeggf_bfdadgb^_bcbYIDBBBDBCCBEFADC@@DJNUWXXXWZ]WTUZYXWXVVXZ\\\\Z[[VYWG:=>>=ABCCDFCGLLJKMLLOPMQTUUXXXX[\X^`]ZZ\]\_a^[[UPNWRGYdk|ȲFDfwKHMJAAEFCBBCDA9,#)Spjf_hkaerh_T^ugf~}ueW}z||y~}u_T|ujfgfgjifioqtvxwy{ytvx|}|}}}~ǻIOZfgYQUNKLJJMMJQXi|]6;;91+'!#).014;AEFIXa`]ZWWXYXWQQPPRVWW[]acefdfkommnrroqqsspvvwoD-AUK>:9:8::7ADEEXmkdov.4^½M#Ih}~}}zurpmllqtvutqidfktxx~~^ lbf¿_4cлogbTB55>HXbO*H̴qiHbD(:OD,4Y7(B8,.5@hʣT !!&8wN6YC7+'+21-(),//.,&'*KvmejSMke]hh--6BUJGHIQP:" )y8()>ULDGAEC?DFINA1($'*+,049?HTd~ɼ~z{~~}~}~sk`T>8,%,v`1PDK^<1d!&'' *325:JRJNblkhjnkjlljhihhedgfcfhebcfe__deb_aa^]cbbVFBECCBBCBAGEDFD?>DINRVVUTUUZXRUWYUUXXSUWY[[\\YZ[XWUH<;<<<9.)1@A=2489:::<=;0%!B{j`[`da^a\OO@Pwo}yrVL|tiggfghehiloqqrttvojhowyvy~ƻJKTgcTTTMMMJIPP\vRC`}~|uqpnfbfhiowx~NARVK]ju~|pX72OVUYXjk`jmqmm_c\ajgbXG)B\STTUTRSUX[[aehmvƎmjd`dddgf_acbcgb`bddagwwqhUIHF@/ +  :bD(#&+*+-/(!'.14:?BILJKNOKB8/)+-.3679;=>AAFThmfnv+2\üN)Jj~}zuqmljlsyxuqkcejrvyY,z½fr^f¿[+cڽm]?##*,?P\R)GʶٯoA|}|{s025@GOSUUSRRTUVUTWUUTXXTSVXZ\[YZXYXXXWG;;;:;7,'0:??806898999<:1$#Hwd`_`\YYVNQ=TwotmRLsihffhhdhhimlnprsrkgcksmjosw{żBOUhbTWRMKKLLlaRZ~żD`|rvrv{qV64XU@>9KUWd_]YVYXRWSY]T<'G`TRRTTRRSXZ\]ainv}SJDBDD=A@??ABGDGC?ZlpoeRIIF@,5J1$#'-1013)!*,05AGLIC=78:9;>>>><:;;?Ziorvyy}}~}yywwvwvusqqonljnmjjgecgmoqrtyyw}n?/G_L?;:9;;::<:==>A@EUk~jemx*4\Ÿ¼H%Jkxrnklmtyxvpjccfnsw{sW-ý[kX+kοpZ@??>CDT]P'TʷՂһ/36Akٱz#(4dD?RA@*$04206fyb;0.(!Mzocoÿf\,:[_8 .|2*-Ov|oqoottmomaD1'%%$(+.38?GUgúV"8X1_^9!4J%&"!&,-+).3D_fegjfeijhcdeceeff`cdc`aeb`aba`_bb``c]NECAD@AABBBCCBDEB>?HRWXVTVVSTVVUUWTTVYWSUWXWXWUYVXWWURB89:885*%.9>;;?AABHLKMNMNMMQQRRVTVTX\XY]]ZZ\]\^`__abbba`_a``fj|~C;}wB@>6057866;<;81%"CqYTUSKLMKHF5QxknlHOsjffggffhfgikoooookijppgfjlonp~ŹANT`aUUOMLKOhwXOVZ`S{uv~sR61\M;8=Mg]^`dWU`jb^V\aP7*K_PRVUTROQXZY]cinvӺySKC?>=;9889;;<;=BCBACXmomaPHYe\4#($ (.)%#+5SVUG(!"#$""!!%(*.3;CGEB?>>?==???><:6/+3=DNT]`eousx~}~vqhkmmpqvytzn8-I]G>:97899:;:><@@?DWn~gdn{,1\ʷJ&Kpytploovzzxtneackqv|}zmR)ÿbSo¿X.ihYKMOQ]WU]H%YȲѿ}*25=pޞ%"[=%5iDCNVusTE74FȺZ0/$W|phrjgjVSORJWtZ2:NePexjaqW8 9{4+/RxlMNS\ijUS\_D/'$$#'+/26>GSk¾LHRXYXVUVTSUYSTXUSUXWSSTWXXZWUWYWUVTRB77:973)&.7<<=A@@DGMMNNOQMPSTRTWVUX[YY[\ZY[[^]]][`aba`ca`b`^cjuA~=u;>;4047766;==92'7pPKTWRKLMLH@2S~wfsnLWshcehfdebbhjkmnmoomjmrpnoqrsss~~y|}ٻŷBJS^`WWOLKLJZvq[`acka[£zv~w|{oR51aL96>hlD_via_ityjXegQ7+IUTSUVVNIOTWYadipyѹwTKC@?><;;88;==<=@A>@CZkppcXbk2%2DO='"!!$%(()*(!!3dlm]B'"#"!!"$$&'(*,,.////576=AC>ABA??@>>>@@A==<4$  &'&+28BJW\]hmr{~ymnqtsuzzz}i<*IWG>98699;<:<>;?A?DXp|cfoz+.bű¼I(Mqztqmmptxyxupg`bkpu{|zkO+ýo\mY5p֚gY@6-.48P\L$\ǹz/15:sq$*kf3p>JKűe4]c6/" `sfcfTKGD|Q@LUZ_[Rk|R3<=?E`mp|\< Bn.+/TrZ312MfX<;R_A,%"#&(+-27=HWhÿ???6mvS0)4A$$$+-**.1Faikjggigfdeeffhgedcbbba_a`bb_^a`ca^TDCACC?ABBCCC@@EHD<?@BHLMJJNMJQSONQSTRVUSYYWVYXXZ[XZ^a_`^]_^]]_`aitq{|HzFw?>;405::::;<=;2(!JzOAPcaVPPQOGA1W~teol~DPpgcffcdebchjjkmmnnfcegilnuxx{ˏ>CYnttgVgR-!0DchE*"!#&'++-,(*;CQK8''+.-//1//1021553>?@ACA=<5%  &%(-0;EMY_hjou{umtrutssty}g;-J[H@;989:;:9<=;@A?FVnvdfm{+/_ӿ¼I)Nqxrnmppvyywqjcdott{|{pL.ÿs\oX3smh^F90,-@\_K%`ϻ˲Ž˺,18=>@JMKKMLJLPPOOPRTUWWY]YVVXYYYZ[[\]\][[[\[[]^^hljr}uJrHyE?;548<==>=::93&%OvJYb\XSOQRKFF7Ythmq{EQqeefbbdcbdfikkllmlc___`ckszz}ȅ}w=QOX]XUNLIJRb}xfZYit{zziJ-2iB92@kmee`bqX[iif\aVF/(GRRUUSA$!%*17AIT_mxϳlJIEBA??<;=;====<;;<>>=>@ABDDA=3! ##$'&***$(,/118;EJTZckjjPMV`bbadhtyz|h93JUJA;7979::99;BKVXWWTUWVTSXUQUTVTTTTRSVXTUUYVLHRUVSRSJ=89755/&%/9<78;===<;;<6(/Sy^iZOPONRNIGI5\}uomrzDU~lffc]`bbacehikikllifa`bgmsxz~ȪffAPOY\YXOMIMYlSVYfzzzfG15n<74:CFA@IGGDBHFHBIJ>,(ERTUST@'$$*4>IViuЮeLKEA?>?==@CAE]msreWpt[A%%6Iec9)" "(,-,+&-N_G6*%.5>BIU]aha\jsiikgf\UXB/4<=<>?AABBCECA<0"&'*+../3244442.-,1.).1DHNPQLJHGVug0.JSG?:8:678:8;<>F\qxagp|*0eſƷý@-Rz~xsnopt{ujbbklqvwphG7öWsU>øq\IDDFFDPZK!cızPR?F0NWGO2MPM:UYT=$*17F{ΎѸfp~-$^7WvAɤM5`V1*  m{qkf_eba[XWpnapobbi]DBGGHkzprsOJ8 Ta(((3;63313237<=90& !&(-5BSjoGmGpA?:228;<::;=<<4%)NqeYSNFDEEJQN:c~skouz>\|mfecab`ccbfehijkkllkfehknswz}Ɠn^siALMV[YYRMLPWhjhdVfWX\eyywfF17q{=85:><:8;<<<;<=<>=>FexvtX@>=;=E]krobZszi?#%6Ig_;&#!!$')+-.%!1NRH1(&/4?Qaoqedhbbghgg\]a>'4;>>?BDCDDEDCA:/!$&(,-/44:<=??<=?CE@?BEFGJJNU\p¿·~`75NXM@97978889;<=@?=FZtqchq~*-dűþG.Sw~wsnonry}~}tkecfjnsvqlG6dYwǺR(C1%'!"&.,*,-5Oed`dfb_bb_]_bca_]]^a^[\_][[^e`KA@?@A==?@A@CAABDD@n<><56;>=;===?<2%#Aof^YPHGGHLOG6d{okntw=_}jbb_aa]b`^cefiijiklifehfhnsy{ӱcWfw<;:>>=+.IRRTUVB,&')-/27@BCLW`c\O;,SmЧfSHB?=>=?Fm`BA@??E`knl^Yxi6%9Lh_6### %()-0.&"0K^K3*$*+5Mcpqg[\]bggimdbU<(39>ACDCCBCCDA?;1 #$()*+.1679;;?CKPUMILKLLLKQXe{ŷ`26O^K@;:88999;;::98=FRYE!h˱z|ughmhmjc^?33:H~$)'$/n.XcDm~O>4cN2,!+vx˽d\P]P@H8 Z_.))7CIIJLIHMMHG@/&! "&),18CVqn("V},3R}d@(L-)% #(0-((-8Ocb`cdaac__aaaa`_\]^`[\^_\]_^[JDAAA@@?@@@@AA@ADFCAKWXSSVYWVXWUTWURRTVSSUTSUVVRUWWTTQHISRTPOPE<97422-%&1;;558<>:;=@?@@AAB?4& )Svaca`YVYVUSND3aypqrruAc{ja__a```^_dfghhjjjmha[_``eiltz}y`ir:PNUVVURNKKOjuqtxt[Y\s|wpuzqz~u^A3;u<85;A@;<;BA>BJB@=BB?++IQRVTP:&"#%*.3AQSABBCCHaklmZUnwb5*;Me]4&"!!&(),/+$#3YePK0 "$)+4J_qtfZX^bfhmkaSO?-5>ACFFDCDDEEFFB: !$''(+//248::=BJRPJJNLLMLMPXdzŷg6;UaK@;:::<;=:<;<>>@C?BC?@BCBDCNUXVQSXXQTWTUUURRRTOUUVTSUTQPTSQRVPJMQRSNKNF<85202,&'2::668::@JMJKKNJLPNKTUST```cgkliiade[OKRQQMMLFFIJIGL[|mWHFC?U}yKa{\L}cX`MsBA>:<>>?A@?@A?4&.cvc`b`\Z\YVQLE7gynnhwo>g{h`^`_`_`\abcfehjjjlidbcdbbddfpy{y}߹sĦh;KNVYSSQNHK_qWZ[rķ{}vx|}zuY@0?o:76AVI@>BIFCR`XMKEMN+2QSTVWS1 $'+5=@BL_p}ΠVJDA?@??BLEBA@ADLajkhPHewd2):QgY0'" !$%),/-#$?accM.(.*%!!'+3DYmnc]\\\afe_UPN>05?CEEEBBEHJJLQNA" #$'&&),.238:;>BJRRLLMKJLMMPWd{ƹk3=Lazpclp~'.gŻyѾ¼@.\zvpojktocbgmqsvvmFB´R{ӽRIҽxmmsnjRHQZ_`b`^BmʫsnkdY[`fbbZX]adb\L2-1;CݶV 5IF85C{%i\JܢK3ū]0+1ļz{~oSQepYTLcf''Ee_DH4 +"jM))5kzooqmnighiT3$ !#&',03;K^s¾^"*\l)WI(=9&[+&% &*.((+.7I[cb]```][^^[]^\\\[[YZZ]__YPCCABA@>>@?@A@??ACFEFNTWWTSUXWVZYUTXXVUVVRWYVSPTURUWQRRVSOQSQQQNLD;63001+&'4=<33689@LNHLNMJOPOfeQVmaMIJIHDBEDCCGTwpK7BKH>^\^`@Q\[VSo?D>99;<<<<;=B?2'#:ir_ibY[WTQMJJ>1hwmrfzmFizg^`^]`]\^``dfhhiiihffgikjkjjjsuop{~ḔcEMOWXUSPLIKZowvWY_xǹ}zrZ=0Ee>65N`PB=HY^TszaWX_{i)9VSRUWU, $%)4<>?J]o~|͚LKGB??>>>IxgECAAABLdlkcLFmwW1(9QeW0%" #%)-0-$$5We^D+*-&%+.BYefc]b[W[`b_ZVP?38CEEEEFINTVXZ[VC! !$&%%&*-0498:=DIPPJKJOOOMKOVfyǹe/?XZL?=?JVSLLMNNH@=?Mb{~mclw'*eȿ³ý@7]{xpommrx}}zujcagnqrwvjC@µs}R~ſLL̹yvustoYLSVV[a_cCpĤkK:5% )$!&2,%(',1:Gۻmd|(hWHmw=<\lrq5.*6`_POIGTdug~}zNPDWt|ztCH3 $pK*)Dwz`OOPHHMP`gV0$!!#&&(,05?M_v½Z(fh&OXHD3"f&%%  %,.*')-6N_a^[``^][^\\^^\\ZXVYZ]`^\MABBB@>=<>?@A@?ADCBEHMRTXURUUVVWXVWWXTWVQSU[VQVVRPSTSQRQUUUPSSPQOLD:6310.)$'4<=5469;AIJILLLLPSjMGYͻHGHEDC?@AA@CRxmSOW^^KPy_B6Cj\_TVoBD=96:==9;=?B?4(!9gutp`XVPKDAGMB8lºxjqd|k:gxf^]Z]^Z]^_acdeggge^\^chjikjnpvwwy~}zx~㹮Ž]@MMWTWSMLKHM{~piTZ`|Ŷ~zwwoY<-G_:67V`P@?[TC@@ABNcjh`OSipW-+;WgP1%"!"%)+.0,#&GsjT5#'&!'+1CW_^b_aZST\bd_\WB48BEFFGOUZ]ad_[Q>  !$%&'*.35688;BKURKMKcw|jVQSesƶi3>]ZL>?HX\VXSSNJEA>@Ocymdkv),dûA5Xw|rolnqustuogachoooppiDI¿}T}̵VSŶ|stlYG;>=@DM[D"urMB5( #&'%"(.,(')+3=LΫ{)qVTzrJ;TZBCH.3+Bj_RPKPY^`_nIDMAsFH0 #y@*(FqvggeV?;KZfjM0% "%%(-38AL^vľU'hc2fwgJ. r+$'" (+-*(*08Sd`\_a]\]__]]]^[Z\YXY]`a[MBAACC?==>=?@@@BBEBCKQUVTQQRRTWUWTTTYXSVWQTWUUTVVQRSRQRRPRRROQRROMKB;50/0-($(2;=7337=AHIIIHLOPXmBMn՜PGEC?>::=@@EQq}m]Y`ae\KpjOTm[fXXqJG@;;>>??>?CC?3($3gre[ZSQMJJKNC4pufsa}f@MB8JD33iM8'>RRUUXI(!$$!%,04@Ph{~_ʕSHFFC@??BYoD@@@@@Neig`PVt|\1,<[fO3&" #%(+0/-!(BUM7+#&&!%)1BV[VZ]_UIRZadb^ZB5:GHGIQX\^__\YVOA$ !"%$%')-3348;=CKTUQVn}qfSTgxŹn3BbZM>jzwur]=?_kmeB." !%(+.28?NcwƺQ-oU2sr;(yw+''  )*,*))/=R\XY^^ZZ^][Z[ZWT\[WV\_]VB?A?=??=??>>@?=ABDHIJRSVYUTVUTTUVWUXXWVWXVUWWVTWWTUXWTRVRPPQONOPNLNJ@93/..+'#(3;95355:@EIJJMNPTtNETݣIEC@;:779=?AMgj^acdfcLeml|QgTVnFEA?@A@===@CD=0% .mtd_db_]XRNNND7stivh`:qwg]]^^\\Z[]acdddffec_aad^Z[afkjlmu{z~ܫ`cX]jrVBNN\TSUQLJGTkhdhuWZ`Ʊzzx~xuvlV5/KQ:47CRGA@>UfN>Dl?:8)YeM/%!""&),/.*!#,*2N_gb]R?EY`aUA-# !#'),19AMcyI0rR/ewl@+yv#('"'-.,))/;R][\^]\^^ZZYZY\[ZZX[^`SE?>?>>>=>@??>><>CCCJONORUTMPWURSUVVXXVSUWSRSUTTSTTSTUQPTUNJOPNNONMLNH@72./-,)#'28622327@HKIKMNN\q@Gfݡ@CA;:65648:=FY^dhfe`PawuWmN]dFB@BDC?<=>ADC=2%;}mdad`_]VSOOKA8utgwd_?uwhb`ba^`]\`bbeccgffcbeeheedinlhdhnpprw|ڤtTGNMZUUUNMJJSXWm^UY`Dz~z}yuw}{vyylU5.TR=5:QaLDFYv]?FucE@@5-CUSUWT?!$%$&(+7Id{}zxl[ĊSOJGBABBCGJ>@B@DScgh\G_zy^) /A[cM0%"!#'..0.("&LbJ>+##%!'.1?TTCC]yn`RJOW[^aO12=BGPV[baZ[lxp\( !$%&(*.13557=>>?>>??=<>BCBIPTOQTUTSTYRSVUUTVXVUXUTTWTSTUURRTTRTTRLJMPMOLNLLMG>4.+,++)"'1520.015AIKJKMNMnTDM|6D@;6433465:AO]gga\GPtx{OqF`dEGBBDC?=#'('')3Ji}~{UeāQRLIDBBCFb|KCBBAESbgh\JeytN"!0A^`H1%"$'@nR//)"(=@B7*$#%$%,1>VUKXnoe[U\d[^cJ*-5=BHTcg\Uh{rU%!"%(')*+.35778=AKSUV^[RNUkiMTc|ŵc/G^^STOTQUZWSRSRPJDFUovgfmx(3sºû>2Z|yursruxy}}via^`ptsnj_?OľwYмľCWƟ{o_U]\]^Y_hA'X*79Oۯ]/5652(!,50Db}7cк7JymZ61(Kxlk>.1CC//26AD?,*>|yyvwvwqoseD(" !#&(+028BOc{~?3zE1VstV9!$uaK41(!$.HC+(&+.>PX[XXZ[ZVTYXRTYXX\[YJ<<=@A?=;>??>>=;>@BDGNRSRUXVTSUVUUWWXYYXWY[XUYWUSVWRVVTSUXTOKIKNNMONNNLF=3,)-+*'!%/3.++.06DKKLMOQ\BFWz,JRUUTS8"(&#'+4Jj|~vvwiMwNQMHDBBBC]tpsGGFDDETchg[IfwkL !/EciD0&"&AT22,$!+34-*&'*1=*&,2?Y^ccjlgb\dtkVZ[C(.68:& #'(-*,.*+++/56788=CMUVT^de^WplLUcĶd1Hb^MGEGGHHFHHGEGE@HXowcfkw'2rξû=1Yz~~xrrsv|}}vkc^alsuvsa:T¾~rUſ~DWzkovlZTb\O?7[g?)ƱR+6;Oܿg)~4be3PŸB.(M~thmb[WLRbn~gkzlaZRGFC86Ba{yiPGG& 75+1Std]\]\]]V[S;)!#&(.38DQf}65z@;>?>?<<=?BBCJNOOPOPSQORTQQUVVVYUSVYTTTTRRRWVTVURSVSQQHCIOLMMLMLKF>0)*,)(# +-($#(07BHHKLJNfi=Ghj7?<70.-.0//2=S`c\SWeoKy@laJGA>BDDAACBBA>3% 5exj`bfec[[[VNA?@?BFFJIMQF9( %),0475-*+-/35798;;;<:<==<=?A@G[pyefnz(1q̰û:.Z|ztrsx~wlb_cluz}vaAV½qZíŽ}J_upvsR/,FinD*˪{xxtijsJ-6>P׸Z)7z;|qv:?x:1)L~y{lYJ:@NOcrefvrpnreMF7FŭuUI% :6-7ZxlUNTJLPQUXU=+#"##%-6;ASj}ĹŻy2 7=@vV+(6I)*,-0>U\WU[[UVYZXVX[[ZZOB?==;;:;<;>>@>:>@ACGORUTRQUTSQSSSSTWYUV[WVYYWXWVVVXWTTTURTUQSWHGJMMMLMLLJC;/(*)(( ''%07BGHJJGSwQ?Gz[7=950-,-...2=U_^Y`mv}H>=<>?<3$:o~ooogbZUUQNL;Bzrh}}aSAy|xxz|}}~yuxopnqorvwz֞vƱHFNS[KRSONJE^zmX_gŪ~Y}|xvfH20aB83<]\MREGKQa`12JTTWUR1!&,7Ooyrlqj~ѾtVPJGDBA@Cd¨lGEFDCGWfkm\Oi|p?#$2IdeF/$ "6t|M32/!&4?HS7&)6PM"',.@]psdUVRVWgsfVSSB35@HLLKOWZZ[[XSJ<*!'*.::1+*)-/357:<@FMUUUipUYi{ŵ\4M_YG>989:9::9<>>?@H]tvegp|&+sιǼĺ71Z{{trux~~wga`cmv{zs`AX¼xqVμþO_Η|qN*-HkskB-Ǿ>05hK$ 92)0Vyxvvvoopnc>&  $#!&/79BuyT0&=v`kobQB-*+-2AVXPSWUUUUUTRY\[YOA>@>><;<====>><;>AEHJORQPOSUURSVTQSVVSVYZYZ[XXYYVVYYVTVUVXUSTUXJHLMNMMOPMJB9.+++)*#%08CGJLKLWA=LH8=80-,*+,,-0;QZYgr|H3mXIE<9=@?<;;=>?:1# ?swrg_\PJKLLM>^RLYY]g_htZMJGJD,3NSUXWU. *8Nu|umvtϼtURMHFC@>@SoJFGGFDWflk`Tf~h>$#3Hed?-# (Ws9/0, -GTxQ3(*9K?',.=Whop]UPYakq_SPOC6>=?>>;<>>9>>=;9=?CJNOPQOOPSTQPSRQSTTSPWXXVYXWXVTSWVTTUTRSUVRQVRKINNONNMMLKE8000/..#!$&09BHONPRbv=?O>9;83.*)))+,0>R]hrvI3p[JD=;>A?<<=???<1%%:rpha`[URONPM@>|qh{bNIrmkprsryz͍qsjfhsBHOUYRPQMMMIakdbNg^Y]bxg|xyt_D.5kz=96?RQT`_escYJ6<<>@<.5QUTWWP*)6TsxrpyuʳlURLJGB=>GgZHJJIIJ[gij^XoiJ$&6Ned<-$!#Dzs>20(.IhX;/%(6I?(/0>`nj\ihiflaVTM@6=JSWYZ^^`\WSUONKGHCABCGFCB@DFEBFE@52689;=9AEPVWSk|`ZWTHQWgŴU/QbUB899898:::>==BCMdxnbjs}+/xʩſº|76_}|}~y}~vja`dnwxvre5bƿ{^þyIaxZLR\_^Rea65M5%/7;Y١5(,)#"9aP1+Sq1M{{5.1(O~ſľ¾X@\dUTenK@F=MreF Lt+++0Nc_YX\[[\ZYN3)  "(*/5>@>;9=ABIPPSRQRQTVUTSUSUYWQU[\[[[XZ\_\TUYTQXWVTUZXWW\ZSUUXVYVPPPQI>9AGHJKEGC:.%$,7==>AKMTXmc@EUڟ7<<:60+))**+0AUcpsu~·I2vUIFCAADC@?ACDC>2'&@qr`chf`][WVTJ9=rizdKLƷusrtutvvzjz[boGHNSXMPOJKKNhk[^bkXZ_g}}wq_B-2jr;97BYLObV]`UO\I><=@;-3RSTYYL*(8Zrsnnton{ůeXQKIEA>?F]kFOXFFGGHJKZeij^XxuH &7Qja<,%#+Q93/%)ReD8,%(;T>$+.<]uyiindUOH<5?PZ[]]\WYVRTUUUUTX[\\]\_]`acba`b]VNTPOJIIHIEMUUR[RINNP_XYkijP2NbSB997799:9:;;<>>Lcxpchr~*.y̼º}96a|}w|wngdfqxyvpd:i²z]͹Ľ~OgÐ~truoklilj84q[YuvtvttiNG628<[٫{vupkcH $\O8p6e.dy;0&W~usou_hKXYhmaIEHblI# Rn)*-EmywurrnlnjV6)! #')04=ESpƸc,N|0QmiI,&Rb{vgL60)),.5HQUVUOQWWVY]][IFLOPORRQSNEFFCA?>?ELUUUXWUTWZYVYYYZ^`^abedbdeehkkfecdfghjjhkijmmmpmmptnljiffjiijnrsvvxwqf_YY\ZWNE=^{{|}}e[_l}vvo^B.5rl:94AVNVg@QST\p\SDAA<.7SPUXXK) (8[oqllmo}BzɬiWOIFBA>=AFIJNNHGHIIHH[ejg[Yg9(;Pi^<,%$+bh03-% 1[Z;6+'/G`;#*/>\tn[ei_RJG?;H[dgga_^`\YXZ[YTX]cjlppqvuvzwxwvtsqoehd_]PI@EQUTZ^ccbovy}óM5PaRD==8:99;:;???BDQg|meiu)3t˪úx75`z{z}}{ohchuyxto`7n·y{bŜŻľ|Mm黉yurpoppnf:6ſ;.6<_ƹC@J9a1j0"_r4/%`}vmbczP87?FRhԍKRONJE=6,('+0A[fouu|÷I4VJE?>@DBAA@@BB;0%"Qzqmkie]VW[[I3E´kbwh|?S}{|wxstqopnrsyzccgiousw٭nBHMWYPONKJMOz`X`m{rys[=.=xl>86DQI]oLajXaqX\SMH?/AEOg~jait(2vʹʻú{54]yxvz~}qhclutpqn^3pŴ}hĽ|Poϯǹ|utrqppne9::-7=^֧>0|CC]2b-\sodg8./&\~rhrǡhylxNNcri`\Y{ZXM$_[**:k~hURWM?BNSeU7/4331/..-/:ERo̿[ [o%V{M+%oM91'&9Tyk;,*,/8GQWWWWZ\_a`abbbcccehgeejimiljghlmijlighklmlnoorqqttvvvuuvvvxyxxy{{{|~~}}|~~{xnf_SG>:>>?:1)5eomjgf`TTW[RF7J³|n`ujyGZ}R8.;8>::=3/).01,38?@>EGIMLPSY`hs{iffcgnjio{~Ν~s:KPW[MNNKKLKZ{|~YX]k¼~{~}p[9,@|e:74FQEbo\wjSM?\aVOb`1@OUVXXK:,$");]nmhjn]/9ҥ_UQLHA>>=JpVCHFDCJ]gigZ^wv],*;VkY:,$%-ff33+!.CUTG0(-@P.',/??<:58;DSg{˺T%^g+ZwyF+'yoftrgacoN3,,.28LVUVVWVY\]acdeeggijijkjmkmoprrpqqqqrrpotrrssuuvuwxxy{zzzzz{||{||}{nfWLA89CJ`wǒilgedaa[OD>:7D^fjkiuĶwP4QGA=;AEAA??>>>:3-:erqlid]TWYYTK6IķpctozE[~jkyjortwyx||oomqhltur{̷{7LNYYLNMMKIJtuzeOZao»xxv}}zvnY6-D~_766JPHgpYZPBBJNLNPQQNVSTWUnں~P(eX,cv]3+,9gT1::9<==V]]]]YRTZY[_adehhikmmpoqstsuvvtwxyzzyyvvuvwy{{|}|}|}~|rf[SK<8;62Co~xsmcZT[YXWN=PŴ}misov?]ýtwzyyz{yw}}zŻz:HO]ULNMKIKQmYGPRTW[arupo}vvmV6/GY968GWWpyAM\ACFTho7*EQRWYXK:76.$ '&'?hwyyyoc{v͟YSKGB>;:]nT2'#$,Vi70+ 4O[M0)%,AF+%,0>YafokLQROWhk_XUB9APWY[Z\a_\^\\ZZQL[tp`@3012221,,+../.-.>PUXj}wq|²C2U]MA<97898::;==AADTk|kelv)5ˣøz25]v~|}}zzysjejsxvrk[/vȵtze½zMvˮͮqJ&.177Ie`3=˿{.36Ad֏'#),+')iAR[7MZFDN;6ɸr5.$kӱmATnXj~}w^XSehSGCdn^_M !eD+(-Nkhb`[KFTXRC23HWdggghgmmpspqwp}M'cR-/+,/2CzzrYdwqokie`acbbc__[[Z[^`beilonqswwxy|}|}}~~~{zywzz{~yl_SD975CObs}|z{yz}rke^emnomot~qOp=RKC@AEFFEGEA@@=83JvæxkaXT[WVUN>M²~nislsB`ůľŲӻȿr;FNYSKNKKHKHOGJWiu]Zavÿzgk}{xxlR7-HX:46LaVsuEjZJK_SA80JTTYZYD5342(%*')CnvwhΙQOLFB<9;>MmCDGEBENbikgWQcq_7 .@blR3%!$;S21*">]YE2(&.BG/%,-;O`a^KGIJJXnt[SPICM_cfigca_WSSXTUTP\vhF4101232.-/..0/./?RWRUtj`±yA6X_I<:98878;;;==@@DUhxiejw(5}Ͷ·v14_u|wz|ywxwumfhr{xytZ4}ź~xivMy㾧vd_\RU^ena4Iʷ½h(14>jՋ4'),*$?4VT7irD3aS3/%ktXKUlshQRXVSPGau_SLaqtlkT_F %jC++?lvqkkigfgc\I-"B[w~G*b~J+,,039J븄||sihiiihkjkkjiec_bfdjmqtw|~}zxvvz}xqbULC94Nb\H:9=<O{oksks@bıƿ¯߹mAIMTRKMKKKKGN`xsX\brky~vyugO6.JV>47T_NzzM[OPl^C:8:@Jt{OHIEACPbmsmbk]0!0B`fM4& !,csR41,% >WVH3&&1U_-%*.:O\ZMF>CFN\miSOOI>GV\_[b`^\PX`bbbZNZqoG3120232//222433/>RYUjr]qz?7Z_K=888999:9<==??DUiyfflx)0{ʳ¶w02Sl|xvyvw}}ooww~xotttf\cosumpQ0¬ofŦ½vKzϝ~udT_e^e_4Gd7'07GMSTNNMLKNPnnZT\dy|ztgM4.UO<36MVSw:89BPTEECDANdnvtbk~pO(#0EafL4("%.^vT20,' %G]WJ.%&7VV&$)09M_UGB;@MT\lhOJLD:@RY^gkglhdfjqqmeSXnvF30110412/248:975?QUS[[RMMLc}k~±{=:X^L>877;:99;<=>?>ATkwddnz'3ɫŴøw+"?Vd_[UIHHIH@ABEHDHD?DB;4:>@99<0,mdĩr?x۲ʹzs`ZVKRo\/N{vj=7qaCDL9 (z;+,=QTPXirsrl`I* ,, #%/7@KdڴǾ~=1oJ8S]YJ>Vǭ}iz|ved¿xm~zxwwsrtwxts~û<7;>CFFLOSSWZXTJOTVTWWC*,.())())((((*.5bcZd?uHHD@AFGFDDBAAAA=6Jŵlmsl_Z_WM6Xyhoqvk>fìå^ܹ~sn/5DMUVOOMKKLR{ydW[_W[`yÿ~tgK1/ZI;19RSTrPiQI^=:>@BB54KTScyi=$#$  $*9^oȊNLEB?;99@GrlECCBAAQdntpcisqE)#3GabI1'"%2fZ212/' (I^WD0%&5TO!#*/7LYUFH@GRScrkTKHA:BWcltsqvojnt|zulRVmvC22210332359?>=86@RRNTROKVYoqhuïx=?]]J=889::9:<:<>?ACWmtcdoy'4~͹·v,#;O[XMIC<64423...-,/.-*'&+),,*-03ƿ{meƿuN|tI0+5G\m].PlUGDD<13/-15FrЉ):)cPL}oy[45h|@/0"$nVOiʸVG@VuxnkwpGL3 (8-,3BQ]jrvojc_L/ + -,!%'.36IӾŽý½v<2zF3=PUH@[őroǶqx¿qR³t6QHBIOOCFTTIO]eȀ#-,,,,+.--*)+.+$&'((-0*&,('()((((())-/Yxotvvrsrrwtp__`DuIIC@BFGEBA?ABBB<2Kxtfde\Q;W¾tinovg;gıěI̽a[M06DKTQOPLJLLCGMdmqbW]bÿzopu|~xtpaH-1]I<29PUZg9HHOR>@CBGJ:9LUfj4%$%!$).@nkDŽKJCB?<9:>TdJABA@AScmslZfsc?'%4I`hG0'!!!/H200/&%@WV>,%'8_Q"#*-5MZPJCBFOVcqiSHDB=CYgwxruyvvw{|ztdKRj}sD343023357:?BDDC9BOSTT[lsw}wr~ıw??_[J=99:8989<<BZpsdgq~(7ѾϽøq(!8P[XNG@=75343.-,++*)'%&')'*+,.11ýmbټƿsK{ð|L'*/9@Xk],Pɻf117@tֶy:*p)lGRV,&+5]ҕP1/"$mrXNͪjpnMEA]j]liDJ/ +5*-@izn]NOX_W8 +,&$(*07Iżq54:1>ZQFD_结jnӵQ{pK̾l-HA=ACB@<3O{~|pkkjcT@Zuisoxg>mĿÓCntʿƳ]YmtS=GMVNNNJIJKKXwzjYZbÿ}qy}~xqomaF.0^E84:UTYeE`]Tekklglk;;OUzb4)$!'$&-4Ds~mŀJHA@=;98>OOBAABEVdkoeQZmeB$%6J`eA/$"&2]p>30,!(K_U=.&-HXF$,/8N\LA@@DOV^mdTFHMDEUeu}uv{|~z{wqh]MNdxuE6322237;;62/01/,*()'&"$$%'$'),/02jbǺſsN|̷tX[e^^]ekZ*Mt117>uv $+*",r(oJVk;8bڻx>1.!'j~V`}D[YMB;?^iPE84/I|639@<5=ISrFlnmlggbZXUQNE>-'/ESNPB"+(&$'&&**)((+2cyuutstvuuspsYkUFlKIDAFHEB@??ADA@:,M}wxvsqomjW?\sfumyd;t¹ƶž”?]tͺ٣l`e_9ELUNNMJGIKSle\]g¾uzvwzxzp`B01aB:5:RO[n[cWaf7>QV~[33. "*(,17LwwuyIFA>?=;;@XNCBBCEWdkpe\hyp>"(5LedA/$"(Lj72.($=[S:-&*?XD$,.5O[NDBAGNMQdcSGRTBBOcsqmuwy|{zxsodSO_pzoD622136:?DIHLKJG=?MSS]plheWOWh¯l5=\WD:8899<>AB=>??@EUvq]er(7Ͻ¹q%"8RXUMG@;761/0.+*((%&"$%$&')'+..0ihگjL~ߺtj``Z_W[hX*ShI>EC?i粠ufxƧIdS̸V8SE=uºŵȾĻ@gpǶؚfǿ]=GMXQOLHGJH\qfOYniV[cþ}r{yq~~z{xq[>-3gB;4A>DXwnaht*8̬ʷ¸s'#=QVUMF?:85300-**))'%##%&('*)+--:зxb^ۼhM~ǫ~mM5%$+8XmW'Yvvwkbf~m/2:B~t1,.22+(6mw&vGJ4-PxbZG/1+/xɃ@SyjwlrsID6`n=QVD?& .h,,-A_wrmpoofi_R- +)""&*/4\׾ÿh'Az29MTQD@p䧞a}ĹƤBê_Xʼų˳a_rioztje`\UW[hѾP&UpuM$'&$$%&#&'%'(,9hyuutuvvuttnxZlRHhNJC@BCCBBB?ACBA:7]~ywvvqd.4kq?94AY[hO9HPI5Wf9CK8BS_{P<74)(+*+16Wt}c}͸oIFDD?<=>C_hFB?CEHYejmdSk|c5&8Kd`>,$#&H{{=/+%*PXO6*#'6R?#*.6NcjSHHEDGPbjZFHPF>I`lqvw|}iNDSgr|f>433328>FMTRSVTN?AMVPV\o{ndm­k3DZVD:8;:;BZN>==?@>EXz~m_it&7ʽtǷo##=RWVMF@;7320,*+**''###"$'(+*+,/>ֻy1%$(& ##&A|iblJ©w]F;6?XmkP%Y¸g+59>̥wr|ýo-xD_~EF6+*'!%.0)1}~zʛHO[gj`ucHB9zkzYA?# :X)+/R~{rspplk`N/ 0!"&).7Zͧ¿c$Fr09TdRCCsܣ[<̴~ѽU^Ȟ–Ͳ}ĹѺ߱<$16UsB#*%&&'(&&&&((.:iuvvstorrpsoxVtGNfLHB?BDB?@@?BCB?:5_³zyvre@fŽpec~Z?~ȿŻżBhsʴӦ}zS?HNUQOLGJHCSvk]X]bý~}tskT<.7nr@83FeZhUXhU=dxPHSZ5CRX{I:52'(-+.5BZpz^ͶjDFDDA<;=>X~JDCCFJ]gmo`Wtb3(9Pg]9,$!*[g9/,%0V^Q3)$'8P7$(,4G\WJIGDEJPdlXHOZE34566:ALQVXTTTK;@NUR\tynqg2EXSC<:::;@G@:=>@?@FZz|ibkt*5̰ιm# "28>ݹh(yAZ^<,*+&"##*12+5~ko{C7Xc@8_s-B@.ALCDHSB?<  ?R*,4`w^SWVPRPLC0 ,"&*.7_ÿ_"Ln1G`kUCDvަ|Tƛ;θnSѷRfūN|ͮͽӭϷi$)>QgY $,$%'(''&''((,5pyxwzrryMxGS^LI?GRWPKHHHJLNboi_pkY]d¾xysdqu{vqujU8+7rl<51I`Pew]dbGXqEL\{_,BVbtE620%(/06;E\nu|}n˳fGDDD>::;>bzFDEFFH^hno`^wwZ4 +9ReY:+# %Go;0-&.VZI3'"&4> %(-3HZRJHCCGKRcgTFMSE>Vqmq~~{{zzzyrjZC?Oaiuv{yr]<-3579@AA=D_|{jdju(3ʵl##?AABUeV  dĸoj\QC9+-)18CZ(!$(Hg$}z5jƸ;,&" ")/1*8{hd^YK6<@MokCwa(0DB>c}|]C= BM1-8]~s`W\]YYXRJ/ '#&)/8`ÿXQg->^aQ@H}Ԍqyyn~fûXÒBhаNnŽysˮœ̷L&-124686:6=<:4%!-Kfpl8"%,%%'&'('('((,7uyz̘rqxOGX]IA;9=DCC@ADDCCA6;gò~yvttobGmúl`gSOȾ´òýu=c~qDzІbxetL=ERVQLKFIKM^faccYZ_ü|vzz}||shT6.;zm:43KUIir?=HOy7Cc|~?3IThpE3/,!'038?@?7:=@CDECABAB>59mx{wrsppn_>pĹkffPHȺȾĹüo/f{yɲ͙xO=CNTPLKIJJIKJ]uWZ]gǼ~~~{tneR2+>g;32ARThl@V^[cC9AZiO8;KVn{J/,(#037:B]mrjsvҭXE@AB?99:Cg^IDDEDL]ell`dtZ**=Zi[7(""+g`431) 6M_L+$ "(-$#'+0CSRIIFDDHQbiWIPM83DTQUYcdabd]UPJ7-,GQKAAB@DEA7>NSNONOLTrlOYk_7H[P?<;=Ee~yf]my(9ɴżh#$>U[ULF?:642/-,+*''$%&$&$&&)*+.6Kňf^YSONMKJG]~jlȫcHݮlOQSMP\blS !qF(3:>}%,2+!("L`({>Rc*'"!"%,13)D~wzfvkguvPJHJ^edpWD? HO,*)?crokkkjd`Z? + "/!&)/HՕsnX>ΩB}ǩªʽ¿ˤDQ[^^VROLKCEC9:4(6_nJ2,(-&%'*3>:1'()+>@?:4:nohg`]_dceSpy}ɮ͡citK9KUokA+'" )--5@]usjw~sj[ҩWC>AB>98:<_fGDA@BJ]ejjX`xQ**?[hS5(#$)Ud521* !6Y_F-#"'3J6#(*4HWVQNDBFKRbhWKKD5/5<;@BEJLQTOHA;3+%1AMRQSUQB3257:@ILM>9>@CB=5=OTPPSbkzYO]j_4G[O@<>APeihli_KB?>Hb~zb^ny%8ֻµi#&CX^XMF?:433/-,**%&&$$#'%&%'(),2LѴk(#'"HeibOİ~rg\PORKYjP$$w³r/+39BQ&T|H=cJ5Z-w6u~rs1&! "&+00)OzY@WQOMYNLH=OUD< MK-*(;hvpnongcb]A !+"&*0:k•L#_Y/F[YB?M͏~y^}<ͥEƟy´úĿɤ+*Kikc9&)$#'-;D<0'()+=tz}tmK7uS@:55<@@??>??>;71?mcJOQHGDENQB5t½lggHWɷ˿»ŶeAyv}í͖cWs~D>BOXNLLFGILelb_VY_gtgL1,F^;43DTTb^OL@@JQQNQV_F9KZv]>+$ &%&-7`wsjxxeUOѠQD@@?<:;?Co^DDA@BK[ehfZ\oqM' .@]iM4& $5zU010) 4QXB,%#+Nd(#'*3EZciXGGRVW\fRJH?1*07:99<=?EFB?;;6.+6QWUOIHD=324;>ALQM>8;?CB;3=OTPUg~eX\m\/N[N?<>Onsw|ynWDA==Idu^bpz(7Һ̸j#(@W\VLD?94330.-,)&''%$%&&'&%&*-3Lүg4UxuqihgifxfiռĿ^Ldz|d4284XrI(th<(+2:F޲C%`ad3vV-q2LbOq[-'!!%*./(SƓ|ŤdgOIKw?E8 UC--3dutppkhdZC# "+"&*2=rI!bX.CO@?@SԳx[ȿv>̠@Ɯsȿúʭٛ)8noH('&##'.9?8+&(),Cv|{tŋupMC|O?959@HGDBACDA<61AFKIFECDA:6tŻjfdKYǷƿǴgEy}ŮԙzC@ETXLKKHIHGLLLGGMJY`l~tbJ1-I\<42EQR_YID?Ehzyyz{F7KWiq]>-& %$$,?hvqkxhTLНMC@>;:;>CEaeEC@>>KZdjeVYp~M&!/D[fL3'!"&Xb-10*!;aZH:'#.IQ,"&+2C^t{eLMT\ZXVDGD;/'0797689:742/.,,)*(&$&%#$&''(+--F˪nxalֿÿ_FcEB]cZ_onE&yǻhf_J-3,,3;Iܪ>/Ϫ\;1|U4s=044AA1,% $,/-%Wj\NK_dWyI9G: XD,,>rp\WWVTLKI>! +4"&*3>y׳}C*fP/=NOI@Xä}tuZnHЖ>Ȗp˿ͮ؆$:RHEF0(&%&(.8>9,(().Du~{~ęmurLKM@;7:BIIFJNLKFA805_wfSGBHPHEDB@<75yŹkg`NUƸƱcL}yī֟dzvE>GUVKLJJGIIHTVXY`ioal|sbJ2+FX;34DUO\UNHLR~|a@:PZh~j;.( $"$,BhrklyjXSΘNB=<;<=@DMzfFB><>H^ef`XdrJ%"0H\fG2(#$9}W11-(!6_dVC'#1RS*!&*/@d|_QMMQOFGNLA:0)-476679998:@EB;0/?LKB?@CEB:479:@L`J937@DD?4?VTQY|wURYmU6KXMA<;>BQybKIA?@<=Mk~kaho|&7ʽh"%@V[TKD?:650.-++*(&&%%&%$&&&&).-BǮ{q|{xmrZh_Ho^jvrmphiG'zaIC4-3;NެI6ʥB;+:N3e413760/+%! #,/,#Sws`NI?3"&*0=yؿ{?'kN8QVOF=[hhqpbgQ̐@Õyӿ¿żĿͣīF".G_bT/*'$$(/49yøijeJQǸľŮW;xyÑ{uv@>FVQKLIGHKH[|~\pÿvo}rdJ0/IL:25L]FVPObaMTB=<;J]ad^P_shM&#4EbkF/'$&=y{E30-&#GpiP0"%3PS) '*-B]gfQMDBJIK[dSF>4+-37669;<::=CF@9.(/4/27FMOH?9779>NZQ76=EGE?29; +47 $'/@z:-rE1ACA>=[乣vvh]ŻbQɌ<Ř˷ǻ͆015:=?>?@FKHNG>()6`usZ,*($"&1?D:+&'&*Lys{ҽpq{WhMA;57=DGSaf_VME;-*Ioa[TB?CAAADCA3;{Ƿjh~_E]ƴýǯXT|wi^aP`xr9>FQPMMIHHIHj`u¾}~ysvno~~q`J0/SF;16U[RWQgtBEPB<;>AB3?RX[+''# "''IpplphWWdȑIA;;9:AAAABA=6,"%'(/?RUTOA958;CRaS97=?@DA6=STQSPJJDCMPVkƾR5OTI>;>Gi}F>FIHC=<>Smkdfp~7½b"&H[YSJEB;542.+*),*'%%&&%'&%'))+.LZ#4JIJ.3D;N~WkԱ¿ZUŜ]1#%'-8S^B/rRGOJAMbqy}b\^\E-+18Ku,6 TD<_3.1241.'#  %,/- en_^QH<99:Tb\Q=<[jYSH^}P>DggXI7 #i}=,)-Ldfaacb[TRD, + 12 %)0G¼w41qE3AHJ?;]}e_ú]_Ȅ<ɮӳ~$/--),*+-.-*+%.-+Fru`5$+('#&0?D<+(&'/O{~zNjorpyUs`H@78>EN]be^VPG9'&Dme^SEEFECCFD?29ŶinxaAaƳƧHJtykms<=CRTPLHGIJJiaV`pWT[^|½¾}{|o_D.0Z~M:27QYKPMI>9M][J?@?>8ATaL#"%!!)LmopwD>FbǏD@<:<;=>AGh_B@=9;I__c`Usd8!#2JheC/%#)Pv:33+!&BNN@/#$6WL#*).BW\UMBABELdvfMFC:.07;=<>@BACBA?:6-!#&/ERZYP?889MSSSOV_fhdU[jƼQ1XVI@?AVuR8=Vn~kbir1ye%&E\ZTKB<8412-*))'(%$&&'#$'&&),.,Mj\GWe[[oĿ[UfKNLHLU`dF.aw|wrh_`aW8*+1FXJSeX[kzcPE`fT>>]kUH9 $x<+'&08667;72-384 *.#',7Hֲq/6y?3OeX73WĿ]gRYȿʀ=ξýuKyxtmkgdcb_YUSN8'?LHA1%#-'$$(3BB7*'&(.N{{|tix\vnTF>>CKVbb`YSLE7&&Ahe^RGGGA;>B@<19iovd{Fd¼ŴťMW|zඝër;@DRTNJHFJIIEXXpltsd{¿|ym_C-4^{C637VWHKIEKYb{^UQHC?5CPdC "% 'Kkrv~z4$(bǿ?A=;<<<>DUWDB>:;L_\cc^u{gH$#5Me_@,&#!1hF1.'!(NYK8,!%;N0#'*2AT]THCBBDLajaMFB5,/8<>;:==@CCA>=;, *@Va^J:8<;@RY?;=?A@@B;>Xtkbhr'a˼zysvwt·a!%C[]ULB=8532.+**((&$$%##$'''(,/,Q±qtoan͍ľ[Qſh]cPLYQX`B2ǺhE0,3;;=<80?iptdw$%%%Plrvw7cǽGB>>?==?CVFBA<9?L]_eb[p{D(7Qj`>+##*Pw70,'!..,/)"#.60$')0@VVQKKDABGXibMC>3,07<>;<<;>@BA?A<+"9S]YH;79<;DPTI<448;<84>RQORcush_KP[l~I2SVH@@?;AVu~i`it!;e|nl`VFADIPIFBCCEN}ҿc +G_\TLD?:63/,,+*)'&&##$%&&((),0.Jɾi\\\WYZULLdZpʑ¾ZZĠa>331We?2ʾpT9.4RcBC. *}:+&%1FGBID;5>IQ: 7(!'.2O½l(=~{4.0,-+#MU<6+(5G^A%(.360nµBhֿtD̹|ua%Ip]SJ&"(%$%(0<=4+('*/Uy{vrsssqgzt^_[TKIOcl_SMJIH;)!8g]ZRAGA?<::<:/A|hpwlwDh³š?Uuݮ{o;AGUSJJHDIIL~w[bĿzlZ=,3euB72;EFCCI^qKD8J_ZY{l;Gno>.)&/Xoswz8!lľ}FEA?>;;6,19>?>>>>?ACDCA:'.GVYH:9;>>GW`RE>:8;<61:SRNDEIDJLNNXlN6SRI@;>HMLIIEA?@=>EYxkcit!:^~zxtePEBBA=7>?BLW¶b)F_YSLC>:61/-.-,)(&$%$&&&&''**-*XʾS!!M}awVX֥iOER[MVli=6ñ{ondA,4;Sۧ; %$#O7Ib613410/&!"#%)./)rmƷUWQDHltK<' -|u4*(0OxslofFIfd\K-%>kt)#0&&,2OȾc&Ax31CKF3$X}}~4!'*021v=98:;/Mueurhs>k±ɵŽD_vŸݳzh5@FWTNLGCGKI`c]bdmbYbýzmX;*4hr<608?>??IZSCCAHNXL6Kmo=2*%2\mpuw/ !mȻyED@=;::0.%'?[]aL)(A`A"(,2=VXVJEBADITgaMB>5,0;@BBBCCA@CEE@8$'@Y\H:8:;>J\]UQC?:971,7QSMHFGJLNLOYiǿK9WVJ@==AHNRTORFA>vRQMCFGA=;<<:*HxeupipJpȻƖG_qĶ习d>BKVRMLDBFJPtq[\b¼yjV:,7kh?613?C@<0(# 3ant}n/ %uʿrIB>;;<<=BYnAA>;TVNFCCEEHSldMA<4-1;ADGGKHA?CGHD;$,L`aD7479>CT[YSGB<:5-*;OSMEGHILNLO[k~D?UVG?<=@>GXjtcI@>;53/.-+'()%$&'$%%'%(**..^ķJ~hh[w[[~eD4539:ZfD=Ϳ9/6=U٤T::;-$*h1SP60CI:*.())%#'/0* uj~SEJKFb~noq{P:AdQ?8$ :g3+.VpVHKGamOA_\4]Nj#"T_B3$!(-7`Ҿ½\%Gx3?SWH, gdbRM|z;=od*"%*/05¿1˹_Nȴ˱pζH"(&,)'0A*.,-&$! %-:5!#%)&%%&)--)%%&*5c}|wpxppkinju[ITfhUFGLPF5#%=|YUJHJMG@?A=8-L°vbuijfBr¶ǿȿ;O|pǶݪ{hva:@JSJIEACHFT}uh[]aufR3(7pa>63?SLA::;<<@CutH?><=APY[[OPotU.,;Yi\8)! $Eug43-$(72@:,%(?L/!&).:RWGHJGGEFRg^H>:3,0QTNGIJLNMLO^mH=XYG?>>ANfzqG=@A=B`{xgcmx(o˿zwn`WSKGWY˸̧v}{],F[^TKD?9540.+*((''&%&%%&(((),.&Z¦^ANmt_g]sW_í\/ '-5?fr>Cųr'-7AX{>)Q,]P3X^74?P3%$12)*qlanrUdvvgzgG5?nA>B9! 9i2,3Tsm`]_js[F]m^:aċ#%8*60!(-7g־X Qk+=;;<>?a`DA?<;AT\[XNKlxT)->ZdS8)"$-bj42,%#4ISC5$&3;+"&)07S\RMNKGEEQdeOB<4+/;FJMXgbTEDIKF8#.Vvb=558;CKNNJHMKPL=.+:KNRPQMLONKT]mƾE>^WH@?DTxuOD<=AA>Ie}wc`jy#Oпn`XQXZ\YZTPQQRXTϸū^-H\\VJC?8560.-+)&'''&$%%$%(')--(YŢaB* #0;EaZyXlտqZjbWckur?L|ɵo0#+5AXά`)X~*a{HEҸm/;gysligK, +zv\z{MpD36WVRb^jkE5 9g,*-Hr~truraBQmmK0h/@U5!!(,5lӣwRU^-DWN8*wlcdal}M""'*/1?µ.Ȫ{ͲN[ʿƧu{~Э=(+*,,>opY@*())(''5pf0))%%$)9GD3)(',1c~|{§grrven`a}dLHEM[WJGIE<1%"Jw^[PVVPIEFHE9Hae^XBABA=:@RYXSEVtf9#,AXdS6(##/l{<20+$!0RlgfA"(G\WH@@PogG63<;>A?>Hdwbely%>6569@<.&-5B\Z.g+b|K3B[VW%.>Ư]+3|pjca[vlwXp\9.2847Fe]=4 ?\+*'6NXVQSM:7COK94q~ +cx*#(/;sɰϾM]c2@A<9)'~iY@#%.:#!#%(00>Ƽþ(ʬHc̸ʧwϨ;)(+,-7*)K~{hpne]UPNPQNF2VļiimoaCǽƻƿDhtijԻɘp]8@IVNJFCEGFIly|~{mYZg½~xfL1-;s`=23?C=:8ETAAH98;>>?9?QUS+ (;Uhu~wR-+8Ұ`IB@?>==>?:;;;@AA@A?=@SWXTEFEB4"-A^fR1("!$36.2/(#"BnnaS/ )8E* !'*.:R_bliN:@IQ_YHDA7,-@NTf~|eI>CKH3!#(028:5:>FS^i\SURLJA2-8X`kU`qilqǿxCI_YH?>FJC=AA@>??@?Igucgn{">cyncYZYY[[ZWSSPU[WϻɰX.F[ZTLD>75310-+)'''&&##"%%&''()&Zƻb,:aT4%0f`ԿxXjmlox~q]urAU_RJ:&%'%#''" #.59^ϗ( *[z,gEDo}V0KϤV- 9{zsbVHCJedSBRfc6.>-''*0358>D;8. >X+)"&/4,*11)+07=97xu*Qf&#'-:xþK!_]1AJRC&*XOJB4!##'0-G%״|˫GgȪʰ϶Ƞvћ0*&(*-D{:&&&%'2jZA\6(*$&'+>MI6)(),9i{yɵuhycpVizm\TPLEEHD@??@A:')Nsc_ZSRSUSS8Vre}ms_HȽƽŽĺľz3eo³ϓYogp\2=GQNJIFEHLb`XXc|vfM3-DzY=4278775687:::::=@>7AQ]K%!,BZkux|y[A=CЬZIDBDGKIHGFFEEEDCEEC?CUWWQFBA=1" .@`gN2' #+231.($2@UUD+".IN&#&)-=TkyO8GOV^cPIB:01DXimL?DG?0%$!%+,06959=GSai\XTRQJ@728T_uuzǿx>FYUCLiv_do{%?cxkc][ZZ]\ZWUUSVZWоȮ[-G[\RME;9631/,+('&$%$#$#&('&'*)$[ÚI")(+0:K\p}_ѻXkǣttxtAXaSD7)$'$!%)& "%-5A`oS-"+i}1rERϽp0:sL((;slga]RG5-,+)($&/0558544* FX(("$-,(,/-+,39>97{u'GK4!(.B}͹F&fY.FaY?)0eU&$&(/,Q~$…˧?râǮϯŚpſüΖ+*'&).BnF#&&$"'6SE.Hh+*,%#'-?NA0&%*,5j}ysmz^qUuqcYTROHAA@>?BDE6$+Rr`]_YRTTVUT:DTazI#,@\npqzzia[VөUIFHOXVUYXUUVQLMQPNIBDTVUNB@A:0"!1BadI1%! #+/01.'"!BhkX<("-EQ'!%*+9XvvNEOSYigRIGB75OkoK?CD7/(#(-"$**-5659AIUih_ZYZXQC909PZhyxǽp9F^RD?DUnyxzvoXHCA@Mntchp{#;_wke_ZY[[\[WTRRUZYԧ{λ̷W,I[\SH?:7642..+(&''$$#%$&%%%&*+&\ǣjWfx{\}ù¾Xq¤dByrDGhtj:[^SC6'$&$#(+*BF),7?_ٕ"@X"><4-'&&%&)..234111' + PR,)*5FA6<@>79AKMB:f)Y^)!(.<Ʋ|C(fM7We^@')r2#%()3,St'ȷΩ?xĴöɿȏ&,('*/Fl('(&%$(/CYivK++%%&(/2,(%'*,9m~yxxƘrkx^~Ts^XXUUTOB?>?@CIC1(-Uwl`WZXTWWWVTFYpagwYLȾøĽĽŽx>p}y°эΥV8;LSMKIGHKHTjY^jĹ}}~~xywscH0+M}Q831555687678489<:DQguA  .FamnozwupdѡUHHKPTW]a`]\[WXXWVVNEGTUUNC?;7.!"3GadE0& "*241-(!:LWP7&"0LS* !&*-7TcnkRAEMSUb^PJKJ:@Y~sM?=:2+'(-)"*-03329=IXdfd_`bb\QC9>MUXm_sǻrCLcRB@@N^X_k]UOHDBAQts_ir}$<^ykfd[Z[Z[ZUPQQW[[ȾyѸ;Y+IZ]RHC<7641.+))'&%$##%'%$'&(,+,aʥdU\c^[RM>AAduZy˼½TtƼoy{7}¹ŻuЉ#,)**-J`*&(()))/jnN2,+'''&+31)%''+;s}u¿tsm}\Pe\\ZYYYTI@?<=BE=2%+Xp`Y_]XX\[ZXK\ƾmfdu\KɽøúĻŸoApyʪyf}T7=NSNJB??9DPgq=  /Gemml~ymȖYIIMQTZekidbdhe\Y_\NFJSSTL@<<8."$5G^aD0&!!$;aJ2.( "=?@QeeWZ_HAA>Tuq`iq~%=`wida\YXZYWPQTUX\ZȷķƽʿS1K]\QJC<6550.*())(&#""$'%$%&(++'^ėS4/2'! $&^pUzʙƿ½Vx½͢O'=<:75.*&%$&%&.1012020% + ZJ/,=jypqrlnlmmeE>¶S*PW*#'*Aؿ¾x9)pE:-$4F`bB/#!%=yG1.( "Dj]N6$#1[X&&*.8OYREBCEJIIWj^NLC73AT^cmd^H:852/*(*($0442338>KZknlmnsrlaM:@OTWc~czuhǼi:PYLC>?FQu|cdoU@B=;Two_fr"<`shc_ZZY]ZVRRSUZ__ijļT0KXZSMC=7441/,)*(('%#$&'$#&'),+'hŵk>' (;NeotzWH{̿¶¼\|İŠtJ!'_vfqm5gʿ{\)/7=hʏyǽ}:0aj1x#C{|~K$$,2-tïW:д÷ҷ{ώ1³Ⱥÿt")(%)-G{q'%'%%$$5Q^]),(#$'.AE:+&&'+9y{|ɺsq_W~{{~ztqmdZTG??=<=7+6gzpjjd_\\[^dFesdbwNLǹûŷf>xyv׈pxO<=JQJJC@DCKZs}UV]mij}yrn^<)0MysD:3244566766788::>@>=FSl}_8"'""3BS]du|qƔRIIQ^f{o_^X_ndSXZ`b^HHSPOI@>=6,#2L^_@.%!'Mt93/(  Af\U3"$9`M& %'-8IWRIC@ADIOnqPJIA2).320437743133,&&!$059955:;HWjqontxpeWB7@OQUZwo|tżoAP]KA?DMnwSWcVDAD:?Yzo]ds$>`tg_]ZXXWXUQSSUZ_[ùüU2L[[SJB?9651.+()(&&%%$&%$#%&),,/oòM6+(%%0LhfhpqpkTO{Ω½]}ڳwK9PtaZvm4fƴO'.7:m٥JXJ!+cd+r;h+-*OrU5&?w]ZTOH;/,17;::741*$""!!"(/2444530# + "azB++Afi<).?ZX<2L[>ER*5!!%,I¾n25w{8@W[O4 F@$%,3/{ rz~~Q<ε׵عϋ.ƻzƶk+*'%)0]Q!((&$#'0X~M(+'#$'1>B9,&&&+?y}zýqq}TX~vnhbRFB>=?82?l}rlof`]acea@boa]~RPȸĸfL||ƁìxG8:IMJJDAEHFGpzb`ZaoŲ{sphQ7',N}qD81/355677768689:@A<=7+&4L`Y>+%"&=vzN2.(!(N^\I+"$9ZL"%&*6FZQI@;FSenojjlbRLB8>ORQXvvźm;P\G@>CVdF9AC>@ABjxjZX`żɹʿýij\Cyt仟ǵuB7AJNMIEDEIFVw~g[_mƮztpdM3'0T}kA80/25657777768;:>@=azpd_][YYXWVUTVX[[fT/LWYRLFA<842.,+*(&%%$&&$$%&&(+-.o̿dPKH:<726?@`x{zrZM}̾½]l/momaWJGO_gdS74@A>Ngg=(-7Brr!%),*#!(9X8oh8.'Mſ:#Dm[URNF;2/39:99720*%#!""#&/56:<<96' +#s:++:Y_SIHLI?BQ]W7QŪE'H[N"!%+L׵g,=}v5ͷƧDG˺ͷYʀ5ͻ¢̴ȹоҽX#'%%+2WN%%%%#")FoxkO( )&%%(4?D8+''(,>v{|{ulPi|{yreXHCFB,!&1" %*,3LYSD999EJJJU\cYPKJHLLC9:NQMLJgŻh;QUG=;=AFIEFGIFBA@Aa|{gblw#=c}pe_]ZYWYYUTTS]gnmƸºU1N[\RKF?<851.,))('%$%%&##''(*,-,xµzzweP_twwpWJ}ɿͩYú³k+!z{QJ:-!#$$$ $"$'2EdZ7'-4?tf%#(/+%!-fU=jCv50,GlrV*%On]WSOH<2149;::631*''#$$',379=@@=8& +&uw4,+2Uwohkljigll]7Qȧ?G~{K!'*Re)Gs27?TN.[Q5 1ciV>%!"'06?ջãCTϬɝwsldz8˶ʷӻҼQ%''()3`G%(&&%#(=W^^M$!)%%&+8DM7)%&)*?;?M\~T $'$")17=JXm~ĂLQbobjc\UTY^\Q`w{VLSRRNUcTF((:Qd[8(##%>yr52-' +AIVF*  !&),6IYUC=?BEMQdkYKJC11?DE922202456.)%$&,9?DIGJSZbKFBFHHE>4;NQU\t{|ĺfAd|{fdkx%?a}pe^[YY]^\YV\espdnúQ"1L\\TJE@;641/*)((&%#%$%$&%%')+--vĹ|owuicZ<5SkqvnNO|ĩ~Xh+ }vbpi\D=DFPZ`dWYvuviY:*+3AuE3]}{vR;lSq//*ccYk9Pm]XVPI;305==<;643,('%%'*.58;?DEB;' $xn/+-Bvoggihc`di[;\Ƥ60`eI! &0V^%Io,.Ma6'!"gwpe[[^}{.!&(24Cϳxyž5W˩ĩu;̶̾oɲҹE&(()*1a|A.)&&##+Fi{qO& '&%&(5?@7)$%'*>syxwvy{wxwtr`aW`~fSHEFD@b{{}zqrvwtmag~gi^~xºȸ˿ɾƿçNN~x‹8;@PPMJEAADIhh\Y\j^ZayxmOD=-7Xe>60374677766686:;=?KYk|̺vFRl~||kUSWULLcz{MMSTRLK^dS0+5/;KPXf~s}yŸ^9RRG@?Ge}vtp_G?@?De}{b`jy$Ac~od`]WYZ\^_dtwb[wƴƽP/K^[QHD@96410,))(''$$%&%&&'&'+-2|Ⱥ>!&(&!!'1NelnfOQ}ҷƽ}]g*&͹rrz|ohF'(3BwȨ¾O;`4/'Ma;_t0!N}g^YWQI;204;;;:8750*)'**+06:(%%""*OqbO,!!)&%&)7ED5(&(''@y~ywywswxyytmW5QyvWMGDDFEAd}v{}xpuxwvnchmffx]qzùǷʾȻƾŤKIvs~:=CPQNIDBCFGbhb^dyeZe~æzpeJPC-8Yx`?513546798777769=?@=@NbtF #-37;?KV`kʽwLLfxX_cKKVi~pINTUTLOm|Y-,>WcN4*!#(YZ02.%$Ne[E' $"  %*+3G`OGIIFEGUnr[MGB;E??ADED=4.8MVYcrryyĸZ@UWB?>E][MGIKGBBC@Ihxceny$Aa~oc__YVXcllO\{̲ѽƿM1K[ZQLE>9751.,('(&'%%&&%%&''),0/ȸS1077.,+.56KhhhdZRϠ{X}d'*Ծvea5#'4Eyz|xyqW]xtqnkjmJ?`^x6/,>VC0Q^5$!Wi_ZVRI;004;<;98:71/0-,-/17:?BCC@:* -f1,2Vtoe]fm\DJdqY9fǙ*@^>"([Z"M}Z/562/$"pqjnodap&#$(03N˷¾#iɼƯDмc>dz˻ɮԯ=&&((*3f{`,&&$!#(ENB<7##(#$&)7CB4)''()Iy{xy|scG1cpMGCA?BA=8Z~uz}vtvvvskbb^bc{z]yj˿ŵȿǽŻÛARqɾv2;BLOHHBBDGJdabjUWd~t{}}{ti\KT=.6`vX=51274677768679:=@B?@ZsD #8DJNU]ks~ȲtKL\u_faSR\oyaDOUXUMeytQ)!-;WcJ6%"$+Yd42-%4`aVA&$# $',2G]RJJGB?ANimZHFB9;K_VC:1014431.)$&(%#().:@C<:@KPD?>CHJE;3/9LTYghlmwĸ\?SQE?>CKH>>?cH}v/--N~|i2 ci^ZVRJ<227;fLGJE?=?@;8V|{{uvxzvskc]Z]b~xbvmƵɻƼƾÝKayɿv1 #=JOS[dlr|ʪlKIU`\e\VWcosXBNW[YYjwjU+0>ZbM3&"%7|H42,%.HOR>(#"!#'-3HWRLIC?=>JbgWGE?88KSHD>622453100.+.'&,,1=C@<:HUbVOLPTOF;2.;KSUdgcoķ[?SQD=:BLND??>?@DB>Hlqbfo|&Agzr`Z`wh>NyǾP2KYWSNHA:61..-*)((''&$#$#$'((+.0ªtifdI>RgkmoQSɫɺyYe'-ykfdlofUS@$+6GٽrVXO>6((,*$#A@CeDop+.-sł1!`h]WSOH>329>A@:;<;762.+-058<>BBEB<* 4d.-/E[]abcaUCP_[E8rȓ/0R4"+cO!R}O/C^N9(#zd<=;5/Z|zwrtvvsql`ZZUcwfmqĶɼźźÙDgwµȿq35134777687569:<;CSjk: + #;HMQX_glwɦeJHNz}qWV[YZ\fopSEQX[[]eps]*/E[aJ3'!#&_~[41-%,K_Y?*$" + $',3DWRMJE?@@L\h_HC<3199;FD:556642352/2$ )/-2>F><=?ADB?Nlq`fq}*DmqrwzLEkúM3L[WPKF?;840-,**('%$%#"##"#%(,.0ƷG601+**'*01RhnstUUѻĿy[ҽ`&*ļV&.7Hڷyyj_C()2B;0(#D>JfI|y-13{Ū_*ewg\VQNG:229>>?<<<;76201/28:=?A@CC=) :Z-,(;_okiljkafibN=wȈ$*hi4$*jJ$VyL7FE87'(wWPHJOQdT%$&*2,SѾæs{gxͱMWͮţɨŻӖ%*('&*+DaK80'&%%%.;hvfA &*'&$%(*('&'&(,Qzzsɶwrk}6Dum`WJ@==946\wtonopmlnk_XYVevhntÿùȽŹƽDjtȽj4DKQZi|gOOE;409POR_qnaTpŷTDSQC;9;?=<<<=>@B@=Qnq]fq~3^U:WƼM5PZXRLF><83/-+*+)'$#%%#$$$$&)-,2DZw7'.--( '-2ShnsmPYѽÿw]Ưe$.z~K(.5FƨN&8qi+Q:MiFBw).0Z}[-%fkbZWSND824;>>=AAA@<6$ EW--3U|ymgikhbdfdS<}"JP8$4sD&WtH5=CI=%.\!%(*0+Zut|ҿն_HpήEZǝɰɦǾҎ(*(&&,EoC''%#%2Wo2&('%''((%&&%&'-Tz{ūytnz5Du¿n`VKDA?821[znigiieddgd\VXSbvhntôȾƼƾHnrǵe9?@@GSo_1 !:GQZdjow~̣cHJSkuTFR\daehXEBPUYTHE`W;#"2E_cJ1#!".exE1.*"4X\J5$%" + !%(*3D\VQLHGINR\iXC<8-',29?>;=<;752131.)!)/7><>ELSchUT[[G>7/7RNNUlu\\~ŵWGROB99::<==>>=?@>>Soo_er~3t̽]8?lǾL1U]XPKD9895/-,++($$%%%%&&%&%)..8ȱ|DUshV=-AA@CDEEA<88537<>@AAB?;4! KU/,2`|cNIX_VBF\dM:x'U^7 %6uȽ|?+^sG7MUN7$4}ywgow4!%'+/-fŬzy¾u˕ϮEe̹ƢǾщ$+'&',7k<'&%"&2PqY5*)'&'+(>90%&')-Vzz{srlw2Bnp`TMKIF<25^}qgbdddba_a`XXWNgrhpvõȼƸžGit޺Ŀa6?FOOKIB>BIOyj`f\^hö~yun\G7(+=>>;54443.)$&-4=>>?HVttK;BZdRF>71:942.,,)'%$%#"&&$$''*,*5ȷpmEIsrOI`jornZZǜĿw[~]!3l_IO6)08HמA)^гPB5`]G?/7Ҿm'cvjc`\\TI=:;BFFGKLMLHA<978<=??AA@<6. JV-,5cya][hhP@SinI&bsA2GOL9#;\M~~wl8!$&-1/gηuzýiƺѻ̬@kɿwĞżф,('(-Fu.%%##(7bjSP8*(&'*/PR"&&(-Twrvqmw5Bqp`VQNMF=28d}thaaa``_]Y_`YVSOkqenyʾǸĸŻľ;ar°۫b5>FRPMHDCDFS^aehYZhõ|wtlUE8'+=gaI;41333358986789;>@@=LWrT* + $7Nbmrx~˥XOU]YYdb_dQ?:=Qdpm`OEGTY]VEWg`F#&4M]a@-"!$>_8.-)##;?IJ0!!%!!#'*2CZZD7;AEKNV^\G>:1),06;@??<753220*''! &&4?=8=FPck`lsyfDA<52;OSS`oV]{ķyNCVPB;9:;:<<=@BA@=?Pvk^hv#Vͫ^?XĶʿI"6S[YRJF>;83/-,))('''$"%$!%%()-,4ɾb\WQOEDIG>F`kqxtWZþw[[7½U?<;JۦP*7uI&A3g]Cqr,25X|M* _rgb_\ZTJ>9?ACDEDBA=6/ NR.+8b{stkvmNH`l^>Do0kW"4wƐy8+lu74K[T.rǽʽƙĺ}%+('',GhZ\8$%%#"&7HPkt4('&),=3#'&(,Tz{™xrrt7Jtla[VNLJF@?dwke`deccba`a^SWVPkrfm|ɻĹû÷ĿIotצZ0ERW^SFc}mH#&4J]b<+#!'7820-)"2J^R1  # $'+2BTZF88=AFNZi_I?90)+04:>>;9974331*+..%$$(*8B<;>IYr~oNA?<72:NTUZĵzJEYNB=;;BB?;@ELDB>>Xxc_it(Dsus}ýýȾD!5RYXQJC>:850,*++*('%%%$#"%%*(,,2ƴ7%$$!(.TiqyqO^éþyd~[ >ü\G?J޳v/#-- )I.oVKֽb02.0Sv)% \kd_ZXWQE958AKOTX[]YNDB?#7~̾ȩs82ky7;XO=*;}X^nB2$&$+.'rʼR'̵ͿĉA`̜=w¸Ŕÿm%+)'',DG*)(&%%#!&3KxI'&'%'*:hK%'').X}z_vnto6Guſ}d`[ZVSWVH@bljebfhgiijkeXMWYXlidq{Ⱦʾ²¶þÿCq~pҪZ5_[D;4013223577678;==>=?JR`zxV! "9Vfu~|ƕPMFC?988<=:7798<<:;;"'7Jb\>*$"&@fB0.'"C_\J,$"$%(3DVXC86<@CHWe^D=7/(*.16:><=>;6340-173'%*.4>C=;?I_|{eXMKADC>949LXZ]Ķy?CXMB<=BNNMCLb^FB>=Xybahu3Yx̩ǾF!7SYVOH@?=742.-+,*)&%&&%$#&&'(,,8ɸtcIF>((7:6:WhqtaBUțþxc|[D~`RB6AZemzdA;Kɜ\$$## !1t0oUPw6-11d̴A&$`mc[VUTMC736?HMQVXYSKB<8799, ZO,'&+-.,0.-',5:;4Hƿ_#9ҝǶp30mo6FPLLLEABFETknp}|]_hó{xvrj_MG2&+@_[G;4001334687769;>>?>AKTmvP  .FdsyȒRJD@=99;=LhYSYUH;;<;;HUZ`TXntjE$(8M_T;,$$@vn92.&!E]YI,#!$&)1CYWHA<@?AEUg]J>81)+1359CEJF>8771-13,!#,5;>EA>?M`ieN<6@KNKB:48PX]rynĶnDGKOQOH@;96559;;<<<=:5) + aC*'"#&&(**('*28:3LýZ'BɿȆ}m.6td7CSL@(M;/DB,)44&"$%+/-~Ĩ}sK.ǽҝQlǒ3ƾ["(&&*.O|vmb0$%$"$',-,)%+'#&+$&&**]}{~crlqi9LûfWTLFEGE=09Z}wehsyvssvvrc[]]^pnjwɾýĹxAt}r̦R4?GMKMMCAEHToZ`k­}ytrleWC:%!+@e^D:3/22125688568:?@?>AMZyoN& +>[o~uɓPIB>;8;>K{_;;;::J]kvrq~r:"(9O^T:,$ %8tZ90,%#D[]D*!!#$(1BW\QIIB=?FWieI@;6-.8=BPfom]H7993-+&!%-49@FC@FU`ikcckqhXQD92=W]bt~ijj7FVI@<;=DUd{|T9651,++*+(%&%%&%%%&'),+1ͽ7%""'$"'6Xjp{oS]ĿwjXO}Żĭy0",9MťrixĽz-iyYTB7CxE%+_h`\ZVTND:8;@DEFIJFA;556669>>>>>>92' +aA,&!"$&)*)'()29:1VĹT'Fˇťj+5m`6.)(& K;*,)!"')(##%'.-,ðwF5j8~ʍ1ƻǽ˽¹L%(((+0hd+%&%$&(+++)% )($&+S7&%''/_~zqtmvg;ZʿcNJF@=;<:7-6Zy{py|zz||ujbcccomh|ɾŹƸoDzzܳͳ~R6?C]jfFA?:25@JX}udKB@=7(!$)09EEECJYakhYUJ?8D]fl~ij\4FTJA<<@Me|WJFD?;?]xzaao{%H¿Ƚ@6P[WOHA=875/.-()'&'%##$$&&&'(,*0ϸS@CIAG>0.5LimptnW`¿voSN³s`I* $-9QԷz|s1nOT»u:5;ƹH',_kb^\ZXSH96@?92& +!d@+'"#*''())(+2<;-UɱF(Aչg);jY4+-42$PIA:3)'*143(%'(-,-ľrĽ=7Ѿp3bʇ3ͽskp|ʻ¿׿B%))()0R^hzb)'%##%(*,+*%)&%',Fp2%%')0a}{qnr_*WǤz[D<:99:8683)5R}|z|~{xpfhiiremƸǻµƿƿõk=CTgbMCC>45CRilWHIL>%#&+2?;3) +&j|@,*''689091'=KXX:YŮL$-) *GǼa$Ao^/2GM<&XdYQJCCCNWC*$%(-+.½pt/AŹо|2̽z­ƾ˸¿ôմ?(*)'+2UC%&%"!%,-..-& *&$(+L`0&%&'1d~|}qnu\'FtϫwM@A<689996594,8T|yyyzwnjjlpyfmǾȸƿijj=tv~ƪP7?@=CevZbn{%Lǽ;9PZTMHA=7551,*)'&'$#$#$"$''(+/.5ϽVFE535/227((9vzrlihaYNLOSWZWUROJF@>87@A=FNje7 + +  />LayifʼCB=;87999EJFAFJB42=EORRIIDFBFGE;,!'(+7GOMHIOW]jkg[_kfZYUOK^x|~x{mpôq63IVG=:@OJ:;?BDCCA=Ggrdenz&SƼ;:T\VPJB<862//-+**)%"%$##&&'(*--<͵ySLHH;& ""*;]jtxiSc}LQVND:*!&&  "! $,9VΡqrcWH7&%.3/')_g/xwO4_J55=NNPTVO8&<~vrnnj`WW[^_^\ZXTNFA?=83' '|xA.+:ðG^˫=3apW !-TԳ\#EuS1;>7-$k}YI=>GD9DL4&##(.)@κx(LͰs1ǾƢɻʵű٧7)''(+8swF)))&#&*02341'!*&&(-cr('&)*5j}{jskxa6.B}jKHB>@@;675464561,5X{{}||{wop|jsjyƼǺûƼŲ]L{tŗ@8BMUNKHB?CK`dNTXbrĿzsqgafmS2& &/K_MA:102//04676768:>AA>HUn^0 +   0CCB@3 &)*.7M^VJPWZalldXllUVVONcy~~v]Y^s±l32OUG><>GS[[ZZYODAǻc7"#,CW_gnvuhZfǼ¾}þ|GT{WPC8*$&% " ##'.>QѼ:&7tJ2ld-qmJUB56@\J#E~wssrnc\]`ffdaa_WNF@<<;=ACACA@>80% *zw7*)?~5eʤ5F\-#(^ǾʿVIoM0BF<3$"m~bQD=-,:HK2&$"&,#@̵%Uʕ˾n8ĽʳȻ֡0(&'',2oz5%%&&$'-48751(#+&%''Oh*&%'*9p}w{|vgzgD8.QxuSGB87:>9555352582+3[}}zwqjhndq]eƷ˽ĸƾTKyrǮuzC4AQUKKGBBDFb~aOUZeYdrý~wohb^afF)#"'4OaNB9101-,.5966748:>?=:IRt[/ +  -8DXeu{ujʽ|ID>;9999;HyL5423=RamfTfyqE.>Q]B.$ #,Xu]21+"/DKS:& !#$'*0>OQC:579:?ABBGJKB526::;<@?;Kll_fr&R»Ǽ7 ;S[UOHB<841.-,***('%%$$$$&&'*,,AÓT=-,()>Thi_envti^lĹͿþƿvHTyWOF9"$&$! %' &*&)0=Vځ(1խY9~`6tuKXC22K՚A"F~xuqsqmc]]`eeda^]TIA;7577?BAA@>:2+" +0r:)*<}m}9o͛-CfG")_V $MkI-CON='(tz`SB0%*9ED1$$$'-'Mʳ¾]βΰƿg<ĿʮĶٟ**''(,6fy<&((&%*29=940$$)%%'+lh+&&',=w|yttjlRN=IqoLE936787433532463-3a~tx}~|zvlXXVd`uS`ķ̽øſ\YqƜyA:CTQHJGBACDSegl}s[ewľxpd[Y[ad9%&"(5N`P@:41/*+05:96479;=???JVlW-   !,6E[]ivyvzuϿtMF>;8688:T>2334>O_eZMbmY4 .@RY?/$!$7|zI./'-TgU4#! ! ""%*1?UVNE7248>@BDEJLA77=@<89>>=@DBA@8'$.:?MkyWJNVY\fxkaYQNHF_t{sr}qUZas±T-7OUF@=C^nmke_PDC@>Mqk\ht'O̽Ƽ5:T[UNG@<862.--+*(&&%###$$%%&)++>Ҷw&"#+# 2_otvk^o½tES{g|vwy{taCBJNQSdoe1'2<\z++ў:&b^7x|M?mN81Op;" ?rvrnjmkg`]Y\dec^[UOD<63238<=>?=;5.*! +9g3($%?`[Rk~~c4|Ε%2M@!)^ҲQ"LiF1EVS<#&vgRC7/5>RQ2&&#'*'JƩƽ}UЧH_ypJ\ɺ^@ŹɬՕ#)((&,:k5%%&$$*8C>996&&*$$'/`pa2$&'(+Dz}yyntl~[jpb[geJ<6247763.0320342-4byX^ny}yeMLD]dwpvkZ_g~Pb¼óʽŹſQM~pи::BOPKKGB@CF\l~S\fwļtWWTSV\_R,()#'3WdN>8320..26977799:?@CECC<0#"2@IVr|^LLY^bwedm\PIHDBZpsq~yVTW`qij|S.9PUH?>==GHEHMOFBA@>>=;5-*  :h7*&AfHB33ˏ%6`N" .cѺL&UoG4CPB5!)y`ICABN`cH*$$#'+(WŪy!cͯ͢ɣȢʳVFŴƤ֌)'((-;vx2&$#$#*5=99<7)'+%#%(.*('$%'*+J}zxʶupQz`^aH:6246640+-0/-251(,c]MSex~iTG>c~s~]}yjmyl]Pn{LdþõĵſåTUt弡?:AMPLKG@>ADUn^][fzĹs`WIJLMQRSB-..$'5`cJ?620/,,28767769;>@@AKYtO% + +  +  '.;AJQYbc`aѴkNGB<7646>Xc;63346+"1@TS=-$!#:zJ1.&Hc_L2! !!$&+1=Q_[B43138?@ADD@8/0=FGFDCCFJHGC9/$!)7DQ^y{YQTZajqJUa_QJGJGGWjnltufaXURU_tİ{M/;UTD=?EVhpoplaKB>9Pmd`js*Y»ľž4";SVULE@=9842,,*)'&&%%#$%##%'(.)DӼuuhpvum`pȽwþyD`øx,,5?XԝM-4/$0`Y9{lBnȌB0Pr7!^~yztldXHLRY_dfa]UJB=98:=??<8862,) +G).iѬF'Te?3?@@4"(}g79HQOUSB7*$%$+.%`Īfxonj˺uˡ֡ΜʱOIķ{ƾӁ))*)-Ca#&&###)375785)+-##%&4]lO,&'+-J||jwtiPESbB;4/0010-)+--/25/%+_QBN`xrJEAjri~b}omvyhNmxIlû¶ɸ²ŻÞK_m㺚įz::>KOLKG@>@?Qvn^gķoPEGEFGHMOM>/0-$'7SXJ>4/10+,1567767::6466HxF5433?T_ji`VJA6"$3DVU;.#&Gzr4.,%.JUR3" !!$%(/>OWN;73278:=ACB><0/9ELNJDDHIHF@91,+2:HVa|bTW\_hlhdc^ZLIKMHDTljd\WTWTROU`pqE,7VWD??RmzyrqfNCA<8Nv|daht)WµĻ¼Ƽ~6;SWTNHC>872/..++(()'#$%%&'(').(Gոs8($*18ACKJPVrvsj[nþzyBiy{{ozc$*4@\ݵ}.'(# #AT8nHz|C+*Uƛ7!XxsrsohWEDSY`b_YTPE:745679;:872/,) =i/)&-Bemn{rjBЋ 2qϾױ~B(\hB8ITH6!.aE[idJ@9@@."%&++$aȸʳfz˹̟цǼͭIPǥȲʱ™z +)').I`,%%##$+367562#''%%(1nG#%*+.I|}xotlL=Q]A:3//1..+))+-.23, Vt@GRd|eOLImrm}~_~n{yxt]]vJj½ǼóľÝOff㸠s;;@NNIKH@?A?a`VhõzT<;;8<@CJMP<+0,"(8W^K=4/10,+16876679<@C?BLbpI   ! 0:>B@R\dӰaHD?><965:`T>9524@RbprlcZQ@)&7GUU<,!!%8iE1+$!B^aH1 % #')/>:21:FMOIABFIJD@;6546=IXfbTU[ahmlpv~tQJMLHCMdc`\SQSRONR`q~rK+Bq}kc[[TZdj>,/2237>GRJ9+-&'2YaG<500/--14656668<>@;DMPeicF + + + (069:PWTѨ^KD>=>=86=bxJOC<8647CWgx{skgZF- (8G[R:* "+Wo61.'!2QXH0#  !#%)0;KRD@DGE=89=DC>=724Ja_[XRNPQOMR`qyK+?VRGA=9<;9==>=>>50Iox_bku'^ŵź|4&>TZVMGC=86630,()'%%""!#"#%&%'+*BǡN6dwS5"#)V|tjYpwļ?myxy|k[L>8FE+,5Daҷ|ZJtL?nIu;14:ih+"[}uwsolaZ^`c`\ZUSKC>5.*,//-)*++,-11(&`}RGOX^gszrhhbY>kooeblyZxdlsDu½ȾǾļÖHb|l߲n7;BLQKJEA>=??7:ALQU_ma:+,++/3]6.0/' !J_^B)   "#%*0=QUMJKPH979?EDB?967;AHNUXXVIBD@9743:865/.)(%$#"! #!"$%()+#Eʭ`)BT|qth[n˼˽lſ|;t|rz|otz+,6A`ɐoD:e@>{<13c˹|.a|xvurfbbdfdec[VLHDGFHGHHGIJGGEC>*LV.))f~MGv&]i='$;Ҧ½r80fa7*2GG0>u_ZNFEMTQ@*#%&**#tðiyhXǾě8džƻǿU&+(()/IS-)&$"(054453*))&%+6gI&&'))G}{ypsqiq[ARX<1+),02/*+++/-.1(-Y{QFJLZkqoedcc]BqjnksvX}pLwǾżſĽÿ>Y|r޳h99ALNIHEA>=>AMPPafnyr]/-(&&&1:DID4(*$ );WVE:520,*.2577344:>@ABELr{= + + + + +  *+/>G=S͟PKD>89;99DatA;879J\owpf^E&(8I^R;* !#AQ70,& !;=93,  )# !##'-8M^YLNR>:9@>?@<2,Jpwz{u[elw*[¯Ź+#>SYRLFA;764/,,))'&##""###&$&((&DɿtYZe{}svhWnült8|zxonp}m'*4C^~. F~F3dAT~923]H&!%ynddejkjhb_YUQPMMJMNRTSOSKHD,ST/*/bM2apeGFv"$EW`0%?ˮ½q4-ea:;KSG/GY6>HIGFHD3$#%'-+*̶IJU!˰ͭŝ7iìĻƵƿQ'*()*.C}[;.$#$&.20020***%%*A{6&('((L||}|jvwonq]0VT81**.11.+,*,,,.0) 3\~LFB>Qjme^]^edKpkqu_mNy¼ǿƻĽ‘Be~wܨd48BNJHHB@>>;Pwp~]6*&!#+6:CID2%("(:TUE;520.,.0245336:;@A;:::?Gsj::98@>@DCDC?A;;IPUazwfFC?=:63.6EQYi~TFKV\elcagzSEDC;7G[[TROOQRMOT^sılB-@OJA=979>?>2/Jowww{}wcen{+Xÿóĺv,#=PVRMGA<651-*)('&&%$"!#$%%&$$##0ǻ½ktpd\oho:yehvtvug\]^^g}ohtg))2C`t"*% <{E7y`I}52/B``Zg<& #n}~t`abfkmmllh][UPNNPX\\_\ZXPMG-WN((6~U-LtzwMO̿n/fI%@ùk-3eZ1;BC6%ORM^`UEGD=/%#$(--,̿ǷN'ͻǘ_Xɔ5lĦûܻ@*))**/OmC)&$#&.2220/))(%&*6k@&&'')O~{vwoiq]0NO91+'+/.-)*))++,,&1W|sPHC@L_mh[SWjiEs{jstblJƾƻžý¾;c{oٺķf17ANJHHDA@><hgc:,%#*89:JRF4%%! *>\SD;401.,*+/24348:>A@=FSwc-  + #)>LRN]͚PJC=;:99;BpQ;878>Ibu~zsmhZ7!!-;P\O3&!!+kT02.'%+4:ITXB#  + !%')/:P_\XLIKKKIHFDCCGBAGLOTTMEFC??=<5.6HQXoTEJUajqoof@@@>95EZYVQONNONOT_tín@+@OJB<879@OYC:>@=1*Kkuwwvxztdbp}+Vƺżp/ @RXUMIA<653/0.*(''#$$##""!$#$$ 5üyowreWpmǿt7nkpzmXXfohFHZTB=>IO[dkhV2$)4Ce6$)-( !Q@9a?sz:23f`0!'h~|qd``ejjgc`_UPLMMRU^_a]\ZTPMF$ `L+(9uP%"8pPOͻ_+KM'Cȿʱf)5eX/1341%Si\K@8;@E>7*$$&*+0;ɺH(ͷ̲ڼݺ͍/tȽǶĪDZû޹@-))*'1X[&$%&$&-22222* ')&(/M3$((%&N|͵spknaz6OM;2*'+-.+))'),*+,%!JvjSQPJC[ohD?Rke@szkypd~ziN»Ǿĺžü½=`|yt˺d1<9\cU{{glklI4-,5>@??HNz\' +  + + +AKNRf̕GG?9979:87\e<758Q[L1&!#(Ovb10,&&/@_r`]H$ + #'(/>>@A9&"('%'+={z0&('&+Svkqpqoh`r6VS91)'+..*&()**(-.%AoeNSTF?OaW>,Hc`Gv~m{u}qb|xfGǽúŽŹº?iwpĢb36CFOa`Q;,&!#,@XN@9210-+*+.11037:=??@JWV# +  + !/?JONgđKE?;7889:BqL:757=OgwxpkbH* -9/6DN[uTO`ikd\lR^RF?;73ATUROMLLMMPR]vr>0EVK@;8988;<=<@A;/1Qq{wpponw~magr.WƵĹr)%>UWRKD@;75311,*+'%$$""" !#""!!!7~yxqcUr½qo1#v}~xuqt|b'+6@je,69-n4@~}_;584,011S_'!2}~tokiggfaZWWPQUXddc`_]ZWSO<""jz@)'2~q/Ctyp?ZȳO7aQ!'NǾÿ_!5254441?@aGQjlk¿uhhH4VPVf_R@10(&.EVNB:200.+*-052058:?@=?J[V + + -@LQGb˽IF?977779<`O9556O_hUSYdpe~tageMB?;64DWXSOKLKNKGGZrëuA6HQH?98979:;=>@A:05Uuyxsmfegyg^gr+WǥĹs''CWWQKGD=93120,+*))&%#$"!" !!+l|xvqcVwýo»n1%twiikkpty|f+,6eeXWTKKDQZJ72=E8~uj}vwq|o_bIƺùĻþúv:k|xuѩĹR7>FMHHIEA@BC~~dSR_`dmüo==NGQZZSD:8&$/DUN@7113.**+042147;?A?AIWQ  +   .EOTQkǿLE<777558D|M6556=QfxxmeX?& 0;U^B.#!$-R{X3.)!!*BfwjP7!  !%&(-:Ncpuxsrolidcac^XK@EMNNJHIHEFGGC@>Kbt[R[jicyl`bggMA@<41?XWSOIIJOT]bcpìuKHMRI>8789:;:uI8666;Qk|ulbP<$$1C[YA," "5|F0-& !+UlaSJ;"!"  %'*.9Odu{{vtsmjhgecb\Q@JQVUQPOKIKJHE?=4-@QTSPDK^|ëkWNQG<779::;===?A=5=_pwurjbO^phbku+`Ȼ¿¸m&(ARUQKFC=9410-+('&$###!"#$$# )kzxtpdSz½wľi.&p{mvjmy~X%,6@q˔9<\H&?5JY;*-12100DiR9kG($ Jxrjjoorru}z~~z<(qi9*$/S< ',1V|6mʭL5N4*]Ƽ¿SFdE-&$&#`qRJEBDFU]I)"#$&-&Pų̛ovӜ@Ӻĺl*ìûľõ֏/TZ]foqspqlklib]]yq4&)''(1bg&''('+a~xǾînnseio3ZQ70&#(..*(),*++..$;fwlej\<7?EC:9<==tg~{ohWOĸþøijmA@@IVy<  + + 0FV`PoȸFD;754458AgN8765RVRW_rï©vQOQD;86788;;;=??<7B^outrkbY]n|}e^jv*[ҽ÷m'%BVXRLE@>:52/-,)'&%$##!""##!"+vyzzxtbTv¾vr3(jsezxv}}rs2$.7Ft٭m+$&$+a|2O~T6)*041/0ItxtN,)% Q}w:-yh6(#""#%),.Nv8o˯](\ÿO!HcC+#"#!(phBHPTPJUH0!"#$'-+TȲʹʹϓGȲԵlfrxŶb3ÿщOrgu3&(&&'1Wn(&&&'*dzoln]ne2ZN:/&$(,,)')+++*+,"Oiiu~x]fo¸|a6&9FKSVB@:&'3ISI>7132/-+),.-.259=@?AM`r6  + + ,JWTCzɵxLE;642258NtA9645=Ul~vl_Q:"#1BZV>+# $6T=3-& +8`fXI*  "&)/9Ogu||vwtokjlkljhit~ja`YPMKHE?8?FP|uIIReejul_LUaB754/1;QXWa|ìlNMQC:98578:;:>??<5F_ovtqmcWZky}fbhv)^пrdbS[̻øm)%CRXSLE@<9420.+'''&$#""!""" !,o{}|zue]zu½q0+gkYnsehstlfilo|n5 &-7Jwʝe8$""Rx+J}R7()031/2̻\)$Uo0.xa3&!!$#&,.WyAy˭R285*^ƽ¿J Oc>($!"#qkUQQJ7@C@5%"!"'-+]ʫʷɋN˧|mvºZ3ЃWhZrzj*%*&%(7o_"&'''-d}yrms_rb9[N8,$$),+*)(+,*'**>b^OTUF;8=DIUX_RDwmfn{~hi}z~SVŵ·İaEwrɏI2=HNHFGDAA<@cUer{^<,4GNPQPEC7%&3KRG<50340,+,++,-17;>?=@Kh_4  + 1CI@D̺tLF>633346Cv\K78533A:8HbrwurmdX[csv^alx-^ǰнön*)CQVSLEA;9640/*(&%&%$$!!!#!! 3m~{vaY{tľq,,mlZtw^fn_YJCJLO\[[ca_SNKPB'09H{ƣ~]GFnt0J{S9++/0211}ǫX&$"Y»~|wb%1}^0& !"#',2]q>|̨O"OxP""-^ƾ¾F#J_;'" !,uuh^RJHT^L@)##$*.$c͎sɇTɬͫƙ²W;ûzWw{u>@^isyc$%+(%&,Wo(%(()3i|vppiwYv`=YK6,%%)+-+)(*))'-/  MuXFIHA;88DOX]`AAtm~xur|dixo~VUų¹Ĭ`GytƧ˾F7?IMEFGEB@=Iyc_W_[dv~z`KA?INPPLE?5'(7KTE<4122/,,-+(*-3:>A?:AKVh_W1 + !2=7;JдkQD<62355549D67456322?Ym|sjaR8#&4FVR8(#!%Kl60* 88?E0 !"!"%'(,:Md|~}ypnppmmljjs|l_WTNG>7;@Z~mKDTcb_gr}xE6676/.>S[fʽ˿fDPPE>98889:<<;>B:;Idsywtpg\TbnvZbmx,_ͷ¸j+*BRUQIDA>:762/-*&%$%$%"#"""" !-i|{{}xoRU{tÿp)'`d]psywphjjjvZ$/9Izt+NuS9,+/0110E{=(&%c~~zvs]!4Z1&  "$)/?{[:ʨ7$U{u<"-OĿ}C'R_9% !"2wt^OC=6űƿn7;;;72,Khu*'*'((7E"%'''/f{wsytrqnsqiuYt[:^N7,$&*,+*)*''('*, )`sU=CCFB95HTV]Z>Ftj{vnmqhmxTZZPsƬA6>JLDEGIC==;^fmxzWbvÿzl]C;EMMLIFD7),8KSB;5221-,,)('(-4:>>@%Za5(!"#4yq]I91)*,+$""$$),%yžđɮl_ȶʮkɣKEƶc*0..)+*+'(+*)((2U`!'*)*-5_k[;&&''%/i~yxzxrvnjs\wW6cQ6,$$(-+('((**)+-!3ksXHKSSK>>OWX]VGHvo}{pryjnR\ó¶WZl¢´@6;JMEGHGC?=JvWe{xm]E@GLMNLLH8+(6NQA<5121-,+(%%(.48<>?Qmvzzytla^_hw~qffn{)c¼˻f&(CQUQLE?<92.-,+*(&&&$"!$$#$#" .e|{{vrreY}un%4ps_ltwplnp|3&-7GX%#%$  %Il%ChgO8*+12208ŷå@**0~yyumV#;}W.&"%)*8?HMEEHED@;Ksm`i|uae~ÿyk]JCKOOPQSJ7)*8NQA;5000-**)$%(.58;A?83321:[o93212=KX``]YRH5*7GXM5'""&Yzb//(!AXRF. + #%&.8Idy~}vtvwsqqqjaW^mlinvuiTE;?MYZ\_]OHEVc_cv~q[E:64--=Tc}ʺʿuQQNA:88778:=<=A@;>Uqy|{ytnb`\hy}jafo{)bølr¶µd%(DUUSMFA>82//,*))&#&$"!"$"""!!2k{yvtsseTrj&4plZmuxPC@EJV\]WZPHG>;'%,7E]&"%"'Nj&GlfK6*+02/.5dtaN&++5}y|{s`$AN)%$%'*9jVDň&#Qa4 ,]y1,QP6&" !CWY^bbEGsnpcar`uLb´OXoںɿzD9>IMFCKFCAA>@Zv}|zzvqhablwxmbfp{+`ŶjUkʹöd&)CSWRLHE>6310/.*)'%&$!"!##!!  6n~yuruxgZsf$4i_Tqwc,'*)!!$ !'$%-9E᷅xqjcF'"2dg(ElcH6**/1/-.+ "84)**;|a$F~M.%!$')3fa6K#CQ3"1dr--KK3%!"!EhZT`WLUUA."$"%*()̴ʹ~ӻSo“|˵~@YڻE25L]oO3gwi\@*4HORUWWM1!(*''*A?"'&hz{|th}PJGiR8-('),*(%&(+)',+#B}Y@ACKG?JZ^`daCGokicev[wrtLhô¢M[qİȷu<6>HJFFHEA@?>Cpi^h~rcUJKPQSWZYM0'-??=EMQV^G  +  + + !VǦZJB<942024Gk<5012W[NRSQE:AJU\ZTSTNWPA;93//;T_xŹǼpSSOC;987799==>@A;@]w||{|yskfgnw{lZis}+]ŞaVƿµb#(DUVOKFC=6530/,((('&$$$#"""" !:nxtuwvhZ{ſtſd"2`_\nvy]L?FIR\]^cpsryuD&18Iȶd-Te'LlcJ6)*110,*&!(.*&)*/*5HQTVWWN/!*)&'(9ip8&&&&&4kzwy}vlc~PKKeQ5,&&*++'%'**((*+"G\@AKMEPPC:1/13/-*%%%'-5<>>?>EMUW]@  + + #fÛRHC<983238ck74311VXA'  $&)-6H_wz}vrrrpnliigfep]SXTRQQRWUMB8BLaYRXVR]fcD:7410=UaĹýmKSPB98668:8<@BCA=Ba|~~}}|tmkmouvhajr+bľκ³a#-DRSMMHB=8630.+(''&%%$$#"!"$"! 6qurrpod^ſqa#6lf_puzjo~E$09Nҽ|'7j]%Kq_J5)*100-'#"'+))+*,@cGxJ+& !%%%%&-698Ti%:G(!-bh*4^P1&! "MxpU8.& ! ""$*%,żɿĮ?wοȞ_EZü4bܪ7-2Z~X<7,1UoqZ-(5FQTUVXN+ )'')*?lv9%&%'&7k~zwwwphQBDcM9-''+,)(()*)'(** B]:Ndda^U+U~rnxqv{}|^zd[ivHkžDV~|mᴓp88>JKCELHCB@VsdQ[yn]f¿{|wk[KHHKKNZ_WC*%->OMA90/1410)%$&)-6<>>?AFMVWYA# 'oSKF@=:856:k_56314>IRVJB;80# -:QWF1& !%.OJ0.''KYO;% !$&'-4J\p|zz{vqppjkjf\el\RZYUUSMKOQOE73FBDIIGA>BUWISftz{tfB:982-:Sa|ĿybGUOC:756899;@BA@w<'%&&":lywwqiQEDcMGOTGA<7/! .>OTB/%!"-VcB0.%#:NW=%  + !$%'.5G]pvtvtprnfZWMGW\^__ZSPQTRG2)38AGHFB<:=BEZivjne@988/-Fgx~|yvvropndahu*`´´b.FTVRLHB<710/,)*)(%"# !#!"#"$&':nzttvxscVĿvþ_1[VOm{zlhoxW$%/9JʫtW0HZ%Lf^G4*,111,&! #)(%**-Gÿ\Xt@(" !#'%$%&-673Z˸_%U;#0lc"7\K*# ! Ox]+$%"!$(&5ɼιid͢1%}źƾ|}2jºmqqFk}uzzyqz֠,6EnF:q{w_8)6GMPRPRD" )(&*.Ro3'%'&!>q~xuwqtkngRFIdO=0%%)+*'%&()(,/+Xa;<3127ame^R=2`nr|\qIvƿļ‘Fj{qὫl35=JJDGHGDEAIpa]ps½y|}zqfVIDDGKQWWM;'%/>KF>9213200+&''*-4:>??@GNTZYH6012010-*%""#5ɗNLHC?<875=eQ33215?GPPB=;5+ />SXA,$ !&GC0.,$.T[H4$ + #&().7Haq|yuttutpi^PAQ[[^abaac^XN@+"+6?GGD@9:9;>LchdyY7:;80,>Sdy||p}yu{ywtrwups|xaOSK@:88888:>=@B@>Kk}~{ywtoni``iu-aʿ°[.EVWQNHA<71/.+))(&%#%#!""!""!$*&:77430**(!&,:JĸCaV%Gf`I2(,111+& %*)&)*.OWS|m?(# !%'$"!$-685bƺU'-$4xɿ\!;dG*$ UZ'$$ ##&(%9˾̵Ɯ($hwyZe{5pÿsŦ~twgot}{י#0B]aVZ[7:\_NE1)5CHIGHH7 "*)'(-?mx].&&'$#Awyx}tha|Uœ=OeQ>.&'*+*'&((*))-+Zc96///@otk^F;7`moy[~nFwŻû‹Eiwo۱h68?OHAFHEDFD:Qvu~}n½v}|zobRFDDEGLTRI7$%.?LD=920320/+'&(+.5:>?@AGOTXYB2-0139>?>?BCEB>92(""9ȐPKFC=:677@qG22325@FONC<83+"0ATXA,# !:cI0*#-KK@6$  "%'*/5K`u}xpruvsqlaOBNSY_`dloq\MA8(!*8@DGDA;9::?RrZXiW;78891,9Qc|~zrbNQK>966888:<<@C@@Pp~|ytmdY\blv/_ófʳ²W,DVXSNIA<841/-+)'%$%$$!"$#%$!#)(;pyvsly|fYpW7cXTetǨX%"!" $,:QθX)QU#Jg\F2(*-1-($ "&*)')*,J\U|o?(# $&# $,862hƵR%8ǿ[=XA)"!YP$%#!%%')&Fɮɾ× (û|˜vans2ts]8WvӔ$5:VqppvD9QLIP.(3@DFDC@2"*(')/Nvc.%%&$&Dzu|xi`v[9M`Q?-$%(**'(('()(-*&gb81,/4\z}v\<;4a|orxbnIyżûľ7003331,(&'+07;?@@CIOVYZB201.17=BFLXalqqmea\O9+;RŎQJDA=9579BcyJ42314>BIJA;71*"1ATX=-$#6vF0*"7STM4#  "%()-4H`u~{squvrpolcNELSX^cfjj`KD<3)#+;BDFDB;987zyzvugY|mW9cXObhxvZ, $-9SG,62($ -\R"I^ZF3)+..+'"!!&*'&)*)Dÿ^!]o@%!$%""'/868mɳM  $4ǿÿX:M>)# \zO)#"!%#%( I;DZglƑ.įǿUY½p/wroE>UrҊ%7Oi1:TQ\K.'2?CDBA>.#*'').Pk/$%&%'Gww{uhaYuY:MWM9.'$)+*&(()*)*/+#.pf5-+2Ez~ZAEHE?5&$-BLF?8004210,)''*17:@A@CGOR[YD765369=AFITbo|}qnk}ćJHD?:869;FxtE23203=AHG?;72)$3DWV=+#&:zm8.+" ;ZUH1!  #&((+8H_sz~{vvvsqsplcQGLSVZ`a\VNEC<2($-?FIHDA=;9:;><=957668<=2.;R]yzǺ}]QQJ?:789=@ABAAC?>Qu~|ufTZ^alw/^ȯtdͻV-FUWTKGA<541.,*)('%$"$$%%#"$$$./E|yqnzt^WyhÿW% $&"!'.666rɮR)JJCAAA>,$*'%).U`*$$&''Gw|v}~_WXzV,K[H3,&%)+*(((()(*/-(:zt>118ax^RL@DC:1+$ &/>KG=6013221.)((+16:=>ACHOU[WD;964:?AEJMUao|IHC>;988:@uU120.3=CKE=851)&5GZU;+"';iF1*! 5PUP2    &)),7D[pz}xtutsvuomdQCLRVX^^XROLE>1(%1ALOMFA;:;;<=CT[A6669;;1/:N\qzȻ_QQJA;89>HLHME?A>@Uvynb]_\kz-eƟfrtzíȿT+HWXTMGA;82/.-*)*)&$#$$$$#!$$'12GzunntgZXĿxoŸO=fRM^ZMF9* !$.;Rܑ6,).+ ""?xME_SC3)*..-("&)%$%&7B#dn@'"%$!!&/344s˫NFob<%Eǿ¾NCU=("%ixG$$!!##&( Y£Ǡn`|3ȷxǶ^3{¿ajkSnljknxw%4>@CKQVYTB;974@?B\x{qh`_doz,`Ȫ~q^}˽ȿS,FWYTLE@<61--+))'''&$##&$""$#(51H~~{trz||t[YþroƾR=_TNXUJD7) !'/=Uֆ3+*Adge`1*FhL$JbYB3*)//-(" ')#%''3Y~vlddeS# %eh;' !$&$!!#-574tʡE6;2*Hǿ¾K!FT9$!! *cmB'%!!##''#cǰz3ɾþ]6bpW{bvwoth%3?KpdW59\lT=()8ACCBA?,&-(''.g|>"$%&$"My}yxr^PTboW1J[?1+&(+-*(%(*)((+)!2_[113=K\dd^ULE:hujuaeGĸµºw;kwwө}Y79@SGCHHDBD,3EQTQMD?<:<@Ox|A255:;92-8N[s|z~Ȼ}\QPF>99?QedbZF@A@B]y}wmd]foz/aɹn{çǾJ/HWWTMF?93.-,+*)'&'&#"%%#""$$(0+P{wtujny~|rWZýmk¸O>_ULWUIA5'%0?Tь7)\ĺ4TF%M_TB3+*..,& #((#&*/>o}~zzqtomwvG(eza8& "&'$"#'/54;{ˠ=4P@$)IE$FO3% " (^n>($" #'(!gm3«\6jnh~gc|gra)3B_fOP,FjUB7)):CDDBA<+(,'&&+,+&#&#&%$#Et{wz˰{[QPMag\.N\=1*&()++'&)*+**,+&*Rp{m]SR\jospmc_[MptiqabNŹµöuCnrzϘX38DOECGHFBD=Qygeq~zn^MC?ADA62>2%'4ENC:7222131+''(,/6;???EKRW_S=8:88AGJOU\fpw~İnQF=<:;:88G|q:0,-.5?HNB4560#(6NZR6& #&)-.(()% +"%)*,5H\mtwxxyvojjkmle`WOQVVST]jx{ukM36DNVUPG?89=EQyJ8777:80+;PZr}y}Ⱥy]NSH>99AVkjcSCAA@D`zzm^\fm|)gÿƿȷǾJ/GUWSLG@;40--+')('&$#%&$#!##$,3/Pysx~nXRolH;]TO[VLA4&! $0>P՝J/ɺ*]D,H_SB2)+-.-(!#'(''*.8kľQ,d{f8$! "%%#!#'/657Ơ43MF% +U½@#DJ1$ !-gs>$#""#)($pùƼb8ò~{GSS3diW``svdpT,1),;DDGKD:'(+&$&()RPC(%'$#$Gv}w{~_VPOOJag^-TU;2+&(*-,*)+,+*+.,*@izz}yumXqqfleYOķöl9lr~Ѿ˽Y5pyob_qyn[LC=?D?40;1$'4HO?:5121020,('(+06@GNSY]Q<996:CGJNW_gqzǪjOH@><;769En80,,.5?GMC8780#'7MZO4& ")./' %# &&(*.6G^jrxywwsmgfhjgbaRKT[\\]cq}Z75COVYRJC;>@K[uM<778872-:MUlqmzȹ}]MQH@:;@SgbZPECB>Gcyyma`fl-hüǾǾK-FXXWLE@:40.-+)(&%%##%$"$$#$&.2-Ms`W¿on·H@bRN^UKA5%!!&1^UEm~ywlvػM*2EOIP2;>:B4%.>FHP_aF*&+'&(2kvR#%%$"!Jz{wtug[WSQMFaha+QM<0,((*/0,++,,*+--*Ft~|mIspgleYQöòi?wqøZ6;EQHEGIFEB>Ybcfzgctyp[PF=>@?3-50%*5FI?93/2123/,('(+17<@?CFNSX]O<989>DFGNX^fp{ŧkVJA=;:769:@OVQPJCBB@Fc}{ncbft.cǶǾL0IYXRKF>964//-+('&%$%$#$"#$%(-5-BztbbÿmoļGDcPQ_VG>3% '0@]ľ@*KZRA2,-/-,'#!$))&'%#)E~~xpmlfkrtqoha_WSZ[D .jt\2'! "$" ##'378AǑ(sn6%""!$*(%Ӽ˿gcP=ȼ\HAx~]gzpjrr{ϴH*7GPWQ4>BB>-'/?ENtt=(&+&'*1JyQ%$%%$$N{{wrlf]XRNKLeda~!PO<1,))+.0,+-,-,+,+,Is|xnJxsjjcWVõĬ_D~sƹT7:GQGFIJDACCarYftzp\PF>=@@0+0,%*4BG?81.22240,(')+29>A@AGMPYYL:789?CDENX_gr{m[G>:;857ABAIh}qfbgt2acZXkƽH0KWWQKF@<62/.+*'''%"!# ""#$#'.5.^~tccþnpƾFEbPQ_UG?1$ &0=\V6&4NB(L\Q@3)+.,,(# %*($&'&*>yyrplkihpoocZSQJQUWYaehfdgkkV%2jvW0%"!$$##$,695Hō# BQ0"2fq5(NI.%";mf2$"#"#*&)ԿƼDZzkβH>ģfeȿCGs^kwr~ȵA-4KgTZS09KTU=(0>GXS7&)*&%)7rz<&&&%%"O{yuore\VMEDGfaby&PR92+&)--,**+,-,+.*&An}zeAxtdncWVö­_O}~oĸP:;JNDFGIFFFBrpsr_Rhx{n]QE>=A>/&($#*5CH>83.03221,)(*,2:=@AAEOPXZJ737;>@@EOY_gr{ǦjVE>::655:Ng9.22//.//5?IL@9?:.*AA@Jotfbju.gɭƽE-JVYTKFA;62//,+)))%""! "#$$(174n¼v\^huĸAGdLLZQF=1$%0?V-*#! ;s|>'HVQ@0'+.,+%!$((%&)-9e{zwvtqmeaYZ]]psxz|yuwyz{]6m|W*#"  #$#"&,591GɊ#4Q9$0gÿq4+OJ.$!:ja1$""""('-ƳЬsuȬBCAd9Dspͭ7,/7@87UK/GRfnfA$+(#&)-NkvL(%'%""QwxunjbYQF@>Abafq,RU;3+'(+,+*++,.+)+)(Cr~h=7|qklfQX·ì`J}~~|hõN59IODDIJGHD>ELwo^hz¾}zm_RF??@;, !'5EF=92/33122+&%'-38;@@AGOVZV>646:>?@HQY^epzǥaPB<97443453/120.-./17@JH86:7,*@?>Gqsbamv+_ǼŻJ.HWWSMF>9630.*+*)'$"$$""##$%(19/kþvW\Ŀ`pv~>D]LGTQD;.!!%0>YG%**&"$Cz:*HWQ?0(-/,*% "''$&*+3f{zxvrQ 9psQ,#!  """!%-791Nˆ6Ha(#3kn-/TD+$">p^3$$ "'%4ŽŲҽeˢ8B~`\{~rmq7HgpӤ72/5F?QnO1Q_?5,(2@FSyj;#)*&%*5mw?#$%# #V|{vne`XQF?;=e^fi'OS<5,())++))+--++-)&ErjB:>umedP\ľèSKzhĴQ7>HNBCFFGCC>BFPSVQB667;???FPX]cluǡ]GC<963322430020-,/-09DNK;784* /@RXA+ "&+* !%%!$ '8OmR2E[knjca`[ZVW]`cc^VS]haLDEIR^_^XL8',;LT[WMEBEEH]|{I:;8553-+:GHFFBEHKPOQ_vƶuYQQC=9778899;?@@@Iprbcku/fƼF/KZVRKF=:51/,*))'%%!$$""#$%$&-52iyV\Ŀemº}c1F^OHTOC:. &3>\݇0'3ZL%OQ/px70LXO;0(-/.,'!!&&$&&(,Zz|vjhmlfC :woJ.$!#&" "&.9:/Sȑ8(OR/#2vh*.MA)"!EqX0%# "$(&4ϻ̧a`Ĥ/Ml_3R]qf`idW|sYjw՜-/9^uwP.NTTS:&2?CQ{f<" +)'%)/VnnD%%%$$W{vqje]YSG=7>g]ng*WS?7.')*+*((+++**+(?l~YB>9po}ej}Pcľ¨SU|böL9=IMDADFCDEGx}E`zx`j~v|tfTIDDB:& )7BA<71/10020,)(+.4:=>>BHQWXXE868>FNU\bmu}ƜWHF;8621/0121221/--.1>RdW97:7*!0?SX>*" (*)""(##$$2HhQ1J[imf`]YZ[VQX```ZSPWZPIDBHNOTXTI7(*:KSUSIBBBBD\G8=>:94-+9HKIIFGO_eeirzŶrROOB<877779<>?C@=Ptqddnz/hȽźC2MZWRJF>9410-*)'$"#!!#!#$%#',55ly^]½gqrl[1FaRHXM@8,!%1AWx1Ctl2av3.FQL;,',1/)& !'%$%$&*Y||}{zvqkhf^= 7hjH.# #%# !$.681[Ƅ 'YW+"2{Ŀc$0L?("PsQ,%$ !$)$2лͱq`z|¤'M~nz5SavT?VVVybM[xט"1;dfiobJ2Tij\6'7GHdzG8!)'''):ly?&%'$!VvzrleVPG=?@Dk^qc+VS@7/*+,*(()++*)*,'!4^wVF?:ont~ggrVm|MeſžƣQW~wi´H:=IMCBFHCCELeRMslYk{|pf[OIFE:)!)8EA;92.01021+''*.5:>?@AFW^d]B578:>>?HPW\gszĚWKD<8411//1214220.--4Qny^77=9+"0BUX>,!"**-* "&#$&"1Dg}~U7F\fmb]]^`^[X[]^]XOOUYVND@@EJQVSJ7&+:HSSPFA?<=@]sK:;=:96-(6GKJIISk}|yŶiPLKC;8777::;>?BA?Wt}ocfo|+iƥļ@2N^VPKD@9530-+(%%$""#" ""#%$&0?>q{`]½\lȺmxui/?^LFSH>8,&.>Zm0rŸjV'et4*FNM<.,.10*% #&%%%%&+Y}{uojhf\= 7/+,.-**)+,-,++&,Tz~{|{tgcVHnpybhz^zLgžĿĝIT~~viñF9=HIAAEDBBDBVSNuo^\hx}qh[QIFE;( *7D@;6201102/)''(.5<>@?ESgpoeC67:==>FSXUK7%*;HOPME@=;:ATwc@;;<:85,&7DJLIJgz{ĴkTPNC:69889:;=@AADXv~m_eq|)f˼ú? 4PYWQJC?8540-,)&'$#$$$""#""#&.94o¾`_½[p̹kp~v]$EcKJPIA8+%.?`p9ƴaB;%-wt1+OSN=/+-..*$" ')'%$(2Z~}y{|{wuxslmqnf@AtlC)$!!"##!"/575\ȿi)>YC&%=ÿa$2H9("XqK+#" #!%*%@͑LzTsvvg5iǸp~âCJf_KPhPDYiȎ$32>DAFF(3FCFA1*6@CdoH3 !)(&'+Avs4%%$"'`{vqg]WQQPOPMoQv\/VR>5/*,/.**)+-/+)*$$Ky}zzzwwrW@krnx}tydl{pvLnſĿJZ~~~wiòG7?NLBAEFDBD=Ddiq{vbex}qcZNIGE:& *7A?;70/2233.*&'(.4:=@AIgxmE99<>?HOOKB<:=:>L\yjI<:9:97.+9EHIKQh{zpuijgMOLA9678::;<=@@BDYx~m]fq|1k˳ù>4LXVOID>:6220-('&%&""##!"$$#)*,&l[]¾\qİ~upP"LkJIOJ@5( )1?]7öY!'Js/3T`\>/*...*# "'''&#(7^~||xx{|}zxuutrj= EpoH,%!!"$$%1596eZ2OP=#D¿Z5I7'!#dvP+#! #!&("Lˉ Ng2q­g|Tblh[USFQelɃ*3A`VQJC/+9?HM3+8@GUtgH2!+('''E7&%$ %a~xoa[TQPPQPMsO}^/XP<3/+-/.**,+,.,+-&Dm}{zzysbM~nvzydivJmſþÜH`~~~}pgðB9ANJDCGHFBBJuw_h¿||re\QQID9# (9D@95/.23330,(&(-5:>@DPawmD899<<<:;:AO~XC?=;?<8,)8GJIHLcweedcsògONJ@:998;;;>@ACBB_z|icis,gɠf^d_gq¸;2MVTOFA@;42//,(''(&$$$#"#%&')%$n}sX_[jvUL[FGNH<4*!(2Adˑ7|[;p/0YlR=.'---*#!#(&'($&2d|yxwto]9SyrA*$""##"(1563h˶T%@JT.%BſT6G:'!(bpE-#!""%'#WłTz[0vϵ˸{ǪarlelslfZ[hlmu(4-1]s}m32Q[aS+(9BHaxI/!)'%(/Ue/%$%!*dyypa\]VSRQMMwP|J5VH;2,,.---*,,-,+,1$>jx}~}}zgPnt}~{\hwuMnƿºAX}~~~|uoG:>RLB@CIEC@G}elvu_itug[QKEC7#!(:D>85.-1232.)''*/49=?BO`r|_=678;;;CNV]cjqyQJB;640,/9eiC_a:0/..5H_eU<=>9.%4J[S3$#)[}cC&"-63-.WipT-,><93++;HKLGHZcLMFN_sı}dSPJ@;899:9<>@ACAEc{{ecgp}+hѽ¹33NWVOID?96300,+)((%#!!!#$#&%&$-s}zoZ]\nòt~pZ?FXKGJC95*'3;iةqhLtp+/NZK:/*++,*# &'$$&%&.d|xrkhW4W~mF)"$%%#!'064;tɻN#L]=&IQ!?I5'!"Q_@+%!"$)"[zSz|UtX:|~ѾwƨazUQgy{wsusolk'4.YR(7OGGE,*;GLrx@+"*(%%0FzyW'%%%!%c{xurykTWUQQX~RN4SH;0*+-/..,,+,--01#>lrqry}eT|pz~usjyxVqvo~|tMu¾GY~|}~}toB;@OLABDDBEBD=1&[namv}tgYPKGB5!",ACOdp{}[7249:;=DNV]agnz~ĉLJA9521-0=g]NgR201.-4@T[P??D>-'6G[T0#$9l_rl[3("?KHKF7/ZqbE&$0Iewf5D`fnmdcdk`_egc_^[TGCGEFAACEF?83.*:jjG;67;?DLaibQ@>:><6-)>DEEb{lcju/lÿ¸~83GSVMHC=753/-+()*%## &!#%(&"%s{kY]¾ZfƱs{ia[ZO;SdIFIB<4( (2Bc«Ʊh)5NTK=/*,,+)#$'%%%%'/j|smhbZ8WxmH)"#$$#")1713uʲH'F½K:D2$!!MaB)! # %&"`ƺo XszjzS:~Ɲ{ǣn¡`hMbksy{zwmX`+:UO-"*)0=NF)*9EIo`C.$*'').KlyZ-&&%#&d}yv~yg\XUTW~~SK4QH91)&*.-/-++*+/10&DysNHgvaL|oy}i{vgiow}o`sxx{}lIqĽºþ@b}|}|~}qp~A:CQF>AEEC@@?Gju}y]l¾tzqfWMIF@6!",=D<74//20020)&(-38<@DMam}Y6148879CLTZ\boz}RH?8320--6XQQT92.0--3>JRM@BOB+'5GXJ1#!65)MH3,( B]ZS\M3+MJI/'Gia5I[enjdcea^]dfd_^[O945788:GPI?41-,3]c@;799?E`wt^EA?>@>:1+7EIJFBCFGIKP[sĵ|aJJE:667789=?BDHM\tjcjv-hz=3LXTNIC<6430-)'&(&$  ! ##!&''$)l¿yiI`¾[nů~uvi\Yid_ib1!ZdFEJB<4' *1?gNJFI@;:7Hg+6LUMA1+,.+'##'%%$$#0pz|}|xxsfdcbc]1QynB(" #&%#")165B=:4//20/1/*&(-38FOTV\cmy}}PK@:63/-..7@;3121/./5>IRMBCL@%(5JUC,!"  $)+'CUNQ[B,%413 #=QmF:HYflfc_^]]_ddd]^YJ7,/259ALSK;40-('AVbcN@8255=DPQPLDGJJKKD:/=KKKGEDGDHJPZt}oLMQF<8;;;=>@EJSdnv~gelv)gy8"9NVWQMC>832/.*('&%$""" "$$!"&($&r|iLd½Haîzejh^gv~(QaGELF:3$ (4Bjh#$%# ""Ae)4MVOA0+.,)(#"&%###)v{|wuuwxtltvtnledeU- !Wwc;'"$&$$%*283<ʬ?%PǼC?G1% )Zb@(! !"&%"pŷZ*Y~he{qA6ǗzźŜeMDoxvoRQ^ikzؽL029liH,>[`R>&,;FOjzk?*#)''(,Kv1%$"2exvwunz{a]]UOtO?6SC7.))+,-,+*)+2365)#1_ng}zzvqo`Vyl|qmpntlrxz{]rkBzĺ¹ľIh~~}mu|::CKLBAEFDDALziZ]lobkq|pgYQIGGC4 "->A=:4/.0/12.*')-28;>DHZmkqd?3/236<@FORT[dmyй}PK@762.,-///0/010-./6=HXVF@B1 '9LRC+ !$(,'@T;HL9)$$ ")0AMK9:E[hieaZ\^Z^]\b_]YJ7.0237BIG>320,&&0?IG=3123@FHHLKNTSVSQK;/BMKKICBDEHJLXlymqxthkljqoqopmeb^_ZY^ceir|zbdmv*dx.4MXZRJFA;520,+)'&$"!#! ##"#% #o~nRbýJ_ƴq:P`JIMG;4% *4>kl+!$%$'Da&6KXRA0,/,,'# $'%$$ (p~||vrnptpwzvrokaa^N$ "[|a8)# $$##%(255<Σ7+Xǿ~;$EL/$ -Y`:%"#"!&%m˶R (Y|l@8ĒxųƐ_]gE|~rmvٵG/4PwTRXD5:HJJ<)0=DKfx^:(%)&'(5yZ&'$ 1gvwtŠoa[VZrX=7UF5/+(*--+*(*/4345+'1$/=@<97301254.+*+/8<>CHWlpzqD0-.149N[ehg]N[abcage][VK9012346;8551.+)&18AHA=/.19ELPNORTTZYQVPDBQUPVNHICEJOVZe|zruxxw{vqqx}~xdhoy-cÿƿy2! $^r\6(""$##"%*185@̢4&Uƽz:"@B.$ .W\:&$$$"%%"tʲO$\~pA@ͼu²dgztu{׳B39HG/2AE0=F@Y8)0>ERv{_:'%(''(/Wa'%$# -l|vyεv^YW[pS75UE4.)(*-/-,+.23475+!/i~xvro^a|m||~yv}pf`yZz}}hK~ĺµſGi~}}}kwʾv;;EQI@BEC?CDYkR`jx~obXMIC@A<0$1@@;7AD52>D<36:15C?FMLZntnD1/-..17>EIPZdlvʴtJE9320//-,-.,.///..-19AIB/,-('8OS<* %*)#1,+0+($%#%'***,-/239?I\dkh]Yaceiih`[YSI9.24234355747Ol_8*4AGIla<'&)&&&5|S'$%"9s~xyt_UWT^oW3:QC4/))-00/--123697)@{zzvdbvf|twxvrutzzX}gRºýOj~~}mzʾv97CQI=CFEACDLXhgX^abjx~pcWLGCBB?1!&1C>9D_O7?ZU>8NE3;A=NOGYunC,-.--.048=BKXbo|ǫpMC6221.-+,./.-.//0-,17=?4*),()8LO=* "&**"#%" ,=EDD@@?A@99;9<@GZflidbbbdfhcYWSLC=?BCCDIGLNTX[bjmlkihlmnjkrxxxw}~tfhmy/jѾl`VXRas/ 7QYXQJC=831/-')'&%#"!!!#"$$#%#'t}whP`{?bŵZbnpjllnldM$IXDEI>4, #+3@pӯX.kW$5KUO>-+-,+% $&#" 4v|{|}}zsohb\^[^feg`XSSUWQA (lv\3&"##!!#)252J̕&!.]ƾ¿r6%><)!6ZT6% ##"%!"ȧ:/nu7AǺƹt{Ө336_dMU4*AXZD1+6EGQpX8&')$%&2\`N*$$&%$-Wvxupnkd\UQYnY-8SA50+*-030/.237;;8*+]xwzzr`bvfzytou|tY}|~|^F¹¹};j}|||~Ī}|nȽs7;ESJ@GIFBB?Aqrvtgalÿ}|rgXPNLJJB2"%5??320002343478AOYguĦpG>3011.-,-./--.0./-,29=<3-)(' +:PQ7&! $()! !#$#('2CU]^bcca_ZWSLHJMP]gklied`^\YSK@LUW]adbhjkoqv{t^isz,gѽn+8OXVQKD>72/.+((&%%$"!" "%$##$!&r|{mP^»Hfê{Xovvtrrttrd*"SWFEF;1) (4Cwe$!KY'9PZM;-,-,+'  $&"!.q|yy|z||ywmheefhikg`\UTYQM> -ryW2$  #$ #*383O̒ 1b¾p4'@6) 8__:&!"$"%"&¼¤/,ww8EıNrzzؠ,5/163,,()2=1/')7GDV~zT8%()"%''&!!%%$%(/Ovxuuspj`WM_oW2=S@50-*-130/047DLICBGGBB@\gxyg\jzrk`ZWSRQJ8#%7@A;<7.245310+)-26;;?HduL@>?=>>>?==@>;=@GXlzījG<3/220//-,-,,-/..--15>>7.+-*" ->OT9%$()%#&/2532149EMTZbikpprrtuqpniea^ajnqpokmke_]ciotxtdiq{,g¾ÿr&!:OXUPKC=740-,*(#$$$###!"#"!##"o{whRcNwȿtxy{x}z-"M[GED;2& )4Bx]7172!!$FU&9R\K6,)-.,& !$$%Xriliippoge[[`a_]_\YZVKMPLN= (gjH/# "&#!"*372ZŊ 1jľm,'<6( AeR5$"!! "$(4xȿq7IþuEtp7XMG?363/1666AjӘ-7,*')(''++)$''*9DDSvsQ7!)&"#$#%&%%#%'&1SwwuwuqkdZQZlX.CQ?51.(*/10.05;@><5&D~urpkjig_=lvmvm_]iwl[vzu[mvx[L}¹wDq||~|~}{eǼn@8DMICBDDDC>]fq[_o¿{}wql_\YXWUK:"$5A>:62000220*('+069<8200,"#)4APN7(!! !"%)*,+8ELVSUTT`mqotwtvuy||}}}}}}{r`jq|,h¾i)&>RXVPMC=84//,)*&%%$"##!!#!#%%"!o~|xgPbcv.)S[BFC91'"+6GxS,)+& !&([S':PYI5++//,%!%$!([{tsnmpqpngighjihhjggea]^[^XC0beK1# #&"!!$,673XɁ#2ng*+=5(!CaJ/#!!  + 5wƳk-TSAzj4\TIC8:132268Dqѓ&>INPVUMFAAGKMRRCCGTxX7('$##"&'$$#&')2Y}vvwuusph\W_k]+AN>3/,)+01.+.5<@?;3$:z}usqjjiebbc]Klvnvh}}cUcor{q[yksw\L÷v=s}~Ţzyeǽj6@BM\fknrpsuu}~pejq)fоg__UTo½h')>PXVPKD=741.,))%&%" "!! !##" $r}|ygLb}bn'&SSDDA:1(%,6?wP,%))"&?N$9MSL5-.0,*$!$#"&Uty||wrsvqmosuxvqpqoomtwrqkfghjgbI4dgP-% "$! !%-872W$4qe$-<3%!G_K0$#!'g3~įd,Rh8IgQnf\PNQ=79;;@Qυ6iuiktgACJc|C4*)%%%%&'$%%&&)6Vzturqyz{qbRbfdx0@L;2-**,-.,*+47;;83#6q]KVabdc`ab`[Implhkvw~zzt_tk}WSĿ·pBt~}~}~xgǽj8=KVJDFHDBC>diHGGIS_lu}wsqhbba@!(7?>;730/0120*)))17:;70-*;LTV`cdfgjknorrux{~h]TPQOKMMKORSWZ]`efghdigdbfggnssqqlnj_^`dekomruxnekt.jϿi%#:UWUOJD?83.-,()&$%"!""! ! #j}|{hIa|^i#&JODCB90& "+6C|޶|spkhjJ";SYL5--.+("###"!Jdjmnlljfcb^_ehmlgegijhhmqqliilnmkhM 9ouH*$ "####&,677`x"3sd$,<3& JbJ-#!! )rN>|_+Vſ~¾pJXsu~rgo}FAEA>Vu|:rmw_@DGThL<3 *'#$&'&..%$%'(5\vvuh[cder.>H91-+)+-.-+)045575!9tx^W]eca__ef^VEqnk|xoYsvqawwSUĿµlGx{{|~~}|{|teƻg8;NXJBHIDA?>F>4@DLRZmq~|xujb[\R:&*:B@<95222221,*+.6:;;5.,/4FL\kloptusvyz|}vwvsqpuxrq{zz{~}||}|~o^mw/kƿf#%:PWUOKH?820/,*'$$%###"!!""! !%k}{x`?eĿtXľd$(LL>A@80&"*7D~þH)@UXG5,,+,(###"!%Kcilomg\XUTNT]`a[WX[\[^cgknehkkihhfI Cvh?*""$%$"$.586fq 6}a&.>1$ GYC.#  %io`l}~{}yuQ?ž_/[üZlRd{ht~WEPTIJdwq?rcdjzzlA=CCDD@=1 *&%&&)1Yn8!#%,:Zzw{mbicbk.BH74.+(+---+(+02256"G}fffgdca`if]XIqpi}zqcr}m[VVþijg?v~}||{||||}||}wdƼd4;98=DIM]pqz|}vwqoiWNGIE9)"3EIGA>97779532214:?>=;8;hþnEy»c#(NRA?=6+"".8DB+>T[E2)*-,*$"%"!!!$:k~tj`RSY`ed`XUVSV[`_dfdc_\\ZX^`A @nlI)!$&%#$-584jo"3~¿Z3B0# J]F-# !Zo}yux}ngE"FƾW,avBZ~xͯpNS]qhXhECEED>1 *&$%'-]g!#$+8b~v}w~jhr\al-CK;5/*'+-.,*)*/0166%Mljddfd`cjhccRuph~tq{mjj^TX¾űe@|||}|z{|||}}}~xiƻ`7>OWOJHECAA>;98;==<9;:??@DEVahmqx|}~~}}|{|~{zxzyuttuuostvy}|khmt0gżd!!9OWTQJD=83/.-+'$&&"""!!!"#!"&i|xphYFfľf9lľ\(NOA>=5*!#03GĿG&>SVC0+--.*$  %%"#'*7gzwvsqof`TNNEFSTXQJMSOQQTYR/ Dmi@'$%#""%-479n̼e$7V3:.# !I[A)"""gv}{viehmswpqdi}w2$GʼQ1b`@<:8;CHJ\mo}xxsmkdQFE?A@@408HKTXWTPMRRNIHMLMOVWZ]ahnyzmnoponrtuusvyyzzeemw0jö`"#83/,,*'%%$$"!! !""!!$h}xpiYDižc4`¼^*QM@?<2) &16IG(@OQB1-..,)$ %&#%',;ļ}pj__c]YXZ[][ZYXWK+Cj`@( ###""'/88;tϹX$AQ5<.$ &PW?)!!!Zili_WTX\dmtxroyyX$$Dͼ~M5b|U:5Sx~a{0Hd`T>GpٿV9aTXTHCA@::<<8.':CEDCC?. )'%%(,cד%$*;dzx|jqh{Vlf3HI=71,+,//,,-/-.040Vw`_ZY_^[bkiYM>yrk{d]dm|xpr~l\}t{}L_~ÿ·ɮ]K|}||||~{|}~vlķ\8;3) %08H~B'?OL?1,+/-*#!$&$$')8~Ŀvicadeb_[ZWSOD+HjbA+ !$%$(0757w˷S(MɽM18,!(TX?*#"C[d\PHHKMS^gmheloqnfY=%LǬ~K;d~xS3/Bi|z]l1mhC@cֻO+6&%))%&$%')&&%+;DDC@B;*#*((')5|$%,Bzjm~ulgn|}ywi`vu}xGbV\~~{||}}}}~voʿW3@UTJHGECA?=?:;?CABUe¿{z|zy{~~}|yxyxy|||}}~~}}}zssuyy{||}{}~~{iipz/kϿwspa`m¿^%:RYVMHC<730,()('$$"" "!! !"d||qZFmľd:[wI*LK@?91( "-7L|?&=RM>1,,-.,$#%# #%%4ysnlhijgbZRONF;(TrnH) "!""#'184;}ζL)Gȿ~I4:)!,WY>("!#B\`VIBAA@IT_hcfdb`YQG2(L~ȾzI6f~jJ50<\xziba1|UPHs|ֶF01((''&&'(###%#-=DCDDA:)#(&'(+<: $#+?muwttvssttqp|Osb3FE:40-0110/+.02/.1-Wg^WUY^`iocG?Cpi}ytx~kizp}tHeļfPZo~}}|}~}}sm¹TAIVRHFIE@@A@B8?FELPfz½|{xvrrrsrrruwy{}}}~}~xxx||~~}zgks{,gϺý[';RXUMJD<73/,*))%&%! !    e~|yoZDiľ`6^~sH.WM@A80*"$-7M{=*=MM?1*+,-*$!&$""%*6zqnhgjjfbUNJGA8'!T{uA$  "#"!"&1846z˶I )AȾ{H58("1Z`<%!G`bSF?;;CGQ]bV^\WQMD:)*Pưz@5c{pYC4.5D^ds{zy^vxJXT@gcbc״>-,'&&((>SW>%%%$-=EEEDC:(!'''*EȽX"$$,7jyuuuussrspizMv_5IC:50,/00..+.12/.2,"`gZPRSWVksfI7Cjgxp~gh{|u|tEc~nPZj|}}|{||~vm¼jdeic[TXXZZ]```eot|~~{zx{ywutupoljgccgimrtw{~~x{|~~zaiq{.f¿ÿ['=VZWQJE=630-+('%$$!#$! g~{wmWDi¼^;_{vwoI2JF@?8/'#/6Iz>+@OO=1)*,+*&!!'%#"%);{snegklidZOJHD<*%Xuf6&"#$%%"&2845~̰K /KǾ}E#77)!4\W5# BUVH?:9;=ER[VMVURNLE=-*Yƾ{:9\~s\D3-/8Phyvtubќ_CE_hE]˺֮2-,&'),^d<$%&/@IGHGG;*#((&2n= $$$%)6eyvwuvusttol|J}Z4IC94.-.221.-0411/1("a|f[NIKNaxvhRLRjjwmpydjz^8jxcekPVj{~}}~{zwvtvsssqnheecbZ[eda`\RRZ_hqy|z{vejr~1iZ%?VZXSJC=830-+)%&#!" ""! !!! czxslZNm¼^5a||tt~|M-GD??8-&%09Iv;)BTO@1*+-,,&!%$!#$)A}~wsppppnh^NRRKA'%TmY7&"#&*+'*3829˫E,H*7=%"+Kǿÿy?"21'!/KH1  5@><:78=AJQTHIQSUTURD,-Zŵy~s>5,% %1;Ku;'=ML?/+-,-*$#'$!"$7t~yxurmjeb^b\O=$ #Oj^6# !);>1-471BΣ?Fzrd|_#%,PȽ¿y7!61% .OJ/ !9><<<8:?AKGHBAO]Z]b`G*(\ªun=:`uspght|{qmgTJ?:1-*-6DQVSH3c|0A]ohPJGHRlϻȫ՞*3)&,5yX%$%3Tiriewd3&($$%&&%#%###$%-;c{zuvutvuutnf|MV3D=62.,.00132214654*'dzzoiv~~wfPCoy{Ng}zyxxxwvtxwzy~vcpyz|zyqnokjgeeaec\a_`_gfhnjmnmpuvz}|~~qedfegjllkjkkjc]XX[ZQIW`ba^VNNWalt{}|~rilt/jѾþR+BTZUOKB=:41-,))'"""#"!!! !ewicguq]CmY1cu>.HG:;3+$$07MĿq;/=OL:+(,-.*# %(# " .s}wqnmkffgiok[L< "VoZ1" !'8XY6.26.@Ϣ8Iq`pa&1Zƻþw;#81$/ON- !!! :<<>>::<>DGD=BR^[Y__]/-asj<9Ub\W[gnj`[ZOC=;82++,/8DHGA;SdC<\ʫӛ$3*&,:?$#&8X]_WY\E'')$####$$$$##$$,@dyxuvstuuturi{IT4G<52.+)+-040/13543*#K{g[NJKd}oT=I~|ul~qpsqolgmrssruy{}~s^^fklnwwxz~~}}|zzzz|~~{||||}jceddehkkjkkkic]UU[WNHYac`]TLOXbnx~}rhjv0gǻnoT(@TZVPJB<731-,*(('$#"!! "g~yytZDo»U6c}o9.EC9:4,%$.8Ns1+ANK9+(,..*$$&$##,m{pdZZ__[[_a[YVII? #SmW0#")EeV.+272D̟1<`]wY&/\ž¾q6%3/#4YQ+ ""!$:8=BA<9;@@D?:@PWRR_v\"2cq}~f-7PTJQgg_UPQL@<8881-2327>CB<68PY]yɻЖ"5*',9y|f!#!&8FHIIFD7"&($!##$%##$""$%,@hxwxxwvuvurod~}NR3I=45/('*,3;3.775378/9_xo[=56Fn|fXQw~r~zvtsmjnjov{~zxzyxzxx|{||~~~}~|zxxzvyywuwwxxuvyxwwvwy{}}{}}~}~ndcdddhiklkllhb]SVXXMJ\dba\ROPXcmz}~ognw*eʮgt{P(BSYUOKC;630,*(''%$#!! ! ! !gt`MqT6d}zg:+DC;;2+%!,:Nſp3*@LE7+',.,)$&(%""*d|sf]YXYWQUYOGCDB93 !VqU.#!%+D]G'+584J˖*%ZqyP &1aýn0'1.#6WD)!"';3?EC><:<<:12:FGEPZP6"3gpo}{|{k+5INQcfYNHIG?;9874.5@<67;=<8-7voUigXofhsǾտ΋'5(')-Okp~e* #"'8EFEFDB6 &($""#&##%$#"$%.>nxxyvuurwvsolzNQ7SJBBB?89=PdLFPPMQWZZez~xj\^cjv{~}xt}|yssloifeg`cfbbcdhnsw|~~}|z{|z}{|{}|}||zwxtwxwwvuwvwvtsqrtuussuuvstwwuuwxy{}{}}~}~~kdbcefghkkljkic\UXXVLLXbc`[ROPWdox}mhow0gIJĿK*EWXUOMD<740,)'&'$"""!##!! &e}dJsV3d{wj:+IE=:3*$  );Mٷ{qh_UTmm6*@MD6*&,/,'# '&#!$)`wqg[SOJJFGIKA<83A=8!"RkQ-# '2QdF*-681N͖$"ijko8%(6hýk,%3."4R@' " ,C?DJKID>><;<;EORQA@D=%1krznmza(2ITY]UJBBD@9446411;B95679884mƙd7@R¹BC~%5*%)'0=L<'"$%$'9EFDED@4,'#"!#%#"%%%#&'0DlzywwtvwvtsogtNccwoefea_[d{rgoootyzwxpqrvwxuqptz|~~}}~|{z}yvwzzzzxxwxyvwtsxwxwvuvvvwvutsuuqsssttutsuuwvuxyzzz{||~~~{jdcdefghjjkijid]YYXWNL_eca[PLPYfqx~}lilx-jI.DSWSNJC<740-*&%!#$""!"! %gg>sP,e|yi3,LH>81)% *9OτJg`E+*#%)**':?ah0(>JD6)&*-+("$$!!!%]xxqf\SNNKB?ENR]e`aellX0#SiQ-# (9T]I<5483Uɍ##i}dhi<#%)3kh+'4/$9O?$ ! !?=87445454::5123689IدS4:Qw*Gu'3*%%%'&&$$$&%$&8EGFEC@1"!*&$$$%$#$##"#'+5KpzxwxvtwussmcsR~}{{~|~|}z}~}}|yzz{z{{yxxwywuvvuwvvyyxywxvvuuxwwvsuuuvuusstrtsrsrttststuuxwxxxyzzz{|}}~}zjdcdccgghjjlmic\VYYVOL^eea\PKRZfry|odmx*nD,EVXRMID>63/+'%$"#$""!!  #leGpO1c|we23ID<6.)$!)6Nщuknw~wumdhYhf5/CKA4*'),+'"$$ "!!W{yxsqlgaZTQRQKHQWgye. '\nL+#%4IPNTO<673X̀ ,iw[XY1&'*8uÿf*+4.$ASA+&%! "U_RV]`emppppd\gzyrcbeX&-wkxyxW%.EIFB=<<<953344587562/0156;Wl%>M`Z;]h$5*&#$$$&$$$$$$):JIJGC>1 %5/)'$$##&%%$',5=TuzxwwvuuuvtokpX|~~||{|}}|y{zyyyzywy{xxvxwx{zzvwzxxyxwuwwvvwvwxwuuvuuuxxtuttvwuuutttsqttrusrrrtuustxxxyz|z{{||||yhaacbcegihimljc[XZZUML_ddc]SOT]hs{{kgoy)iǺJ*BVWQNHC:53/)(&##$"!    pƸdHoĿQ6f{tc54C>;7.($ (7OʁdzgghTge--AJC2('*..'#$$ "![{yusomg]YRONNHDIPZlz~wtra..YcH*!0RX]quO,35/Zǀ%PaDOr5#',8vc%+1,#L`A&%$ "" ]ovuqsqpribj}u^ZcQ&6||k|{umxN!0@D<;9:98532224687530.//247XȻZ+S`Xѥ>Tfվ`*2(&%%"#%""#""#(>MKIGF@5)?Q=0.+)''+-.,2:EOb|~vwvvrwvvsjfqP~}~|{~}yzz{yxwywzxwvwywwy{zxvwxyywwxxxxwxwwzyxuwwuvwxxuwusuwwutvvwvsvuuvuttruvutvutsqqrpqqsqqtssttsvwxwzz{}}|{{whcbdadggghjkkhe\YYZXKJ`dbaXPMT^hu}zigp|1lȱ}lQfF,CUWTPHB;62.*(%%$%#""!! )y±cDpS9f}yvd>6EA:7.)$ +:Tǃh{t~vY^XQK7hc,/BH?3)(*-/)#!"%"!""%\z{qgaaa_ZUOONNIKNP\dhdc_fovxe.+V]D)+Fec&-58+^zPpdt`-#",:zǾa'+6*!"Qb=%$# !$dy}zubW_qyjcqwc45~wh{ttwdgvH3<<667986332003677531/0/048]Һ_%ZuƴT-]cټU+5)$%###$#"!#$#+BLIFEDA65OJ531/./0688>HQTaqyxwwwxxywts{f}~}~}}}|||}zzyzxxxyywxyywzz{wwxwxxywwvxvuwyxvwxyyxvwxxvwxvvzyywvxvvxwvwwxvwuvvuwxwwvuustuvtttturquttpptstsrtrqstutuvsvvxzyz|y|{|}xdbdddeggijjkjhe[WZ[VMOaecaYOKR^it|xffq{/g½D+FUYSMFA<53.+(%%##""#"   +zʾ[9oĿ}P3f|wwo@5HC;5.'!)8O·]bqrrwe[cZS2,ka(0DI>4+'+..)! "(&##!$W{~rhe[TSUTUPGKMHLRPYeinn}p++UfB)#2JaA#+8AE3ew+[H2G-"&/>ȿº`!,3* $OQ7$"!! #\vt\Xalsgym54wrc~yxphm[djB09:546665100002666430-..057aܸi/fdD@RXغN/5*%#$"""#"$&'%1KOLIIDC:1356=@EJJNTUX]chlyz~}|{~~}{~|{{}}{}{wxyxxzvxxxzyxzxzyy{wwwxyxwywwywxxywywvwwwwzvvwwxwvvvuwxvwvwwwxwvwwxxuwwuvvvuuvuxvtuvuruvvvtutstuvurtsrprrstsppqqqpstqqpqrtuutwuwwx{zy{z||~vhddccdfhiihiihg\XY\XJRbca`XMNS`lu|wdioy,gοysohh~E%DVXTNIC;53,*)($"" !! !!!# )o˼Ƕb9nľ~K/e~y54E?;7/(#+8VхG_jY3-h],2FJ?4+)*.+'" $&$## %Ux|vnid^VWZXXVSUVW\hxn)0VaE+!(:;=>>IJNX`iX2lr$BPJ8(#'/?ǾýW".2(!#MN/! " !*p|~vlhiqty~j- ;prfvnqk\{_QX`<#099546322120236665441,..035`ٯ{QBPK^zfG7RسK39-/)+'&)()*-1:L^^\`eceghnptuzyz~|~~}{|zxxzwyzwwyuwyxz{y{{zzzyyvxxwyxwxwxwwvwyyxyyxxwwyxxyvyxwzzwvvuvvwxxyvuwvvvvwvuvvwwwxwwwwxvvwwwxwvtwwvwuvwuttuutvsuutuuttsuusrqrrqsrrrrooqpqrqqpprrsssstuxxwyyyyz||teccdcdghhhgjkid[[[^WLPbab`WONU^jt}xlkp|.mˆcr}Ŀ}C'CSYTPKD=73.+($#$%#$!!## !"## )xŻ½ǯg>oľI0jm*3D<72*%""*7WȀ@OmRQ6+m[,0CK@5,(+-+(! &&""$!#Pqz{}}|xulinuuy{xwyxs-3_`@+4_plmggmcderlU4pо_1qkeU7'&.?ǽT02)"#?C*!  9~}yv~zupruv|l*%Eo{{oplhghocPRN%"098533100/20248763212/--./.PyRFBF`j@,CiեMMIGEE@EHMPSTZdmw~}ywvvrsssrtsqtpvrsusvuvuxwtvwvyxwxwvvwywwxxwywyxxyywyyxxxvwuuxuvvvxwvvtwyyyyz{xxxvxyxxwxzyvuuvtvvwxwxxxvwvwxuuwwxwwwvvwvvvwuuwvvttvuvxvvuuuqstussutusssrrsstsrprrqpqoporqnoppqppqqsrrrstuuwwxxxyz{~qca_abdfhjiihjic][[[ULU`ba]YNKU_ju}vhit-kξĿ¼z;+ETXUPIC<42.+($%$#%$! !! !"## )²Ĭf70+& &.=Vɔ|utf]c\[SECC@FXW.0CJB7,),//)%"$%"$&#!Lrvwz|stw,7^\>+7.+,1.)%"%$"#%"$Hlrsv|~{zo&7]W9.>>483.*(&'##"#!! !"#$%%!%eҳ~}zwbfn{tX;73,'$ (.5Y¼T*3DD<70,-1.*$ $#!#$ 'Uqzyy||xyvlyc#8\Y>*).!%(+(&&',184=ιSDyN$*0FƻN%34& +<7(!  "=u}{~r]JCO_s{|{g-BV_x[J[oq;,AC@>97>>;@A?;DJKKPQNQRVUO^_mxyveWVK>Rhv{z~~}{~}||zxxvrrvrvusprooomgZWMLGEJJF@CBCB=BDGGECGHHEKNPPRSUWX[\[ZY[^``bdcfhilmnppqssttutvwvxvvwvvwtwwtwvtwwxyvyuvvxxyxwxuvvvvutwxwvvwxuxyxxwvvxwvvxwzwuwvvwyxvsuvwxvwvssruvuutstvvvvvuusttuttusuvuuswtrprsqqqqpqqrssrqqopqpoooqqronmlmmnmmppomlonppqspprsuvvwwwvy{|k_``^]aggihjjkkd\Z\\QMYbaa^TKNWblxy{nfmv.i¿ſx:,FTWUOKD>62,+)%'&$"""! !##$&"%hDZshjr|wY4qýI?tplpssj`H 28851,*#!(0=\T+3EE=92,+0/)#"%%!#"(^qxxxyysnf[pmk|{xw}W;XV<'"  $&&&%)-375AϴPH}B$+0JƺF#41$ )<5) ! C{zy|ywuoVB?BLVfrpnkL )COguqUY_`b^`chiiklorpsux{~gUSRXt||~~~~}||{||yyy|~}uwrtxrwvrojknnkmokiomkollnmknmllljmlih`ZRJIJJJFCACA@@=AFFGDBB=BJMNPPRSWVVVYYWZ\\]`bbghhijlnlmspttttuuwvwvvvwvuwvvvsvvwvwwwwwxwwxwwwrututtuwxvvvwwtvuvwwwuvutvuvwuvxutuwwuuvuwxtusrutsusustvuvwvvtuttusrtuvstssssorpppoporqoprsrqppopqnoopooonnlmmmqmnkmmljomopooopqrssuvtutvzykb`_]Z]efhjhjmkg^\\YPMXbba]SLMXdmw~vvlgox.dÿu8+DTZVLIC;61,*'$$$"##"!! !#& *rηôz}{b:vÿM=uoqrmjhbJ!18863+*FM(9G).<320ALG$)0<\V,4DG@:2,,--+"!$%!%&!'Uquvz}{ywtzy{wyltz~|W;[W:$!##%'&(.473=̰H@i1%*2OĹB%3.$+C4& "$ !"#!&y{qopmkfVI=BLTakk]deUIOW^mw]S@`~|z{zzzvuuvvvvvvvzz}yyyxywwywpvwusuportompnmlmqnnnkjkkeiljlllkmlllkmmkljjlljkje`ZSNLMKIFDCA?=>>DGJHA=;BGKOQTSSSSUUUTXVXY\]`cdeghkkklnrqqpsuvutuvwvvvvxvvuutvvxuuvwwvwwvwvvwwttutuvuvtvwvvwvwvuuuttqsttvuvxwuvvttsuvwwtuutuvuvvttstuttsuusurqtqssrtspssssrsnnpqoosqptprqponpoppppppnllkllklmkklkikllmmlnonoqrqprsuttvy~~xjdc`[WZcgiihjlkg]\\ZPN[dba[QKOYcnw}sm{~ngmx+gr0/EUYUOI@953,*'&!!"#"  ! ""!*ûyO4wK;~|wrmfV$.87735xB]dK\|Zi]&(0>^}I-4DIB:3,+-2+$"$$!$& .czyz~\9XR3""$!!#&'+274EҰB(nutuy@!$*3[·z>%60$0?8&!%"! !""$'#%%*Suifbieed]aintuxwwxz~}w`Fp~|}}{yuuyyxtopmkqtnqstupqw~}}~zuzyswswwupstqsrknnnopprtsrsqppqoopoolmnllqnjmjhlllkkjjjjlmklmklmklmlknkilgggaZTMMLKIDCC@=>@CCFFC>>ADKQTSSSRRTSTUXSWZ^[[`begehgjklmopoststsuwuwvutvvttuutuvsutwutwxyuvutvrsurvvuvsuwssutvutvuttstuuvuuxtvuxusqstwusurtutwvssrsutrsrtqrsqstpspsrropsqrsrpqqqmproonorqpnnqnmmnnomljkkklljijijkhhijkkkmnopqrprrsusuyy|vfcb]WSWbghfiijje_^^]KP[b`^ZOJRYdmv~}pp|mdky,i¿s3.IW[UMH?:72/*)(##!"!"!"" #""!'¼}sS;xJ@}|{wvsmO! -7783Jhtn̖W~YF%)0=@EORQPQQQSRSTRUVWYZ\^^_bedhhhkkmonqqrrqrtstuttuttutsssuutwvvttwwyxuttssstttvutvvtuutuuuwwtstuuuuussrtxutrtuusuutsstsruruvssssrstsptoqtqsrrsprqrqrprrsonpnooonoopqpmmmmnnmklmkkkkkjkikihhjiihlkklkopnmpptqrtstwx|}ticb^WRXejhihkllc^\]XNMZ`__[SMQYeqx}v~|kjpz-f¾n+-HV[SLF@9422-))&%"$"#"$ "!#$!&wÿhBwzK;yztwroolhmmP!.9:94Mji:O?T9_T8$ '-6g{H-9FLB;3+*-,(#"%$!$% $Mjmty|L4H>) %/523DЪ> !#*0_ƽtF+AA2((('((2Mkyutyk]\cdVSTUWfk{~yywwwttrqnjkprssqpopnpolmmnonpopqnpptssrtsuuqrtqprllpllpolllnpqnorppsoppqoponoplomlnnlmpmnrqrrnorpopolnlgmlikllkkhjigijikjhjmijjikmkklhhjhihjhjffhaVPMHHFHGGHHGHD>=@@@DLPQSRQSQQRRTUSTVYX[\[]^acdegiilmgnpnqpqrrsprttsutststutuutuutwvuttusptuuutvutvvtstsvtsstrqsutuvvtttuvprqutrssttqsrnqtssssppststssqqrrprqoooqsqorqppppnolmnnkpnmnnmkklkmlllkjkjigghgiiihhhiihhijkmnmmnopprqrtsv|}{rebcb\Z`dhghhmnkd[[ZVNJZ```[OIOXcnwzfkqz.h¿s+,GWYSMDA940/,)*()'%#" " !!"#%$ )yùe7w}D7p~{zuqruprpjkrlN 07:9313&=1-$"#!(-3iſwH.5GJ@80+*.+'"$&$""#!1^uu||~~{y~}|wzv;7H;* "#(177:RϨC!"$(12dųût[X\WGCDEHOWforwy~{}||~xutnppqrrronnopqqnooprqqqsqopsqroqpqooonoptporspnrqpoonmlmnnjjlmoponppqrqopnoqpmoonnnjlpmmnnnqpqtqnrsppnmmmllllljillkkjghhghlhhkjijhjjgijjghkhhfghfdfibWSNJJGHHKLJNQH?==<>GNSRQQQPQPRSSSSTUUXYXY\^\abdghhjllmoopqppqsqsustsprrqutturrtstrptusssststutuuuuvvurrussrprrsustvtsssttrsrrqrsstsqrqrsruutsqrsrsrtspqqsqpqqpqppoppnonnnonpnnmlmnmmlkkljijjjjihhigfghkjhffhgihfhiklljmnnmqppnqsvw{~zoedba^_eegigiklke\[XRKJY``^YPKQXcnx~xflp|,eĿn./LZ[UME=750-*&()('# !! !#&$#f>z½wD=o}zyz}zyytvtoJ.7860+'$ #(-6hxC)6BE@71,+/+($#%$"%%/h|{|{ws|~}t;+EK=/*%%$$))+.189@>::BEINQOQRRPPRSTSUWXVXWXZZ\a^dddhhklllmlnoopqqrrprrrrrrtsttustustrustsqrssusstttssrrqstvurrttssssussrqqqsssrrrrrtrssstqpqssropqrpprqopppnponppnonooopmmmmmlkmnmklkjjillikliikiiihfhhfhiffhhfggffiikjijmlmonomnnrv{}~~{xohea_a_efeghhikld[[YRJLV^^\UNIQYdnywhjq|,fĿn6.GWZWNC;742.)'))&'$!! ! !$'%&ʹruxc<{½y>=sysfD.795/''#!(07kuE*0BE>82,*,*&#!"##$#8hsrvz{}u;!2GWUOMGBBCBABFIT[`i{{qnqmwuuwz}}ztwxtuusqsrpsrpturpqrpprqnnpoqqkpqnnqppppppqpqsmorrpmrpnpqoopmlmlklmljkikmnmomlpnonmppnmnmlnmilnkkklnqpqpnqpnqqllmmknkkijklkhiiffeeghdeffhhghhhhihgedggedgebd_[WVVRLJHHIIHLOQXTMJ?;>@DJNPRSTSPSUVUVTVXVWXVW]][abcdfihikjkmnopnprpppqqoqssssrqsrsrtusrtsrrrssqtttsrssqqrsssssstsrsstssqqqppprrrqqrtrrqqooqrqqopnpqopqqoppnmnmllopoonmookllmllklnljlllkjkjjjklkjdiigggggeedgigdgffcghjhimmmmnnnlmnou{~~zunfabaa_dgeggijklc[ZZSIN[]\[ULJOXeoyvgjs}+h¿ÿu9/GWYULD=842.,()'%&$## #$&%*½{sr`@z{4+)*-&!#%%"$$2ZkoquznD>FMXY_^\acegkow}vrvvsuupprsrtsrrqoorqnoqqqqonpnmtonnonppppnoppolooplnpomqqoqolkkjlllmlkllkmnnmnmmonllnnmmklmkikkijgkmnnpnlpkmpqlnkjjljjjijlkihfceddgffeedbhghgghgdfgefebdeca`ZYXWTQNKHGHGHHKPVV\RDA>>AEJORPRTRSTRUTRTSVVVUVXY\^\cabffiiikklmllmopqnqpoqsprsrqtrqqtsrurrspqqoqrrrqssopqstrrsrrrprropoqqrroqqtrrqpsrooppmmpopommnoooooonmnnmmmnlmnmpnmlkjkjmkkhjjjjihgjkjiifhjijghgededeededgeededceegghikmlllmmmmouz}}{zwlc````addeffhhjkc[XXRJMZ^\ZSLJPXdox~pdjs}-f¿ÿx<-FVWNJD?853-,)(('&%%!"$ !"%$(|vX@{v?CykG2;=:3,($! +-1mľwG9OelZL;0.11)&()(),/0&'9Zgd`kt{{z|uot|wruttqloqqokmrytturtrosqprrqrqppppnnqpnonmnoknqmmmnnnonmlmnonmnnnmpnoooooommllkkkkljgmkknnikomnomlloqoljjlklmljjknpnonmnnmpnmlnlkkkkjghjjiifdbceffddfeeedefdfgeddfgeaceea^\ZYZVTRQKGFFEFFGKKQTJHDA@>>CJNRTUTUUSTUVVTTTRTVVZZXX__adceeehilmllnnpqnooopqrrpqrtsrsurqsqqqnooqrprqoqolqoqtppooqpnpplnnpponmppprqpppqnlonmlmmnommppoommnonnmllnojmmmnlmjmkjjjkmjiiihghjiihigfjihhgeegecccacacfccbdaffefgfgkmmlkkjjlptz|zyytjbbbbabddegfgijkcZWWRIN[]ZYRJKQYdoy~mfks*e¾ÿvA#/DUVPH@>851-*)'(&$%%!"" ! #'%,UCyüxB?~~se@6=;94,(# "()*d½v_QVif\QC=7:=87:77@IQQX`hih`efkrtty~|wronklicbc^WY]TT_]\[_a`de\abbk{tsqrrorqopqomolmqpomnnlnlllmpmpommmmmlmnnkmonnonkijnmnmlmlmlmjjkjjijhikklnmnlllqokllnpkkkkklkjjjjlmllmjmnooqnjknnlklkjihkihgcdddfecdcbcddeddddccca`dcbdca]XYXVVUWUOJGKGDDCEFHMJGIEEEA?BFKPTRVWWXTSXSSSSPRSSXUYXZ_^addcfhhhjkmkmnnponpoprpppprqqqttrpqrpqqoqqpqqrppppnpnnomlllmnppoommomnmmmppoooomnommnmnmnononmqnlknnnljkmmkmlmlklkkijjjlkhghiijiffgfigffhhgeedfedcbbc`abcbbcdedfffgfifikkjihinqx|{yuohb`cbaabedgehiihbZVUODKX[ZZRKJNXfov|phkt.c¿ĿwB$1GYURJA;750,**''%%%$!!#" !#'%!5~Y@so2:x}yupeT3)7<<<4.*$!!!#$)6[|usnqhba]`bb`abdlpqx|~}|xvqryvvmjklhfebaX]]\_^Z\_ZX\VWZ[VW\ZX]`aaaaaadbnusropqmqpnqqmqqmprollmmnmlkmmooomnkkmkkmnjmnmjnmjjklkloppnllllkijijkikkkkmmmjkkmklljjnkkllkkmkjjjikmmnnmmmmnmlkkjmlklihgffhgedcabed`dabdcacdcdcabccca`_``\[YYTRVUWRKMLNFBCDFEHKKJHGGFDA@AFMQSWWXXUVVPPRSQUTTXUXZZ^_^cdcceihikkkllmmmppppprsssrorrsrpqrpproooopoqpooponnoqonmjmnmmoonkmmnnmonnmmnnmomlmlkkllmnmnlmnmjlmlllikkjjjjlljikhiiijigfefhhgefggggfefggfeececbbddcbabbabcebadeeffihiijkhgjntyz{xuofb`c`a`bdedffije]VTRIDMVXZYQJIOUcox|~nfjt-aľzH+0FTUQH@<95-+**(&%$%# ""! !!$%%3ýR6kh;'Lu}zvoi[QLC?DJMH:2.79@DHE;.*+('"!%)&"#&%%&$$*1:Aev|}}}{~wogee]M=K[[a_]^ZU[kkgdge`]_c^][\[^\ZY\ZXXZXY[XTWWVX]`_`_^^dc_rvrrpoosolmnmnnojkmjjjjjljijkkkjklnjijilnmjkmlmnjkmmlinooomlmljjjfghhfjjjilkmliknkklkjmkkmkilmjjiiihjmmlklikmljkjjljhjgfdffidbdc\cc``aadedadcdcabbaac``a^YVYYSMQVUVQLLLHDEDDCBDHGFFHJGFDC?BHMRWYZWWWUTSRRPVSQUUUXWZ^\_`aa`dffiikjjlllnnpmmnppspmpoqonollopornqpoooonpmnnpnkmllmnmnmkkmnkonlolmnnlmmmijkkjmmmnmmjjklljllkkkjjkjijjhhhggaihgiigfcdfigeefggeefeeefdbbbccbbbab^_``ba_aaccccghhihijfejiqwwwutnc`__]_`bcdddfhhe]UQQJBLUYYXOHIPVanz|~gfju2fžzM)0DRUNGB>:50/-,*'&%$"##" !"""#$<ÿS/PxlbXMGA@IVekuwwfNROE:C>FFLLRZZZagjjjff][\YRMOLKKJLNIKQPQVY^hlu}}yyyyxuvzrd\XUTP;4DT\\[WZXRbnieehd]bc_\[\\]\WZZXXWVUUVUSVXSTX[[\]Z^aa`cpnmqmknrmjnmlmlmkmjjllkkkjkkklmjkllkklkmmkkkkmnkklmmjinnnmkkmkiiiijkhhighjijmkjnnlokilljkmjjlmiihhkikllkjkjmlljjgilihhheddefbbcb`b```_bdcacccbbabbaba_`^YUVTPJMRTPPNNOKFFHGGFAADCDFGHFGFDDCDJNSWXXZUSUTRQQSSRSTUUUVY[Z^aa_beegijjjjjkkknjnmmpqlnonrpnommomlonollmmmnnnomonmmkkllklljklkjmlkmlmmkllklhhjijikllnnlklkjkjjkjijhjjhiighhjhgihhhhfeedehfdeeeffefeddcdcabb_`]^_`_`_^^__`bbbbaeggghihggjinstvttlc^\]\\^`adcbeeffZTRRF@KTXXWODHOXbowz~}fclw3hſ{L23ARXPGB@:641.,,++('%''$$%&$%"##'@dNS[[XSJPJABABGMIHFHU_dly~ztstyxz|}}~~{yssqtvvtwxpf^PQUP<0BRWZYXYVWdnfdfea]c\Y[[[[^YUXXVWVUVTSSTVYXWZ[[\^]``[^`qompknokkmmkmmkllkjkiklkjjkiinmkmkjlnlkmmlkkimmjmnljjikklmkmnkjkjjjihhihkjijlijljjljjlkkljjkijjhfihgijijigjlikihhjigfdfgeddeca`^^^^b]_aa]_`_`__a_`ab_]^ZWVTKEGNRRMNNNPHFJHIIHCA@@BBCDDEEGFDEEGNUXXZTVUSSROTRPORTVVTVX[]^_^`cccgiiihijjknknnmnnknnoqoooonmmllnnlnpmkllnlkmmmmlkkjkllkmmjiilkkllkjjjijkiiiiijiklkkjijjkihjjiihiihghigfgfggfgefdddehfegfdbbcbcbbbbcb`_a__\^`__]__^__ab``adefgfhhfeehkorutsojb[[]][\_abacdcecZOPNDALSXVTMDGKWbmuy}}kgmw5i¿|P85@SXPIB@;86431212/,)%'(()(*19IV_nþ~|{wvorruvwuvvlcZQQSN;-BQYZYZXVXcjgfebb__\[\ZX[ZYXXXXXWUWTQSVUSVWXY\]\]^\Z]cuonmjnnklpljlkkmlkjjhjljgikjjmfimjikjijlkjjjkliknmjiijjkmkkmihihhhghfeghkihjkilkjkkjkilmlikmlhjgfhjhihhihgjjhjifiihgfdggddccb__^]]^`_`_^_a]_a`^_\```]\YXWTMEENPPNMNPLIHIIJKJJFC?>BBABEFEEFECBCGKQWXXZWTSSOPPNPQPRTUVUZ\[[_bbccfgiihhhkkkkmkimllkmononmnmlmmmokkmnolklmnnkkllljjklkkkllkkjlmjkiljjjljkjjjhjihjjjhfhllkhhhiighhghffjgfeggfdfcfedfgieeffca`addbbbbcab`^^_^_]^_]^^]]^`_]]bbbdededceegjkosrqnib[[\ZZ]^^_abbddcXNMMEAOUWVQKFHLW`ku{||eeoy2o¿~W;7GXWSSFEBFHFF=:>EFFHJORW]fnst~~}{wssssssuvrtujaYROOI6/CSWZXYVUXaeefdabb]]][YZ[WY\ZWXWVTUTQSVUUXTTWWWW\ZX\^bx~pmkjmjjjkkllkkkijjjikjhgjiklihklihigghhhjihjkijljhhghkijhmkhihdhgfhkggihfhhijhhkjiijjlmjhkjiihfggghigfhjjiiihgfffghfccbcb__^]]][[_`]_^]]\Y]^_][Y]\[XXVXSMFHLQOMHLMLIELKIMLNMIECA@@@ACEEEGEBAAAFKPSYVVWSSSRSSOQOPURUUUWWY\_`_bcdfdffghjijjjkmmomonnqnllllkklljklkmllkklljhhjjhkijkijiijjijjjhhjjikhhhfjigjehihihggiiggjhijfhgffejhdffgfededdcccdeefdccbbdfd`a`bc`aa^]_]\[^]]\]]\\\^]_^`aabcdabdffgioqpomf\XZXTY[\][`_`bd`VPMMCAQSUURJGILWaisyz|ybhp{+súd\`blnokrroolkijpuw}~~~|}~}~|zxtttsrsuusstk`YQNLH72BT[\YXWSV\ffeaac`\[ZZZ\ZWWYZXXVWVSRPSSRTYSVZZZ\\Y]]]`w~qkklkjkmjjkjlkiiihihhegihhjighihhifdghgfhffihghhggggghhhjkijjeehgfgefgheeggghggkiijhjjihijjikhhhfghgfhjjghjjfegdceddc_ab_^]Y[a^\[^[]^]]\\[]]\\\\]ZYTTTRICELPOKJLRKICGLPKONOMLMHE><@LQTSPHFINX`jquy|wdir|5v{s}µobWaku~}~}}~~~~~}~}{{xuussrqtttutlaXQPMF60CS[[YYXVX_jjeccb^]ZX\]]XVWXY[YVWWTRTVSTUVUXWWZ[ZZ\[Zcv~nikjjjjiikifjifghgffeehggfgfhhgfggedgggggehhgigfdggfghggkgehfbffafgdfhfegfggghhihjkijjghlkighffggggffgjjefihedbcedbcb`ba^_]X\a_]\[Y\]^_[Z[Z[ZZZ\ZWUTTPLDEINNKJINKGGILLNNNPPMMPMGB=;<>@@CCEABCB@ABFLPTUTUSRQQNOKNPLRTORUVYXZ^Z_``aacecfgggijllknkjlllnlllklijiikkjkiihikigggiihgieghfffgiijghgghgfihigfdfgefhihilihcfgeeefgfedcadgcccacdacacaadccddcba``a`^^`ba`a]]^\Z[^]\[XY[[[Z\]]]]]bccacdcccbfjnqplhaYRSSTVY[][ZY\__ZSMKE?>IPRQOGEEKV`jqvxz{}~seks}r|qllmnx{yyrgacjkjry}|~}~~}~~~}|~}}z}}|wvussrsuvsusj`WURPH50BTXYVXWUWckgdcca^]]\^[WXXWWXYWUVUSQRTRUTUTUUVWXXZZXYaz|lhjhiijghhhgjgfhgfeecfjfgeeegeeghfhegfgddehhgheefgffgfgighggffgfcgedffeedeffhigggijihhghhijiffccfhhfegjhegdcedbbcaac`_a^__[Z^]Z]\WYZ[[[ZYZYZZZ[YXVUTULDCJKMKJKKKEFHLOMOOPPOKPOMLID?=>==?@BCDCBA?==BJPTTUTUQSQQMOPNPQRPRUWWYYZ__```cbaddegijkjjjijjimljjhkkjliijijighhhiiffgjgghhdhffgghghgfgggigfgggefggfeehhfefhgfgddbcfedccccdcacbbccabbba`aabbcb```_```^^___^\]\ZXZ\[Z[ZYXYZY[[\^\]^aaabdbcbbbjnpolg`XTSQSTUWW[YX[Y\VNKJD<=GNPOMFCEJT^hpuvxy||~nfku|Ǖ~}yssvw{~~}}}}}}}|~}}|}~}~z}}}wvurrrtttrtsk`YWTQF3/EUYZVWWUWegecb`^]][^^\XXYYWXWTUURRRQRRRSTVVUWWVXZYX]_|yljihkijghghgedfgeeebdgfeedcdecffbcgdbffbbefegdegghgefdgiggffddeeeddefdeecfgghhefiiiigfiihihgggdgggggfefffgdcdcbbbab`^\\[_]X\^ZXZZ[[Y\]YYZXZ[YYYWUQTRNFCHNOJIILJFEIKLNPONOMKNQMMNOKE@=:;==>ACAA@=;9?@??=:;>@GPSTTURQNMMOOMOQLPPSUUUUWXXZ\\^`cdedfggiffffkigigighggfgihffegfhifhhgifhfeeefgfdfdeeedbcfddfefccebeaceddfdcdfedba`cbbaabbbdbc``bbdaac``_^``^^^b`^`_^\_]\]]]\][WYYZZXYYXVVWZXZ\ZZ\]]_`]`^\^aeklljie^TRPMOPRTVVVVWWWRKFG@8;DHLKJECEIP\elqruvvwz{}~~|jais~Ɣ}}}}~}}{|~}z|~}|~~~~}}~}~~~}zv~~~|{zz~{yxwusrsttutvtlc[UTRG31HUVVUVWSXgfdfb`_^[[[YW[XUXYWTUTSRQOOOPPQRQRUTUVWVUXZY~ueeghieefdddedfcccdddc`accbcddcbbbeabcabcabed`ceccadeddcdghbgfdceecdefgfeeeefhgghegihehjijecdeceeccddddeedceda`a^_`[]_][\\\\ZVYYXYZXXZYWVWWYXVUTRQRQJDDIIHDFIIEAHMQOOPOMPOMPPNOOOPPPKF?978:;<>??>==<=>AIPSSURQOOKIMNNNNPNQRRVUVYWYZY]__babfeedbggfighjfhggfhfhghfeegdghfhhfiffedddeecedbcdec`edeeeecadcabbcecefedbedcda`abca^`aa`ba^a_`a`ad`___^^`]^__^\[Z[_]][[\Y][YZYY[WWWVWWWWVXX[Y[^^^^\][\a`dhmnlhc[QOMKNOQTUSUUVWUQIDB<6:DHIIHCBDGOYeknorsuuwyyz{z}~~~{lckvƒ~~}|~~|{|{~}{{}~|{~~{~{}|{~~}~~{z{}}zw}z|~wz||y{{ytstttsttusi`[UVSH53DUXWXUUPVcegc^_^][ZZ[[]XXYXTVUNRTSOPPOOSRRTVTUWWWWYZ_wg^egefgdbdddddbc``bda`cc^_a`bda`acabb``c`acbbeedeedaabbdgfcededcceedddeffdefffgffhfgghgihfgebdeccdcbccdec`aba`_\^_\\\[\[]^ZY\XWYXYXZXXZXZYXUSRSRPNIADHHEGGHHFDDINNKRONPOMOQNONMQQRTRMD@;78:<==><>><>>=BGLOQSPNKLJNLKMOOOOPOUSSTVVUX[Z\`]_ccecdddfgghgegedeeffdgefgffgffffdgedbbdccbbcaacddcaddeccdbcb^a_b^^cdccccdcggfebbbb``ba_ba_``_`_ba`]^`\\]^^]]^Z\ZZ]\\[[\XXY\WVWVVWVWUUVXXUYYW\]]\\_^[Z\`afjjjhbYQNLJLNPSSTSRUSSNDBA<6:EIIIFBADGNYdjkmpqrrtuvwwwx{z|{|}}{ifkwǐ~~~|||z~{|~z{{y|}zy|}{z|}zz~|{~~|{}~}}wzyyzz{{|zy{~{{}|yvtsqtursupha[UUQF11FTXXUUSMTbgfa_`^\[XY]\YXYYWUVTQTSQOSONRQQRUTSTUQUVVXbpbceededb`baacbac__bcabdbaaabcb``aabbcbac`aabcbbcgcabbbbefddfceecdgcaddedbbbddehfhecggfghedfdaddccbabbbbbaa``a_^]]Z]]Z[\Z\\WXZVXXXWVUWXVWXWXPNPQPNGDDFHGDFHHEBFILOKMROMPONMLKNNNPPRQTSJGA<9998:<<====>=>AFKOOONNPMLLKKMMMNPOQQRSTVWZZY\^^^acb``cdedefgfeddededegghfddcceeefdb`ccbbbaba`bcb_cacdcbbca`a`beihbdbcabefioromjgfcaaa`aa^_^a]\a_]]\\[ZXZa\\\][]WWZZX[[ZXWWXWWVVVWWWUWZXUWZYX[[][]^\\Y[^aehjie`VNKJHIKNRSSRSSPPKDA>849CHIHD??CHOX`ehjkmoqrssutuuvxyxxyzz}}~~~}ycclwǓ~|}{~~~|}~||}~{{}{|}zyz{zy{|{|||y|}|}|}~{y|xxz{z{}}uz|zx|~zwtsqrttrqsohaYUTPD.0GRXVSSROXffbab]]][UX\TWYZUTSSTSTROPOMNPQQRSSSSRRVUUX`k_cdacdabbabb`]b`^`ba`ca`b_^a`_`a`_b``ab]^bacccbceb`bb_cebdeabdcbceaacbccaab`efffgefgedhfdbecbcbgf^aababaa````^]\ZY[[YYXZ\ZZZWVZVSVVUWVTWWUTRPPNJF@DHJGFCFEBAFHKMOLOQQONNMLLPNNOOPQRSQPNHA<:779999:;;>=;717ADGEB?@CFNU^cehilnpoqqqqqrruusuuuvxwxzzy{|}zfdmwƗ~{~~}~}~~{~|{~~{y~|}{}zxy||yz|zx{|{y|{wy|z|~|{}}|~}vuzyx||yx{zyz}{xutsrrsssosrh`[UTO@02FRXUTRQRZheaca]^`YVX[YYYXVWUSSROMPQOMNOORQRQSRORTRUVahdaabdbaa`aba_]b_]_``^_^``]___`a_[`c`_`__a``eaabccbab`abaacb`ba_aca_bcccbceddffefddefefgdfbb`dbaeb]`adba_b``^^\]\Z[ZZYYWYZY[[UXYUX[WXWVVWUSRONMIDBCGHHDDEFBACLJMNNMMONOLLKJJNOQPPQRRSTTTQF?<7778787::=><;:?DGKPONNMLMKKLNLLNOMQPPQTUYYXWYZZZ]^^```cebbccdcebcdedddbdcccccbcb`ba`cb`baad`__`b`bb``]\````co}vnd_agmuz|{ywtqnje_R:8?HNQW\XO9 $+4>IOPRL>26=CHNPRSVWUUTUVVUUUUTUVUTUUTWZYZY[ZWXZ]`cfgea\QKKGECDHKMLNNLMMF@=:5/9ACFEA>=?FNV\bfhjklmlmmllikmoqrqqrstsuwwvwvwy}~wfglvƗ}{vvnmjiggnnnuz}z{}vty{yyz}|y}~~~~~}|{v~~{z~}{|}zxx}{zz{wvxyvwyyww{|xx{ywz~{{}}{{}|{}~{wxzuz~{z{yv|~{wwusrqpqrrqsqf_ZSRN@15FRVUTSOPXgdad]_a\YZ[YZZYUVYTSTRPOPMLMPONOOPRSPPSQRVX`i_]_a_a``]_a``^_\]a__a^\`__a]\^_^]_`^\\^__^^ba`]``_a``aaabbbccaab_aabcdcafgdddeedaceedgcbc_`cfab``_`_ba]__]a_]]]Z[\Z[YXWXXYZXUVUVYVSWVWUTQNONKHGAADFGEEFGE@BHMLNNLMNONKKMIJKKONONPQTVUSSTNE?;88766688;<:;:<=BGKKLNNNNLLLKJJLNNONNOSTVUVYWZZZ][]__`cdbccbcba_eccdcabbcbcdc`abab``baa``ab^[_``^_^_`]]`]_\cp{vicbgjryy{~|zwpfI4/,/4;BMOG.!)2:?<2.045:>BDILPVSSTTTURTQSUUVTUVVXWWYZZYYY[\^deda_ZOIHEA:?DILIJKKMKC<:83/8@DEC@><>CLT\bdfggijkkkjiiikjmnnmoppoqrrrrqqswy{}}~~|vcfnwǖxwthkjnpnncb^W[ZUXUVbowy{~}z{}yxyy{y|{{|{}~~}~~~|~|~}}}{~~{{|zy{{z{zxxz{z{zyvy{wwyzxx{zxx|zxyzyz}{z{}|{{|zwxzxv{{xxzuw{|ywxurpnoorrssnd^YSQN?04FRUURSPQYdcca^_^Y[[YYWYVTUWQTVROONMMOPPOONOSQORSQWWW_g_^`a]`a_]___`_\^_[]`_^^a[]^][]]\^]\\^[^^]^````^__^^^_a``_aba]_``^b`_`a_acbbccccb`cbedcacb`cddbb^bbaaaa]]\]]\[\[ZZYWZZYXZZ[YUYVTVYUUUVWTQNOMKGD@?CCFFEFFDAAGLLNNMNMMOONMMKMMMQNPROOTVSSSSPLE>;886656789:<;<<>@FIKKOONOLJKJJKKNOMOPQQSRSSTXXY]YY[^_`a```_cdadeb`dbbb^bbaaaa_aba_^_aa_``^_]^^___]^]^^]][[aq{rjebjotz}|}qdY@1++/2:3*'$  $'*.1444457:=AFIRPPSSQUTSRUVUVWVYXZZ[WVXWW\_bdcb^WMFGB=8:BHGGHHHJH?:850,6?BCC?<:?AHS\`abdefhjikosrqrppnljhillmnmllkkmortuuvwz~}x{sbfpyǔ|uokaagjkjh`YZZY[ZXZ\doxy||{vx{|{yzyyz|}|{~~~~||~|||{}~{}~||~}z{{z{z|zy{|yyzywuyxuz{yuxyxx{zwwzzvz}ywz}{xz{zwyzwx{yxxxvy||zzyupnoonrqssnd]WRQN@05ERTTRQOMVacd__]ZY\\[ZYYWXVURTROOONOQOLNOMNQPNRRNOTTTa~b[_b[[`_]__\]_^\^^^_\[_`]Z\][[\[\_\\^^[c^^^_^^^^^`\\]_``__]aa^`___d_]__`bcbddddddfdbdcaaca^ca`___c_]``]_^^_^\\[ZZZXZXXYWX[ZUVZVSWVUWUSTPOLLIFD@>BFEEDDEB?@FLMLOONMLLLJIJKLKHKKNMNOPRQQRPRQNLE?;9767644688:::;>?GHGOMLNKJLJIMKKLMNOPRQQRPSVWXYYWY\]]__``^`b`ccaacddb`aa```a^aa__^``^^__[]]^^]^[]]]^\[][^aq~wqkfeisy{}ueTC3+)('))&'&$  '-245245678:86..6>AEGEEACCGQX\^^acdehhjs{}|zvtoljggijggfccegikmnmosvx}wpuoaeqzƑ}ytplgeiiolf^ZXWWYWYZ^ftwxz{{zwz|xwxyxvz|||~~~|}~~}~}~~}}}~||||z||zy{{yyzyyzz|yy{zwyzuuwzwvyxwxxwwyzyxwxyz|zxw{}|zxzxyyywwyuxxwxzzw|{wupnopqrqqrlc\YSPO@/3ESUTRPNNVbeba_]\^ZZ[ZYXY[WTSSPNPMKMOLJMMLOOOMONMPQRPc|c[^_\`_^^_]\^_^]]___[Z\\Z[^[Z[Y\\[[\\[Z\\[^^\_^[]a]\^`_^_^_b^\``__`^```cfbbccdfbbdb_a`bdba_c`a__a`]_`__]]]^[\]XY[ZYZXZ\WWXXUVVRTVUUUTSPOMLHGD@=?DFEDCB@>AHNONNMMPNJLIHJJGKKILMONNOQRQPPPQPMSNF@=767742245689;=BHMNKNOOLLKJKIJIKKLJJLOPLNQPOOMNPPOOMMNF@:767533234789;;:;0"##%'('$" &-2588786:;>CCGMOSPRSRRSVWWTUTSTXY\cb`[TKED@>;ACHKMOPRSTUTSTTSUWW\bb]MIIGIEA=>B=56?>2(*-02347?FMX_ehijlhf^\\WPPRTV[\`cglosx~xrhc\YWPUVVVWZ_chimqtv{|~xw}~{|ncjr}Ɩ~{wskgcaa[^^cdegccee`[VS]izxz}|zw~zy{zxwzzzzy{{z|~~zsmfbegjlme_\YWVVVVX\bmy|yux|yuwyxuwxyy{~}|~}~}|~z{}|y|~|{|}{zyz{yz|{xyyyyzywxyxutywuuusquvttxyvtxvrsxqrxxvtsttuvwwuxxvwxuuuwxutqsuvsuvvuwyvsrqnjjmnnoqpjaZVPNH5,5HNROMLNPX`cda^_^ZYYYWWXYUXTMPPLMKIIIKLLKNIJLMILMOPSUe|b[[\^\[Z[]^[[[^ZZZYXYXYYXWXYXW[ZVWWXYYWYZXZYZWXYWWZ[Y[^YY\[\\]]^\[\]]]_`__^`a^_b`_`^_ca^^`]_\\]`\]][]\Y\\Z[]YWYXWWVWWWUVUWVUVVTTUVRMKKGECB?:GLPONPQOMMLKIHIIGIJJLKLLNONMLMLPNMMMMNMNNIF@85766433456753/.39?FIIJJKIJKJJLLKKKKMLJKMQPMPSQUTWWU[ZX]^_bgongec^`_aa`__]][^^]\\]_o{}qica`[YY[\Z[[]es{}|~uhihjnz}lV6)'! "%'%# ")/7;>;;999:=ACFILQQTSRSRQUUUZZWF16>BEGGFGGEFPWL:2.,*+,04:?HLRX[_dillkjhdb^URTSRZZ]ceinpv|wslgXVSQRSVX\_bfglmpsvx{~~~~zk`is~ŕ{tgaXXWUVVUWZ]bcdedegda][\lt~~}}~}}}}|xy{zzxzzwt|~~z}zrnibgghnpk`\XWWVUUY\bmv||wwyzttx{wtxxwx|~|}~|}}|z|}y}~{w{z{{|zxvz{xvywxwzxvxzyvvyuvwvwvwurruvrtvutvutsuusuxvttsstwutuvutvxvuuvwuvtouututvuuvuutsolghlnnnookbYSOOH4)5GPSONLMMV]bc^^_XVXZVXZWVWVSQTOJJKGJLIHILJILKLLMLOQPPjye][\\]\[Z]\Y[Z\W[XVXYWWYWVYWVXYVUWVWXXXYZX]YXVZYXWZYXXYY\\Z\\Z\\ZZ\Z\\^]_`]\a^ab`__]```^`^]]\\^\]]\Z\ZZ\ZYYYWWYWYWUVUVWXVUUTVVSUSQOLHIBCAB<=@CA??@A?AJLNQPNQOMLKKJIHIHGJJKKHJLKKMKKKIMMKLLKLLJNJIE>:745532433554-./07>EIJILKKMJKMKKKJLMKJLMNMOQPPRSTSSYWV[]_izzvlfda`a``^]]\][\\[]ay|tjc^\[ZXYZ[bjoponrwstspnmont{\972,'# !%&(%! $08<;<99887;<@HJMPQQPOSTUWUN5 (*-6=BGIKMQSTPIB<93/-),016;AFKRX\_gjmpookf`\[YUVY\]]begnrv|}}tpg\UQQSVZ[]`begimquwy{~|{{hbltǑ~ti`ZWVPSSTTX\`bdefggfb`[Wbq~~z|z{}~{w{{ywvy{tqw|{{w~z|wqmccghilmj`ZZWWUUXY_iuy{{zwwyxuwyxwwzyx{~~}}~}}z||zvy|ww||xvzxwyyxwv{xvwzxy}zvtwwwvuvtuvvvvuttutvsutstuusuustttsqrsqtvvsuwurvwuutstuutrttsuvtuvvttusojabkmnnoni`XPLKI808HQRNNLKMVacb`__XUYYVXXXWXVRSSOMKJIJJIGJKIKLJJNLJKLNMiw_Y\ZZ[YY[[UX[VYYZUW\UUXXXUWXVWTUVVWWXUWYXVXVWZYWYWXUXWVZ[Y\[XX\ZZZ\Y\]]]\\^\]^_^^`_]^^^_]\][[^^[\[Z\[Z\\[[XXXWWWVWVVTVWVVVUTVVSSQOLLGDAA@;<>@A>?>>?CLPOQPQQONNIKKIHHDFIJGKJIIJJJJLJJJLJHJKLLKLMJJJD>:7255443244320.,/7?FGMJHHJIHJIIIJKJIJJKKLMLKPPRRQTUVYX[bqxpkcbc^[ZZ]\ZZ\\ai{{ri`Y[ZZY]aeedhjosvwvutttsppr{}x{yL5KM>41.,% !$')(%$ !)19;=<<9879>?AHJKOOPRTUSK996/*&,.4>HJNQSSRPNKB>800.++-49>CIMT\achkqsqqlhfa^[WXY]_^ehlprv{yof]SOSSUXY\^_ehjnptuyyz}}yzyfeksƏypc\WSQRSSORW^acdfhheec\Wbly~|~}~~~|z{zwuy{xtwxzz{|~{z}tmmgbfhimnjd[[\XWVW^hmuz||xyyzyvuyysv|zxy|{|~||~~}~}}}{zyzyy||zx{|ywxxxyyxwwxyuvwvvxzwtvvstvtttustuvrptsrstssqstrotrorsrturstuuruvutwxwvvtsuutuwvpsuusuvvuuuuwn]aknnnnne\VPKIVXHEJRPMKLLLYed`a`\XYWUVWSTVURSSONMKHJKHGHIHIJIHILKIKMNNix^Z[[[\[W[\YZ[WYYUUVWTWYUVWVTUUSSTTUVVVWWVUVTVYXWZYVU[WXYXXXUVY[YYZZ[[[\]YY\ZY\][_^\]\Z^\Y\^]^`]\ZXZZXXYYZXTVXVVXWWYXUUUUVUVWWRSROKMJBACC>;>@?>>???CLPORPPQRPNLIJIIEHFIJJIHIJMKKJIIHKJJILKLLJJKKJJJHD?;665562121220-)*08@EHIFEEFGJIJIIGHHHHJMLLKKMNPQRSPRUWZatuljea\\]ZZ\`eks{}ojd[YZZ[^]chjnpsuwxywutrppsw~i;*35@HJC?4-(" "&*(%!! *17<<<<;;<;=>AFJLOURPMJQMHC;50-+.4@HLMOQSSRRQJA:430-.269=CIRU\behmnpqqnkgc\\ZZY[`cgkllrw||reOIJORPUXY\_adgjkpstwvz}|{vdeiqœ}oi_XRSQPPRPV\_`cegefebb\Zev~~}||{{ywywtwyyuxz|{uz}{~slifdeehlmhca]ZVSWZ`itwxz{ytwywuttvvuy|{x|~}z~}{~~~~{}~}|}}}}~yyzyuvxz{xyyywvxtruwvvxwvuvvruxutttstqttsttssvtqqtsossqqrropsrprsrrttpptutrutrtwutttqrssprtopsqqttutstx|kdjmmnljd[VNJ\~obVRQLIIHLYda^__\YZVWXWSVURRSQNMLJHJJGGIHGIKIJMJIJLMNTdycXV[^\ZYYXXZYVXVUVVTUWUTVUQTTRTVTTTUTSWTVVTUWWWXUUUVXVYXTSTVWYYVWXYYZYZZZ\]\\]\]^]\][\]YX\[[\[ZZWXXXUVYZZWVXYUTXTUXWVVTTSQSUQRQLNJJEBCCA>??>AJQOOQOPQQMMIHHFGFHGHDGEEGJJGIHHIGGIHIKIKIIJJJIIGFFE?7444433301431-+-17=AD?:=@BGFGJHGHGHHHJJJKMLNLMOOPSRWXZsyod`_[[^clptz|~th^YT[\^`bghknqstvyyxutupprvlOA423-4=KH?72)"$'(('#!!,5;>??>><;<=@EGJMNMMNOPNLOF@:1,+*6@DLLNRVWURNHC>923/--5:AFMUZ]bgjoqqrqnhgea^XZ[\bdgilqux}mC9=CJMPQRVYYZ^acgijmqtw|}~zz}uffkuėwi^XRRRRSUUVZ[^adefeffg`Zbq}}}~|wvyxuruywtvyzxwxyzz}~ymljcdgggknme`]\YWZ_dmt{{yzzywyzustvvux|{z|~~|}}~~}}~~{z|z{z{zxxzxtvyxvwxxvsuvrtuttvvstvwttwvsutrpsrrrttrsrtrqssprspprqnnqsppsrnpsroprrprusqqrrpusqqtsouurrtqqvuuuuuzynjnnljc[UKQh{lZSKHHIMYb_`_^]ZZXYXWWYXRTQOPNKIHHFGHJHHGHIKKKKIHIMP[tvv_YY\[YZ[YXXZXYWSUWUTVUTTXUTSSSUURRUTRQRRTRRTUUVVURRVWWWUUXWXYXXXYXZYYWYVY\Z\]Z\]Z\ZYXZYZ\[[[[XYZWXYYZXX[XSVXVTUVTUVVVSTVSRSSPPLLIGC?AA@<<====>>@BJQQOQQOOOLJJHHHFGGGFFCGFFGIIIGEFGFHJIJLLKHIJIHGFFEEEC<96242431120.,,.015;;325:CHKRUTWWX\_aadilotx|}xuxrgflvƚznaYVTRPUTTVWX[_fffiffccbbmu~~{{uv{xsuuvvvyutvwxyzz}y|~|rmkggggijklle_^_ZZ\dgmxz|zzyxyzzvsuwwvx~{vy}}{}}{}|{~|{}{yyzzzyzxwxxvsvvtuvuvvwusvxutvutrsstqsstqrsrqrpoqrpmrrqnppklssoqsrmoppqqropqokmpppossqrqpqssqqtuusspqtrrrvttwtsslllj`YRNRh}mWHIHJOW_^a]\][XVXVUVTSQROMNJIIHGFGFDGHGGHIJHJHGKMQ[ibpt[XX\[Z[ZYXXWWVUTVVSUUSRTUTROQSOOONNRPMPSPNQSSSSSSQRSVUTVWWVUWVVUVTUUUVSW\[W[YVWVVURQQRWZZWX[XWVVWYVUWWWXTSVUTTUVVWWTTRTVRRSOMNKJDB@CA>99><;=>?@CMNRPORQQOLLLIHHGEFFGHFEFHGGGHHGEGFFIIIJKKKIIIIHGEEFCCDB>95302220,+*,-/012574877679>BEEFFFEFHJIHHHHMJJKKNPRT]o{qkgcbfklpsvx|~ukb`a_acegiloqtvyyzy{zuxwyx}{o^QIB82.4??>>AEGJOPQQRTTSSPKG@;4,./:742./21,(&((,./14;FJEB@;899=??ACDEDGHHGGIKJJLIKLNRZk{rkgcgihnrvx{~vlfdadddhknortuvx|}|}|{yy|}k]UHC6658:6,&#!$*./-+)%!"!#*8?AEFC@AAA?@DHMNPRUUUVXXSQHC<5.-4@JUXYZ[^^YRNHA>;7535;?BHPZbgnosuvzz{xupmhfdedegiloqtvvp`RG<3+(&&-37>CHMRPTSTTY^biu~}rhfn||k_em{Ŗtj_\VSQQQRPNQTX\_cefegeeegku~yy{|wtvwwrtyvpoyurtvx{wxy}|zplkhgiijkknjdcb`_cfjkpw|{xy}|wvw{xuuywuw||y{~~~}~~~~|z{}||}}{{{|z}~{uwxtuzyvuuutrttqpwuqsutlstrpqsroprpssqpppooponnnpporpomomlmommoqlkmnjlsoklmnkhmnlnqpnqvomppoorroonlnpppprquutqv{phaVMKS`hqyq\MIL[`a^]^ZXWWWUVSTTROPRPKJJFFGFEEGFDDFDGIEEFGHM]nrz}mYXWVVWYXXWVUVQTUSRTSRQROOPQQPONPOPONNQPOPPQQTTUWWVVZYWYWSQQSURQNJIHDDC>;99876325421248AFJQVPTWVWTTSUSUSOTJBDORSUTUVURRSRTQNLKJD=<==:79;;;<==?GOQUSSRRSSNNKHHILIHHFEHEDFJFFGFFFFFFEFIIGHJHIIGEGGEFCABCBA@>;73..//+($%&'),06>EILNMGC?;997;>ACCFEDEFGIHKJHJLMNUhzzsjdhjkprsw{}{nghfeeijkmortwxy|~~|||xw|~tdVLF=66333/*)*,)&*31//*'%! ',6AFGHFC?<<>AEHMPQVYWWYYZWSMG>74/1:GSW[^^``[WSKD?;6578:?GOVZchmpvyzz|zxutpjjhiefggkoomjfd]UK>4+$%*/4:ADGMMPPVW\erzwfXXbt~}wjcckwĘxog[WUTQPQQNNPTY^_cegjjhebm|}}~zx{{wuwxyuuwtqsxsrvzwts{z{y|}|xplihihghlnnidcacbciknq|}}{{}|wuwxxvvwwtx{{z{}{}~}~{z{{z|~|z|~|zz|{ywxwvyzvuwwtsutopuvtsstrovsrrsrnrrrqrplnpplmnonmoomnnlmnohhimmklnkjpnjkpjkmqmkoomnpqmoqolnrppqsqpolmpnloqonsrpr{nQNLNSW]enyudOQ]b__`_ZXXVUVWSTSSQQQNKKGDDFDDDDDDDEFFFFIIIIMdmZYZWXXT[XVWTSPTSRQRORSOOPMNNNMMNMNOQQSTSTRWSUXYYUTWPKGCA?:999864444110/...-...--+*+--/18?BBHNPRUQRUTRTSPI0'1>DKNOQRTQORSROMPKE;==>;9;=<::<=EJOOMLJE?;;99::@ABBEFEEHHGIJLOU`s~wrkjlknruy{~rjjiiiilnnqtsuxz}~}}}}{{uc[SG>999:@IPLB%&.6620.)%!!#,7BGJHE@>>?@BGMPSZ`\\]\\Z\ZRJC;856>LSX_bddeb^TMJC>:466=BHNV^clpux{{|~zvrofjhhikjjnnlmjgaRKA:3-*,16;@HLLQRVamu~t]OFShns~xi^alwŕyl`[URQRRMLNMOSXZ]_adfggeioz~z|}{yxvvuuwtqvysqrwxtrv}uuz~~~|~~~wlhhghggjkllkd`aa`fimoty~|zz|{ytwwvuwwxvvz{yx{|{z|~~|}zyzz{x|}|yz|vwzzzxxxwxyytuwuuuusptrqttrqqqromopnmpomnpnloolklmmnnnnmlmiinmkjlpmmmmjlnjkomlmmlikkjlnmllookmnppppoonmjmkllrqmpsnnu|~`QKLMPT[alvudX^a\^^[Y[YUWVUTVSQPQOMLKEDDDDFFDFGDDDEEEGEEIHjnZZWUXWVXTTWTRSRRRQNNRPNNNJLOJFHIHMMKINJJDCA?;;:9758654241/0//0.,+,+)*(('(*)(')'''&))(++,.14;CINQNQSQPQQPB)!.9@GLQQPKKLKKMLB;:>;:9:=<:7:=AHOQRRSTRRPQOLIHEEETQMMNMGGJPONMJKIHFGDEGGHGEDFGFFDDBBCCDABA@??AA953/,*%""$##''(+-17;7299>GPXajqtwy}}xuojffheijlnqqomjaYRB:/('-:FOVKMT^iqx{}~t_JEO]dmx}wc`eowŗxnf^XWWTSQPPOOMSRVZ]bdgfffpw|~{xz{vuwxxqowysopvusouwsvz{|y{~|smjhiifhjnmmjb^_cbgknpvy}~yzzyvwuttvyxwxz|}{y||z}~|~~|y}{xxyz{|}{wx{ywzzxwxuttxwvuwvsturqqrolorpoooommmnqomlmppmnnnknppmonmlmnllnmlkmliknmkjjkjlmikmljinjjmnjjmnnlmnnmommonljlkjprinvpoqu~lTLKMPTY_hr}{oh`^`ZX[[VTXWTTVSRPOLKJHECEFDDEEEEEDDCDEDCGIDmlYWWVVVTSTTVTUURQQQNNOMKLMLK:14328754412/00010,-++***,,('((('''(&'&$%&%$%%&%%%'&$$$$$&'''(*.05AED@87;<98:;9:8:@@==82/( !!!#%'*(,-26:@FKPSVXOJFB?>;::;:<@DFFHFHMVblosy{vsoprtx{yronorwwsrrorrtwz{}~½~tlqrofd^XhneJ +  ,6=835.%!#&+4=CNPMMG@?AJSVYZbcbabdceafb[YLC<7>HSbhjjkkjhc\SKI?:9767@GPYbjqry||yqkgcedhnsuuwxuqoh\QPMV]uw]MOQWdoux{~~~s]GBIQYcpyxfbepzǘqgc]ZUSTPQONMNOOSVY\_bfgdfu~}z}xwwxutxzvsvvtpqssquqtssuwxzxz{{~~zrihfihhijnkib_]\^bflntx{|}{z~yvwyvwxyyyz|}{z{}~}~~}|}}{y|~zxwzzx|zvsxyttywtuxtrsxututpoqqpqppopopmnpollonoqonooolknnnlmlljlhglmihkkkiljggijhgkiijkkgllhikjfjljijnonmklnnmnnmklmjfiolinolmqr|s^QLMPTY^fnxvja_YY]YUVVURRRRTSLKKIFEFCCDBCDDDEEDDFHECFJSpiSVVWVSTTTUURSQPRRNOOLIMLIE. ##%***(''()'**)('%%%'&%%%%$#$$##%%&#!" !"$#%$#%%%#$%"##%&&%)**-017=@DGEDFE@;96/)#" #,41%"-1423::85:>;778;AHOQQPUVTUQNLKJHGGHDXhh^USPRSQURRPPNQPPOLKLHGECCEDDDCCCBCAABBA@>@>??><72* !""$&(&)+-.138=AJMWZ]VRJHC@<;89;@A@BDGJPX`ehnrx~~oopsuxz}zvqwxstsrtwx{|~ŸY + + +  %071($#'*+15>GLVQIFDFPRLHV[]bffgidghghfcZUK;=:;FQTVditz~~}yqnhd`fov|YKPPXalswyz}{lUF@INOXfv~sccgp|ęxrfa[YYSRPNNNLMOQRTV[fcgdgp}~~zyxvsw{yuwxvrsurqswtoquwvywz|xxz}~}|}wqnigggejmmif^]YWY^ckmt||}~{{{}xwz}zuxzzyyz{zz|~}}~~}{}|zz}zwyyyxxzwquwvuvwuvxxuuwvttsqnqusorropssnmppmjnmmmommmnijkllkkkkiikijjighjjjjhffiljhjlihjijjlighkejojhilkhijghkjjkmkjmljglljkmlloqq}eQINRUZ_ekxxhXX\ZTTWWUUTPRSNLLJGGGDCCCBDFEDEECDEEDGFGQrhVTVWTTVVUTQPPPQQPOOMKJKLJA& !#$""""##"$"""#"""#""# !! "##""##"!!"#"!%#$#"""%##%$%''(*,,-148:9:=>@??A@<5/)$" #'-2224:9788;@HMSRPRSSSSQOLIIHDHMRj~{qibXSPTUUTVUUQVTSRPMLIFEDEFDCBAABBA@@@@??B>?A?>;5,  #%$&(*,-../4=AFGKSY^_dhmrx{{uuxyy|~}|yyzxxz|}I +  + &(#!"!#%(+07@MNLKKNNA23=KX]bdijlmmllmmlmjd\SJBAMS`gmptx{wrfYPIE?636@EPYbovuvusuppoljfkmt~}QIJNXbktvwuzzjR@ALQMR`nxmaajs|Ɩ|pjd`]XSQRNMNMMLMNRVa^ddbdrv{}{}|wvwzzuvzvtrtuoostvoltwtstwwvvy~||{~~|vojffiiijnmhcb[YWXZ`ipz||~|xx{zwxyyutwzyyyzyyz}~{}}}~|}~|z{{xy|zwy{yvwxxuswwywyvwxxtswwssssnmpqnpqqmnpojlnnkmollmmllklhjkmikkjkjiihjjhgijghigegjgiijhgggfhihfgkhhihgghjhhkgfjkhhkliilhhjmijjjknlnpoYSRSVW^dluzle^TQVVTVXSRRPKJMKHFDAACDCCCDCCBBBEDEGDHMsfQWWWVUTWVRRQORQPMNNLKMMKH<&  !"!!!  !"  !! "!""!!##"!!"! !"$""$"%%%&()('(,./158:=@ACDDDA:5.&!$&*29667:BHLOTPPRPQRPLMJJMKKTZckqwzypjbYWLVUWYWUXVVUSOOLJHEDDBBCAACAB?@@?>??>>?==<8- "" #%()(*+,.247:AHQV[_]ZURKEA?=<=ACGLQVX\adinsvz}yww{}{zz|}tG + + + + +  %(-1@FPfoonmklkhnkjgilw}NGJLV`kotxy|ziNA>HMLJScs}lc`gp}Ė|wqid\YSRQNNMLJKLNMMV_acihfp}}~}~}zwyzxtvyuutvvqqtrpjrvttssssuxy}||}|sjjggfhjkkkic_[ZZX^afp{}~~}{zzzzyzxxvy{}|z{{z|||}|~~}|{yy}{wx|{wv{vtvyvtvyxtttsqrvrqrssnoqonprmmonjknpkooljmnjillkjkliikjgiifehifgggheehaehigfihehhedihgfhgfgihfgghghjiimjiklkikiefgegihgjmjlqu`TPRWW\elv}qaVVXWXWTRUROMLLJIIDCCDCDDBBCEFABBDDDAIQxiXWWWVSTVTRSOMOOLNMJJLKKJE;& ! !    !!"#!  !"" !"  """##&&%%(*+,0248=?BDDEFFHD:3+%!&-.27?OMLOPOQSPPPNNMNNPTZ^`chkoqsyzuld^XSUZZ[ZZZXVTTRQONLIFDCABB??>>?>=>;>?;=<=9+!!!#%'&)*+-.02339>CKSY_b`aWOKG@>?>BFLPTVZ\_eijnsy~|~~}o; + + + $)-1CEFGJJHD=6.$"*4>GKNOMMNOQNPQRRSTTZZ\^adgjjkqvxxrmgaUSUZ]]]]^[WWWVVTSMIFDB?>>>><=<;;=<<<;<8*""%%%))(--+./2359026DZipwz~~}{{ywuurk`OC98=SeG9/4569;;:79GbmjhhhijhfhhhimxuCFGMUaiouwy|w`OFGLIGHUhvyg`^gq~ƕ}vlf`XROOLLKMJMMLONTZ^cffcfr{~|}}~}xx~|wuyzvrrutpnqstpponnntwwwz~xsmjggjklljjfa[WZ[]afns{~|~~}|}~{y|}zy||{z|{wz~~|}}|}{yy{{vuxywwzztsvuttuuttwursusrruqqqpmqppmnqnlkokkloljllkjklkkkjhjihfioigihfdfgbdgedgge`cdededbbefd_bdebbcdefedegebdecdddfihdfhgdefcfhjijjhin{q\TVY\^els~vg[UWTSUSKNOIHGFEDB?>>=?>@A?@BABB@ACIQ{U>GMWPUVWUSSRQQNPNJIJHFHIE6#    "!! !" !"$&((()),-26;?CGHIJKKHC=4,%&07>DHIIJMMMNQQSUTVZYZ^_`bdhghjnqrytnjd[STY]^`b^^\\\^]WQOLGCA??=<=<===IMLLJQONISZ^ch_WG=633I^ozyurpolne^YE5/+/83/-144898747HfjgfikhhihgghlozmEDCJUahowvv{|l[PKGEEFMan}xf`_gr×yph`XSQLLKLKLJJLNPSX[dibdglrv~~~}y||tv|ytswxurrtrommqvsppnnrsuy~~xlihjigjllgec_\VVXWbhmry||zz~}|~{{~{{|}}{|y|}~~}~|~~}}|yxx{yvxywwyywuuuttutrstvrsssnsrpmsspnnnlnonjklmkmmklmmkillgikkgehgdcfgffgcbbefab`acfb^baadbcccbdb``bddbabedccefdecccefdbeedehebefdbefbafgegn|ybYXZ^bdis{wj[WTTUPMPOJIGEECA@??>>?@?@?A@@B@ADHKz{H3>?@>FKNUTRQOLMNKIIGGFFBA7%    !"##"%&(&+.27;ACGJJMOLLIE80'"$,3:AEHKLMNONORPTWWY\]]_bdefehikpsusmhc`URY^bcdfda_\^][VMLHDA?;:;<<<;><;97'""$%&&**)*--.0115:=BJRY\dkff`[TIGCGHHIPUY]`dgikpsw|}w{ztmjklljY4&*/KQB-$&04?KONNNSSMINZfs~xrgWJ>>@@@?>CJPU^eijjkc[VQKDFILOTVZ`cdhlpty|»pZav}~~}|yvsronlkkige[?;;ATXRD><90*#!(.3AMUZXSWZOHM[iwuhYRWit}sL%"#$$$%)-10-+.36578846Ebkhghjgeb_]]ckpiBCCHT`hoqru{}qf^WMGDJWeusc`bju|sg]XLKIHIJLNONQOMPTY^bbbbepvw|~}}x|}ww~xssuuspssrtysjlqrvv}zrkffjihjkfc_]Z\YVUZ^hnswz{|{{}zz~}x}~|}}{{}{x}~|~{\gxx]dt{z|||zuy{yvvxvtsxxsrstsssrqstqqusqpqrnopqnopmkkoljklifijheikh`hgfdgheeececdecfb`dfdcbda`ca`]``_^adb_bb^_a`_`e_\`b^^bc`]bcbabcaeeddf`_efcbac[bgcahjkvc_]acfkq{rdYPQRPNKIEFD@<5,.479:>?>>@?>???GQ|^LC:/($#/9>DFKNNJIIGEGFBB=-    !!!#"#%&().4:AGJLNQTSVSOF<5( "'/5@FLLQPQSSUVXXZY\\]^`cdgghikoqrvvtoidb``cfhiiheddccaZVRNJD><:9:9893$"""$%%%'*,..00567<>BFOX^cilqmia\TQNJGIKQVY]`fhlorw{¼vu|y|wjY?6\tz|~yutsrpmnnnmlkihhgfee`Y^KFP_YQZ^\[YYVN2!&.4CSdnzwxrPEN\hy}B! #)-01,+-27797;46IdigfggedcaX`glq}d?BCHSahmqsu{|uqocTJFKVcp}|pe`aju¹uj_QNKHKKNPRRRMMMNQT[^acdfjqz~||y{zy~~y{|wsttursupqszslmtyy{zpkgchhijiha`^YWZVTT^emstx}~wwz~~yy}|{~~~~~}|||~~wiej{mU]ny~~}}{z|}}|}zvvzzywwutsuxuqtttstqqsvupoqpooqnnppmmonjkjlkkkjijjjghkhhfeffigdcecbcddcceb`dca`a`__a__]^][^a``_b___`__ba^\__^``aaabdcaaabccddb`adbbb`_\dcbcgfkľ}gacdfkmtzydSUSPOLGEDB>:+&.369;;?A?>@?BGPw|YNQMC8-$ '17>DIHEEECDFDD;+ ! !!!""#"#$&)+/29?DHOTUWYY[WLB90($$#!"'/8:89:71" "!##%''(,-..04458<=@CGTZ`glqqlhf^UPLIGJMSW[\cilqsw{ƺyhbdjknpc\TKF=@:434Snx|urpmmkkijkjhfdbbbdeeindGAYiiouyY-&,3DWqkKIM[ixy? ! $*/20+,/39:<<:58Ldheefghge]\fflr]CQwwHCFMNMA8.%&/5>ADFB??CCC:)  !!!!"$''&&'-6GNPSUUXWXX\]]]^aabeefijllnpstx|xvqkhddfjlnnnmkiiihg]VSJD<<:6. "%&)),,+-13459<=?@CEMT_gmrswrhd]VOHHHILOV^bhkorxy~ѵkagaWUQNA?98<431,1/0('-//4Mkv{{tpnkklihihgca```beegms]GWn/! &-4CTn|siVGFLWhwúw6 !&)-1.+-28CKJK@69Peifghghg`X_efnu[:>@GV^fmqqsy{xzyri[SQXclu}ymc]bmwºzl_RLJKMPRRRSQROMPPSUY_adabhovz}}|}~}~~~|zyxvvytrqvzvppx~{rkhefhhjkhc_]]XWVTX^bhovsrxxxwxzzyx~~|~~|~}liledflppnuxvxzyxx}yxwsfce^]dr}}}}z{}|yy{{zwwyxwvvrquwrpstoprrnsvpjnonlopmmomklnlhjkiihifefgeceeebce_bdc`acb`cbb`_`^_ba_^^___][[\]\[_^^_a^\[_]^\]]Z__]W^^][__^`^[\``_aa^\]^_a_^]^b`aeccfxżkgglknszp]UNJJFB?<3"").38=@CQwrA6;AFNRKB80$#)49:416>@=3 "" !##""$&),4@@AFKS\dintwwtkcYWMG=AIPV]agmqqv{ҪG711-*(($'(&)*)(*.--*&)-,1Sovxzxrmmkjhhghhfba`__defeeX=Ijt0! %,6EQZUP\VNFCJUbt¹t4$%&&&)/1,*,5E[de[@8>PglgfhhgbY]bdgnuV9>@IU_fnqqtx|z{ytmd^\Zbir|tja\dnxyj_WOJLLOORRRSSPQNPOSW[^abfioow}z|}{}z{{yvuvrquxwuty|xpiedffhkiea]ZYYUTTX^cglstpntvtttxxvv}~}}y}~~}~wiklie[XSNJRPSY[Z]a`_aacdccWW]mw|}|y|~zy{|yz{|{xxyxwvvurttrqstqopqmotqlkonmmnmmmnjjkmjfifghhgdefffffcbbccbddcaacaaba`__]]]]\]^\YZ^X[]^[[[]Z]][ZY\]\[[[Z[\\[\_^\]]Z\`[[]^]`a_\V_^[_^\Z`][`^Y[i}İpkklprv{saTLID@@=2!(07<<@CFUwm4(,28@FMOHD6) "% +273#  !! "###(,5>FLU[]accehfaSI?60-,(''),2;DJOV[]]^_caccddffiijlmprrstwxz{|{wrnnijosvvwwvrpnnljc]WP@+!#"%')*+-123768;<>>@??FFRXagnuyxyrnb[SJ@CJOSYafkqvy|ӦC3*''*)&$$%$%&%&)+++'&*,+4Upsuwvpnkkihhfhfecb`_`deeaW17Ppn,! %,7EOPJTZTIFHFU^n~ſd+ ,6880))/.+(.;Snrn^@9AQaggdghf]W_cdjnuK;=@IU`ilmlmt~~||{tpkea_chpyqja_dmz|shYNHLMNRRRSUTUOPNPPSZ]^cbchp{}zz}~~{}~|{xvttz|wuuw|yogdcfhhjib]Z[XWWVVY`dfknrmntvtpsuwvvy}nijilie`WQNQRQPPRUWX[ZY[_a^ZYX_pu~~{z|{z{|{y}}{yyyxuuwusssrostpmopnlqqlkllmlnoklnlknnjgijggihfefdcdebbbaaaaaaab__aa_^^]]]__^]^\[]]]UY[]Z[[`^\[YY[\]]]ZXZZYY^_^YYZYYZZYY]\]^`[Z\]\\Z]^]^Z]`\Z`hzƻ~onprv{wdSHBAA<1#*05;AFQ|k)%-4:BJLSE9-#%+(   !""%'(*29EOUZadgjkihg^QC<40/+(()*19DKU[\^_bbeeefhgjhjnnnnrstvwxz{~yuqmqux{{zyywtrtqokfXE5( "%&'*+/02467:;=\qyw^B7@Wbhfdjhcca`acjmvK??BJT^hkliir~~}wrolgadhlnlf_\am{žzgbSLJMPRPPRQVUQONNOSTW[^cdfnuw{~}{{{}z||xvw|yusxzvjhbchgihic]\YZUUWX\behjmnonqroprqttvx{}~jgikmihc\UPNOPRSRTVWWXWY^]]ZWTPgny}|{||yy|{xyyxuuyvttwwsoqnnprqmmqpmmpnlmonmoqnjmmjlnlhfhfcghgbedbabcb````_^]__a_Z_e_^_^\]_^]\^\[\ZYUXVWXYXTV[YW[\ZXYZZZ[YYY`YWVVUWYYWY[ZZ]_]YZ[\\[[\[\ZZ\_\_ad¯xqtvx{}{dODB@<6,*((& &.5@Uxi"!*1930.+)')/6BLQ]`a`dehhgijklmlmmqqsuvwwy{|}{zzywy{}}||{zxtuttohZK?5# ! !#$'(,003568<<=>>BCFHHKLO_nummnmg^SIFHH8.-6AMYaiowzЛ=-&$%&%$"$###$%&)))'"#)++7Ynqtxuqnlljhfhjhdbbb``dfe^MB=Ixc&"" #-C\^b^P38MW`mroont_+'Elvqa<).1/-*0>ay|[@5>Ycfe`igb`^__ahmy{UHEGKU^gmlecqysqplljjiec`[\en{Ž}teWMNMNOTTRVVSQPNNJNQV[a``emsy|||y{~yy}|xvzzvtxxtjedcgjjjfb^\ZXWVVZ^ckhhkmmloqplnqqqsyz|z}~|onqsqrqstxvxzy|}{{}~ulhlmlkhc]TRPOPRTUUVWWXZ[\`^WUQ\gsxz{zzx{zwuzxvuwywuvyvqprpptqqpqqoonmmkonlmpomllkjkmhghhdehfdeebaccabbabb^_]`b_]^_`\^_\Z^\Z[[\XWXWUWXWWWYXZYXYWWWYYYYYZZXXZ^YWWVXZZYWZ[ZXZ[YXZYX\[ZYZYXX[[Z`acȸxyy{~jUIA?@C<764+ '3Ntd#*6>BFLIC80)    " !$$$(.8EQYafmoqqtssj_TI=62..,*+/8@MT[`ddhhhjlmmlmnnpsruwvxz{{}}z{z|~~}|zwvvslaVJ<.  ! !""$',/.24669==<=?CDHHIIKZlnbba]VRNIFDB940.,38GU`jou|Δ:+&#"$%#!""$$#%&)**%!$)+*6Ynsuvsonlljhhihfccb``bcfgaQ=<8, + + + + "Foc) *6;LHKIB=1'     ! "%'*4BOW`hrststtrmk[OD;:60*(+.5?IT_bcgjkmlnnoprstttuuxz{{}}xy{~|x|{uk^TE6%""#$%$$()+.125779<;=>ADFFGMO[kh`_^TQNJGEA?A@A;62016BO[enu~ʎ6*&##$&$"""#$%''*))% #)*)7[qrsuqnmmkihhhggeca``bdff`X:=NWu[  $ )M~rR.=L_|rq|Z&/Vzh9-/31-)0F^y{V?:CXeggdffdbQY_dhnzecbaa_cfiidkx~xwwwwtsrj_^\]fq}ú{q^POKOSUVVYYXUROMQKOSX]accjlu{z{z}|}zz}xwvuljedeehheb`]Z\YYYWY`fjgggjjikoponostpt{xy~~jTU\bgihijknopuyzxzyz~~ysmjijlld`[WRPRSTVUWVZYZ\]^`XRNWamvyuuxzwwzzvuvvtsvvqoqpnnpomqpnjlnmmkonmmnlmolkjlkhijgdgfcbbb__`a`_^^^]`^`]Z[]__[Y^^[ZZ[WXZ[XWWVUWVUTWWVWWVUUUVVWXSUVUOSVWSSXUQWXSVZYUVVXWWYWWYYWYYWUWYXX\^_pȯpYGFHGEB?@7(  + + :og6*#+7?IOOMG<3(     !#"%',6@RZbjpsuvxwvvpdVK>:641.,/4@KW]fiklknprstuuuuvwy|{{~}{yxwqdL:)# "#%&%()+.//3689<<>@AEFGHLM[kja^[UQNKIF@;>DGJFC?9423:HYdnw|DŽ.*'&&&%#"%#!#&).1-)%!$))':`qrttrpmmljjiifeb````adee]H-22N}T! #! )T}{mP-=OYj`QQi|S&-W~|\:0012*+4Gb}oS<9E_hhfdfeaYV_dgkq|oikmlklmmmpsz~{yzyywvtlda\^etùxqh[NLNQUYZZYZWURQPPOQUZ_bbdmttx}}z|zxwvsuqmfgcggggca[XZYXSWWY_chkggjliilqpnnrrqpuwxx}}~~[KJR[aeiihimnpsvxvxx{{{~~zpjihjjjgb]VQRUVVWVXYY[\]\`ZTQPXerurx{ywxyxstvvsuwroppmoppnornmllmmmmnklljilkhhjhhggheeeabddaaa``^^^]\__]^][Y]^_YZ]XVXWWUYXUUVUSSTTPTUSSRTURRRSVWUSUTQQVSSRUVRQTUTXWTSVVVWXVUVWUUXVTSWVVXZ[]vû{bPKIHEBB?3#  + 9k}mH;.$*8BIOPOI>1(    !   !"#$'))6DSahjpuwyz{wxpg[K?751./.+2>LX\flmpqrssuvvvxyz{||~uhg_baP?4:;/" #$%*,-0102479;;JMOUKEA<45=KU`mw~2,&$&'%$#&&$$'-440)%#%)*-;^prttrpnlljkhgd_[\``_bdge[E5>G_ýP!!" !,[zqhM.>PZhokkm}Q$/V{~tY5/121+.5FevylR=;E]ffdedc`YXYZcjr|qpsssvvzywxx{~{zyxxxyvpgb[^dt¹}zdZQPRUZZY]ZZXVRRQNOPTZ\^bbfioy~|z}|urnppnieceghhgd[ZXZYYXUUW^eimkggjihhoqnmnrrnotwxz|~yUIJMQYaikhhjklpqstvwxxxz{}|tkiijkkigb[USVTYYZYZZZ\][\YWQLQ\jrvyyxyxwtsvwuuxtoopmkqsoosrmlmkkjllmkljijljjklhhhggfffbaec`ca]\]]\XZ\[[^]XY[ZYXYYVWYVTTXVTSVVRSTVSTTSRSTSUTRSVURTUTRQUQPSSRQRSQSVTPTUTTVURTWUSTWSTWVTXYXajx˿}iNIHGFC>2! + 4h~lNF<3*! 0;FNSVUJB5) !  #$$%*002267;AGSceablsvvwvtxpkaL>327:76524@BEHKIN\if]ZZURNKFB>87;=?EKXZ_]WIC=<9@Obnt|Ƿr6)%"$$$$$%&&%&-34/*$!$))(?dusuvrqmlljiigc^[^`_^acfe[J>>FfûM"#!#.]~lrmJ1@Scrr}M 0Zx}x[4.///,-4Fdy|sN:=J[dffgdc_YWW[cmq}v|~~~~~|zxz{yyyy{|yqhaX^it|{|ri`UQTWY[\ZZYZWUSPPOMPSW_b`dhqzz|~{wnhhmkfddeehfec_WXYXXXWWW[ahlnmihijiknnkmoqpqrtvz~xQJJKNSeuypihiilmoqqttuuuwwz}}wojfhhkkid`\VWWXX[[Z]\]]\]\[VROTdlvusvywsswvtrtuolomknrqknroklnlillilnmjkmlkjkjccfffdedabc`]`]XZ[XVV\\WXZZTVXWSXZWWXWVSUVTUTSRRURSQQPPSPSRRQRSRQQQSOPPQPPQOPSSROOSPRURQSTRRUWURVUSTSTVYZez}ƿMJLHGHD=. +5byiIGBA;6,!/>IQV]XN@6." +  !!#$((&%&),,-*,22269LJA91/=L\fkostxyz{|}~wxyp[8#&&'())L{|m_R>- "!&-189<=<=:<>>=?EFJKP^mf[YWSQOJFC?AKWKE=@LXagjdXOJEEL\efdeddd^XZ^dmr~}y~}}|{{xsnjlv}z{yz{|zofaW_jt~}z{||{md^TSW[YZ[\ZZYWVTPOPOQV\]_bipv|}tldcjic_`deehe_YVVVXWWVW[`dkommnnkiiknljmoonnsutu{}|{~~~pNJJKM[~~oiiijkknprrrtsuuvy{zsmhgfhimkf_\\[XZ\[\\]]\\\^\ZTPLXcoorwysqtvtrrtqqqpmnpqmlqoklnmjionjlnkhllihjlcVV]aceea_`b`]^YWZYWX\\XYZXWUZ\VUYYWVWWUTSSSSPQOQSOOOLOPPMMRQNPOQSRRSNQQONPRQSTQPOORQSQPQRPQUTUWbaSTTQSY\k}y~ͼ{HIIJKKIF=+  +4ev}hAEFEDC@3+#"2@MUZ^_PG:/%  !  !#%')*/5ALNRI=ACEE@@AGLLHOSZ\\ZUY\dnkjgW11AQI>@FNHCAB=3,&&+@Yd^RF;4:PYaixyzz{|}{wwslZ7! !"&',P|nXB$#"$&(/58:=?AA@AA>BDEIMP[ke\YURPOKHFFXkzkfdhmz}~whgjo}\('$%'%&$$#%%#(.0/-("$)+.@isswwsqommkjigd^Z\^__`bde]G@Oe|t= !"%7h~rX?5BP]d[\bkF6_xQ1,/1.+,5Li|{mK9=L\fdeffdaZR[_gmty}yssmklljgd]VVn||{|y{}}zngaYakvĿ|~~~~}}~xui]WRVWY]^^\[[XXXWSQOMQW\^agnvrk`ab__\bdacec`ZXTVVWWWY[agkookkmnkhilmiipommlsysu{}|z}cGIJLOarmjjkjkorqqsstuuwz|wpjfdgfhjkd`[VUVY[[^`]\]\]][UQLPZilsyxtuwustusqqsonopjimniimljijlkjjjihjhhgggR=EQ[be__c`]^_^WX\YUWXVTUUVWVWVUWXUUTTSTUPPSRNQSRNMOOOORPSSSTQQSUTWVOUWVUSPNNPPLMNLNPPNMQQPRWTYhxs_ZYW]eowsqpvʯnBIHJMMLHF:'  +6ivzeDFEFFEB@>4-"!4DOXaab\MA6("$*'""&&%$%#$&&%'),.244977:>AERhw{xgXWVYULRNY_^ajp{wvmjmqm`^WE$,5+$(&#$%$''%%7Mhf\UNIJOLTn}xwunZ5!%%)Mywd8"#''(,/36;@DEEEDDBCHJMQ\ke\ZURRMIHIRnQ"$#$&'$$$##$%(+./.'! $()*Geqsutqpmlkjihfd_\\]\^`cee\FHVl~ĺp; "#'9k}mf_=5ET^ikuck|A!!:cwO4.13.+.5Ml~dI;;L_e_dffc^U[aeinuv~t_\\XUYYYVROJOm~|}}}|{ph^X`juþ~~}~{}~|{ypf\XWX[]]]\\]]]ZXWSRQQTY^adfc]^XTQV]^acbb^[XVVWVWVY^diknooljkmmilonhlonmmnuury|z|~~wWKIJMRnqmkjijnpoqrtuwvwy{|{qfbdeghigc^ZUPTY[`__]^\ZZ[[RNMM_jsvstvussurorqolnoeVWbiijljijkihmolkknfidXP@17FV]`^ba]Y\\ZY\[XWWWVUVVUXVUSUUSRSURSTRQSSPOQPOQNMLOPMSa^Z[^^^_accbffcaZPMNOMKNNMNMMNNSS[fd`h~xkcioxvpprxȩeEJIHLMMLJE6# :erw}dIIGHHIHGED@5,&(7HT\diidVH:,   "7QF433688;;:8:;>@BFIJJHHIILLKWs|lefkleehflnllounfadca_\ZZUP<"+1*" #'& )I`e[XWVRRSc}zyxm\3#'%)Krh7'" "$%)-0127@CA?ADEHKRbnf[YVQSRMDP_LLe{6"%%$#$$$$&).0-+% %((-IgqrsuqpmkkjjjgaZXX\\]_cccSA>Egi2!!)FW_bhhddflhedghgf]RE5'""%0ADDDCCAABGKQflc[XURUYWPYj¼ùbSL==D4)($"!$'$"%$$$%+00,'" $'(2Oprquuppmllkihc_XVZ\\]addbU3;WuȾc)!"'GswqjQ38IR_opllsm0%Fnp>+-//,,-4Rm|qYA6=L_fdedb_XTRW`jmwre[\[XUVTRQPHAWz}}}~wme\Wblxº~~|~}{}|{~~{y~zuz}{vruytnkf`ZZ]`_^^^___`]\[XVQOLQRQOJCC@BDGIOSTQUVX^ny|wtwywtswwtqnopppmlnrtomqspptyxxz}}|}rTJIJORs|mihhgijkmnoprqtwzzvmfbdegfhif^]XZ]]\ZUQWZ]\^^YOMMT`cdgkopnqqollmdS=889;DOV]]_bdadaa`cdc^YK=-% &0@PX\]][ZW[XUaj^MCGOQQNQSPNRQOMONLJLKKLMJJIKJJOIKM[q|wuvuvvvxzywoi`TLHJMKJJLJPfxuy}̱nAFFFGJLOPRTTQI8#?jszeOVUVVVWSWWXXYZYWY]YRC92* "$5PbjmicYI7-+(%#"!!$+0.+($! %$%)38Bfřzuplihcdggfl__acaba_]ghehikkjggggcba_`b^\Z[YXVXWRG3,-% $-4=5( ;RYXTW[XUVl¹}~}uR**7;?BB>G\kmponlgYOJJJIHFE?<6-('''*+146:>BCEEC@@BCFJQinbYYUTV[XRYk¼¹mgcUH:530( #$$#$%%&'*//,&  $(*3Uwuqttppmkjjkhc]WX[\[\`ded\USOqǽ\' "*Iu{snkU37L[qwmul+&Hrm?,,.0++/7TlrjWA6=>BGJKPPT\cn{}yvvwvtxyvsstpnpsrlfkplilqonpruvuy{z{}fMHIJO[ynjgiijklkmnoprsswwqidddghhjidc`abcd`[X\^a^_^]UOJKSRTYbkoqrommlh_L96676;CLRWX[]]^__^^_`a^TF9) !+7IT[^_^^]^\]bd^RHGMQMLQPMNPNJKNJFHKHJKMKHJJIKOILQ\qzxwwwx{{zzwtm`SMJGHHJJINbw|zz}ʦ`BEEEHJLOQTVXUTK7$Cks{}~cTWYWZZYY]\\^^_a`abffb]UG8,"#/FU\YOD=910,,,,34(# %,..+)*'"$%')+=fw{{xststpmhgfca`dfbbabcbbc__`deghljgce_^`^^\\^\YUSROSRSWRF0./& $/>ED. ";V\[gUXVSTkû~|wiN0;JPRZO>E`jnppnlgZPLLNLONMMHD;762-+-148<>CGDC?>ACFGMUfl`ZXWVWVQLOk¼wUdbUE943/' "#$##$%(+00,$$&)*2Ypqqtspomkjhigc\WVZ\\]_def`J/DwǼT$""-M}{zmT0:M^s{abct^-%Js}_=.-//+*/7VlqqX>4=Q`hhfb`bb]^adinx}w{WUWUVVTRPPIBAVy~{}~~wkaU[en~~|z~{{|v{~~}~}z{~{uu{}ywxxuttvvqomgda___^]`_^\_`]\Y\XUOQRQMIA?<=@BFKLZeny~{wxxuttvvsrutomqspkknnkmnpnlortuwyzx|[KIJMQe|nhjjiikmlnpqrqruvrkfcdhhgeiieddefkihefggfdbc^UOJONNS\ejonlmlie^E653355;BJSWZ[^]^^_^]a__^OE3%#/AOX^aaa__a_`bd^OHIKMOQOLMMMLNNJIKKJKLKLLKJKJIFLPYn~~zxzz|{{||{zocYOGFGGGGN[jx}{ǝUBDFHJILPRVX[ZZVK60Kjrz{|~bT[ZZ\^]_``bbbbfghkmpqsqiXF9-$ ':COLE836.***+:NNB4'$'*-./10.+%!  (,/:720+%#$##%#%&(,//& !()*3^oqrvsqpnmkiigcZTUXZ[]_cbbP&,]}ǿO! $,Qx{xmP4[r|qW;5?Rdeffda_^Z[\`hnwxy|YXYZYVURSQG?A^{||~ylbR[gpĿ~||}~}~yz}}}||y{|vpx{wttxwrrstsrstokfc`]__`_^_b_^^\_[[]]YQMLGD?;:-"!(1CQ\abba_a``accZNELPQLKLOLJLMLHJHGHIIGGFIIKIGDIMXmy}|{{|||}|}{rdZPJGDFHMS^hxϿF@BEGJKKMSXZ_b``\H=Ukryyz~c\]_`abbddfhfjknmnpqstsqqj[J:-$$%*08A;0.,'%*+(1EMQA.()'(0345640,*$ !  ! #'/Fisvtrpppomnkkkihgfffdedbcaa`_`dehiib]a\[\XY[Y\]YYUUZ___bbZI/!0/$%.2770'#=[tnv{ukaK),-$?B17I`nommlmi[TQQQQRTTOSSUSTQPKJD=:@BCEHEAADFHJMVchb`bc_^^VT^ve2CA<96.))$#$$#(#$$&*+*%  (**5aqpturqomljiigc[XZXX[]_ab`TBPbtƽO !!"+Uz|wqfH.>NUblnfbxY *Orz^3*,0/**.>YrzsX<6>Raedecc`ZSUY_hoz|yuWTW]\[[YXQC>Fa||z}~~ulaV\fp}ſ~~y{|{w{|{z{}{zy|~yttywusuwvstvvttuvsqokcX]]^_ac_^``_a^ada_YSONLG?<Q\q~be~T#(Pwx]/(-1.()/>\tuR:5AUeedcab]SRX^bio{zwsWY\ca^^\XNA?Hc~}~wmaZ]cs}ÿ|~}{z}}|zz|z{}}{{}}{wvxwvvwxustvutrurrotxri\a`]]``^]^aa_afeda[USMOKE@@O`wuy}{y|}yst|{vsvvslmrromnhigjlmlonmloqtruvwtv||}}aIILOSaxnnnllmmlnqpqrtvxuoha`cddhkmnmqtssrrrstppnljfZRMNPPQXagjica_D540..,,+/4;GPX^`a_bba`b`bd_TC4'#1?NZbdcedfdacge\RLNMLLLJLJJIJJGIJKIIHHIJIHHIKHNZjuz~~~~~~}wm_RKHFHILSZftԽʱm>ACDGIKNRSW\bgpw{{yyyqp}uONX__cbcfhdbb_\WQOLH@<=700122-+( &11+)('&.9'!!"%&%".579=;>?<71-+% )2Gfovtonlijmjhgddbba`^\XWXZ[[^`_cdkz}uxzxzzw~yzwnyx~vrs\K- 3/#%&*.344+%!@[y}ua~vskL2DMLNKA8DgopqqoldWSUUUWYZY[\[\]_^__`aaab^ZVOLEDEHHIPYfi]^]YYYUMFUtR6IB;84-('#"$##$$$$'*,+("&)-;cqqstrqpmjjjigc\YWYYY]`bcifTKh~ŹyE !#1\}zvujD7@Q\{nOMdO(Uu~zU/*-.-'&-C_w~nN84@Udebca`^XX\`ejozzwqYX[Z\][YTJC>Fg{|~sjaV[er~ÿ~~~~}}zz|zy{|}zy{|{ruyyutvwvqqvvsrrrpprvxtpnof_^_^``__^`cflkdZXTQROKGHX`jsy|yu~~uoy{xtsxwsqqqpnoqjhjkklnqlilspmnuusrvzxwz}~~uYHNUXZlwqommmnnooppqruuvsle_bceijmu}vtsuvussrqmibZPNNOOT\dgXFJD4..-)**)',16@IRZaabca`bbaccc`N=.%!")7GT_dfghfebcdhf[TNOMMLILLKKJJHGFHDIGGHFGGGHGGM[jstz{rdYNGFFIPU[huڼ˩dCBCFHIIKPRY_ekt~qzn46JSQOPOMBB>:454/22----.,-*+)''%&./*+(&1Wc, !#$'& &3988BB8-!"E\YǿƿynH*8@AKC00NgnoonprfYNTYZ\acefdeghhjlmmlppqqoljf`WSOPPNZii`[XUSPMHHYxǿK7D><62+)'#!%'#"$$$&,-.% %%+Aiussvspnkjkkjg`ZWWZZ[]`a`V:5Hbs?!#"%1e~}s_?1ASZ`hmlg½wE.Xv~nN,(+.,&%-A`tthI57EYdeb`^`\YUQYdjp~w|jS_^_]_d`\RCJz~qsqmeigcgadbcbefd`aenjmpqrurjemtvvyyvtprnfUA''42%! .51.023-%"C_xeeTļĿymE0?C@F>(,PjorrqqpcWNTX[^fllppsqrrqtrruvuuvwvtsplje][X_jh`[WSQMMGG\yǾF>E=864,)'"#$%##!%%'-.,! %&+Cmxrsusppolkkjg`XTWYY[_ab_WE67\ʿo=!""(:jxn]92CR^ry|bgn@4cx|qM/),.+$#.AavzgI87H\bccdb_[UQV_ekpzwm^a[][\^YNH>-706M@..UlptttskaUNSX\bkqy}{wtxz{{|~}||~}|{{soijlgb`[WVSPICZ~ƾB=C7875-*' !%%&#$$##'+-& &$)Fmvsvvronmljiie]SPVX[[^ab_W9/Txk9!#$*:qykY:3FPWpsWWiu<6a}nC-(+,(#&/GbvybF48G[bbbcb^VPQ\aaho{x}h[XUVWVTOIB<=Op|}{~|si\Y_js»}}}~|yx|}uuvvtw{{yuvurqqvyrruxvusqrrsxwtty}|qeW\__]\X[^\_b__cttTKXZ\\Z[]`\Y[]monqxzyz|wtstsmjihbbekfceihfcjlhfjljimrrqoostqopppqtxzzpjebbcflmmnorrrsvwxyzy|}}}~yvssssstsrrsqqrsrme`adjxĤ}zvrmcXSPRRNH>3+'"!!',5BN\chghihhhfdaa\L=+!$/=N^jlnonljlnoqk_TKMLJJJGHJHEFHGGFGFGFFFFGIP^gjlprx|tgYNGGMQSY]eqηuGJJIJGILMQTY]dn{d)0)"!# "$$ !)+$%*&$*CcVG, #"")(%A`kz}hD;;5348:932HY_p}wvuhfqzy{|uyridh`YL<%'1,"! :[XL,)''EasZúzuh?16=XP9/3XpruwwsnbPPSZ]dkumda`_[\ZTJG`G@E8873,.*  &&%#"##$&,*"#&(1Kovsuuronmlkjhc]TSVY\^_`ab_[ki2 !"')*&)K`lQĽ|wtxx|{vob8-6HN:7/4WrqvwpbRNUX\bkvgcbdhe`^YTVlſ¼><>744354+! &'$"!##$&+' "'(*Ipspuvsonlkjjic\VTWYY]^`belwýf, !")DrwlS32EYjzglp69`nnaB()-.(#%-I^jkU@3:J]ghdefg`Z_\cqy|r`TVTT[UNIE:9?Oq~{z{{}~vnbUR]lxº}}{{}~|}||z{|{|z|}xvyy{wwyvqrtttqrwvrntzqlotwx~}sh_Z`a`c`[USVY\__^ZXWWZ[_]\]\Z^]\^__^Zdjkntxvonrpkhhgffdiicbghefljfglkghmqonmorttqpppppljkrtrojddb`_`ehkmklppssvxvwzz{}|{zwyxxwxvvtttrqjc`boϸ~ywnjicTIA5+%%.7J[bijjkmklkhfhdUF1% "+:J]ipssrqpppqupaSNMMIGIFFCECDFEEEDGEDEGGIP_gjlknruzp`NEGJNRVZbk{ʢZNOPNNNONOPV\`fq}ƽW".$'3!")Dc:##!$&(%!')'(.LdX?3"#%(**Ku`DOg[GC=845307Tw~||||wuwwvslahoptwvuptskjcdhd`Zda]`][WVTK=+4,""!Ff#*+&(LatM~ztomeec\Xbt{wtk[6,Us~||}~~vl^SSam{w|{|}~}}{z~|{wx|vqu{wttxxuqouropuxtpqwuklpuz~{rg^\^_`cd[WSTSTUX]][ZWSUX[]_\XXWWVXZ\^\]`]fquqprrlihhhgghgcfhhddggdfjjfdhnkhjlqsrrqpqnnnligknnoigedb`^aehknmmnrttutxz|{}}~~||y||ywwvvvuoebcu˭~~yu[PLE=3)!"'/9K]hkmppooomkiebR>,#"'4CTcnsuusqssrvvk^SKJIGGGFFFFFEEBCEFCDHFEJU`fjjknqsv}saSGFJOQWYakvœSPPRRQSQRQRV\bgr~ϸ{U#) .Wsnv}6###$()% '*(&0Pla[3"%'',bQTwjZ[UF?94/3Ruzw}{}{{|xtnlpqnjmmjihgfgdehmeoc__`\ZXUQJ9+1+"! XsU-*+&+LbiQt`]VNIHA=884846;WvzqmiU4,=@EJ;*:_suxo`NMTV[blvŷceelnjomib\pŽz9;<789:83+ !''%##$$#))'!$'*1Olrpturplkkjihd]VVYZ[\]acgorx\&"" 'DwvlN.7EOTJJ]bp}^*!Ck{xd=''--( ",LgzuW?2:Tiw~q_VWVY[XRKB98AWy~z{}~}~{ti_NS`mz~{~~~y{~zv{~zru{zvsvzrmtxwvrssrpsyvpqvwskpvy|}mb]\abcgaZVTSSPWWUWY\[VSRRRRVQSROMNPSV[]^\]belkqtnjkkgbafgdeggeeggfeghfdhjlfgnostrrrrpmmnljihijmljihfdbbdgkonnoprrtvyz{z{{~~˺~}}}{yxvusmiir־~zshUROKE:/&!(1>Scjprrpqsqnmlj_P=+$#(;P`lsvwvutsutxumUJKJFGFGEDDGFDADFCBCECDIVaefhkmmqty}sbRFFKNSX]dmyIOQRTVXVVVTV[ajsػzQ#%"I^,"#&&*)$"(*'(6^plU# "%./2k~|{y}xKJibTLG=32-*+9Sn~|||zx{vpjinqomonmljggfhqti``^[YWUTJ8,2*#!#J|?*.'-Qh\isiQyB&(/,)--,.222059Vz}xnjdS06@EIH7-<^sxvk[JKMT[dmwǼghdggdff^TRgļv8?=877972(#)(%$#%%&*)%!#&(0SnqqtusonmjjifbYTUWXY\^_cfihtĿZ"$#!)GzyoQ-7DNROP]^ve+ Gnzzd8#&--)"$/LmvmV&4>5+&&$!"!&8Wz~zztspmmfea_chklmnkjjkgghh]ab\\YXVTH5-2)"!(xz;%+) ,RhxmX{<))()+-/./.-.13;V{tnjaN-&.=AD7#@bpwthUFGJPW`lyǼÿehb_][^^WNOiûq9?=565642(")&##$%$%((##$'0Vlpqtuqpmlkjhe`YTTVWYZZ]`debuüX# %H}}xeH.8GSYbcegzg*"Hl}z^5#'-+%"%1PmurV91>Sox~xp}bZX_bbZSG;68>]z}{yz{}~|sg]SYbl||~~}xx~{x{}wwz{wuvwurpuxsnotspnprpnrtrmps{xod]W\\Z_b_WTPPPOQRPOV]bgaXTMHGEDDGDCB@?CFNTW\[ZXY^ekihjiihlhdgjfbfjkeakhgcfhigglquvwuronnomlkljihgfhjmlkijjgehlnoqrsuvvxxyyz||}~~Ӻ}yrfsë|vtj\VUVSQI=0'" %-4>I]gpsvxwuusrpojXB/&"'4Haovz{{||ywxu]AFLMJGDGFDEFCBDEABCCBCEKVbeghjjkpstv{u`PGHLRWY^aju˯hDINW[]\^_`a^^_hrյ|L##"857X;83,$$"'))"$++'*7^jR4##/\bK?fr{|ywwrd=+6;6,&"! !#>A?FLTWZYYXX\aeghhhigechhdchhgchjjgegighkmnvuttroopmnnmlkjkigjiigkolnkiikmpsssuvxzx{{{{{{}ќ{}øuiqҸ}{ysfWWXUVVME<+%  $,4;BQ_mtwxwvxxusrrhR:0%%0?Whsz|}|zwoT8>JPOJGEDEFC@BEDDEDCBDCIUcdghijlnpsvz}vbNHINSUZ^dluþƦcMFJS[_`ceeedbain~ղK!"!#5_PCLZe1$##&)("&+,'*9TjZ:!&@P4.7Zyw~|{yxrc;+6:5+%"$&&"%>\irwxwsospmfdofdfhheddddebaa``dhlnkllhhhgicY``\ZWWTQH4-/&!"9g|DnK(5Rqziaz6%((()-=CD?3./4?Xvvlha\I/8=3582+Egrz|sk[OLIJLU_p÷úïgga^X^hid`dtº˿h4B=55884.&#&'$!"!#&(6_onqtqmlmigffb`ZVWVVY[^^_a`hzſQ (TtuhheE.;GZyl^a{¼\!&KoynW/!&,*$&2VmvjR63@Zq~{vy{gflklmmZC96:Cay~zz|||~{pd[SXcl{ÿ~||}}vu{~|xwzvtvzyuruxwsrsxvtrutqqqrpnttrswyrg[[TU\\^_ZUPNNLMPQPSY`gptnjie_QE>>>>=?>>=>>BFHNRVY^]Y[_cceggffhiggiiiiijjhhjlfgkniktutsrqonmmoonljkkjjhechnnnnjiilnprrsuvyx{zxz||~āvnzWYʶ}yxunaVWYY\ZSND4*#$,3:?L]krvxy{z{zwuurbM;.'+7Kbpx~~zwlN;wdzB*U}k.$$$&()"-,*$)=orV/#+@?13[dbH3124:Ztsnie_H42037?3*Fhuz|sn^WWXYVV\dvödfc_W[fhe`ao»g5?;45895-&#((%! "  #$%5]mnnqqnmjhgffe`YUUUVYY\Z\__l|z7(Z|woa=0=JU[^LTcŻZ (RruuX.!&+*# '6UqwmM73D\tuxiinnomcR>87:Fg|~yyz}~~~zph[SVdq}ſ{~}{{|{|zvv|}zx{{tuy{tsuxvnrwwrostronqqllntstuxrbVWWY[\]]ZVRPONNKNOTX`gntrjjmlfVNGA=:;=<<<;;=?AFJNSVVYYY[^cefehjihiligiolhegjlciomhptuvusqonmnnnmlkmmkjhecgmnnmlijknpqrsrswwxyyy{||}~ń||{*]ǭ~||vsj\X[\\\[YVM@3(!"+5IYlyf\gúT)VszuR* &+*#'6\qxsL44A]wuz|ioomkf^I:65:Kh}|yyz}|}|qfZPWeq~Ŀ~|}~|{||zwyzwrxyxrrwwrotvqhntqmloonhmortvvq_SVSRZX\\ZVRROMLKIQTZ_fjoslcimlic^OH>;<<;::::<=<@BEJLOUWUVX]_afklfglicfjnjehkmlklrmnuwvtutrpnmmmnnoqwxrjgdcgllpnpmooopqstqruvwyyyyyz}~z|L%yst{xvsh[Z^^^^^Z[XK=/& "+59E^konnoqvxz}|||xukXE5-/4G_s{}vkVH;?MNLHDBBBB?@@BA@CBBDMYabcefijlnssuwz~ʹp`jqw~dPJKNSX]bhnuλnljdYPS[gpvy{|~}~ӫ|>! !(i?#1'%$$(**)"14,)&):bfY8! 1ISZQ=\~}xnY2)5:1&&&$ "3Yiorsrrppmlkkknuvggfebacdb_`dhijlmkkjifgffZ__]ZZXRNF*"10'" $cyH11.' 9\vojatʿm6'%&(,9??743659=`uuoki`G&6BJJ>1.Qmt{urkb^^bfhirsr}ý{jfa\WTVTMEPoĺĿ_2?955772-$!&&! !$&'Ahooqtrnjhfddda]YVUVUWZZZ[bhr{|sjxyqlcVG=, +_ztlcV9.?MbymUVf¹Q+RryiK+"&*)#':[u~kK15E]zwzxnqokgc[C754;Ki}}yy}~{}}reWOXgs~ü~}}{z}~zz{~ztvxxtuywtsuyvqstqnmttmmnqojirqrsujWRXUSZXZ\ZUPNOMKKLNV\fhjlooknnpmmsm_PA=;;;<;;:8::;;>?BEJMMNPST]cfgejmifinladlnnosurprwwvrqsronlmnnnnxzmkffjlmnorstsqrstuttuvxzxxxwx{}~ϩvpy|46ҷ}yzurtzz~~{yxrd^[\__``]^^WI:+& %,5846760*""'% ## &''@gnnpurlkjhgfda^YSSVUVXZ[_egnwzkPSK@<;20," .aypkhX30AO]de^bfJ/UppbE(#(+*#!(;Zv~fI15G_}|zt}rkmliecU@635=Jm|{xy|}|}zpbTOZgqº~{|}~zyz~}xz}~|xxzzwux{vss{xsrusonswpinopjiopqrqgYSTTSUVWYZWROMOMKMNV^fhilooihoollmqnf]TKD>====98;;9;=<;;>AABHMOW]`cglliflnhflpmnpwuqquuutssuroomnonmnnpnjknponqrtuuttuuxxvxyzyy{{z||}Śumfjxu'Xϸ}{vrsmtz}{yxnb]__aabaadb_WG6*" #(/7@Ukwznmlllfkpttrj^K8319oztdPBBKNNIDECBABA>@@?>?EQ\^addcegjnpqsvy{v&-,+)*(*09CSXUQOQSX^cjr{̿ͻȮ{uvxwvqk`\`huЪ|8!'Q^rhC!##$&*-+! &*)'*A_kP. )HVZXM95h~zvtpi_L0*460($")Ccnrsoqqronmllllqjdbeddda__aafjmlllkkhfgfgs|ha_^^[ZVSND+&4/'%#)08bJ*,,' !?_{zOqȹi13FNPPT`_e^gdeREizsmh\?,=LRLD21Wnt{{smbfhmqtv{ǿļxhga]XUVTJEWuıþU9>64882,'"#'$%!!%&*Biqorsqmllhfedb]VQSUVVWZY^bblw}zjSXTLKKB>2% -i~vskR42BN`u~ubh|D2ZmpjF)!'*(""+>=>AEPZ_``cbbgkmnptww{˹f&*'%'&%$"*AW\ZWRPSW]`ir}õ̳Ʀuvz|}}|{vk_YdtΦzw6!*wmJ`U@!"$#&*,(&'&$(1QeQ).=S[TO93oxvumnsok`O4-881*%!)Mlnqrqrpponnmmklxmhfeedba__`aejpnkkkjiggffhqhfc]__]\[YURN@(&3.&#!#,9lO%(+-'#A`|WwŶh3Qlplluvvphc`ZHAl{wpkeW;+?HBB<+2Xltxqpknsx{~¼sjgb^WSRRIFVtžQ:>8443/,' #'" "  $'.Fhmnqtqmjjgfedb]TQSVWVXYY^bdn|zlbehkgbXK4' 3bwttjT0/BLbq_NlſzB5atulF&#(*& )?`oq`C36Jhyz{scXY_jptkYI95;<;:9996779679:;?BHKOW`eehnmkkmolpt{xtvxwuuttttrqonnllnvpjklmrtrrrrstvwz|xuxy}zy||z{}}~ŐtoNJdx56ˮzx|zxxxthcggfhihhjjied_R?0$&-6:Dat|wtqnnkjhfdc\QDEKYƶlVD?KTPJCA>@?>?=>=?AFQ[^``ccegklnnrwxzɱY/-)%&'&##*DY_^]ZYXW]^hq}ʨzĭš|{|~ukcep̣zv0%V_sqrF !" %+,)&('&+BmbG+'=PM[L28r}yyxyyyvqdN.1=>5,&#"6arlppqqoommonkjlugfgfcc`a`_]ahklmjihgfgfecbcc__^^\^]YWSQNB''2-%$! '('$&*..'"C_rIzǴf8Ue_RTXRXQHB=:7AfvungaY:1?;;B8,3Vktytt|rjfb\USSQHGXwµþF9=7673/-'"& !!$&,Idmmprpmkkfffca\TQRUUWXY[`chuxkis|xrc=0* 5j{tlaR-3BKSRE@Vqžy?9`wygD&#'*& *@ctra?25IkrXQXiorviO;33;Ts{zxzz}}wl_QQZhw{}y|}}~~}}}}{yz{ywx~{xwwwxwxvvttxxoouuroqrolnplfjopneVLTTMPVTTVYTPMKLLRSZZ^enkgiligipnmkpqnlmoolijgZLE>=;;;;8855559:89=>@CHKT\bkllknnmlsvwttvwtrstuutrqolnnln|ljijlquutqqrsuwz}|zzz{{z}}|}~̽xkzaFAOg{|}i&Tx}|wyxupffllmklkmlmkkjf^M;+#%069Ogx}voponkjjhge[QLMQW}hgcZPB?>??>?>>>??ERY\^aa`chjjmnrvv|ˣI41,'%&%#$-JY`caa`^X[_fo|ΐ\˲umiq|ˣ~t/ %_f"  %+*( &('%*=OJ:( ,5EN=2:w}skaL/4EC7-)($"(Eptnrqnpnpnlmljjlqgghfeddba_]aghimikkjhfgfccccb`__]]ZZWSPL@%)2+%%""%$#'*.10(#G^wQUzŰ`0897,).+1378758Eetslg`T:2;?CI<*3Ylu}{~}uswystwxxqiea[UVSOFFVxƻF;:56972-& % !!#'+Hfmmrupljifedc`\VQRVTVWZ[_`kwxmn||vP73-"5ivkdaK-3BKMLJGXnþt;=dtxe=%"%)% )>]miR;19Im{r{TOPZ`oeI=401;Vt}{wzy{{~uj^PQ\hv}{{~~}~~}}|~}}{z{|zxw~{y{}ywwywsrsuvsoruqlnqpmhlliinqofUHRQMPSRSWVRRPLMPRWY\]iolfcikgilqliknljhlnkjlqoe]TJ@=;::8;977>@FKS[ccfhnnnnruutuuutuutuutqqonnnmqxmmkjjosuusrrttvy~~~}~~Ѷk]fooiVHA55>M[cMpR'|ٹxzxwyyy{{zyvtohlsqrqrrsqrpprol^J5("%-08Uj~ypkmokkkigec[TOLT\ø{OC?>=?==<>>>>HU[]]__`afhimprvy|ʐ231,'%#"!%5S^dfeefd_^_dmzڿmjʸξvsp̡zo* !1ybG9>9 '+*&"()($(,-0,'$4>FD09w|{zvngfaN07TG1,,+''*Syssqnoppnllkjknxfhgeedcbb`^`fhhklkkjifhgeeba_^^__\ZYYUOJ@$+3,&&! "##(+/21)%GbsliW{]1/,.1/,+538AB8;IhurlhbT83?@@UbQLAK<8ԭupnptmv|ywspjnz{yzyyyxwwvvxusm\I1) &+,?>=<=>>?><@KV[[[^_``cfhmoqtx{z"./,(%#""&=[cghiijhebbdjzɥئT~ҽλ}Ś|m)"".\`aa~D  %))&$''%#&'%(,'"2GUF.=|{vuuonnnfL/8D3&%'%%'*gsspnoqnomllklo~iggfccca_aa`cghikjiiieeeefb_^^^_^\YXZTPM>!,5-(%  !(+0:<2$%FeWZ0.1;L?8;KPX`Q7;FhtqlhaV4.4664.)=apwx~٪p`VZYIEAIRWUQYk}lif`[UXUOGF]}ÿ>9;76895.'#&!"$-Qlmotsomjggdcca_ZWUSTUXXV[aft}|uklynI=<7# "Brweh^F-6GSi{tt}üp1 Aiwxc9""(($"*>Yc]N;/8Mjqj~yUMN\m\DA<5.0C^xzwzxz|~}yqf\OU]hw¾z||}~}|~}{}~}~~~}|~}}|}y{xuxywwvyywuw|yusvxwoortqpsutqppomlmlnonkZOMSRMNPSRVWQNNLKNTX^Z^cijhhjjhfhomjlopkhknlkimogimqh^XND>;<;;;=AACEIFGDBA?=@DKTY`glpsssuxussuussrrpqqpnomnxpzunkijmqturstwwx|̙|wpgTBMenaDCFt[)R£{vuxur}{wuphaxxz}|~|}~||{{{{wn[B0% $'-A\oyqnmmljihfca_\XSQQWξc@@?====<>?==CMWZZ[\_acegilnpsw{;d!)('(&#"")AafhjmmnmlkjilyѫvȊYӯηŧǰ~o$ , -x{0!%*)&"''%&'#$),&%5U[B1<~}}|}yxnJ+54) !#-{zpsqpqqnmonnmlpigfdddcb[_^^fkjhhjijhgfddcab`^_]]]YWVQQJ= 0<-'%  !$(0:GJ6$*Jf{WY*.9Vc[YalokoR:;HjsojgaT3)5?CF;*=_ox{uآ_TRTPGDCENOIIYj¼jic`ZUVQNGG[¶{?=;66763/'#&$'2Qhlossomjggfdda_^\WUSUWWW\`fu}rbXgprk\JB<1  EqunlYB(7GTp{vngy»j.!GfzrY6##((#'?[_ZL7.:Ni`hvTNPidE;=;403Ca}{xyyz{~~}og[QW_gvÿyz||y{~}{yz}~}z~~~z}~}~{|yy~|vwyyutwzwvxz|xrtzxsptuporsqmqtqllnomnmgXMORLMNQPSWWSONNOQWYX\cehnhehmjfdiliglolhhjlkhhljbenngce^SHA=::<>@ACGIGGFGFB;<@DKQV]fmqqpuyyusutrsqqqqqpppop}{yqljhkmrsrttxvxzϽgQ^h`OHEM\QUO3,vٮx}~~zzwnbex`U[`fnuyzmV<.' #*@Zsvqmlljjgfed``^VPLOeɵL@@<<=<<>>><>@MWYZ[[\`ddfhjnpsx}ɱJ"(&%%%'$%/Mgklnqqtuutrnmwĕ}ɳueΡͽ|ʨ~h$$*-h\G>WfA !#'+)&"''&&%#$(*&"7OSF4D|xnH+12( $4{ttssrqnnonnlln|gefedcfcbaa`afjlkkkhjgeefdcba`^^_^\ZWVRQK: /7,&$ #"&0;DE<( +QdyS`,0;\kgcclieYA:=Kntpjh_L."*:HR_joUazf*!HkzuY1"#'&"(B\]^I7-:PljMg~uQNVgG<657/.3Ba{~zxyz|}~}zkeTOXahw¾tw}~|z{}|yz~~~~}zy}|}{~~~~||~z~~|~zz~}yy~{xutz{vx||yvsxxsnstpjnrsmlqtnjkppnmaRJKQOINPQTWWTPMNQUUUX^bghhifejlhegmhhiklkijkkllmmjinnlhjrj^VOG<9:9<>BDFGJHGEE@=<=AHKN\ejntwvwwsssrsqrsppooonqx}}yommhijlmostuuwz̯wX_\frj\TNHDD56⹣{~wtrlgprI??<=>Oetz}|iK6,'"!+@Xwxrnmkkjigfecaa]WPMTuŬyE@?>>?<<;<==>?MXYZ[[]^acfhimqtx|ʤ5%'&$#&)''7_nsttvwz|}~~yxyggȺѻjpx}循{c!$%%.$nw3$#$%),(&&+)()'%%),'#:KPD/K~{tjE+05)!%;vutrrrpnonmkmlknjdgeeebcaaba`beiljjkiihhgfdbaba_]]]^\YZTPJ9 /2*'$"!&,4>AA;0&*NflY`,1:KOBA=>B>?=:8Smuoif\K15DA:/!'Gcsz{Ҕ]LMKGBCCEBBABOghkcaZQTSOHFbør6;835744,$"#"! &4Xlqrvtoljhfeeecgle[SSSUTW^emz~zoYMH@@GH=75+#Ny{kfb>*9JS]pNa~e&%MozzY0#'&!,FZYVD4-=On|FJg|sRNSXH:300--4Ei||ywy|}}~|vldUMUbmx¾w}ww{|xw|}{~~{}~}~xw|y{}~~~~}}}}~zz~{zz}xxz{yssuxvsvywtstvtpqsqmlpqqnpqonnoplj`RGKSOHKRQSUVTQONPUXVTciggiihdfljggkmihllklknlhmnqijmpkhlpohgf`VK?<::9=?BFIHFFFDA?;9AFIP[diwtrrtsutrspqqopppopyu}zvrqpkihjjmpqpuwzʤ`esq~trnhh]UP.S˦~||x{{xtqjiucA=<:868CJUg|]<0-& !-B_}~wxrllkiiheeea_ZTNK^ͿbE@=>>>;<::<=)L|vqlbC(33%&A{uuurqpoomljlighhhgedeedbbbb_bhjmljkmkhgeedcaaa_^^\][WVRPG5 44+(&$'2<<;630-%-OiwMbW'39F?46;@CHFGB?Rrxoii^K/-,&$$#)Gfu{ѐTLKHHGEDECCEBUk¿kjb`ZUVUOFKcµo6;735752)" $!! "&7UkqrvvqmjhffecejneYUSSUUX_emz{qXNJBDIIB;9) %Tuwii[;,=NXtxKO__"%Ss}yW.$'% !.EQSQD3/;Pn~vR?Ph{sWMOQI>40.-.4Eh~}xyz{|z||tjbSJUamz¿{~wqtz}ywv{|z{}~}{}}}}~~zzz}~}~}}~~~z~~{}|z~~z{|~{vx{ywuuvvwuwxvssssssrrnnrsqporqmmrplg^MMLPPILQURVVRTSQRTTSXdkiggjjhhmmhilokgjmkjkmoifnolfjlkjkookjmlhbYQF><9;>>BDFDFGICA=<=>AGLWaiklqstttsrrqoononnq}z{rosslkjiklmlnsvw}~zĞgivtynkk_YE,׶~|vrrsqx}~zytrjjtV>><988761.3\nv~Y<3/,& )?_{zsjhgeedb_ZTRHeǵKA==><::<:9;;=ERVY\YY]]``dfhkprv|u'&# ##! +HtʻqĿɽustwvtuurk\^ixvx~|⸧|_ "4LGMWV^{W%(%%))%#(-3:;89>90 2HE71Q}yuuurmjbA,1-" '@yvstsronllmljihhheggddcbbcdc``gjljjjkkiggfddaa``^^]\[ZWTQF5#24,)(&.:;5.*,.+#1QnzNYXTiR,4G[WRU[W^de_G@Pouojf_J*%(056.-Hds{ˊSKGIOXUJHGDEF[p¿ije`[VVVO@NeѾɿk5<70566.'!"$! "'8[loquupmjhffedfkngWUSSUUW^doz{mXQN=DMVZM@, %YvrkbQ8.@KWjRCQbW&TwvO+"$#!.FSPNA2.;Tp{fJ@ALi|kQKMQIEB=60.5Hlyuwxx{z{ysj`PLX`l|{}yttyzzvx{zzz|}}~~~}||}{zz|w|~}~{y}~|z||||~|}||{~|wu{{xtuwxyuwxvtuusrtuspjnsqnlpqllmnkg\SJMOMJMOPTUTUUTSSRTOQbjgdeikggkmlgjljffkkjkmnmimommmommmrrnlnnlkjgaRIC=;:9>AABDGADB@=;:;:7433+$"('/<==;:;<9:;<>FSVXZZW\]^aceilnqx}ɹb!%$""%%$"*:ZvӦsнò~cbgnmfdc_]JRahjosuy߿~^!!/jv/#%#$),%#*.6B>;8:8($/AE?6SwzzzxutqhA*-*  'Ewussrsrpmlmljgjkifffdcabcbacbcgkllkkijhfhdddcc``^][\[ZXTPG3$25.+)(0;/+-*+/*$3Ogsh\XTm¿K.8]sjnmkhnegT>?WqtqjibJ13>DRJ2,Lhs}}ɇUKIQivh`UJEHXpjjd^ZTVTLDPhûe2:82576.(!$%!  !&:^nmquwrmihgeedfmpfXRTUXWX`fozykYSK6>Ued`T4 '[sriaR60>KT\acbhS)RuyfG'#&$ !.JVNL?0.;To}w|sTB<;@Oj|mOKIJIOVPC1/9Oq}ytvxyyz{ysh]PNXbn|{wwvxywvvyzwwz~~{yz}|zy{zww}}zxy|y}~}|~~~{|~}~~}}}{}~}|{~|z}}|xsx|xttywutuytpmvtonssppopomklonjnojaWPNQQLIMOPPVTQTUUSSTQUaegeegmkginljgkkhfhkllmolkkqolmpojmptokmomjkllga[PF<98;<=@@BADDC>=<<82-,*Fl~xqcfgfeba_WPLY̾j@???<;;:98==<>ITWVYYZZ]]_`dilosw|ȯN$###%).+()4GX^^Zddowyz{١u˧þ{sYR\fdkheoྲ}]!" 7odUI=0$ !$'+"$*,1401482"#5TQE5V{uc@).+! %Mststrrrqnmmnkkikggeeecbccbaabdijklkjiiiifdddab____Z]\YYSOH0$36.-+*9_kqf/+/+"6TpuiWkxG+9RZYYQKIHJH::B[qsqmlgN16GTTA-+Mhs}zƃUJJ\z\IHJ\pdia^YWVUMGNhùȾc2=624881) %'# "&=\onruvqljhfdddflleYTVUVU[elu~{jVI?59C[^d`7 ,awsohP3.AO^t|thiO,TpueH'$'$ /IRNN>11>Tiywqs}]B?<;?AAA>>=::;=BGTcpsrssrrqomllllp{~oqwumnroqqomjknoonpt~~ƣuakovgH(~֨zxz|{xtqhhrQ@FD=70,-) "9O^cYCBD@5Tv|fffgdc`]\TOMeȻO<@@>;=:88;<::?LVXVWYZZ[[^`dgjnrwɣB#%&)4IM=0.6GSX\X\UUZ^_bhf֒vұĹżiZenptstxา|V!",?%!$)("$)+**+283,#$;TQB1W{u_8'5/ #'Sxvssrppomnkhjlhjgdeeedcdcca]_chmkmmnljkigedb`__^_^Z]ZXVQOD/$56/.)3ve/,.+!9TqfoºvG(47553/054445:@XqvppolK'5OD;7.1Qluz|UHIazWMKIZp¾|cia^YXUVMFOiͺǾ]4=62484+& $'$ #(;^mnqwtokjhgda`dkkdXUTTUS]jt|ziPC>:84@O]J( 2a{vsmQ00@M]wrb^l}L1ZqyjA'#&$#/KQPP>/1>Wht|ysoox~dD@<:9So~cQQQYjfK6.*-5Qq}xswxyyyzxoe[MLUap{~{vxwuuuxxwvx{zwx{}{xxzz{|~|{{|~yy|~zxz|~zyz|}yz~{y{~zyy{{yz|wx{~}xtxzytuwxutvx{uovwrmowrlottplmrqooqi[RKKPNMNNOOQUUSQUTSPNOZdfgdegklfgjjhgknifdhgfholljjnjimnlllosmlnnljiomlkqma]WQF;52579;=>?>=;9;89=CQgqsqsqqqommlmlkqykxyuopmkppkkiikmoopt~wJ~t_l}ujC5~~{xrqbjiFTgRB7,(+&$=c~ymopj_avggefec`][SLIqٿij?:>>=;<:99::9;@OWVVXXXZX[^bdhknrw}Ǖ5)*.>VcS@329IVYb{v]VZ]eĶ̆vźþ`Wiorwx|ߴ{N! !! #''"%)+*09:1,)%&@LO=/\zrlX57_I2*',014\qvvrsoomlmjhijifgeedddddcb_`_dkonmmlkjkiffc`___][\[YVUSRLC,$66.+*;~I+./) 8YvaxŽ{D,//-,,,.02368:@\syonnfD,@?+47/2Vmv}|xPFJfeOPMJZp|dgb_ZYWRKGQlʻǾU8<4354,#"%(!  !'=]omsusoljggebbelndWTSSUW]my{kPFGFF@99A5  1fwvrcG.1AKQOJU^n|G/Yuvc<$#&%"2ITTL<,1>Yiqty}|zqeaepuy_>>:7;Tn|]PTYdR8-+**.5Tv|wuvvwxyxvmdZNMXcq}û~uqstywtvwyzyxx{yxv{yvv{~|vuz{zwxzyww|~{wxzyxz|~|y||{||zzz{zvy|}xuw{{vrvywpqvxuoowuokrskipvqlkoqomlcYOMJMNLNONOQRVTRSVRQLU[`cggacgjgfijkhjkkhhijjjkomlnpmklnnkkpsnjlokkmonkilqkfhg]RG=864568:;;:99<:98AQjrtrrrspommmljmufj|yriffjmlkjhijjmmoy}\iuO2Bewz}m8JȬ}|xslbmehrb^Q?3.'"&Ehjsthgfgfda\ZSLQ|Ͽ˿i<=><;;:99999:>BMSVWWXY[[[`aagkotx}Ą,*)09=9/&'->P\gƷ]Z`puq§RSaagmvܯ~K!  %&& %).4AB4***$ $9HI>4]~{|xvqlfUDfxPIEHL>5ctuursnnmnnlkjijfgfccecbcdb`_`glommlkliiicgfc``]XSUSSQRSRL?*&73+,)0^_+./* 9YtaYxw>,.//..,1469:;EJauyrpmeG6;89?>25Sow}pNGNvdTPOM[rzgid_ZWWSMFQpʽŽU5;545/'#!&(""!"(B_onsutqkhhgecchqn`UTSTUXanzzhSOV\ZRFD@6" 6hxteZJ-4EKMKKX[qw@.YjbV<##&#$4IVRJ9,2AYmsvyyxrh\diikk~X6845>RtyZTWWG2240)+/;Vt{wvuuvxyyvmcYMLWcp}¿xopttrrsvvtstxwuyzzwsv{}{v{yxvz}{wwx|{yy{yvw|}|xy}zz~xvwyywtx}zrquzwqqvuqqottrotvtooqoimtumkoqpmjeSKJNNJHLKLORSSRRUUTNPX`ccefdcfijjjlkjklnkjklijmnnlknokknplilnokinmiknqolmpogknja\VL@:75654799:;;<:;?Xmssrrrqnmlklllnzp]ktrkhkkjiikjhijilkr~zhwzadh^Q@[yaee|wC<6968:7>=>BHORUczyusuiK3>JPOC44YovkJDWz`USOI_uÿuehb]WTSRLCRo¼M/:632.%! '*% #+BbooqsuqjhifdbdhmjaXSTTVXap|zgRQ^kqaUSN;" 4kxsYbF(4EMU[a`fvv=2Xa_R6#!$##3JTQL:+2C[oy|}|yqecjfcjoQ7534=Ps~u]TSKEHJGG;/1=Xvzvuvuvyz{ukdXKNWbo}Ⱦþ{rqssrtttttuuwwx{|wvtz|zyy{yuw|~xuvxyttwzvuv{{vty|{ywx{wutxxuvy{wrsvyvtvxtqtqsqtvxuqrspmmqqnilqqmh`VMKKMLIKLNOOQSSTOTTRTafcefhgdfiiikmmhhkmkfjplgjnlkknonhknqmknmojnnkknonklnkjimkhgkc[LB96422445688;<@Eeqpstspomljjlmpt|}hdikkhimnniikgfjjjlmrhgw}|U/vw|uncchJSatu~xtgifT.&$ 8]A}~cgqrlgda[SKQn̺¯{=<<:;<<::8;:89IMD?.0VpxjKG\zjSRSNMau½refb^YUUSJCUrºüI29224/'#"'(" #)Gdmntwuokhgfdcchqn_UQTTVYcr}xbNL^qyqa\O6  ?o|mhdC*4GPcyojxn75Y`_Q5#!$#$4KQMI7*2G\q~{sidaagos|E4124?VusZSPR]]c[Q=01=ZtxvstvwyxzulcVKOWco}żyttrtwvuuvxwvtx|{usuyyutvwyvvzyvttwvrttvuvz|vtuzzwxwxwtuwywwy}zttwvvuvxspsuromuzvrrrrniknmkgkpnh\PHIMLJKLONPRQQTRUSPNRdnhcdghdbfiigkkkhhkmiejmljknkjnsoiknnmmqpkknpninqokimojillkgijhd]UG;52/1123569996;76846>=Caz{wtqjD,EA831/11359Pu~{siagR:?GLGMQMJZmtwsps5!.PzEhznu`RMI]ȸN?=;;9989865699@MVVVTTUVVXVY]`chorv{ɐ,&(%$"!  (;M]mSalɩܹsֵĶgv~Ѵ{~}>"PjH:!QY& $(+-*&%%%$$.E]P23q~tX/13,($  $/vsrrspoppmlkjihjhebcc^[]]\ZUW\`acfikjjhjfaQSSUNILIJGJKIHFC7!*65..-OU*-0(#E^ykkT|Žv@120/.-.,/11389DdzzywrkD0<;;@6.=\rssuwxݬgPESltf^PKOMM\gwrjec_XUUTMDStø»@4725CJE<%&)" !.Lfkntvsplggedcdgjh^VQRTTXcp{u^IFSfqrj\Q@' Kv}td=*8EMZaOO^{k19W\VG1##"8JPHA3(3Far}vorpoorvk620/4@_unNE==@MMDF9-1@`xxuuwvvwxyri`PGPZdq~Źypnqsrqssqptvwsrttrqtvxvrsyyurvtoruvuut{ztrvyuprwwtnrwvrrvxwtstsrpsvtqttroostsqpqokhpmllmonfXLFGJIGJLJMPQORQTTRLMXbfkjjhghegihgglojhknkikmlgflnkglqnemrokmpoikoqnlmnmlllkkkmlgekjghije]VN>850/0239=Ffloqpnmljjiijm[^gjjhgdaejmpokihggjnu|̵harwxqG*w{ptpr{NH^wzqfddH;DO]q{lcXTG@FPa="#5_nId}v[QJImþ±@<;<=9878968::;BPVTUTVWVYYXX[^dgnqv}~%'(#$!!,BR`vYajƺuӦƾȱ}ewӹx}9 $Qz`UPh[.! !&,.2.'&&%#!,KY?23t~}qU465+%"#3tqqmlkkgjjjif`ffc_YRSTZY[TOTY\_^cfjjkidbSQVSMEGKKLKNOMHF>1,;60.+4Xu?'/0)!$G]k^]|z3../.-,,-/1269BIey}yxvmB4EEED7(;]opmjnv۪eRGRklfYGGPHP`i|ľkgfc_XUVSMGXwǽŽ~B4614@IG4!$)##/Mfmotvrokgfebbdilh]TRSTTWbq|w`IHL_enkd_K%#Kyyb;(8FQ`qqgj{i.8TVPD0#$ #9NOIA2)2Ias~|wutrqsu]./.-5DathKC;B\]MSJ3.3Aawxtqstuwxxrh]OHO[euözttrqptvsrtwusrsurprvwsorvwtrrtrnovvsqv|uqsutrrrtvtuwvrrwyxutvxrpqurrsvspnqupmpstlkoqmlmpndVMIJIJIJKMLNPRRSQPMJPZfkljfhkga`jhfdllkhjonhinnjekmljlqnmlpmlmqpjflomkmmnkkmkjjlnjehkjigghfhh[LC;42//18>QjmnpoojggjghijmjZ]hijhfgfadjloonnjiilpx~¢}~`fwz}v~uX-Fvqtxx[]dix}}~xq^f^D>K_hffup[6+,&'IoQ^{yldXPIP{ʽiAB=A??>?><<<@=>ITVVUUUWXY[ZZ\`bglqvi%*&$# $9JT^㻔ea`xپzx½ȰvlyϮy}9-+3\E (-02-('&'%#0JK>25zyw|pU045(!!"/|pkllha]^abdeZXac_UJUYYZUQQORZ^[ZZ^eihidYPTTNEFKOONPWTPIE=/1A6..+E^:,/0&%Kc{n\v6132/,.02:<7AIP\p}xzzm@,;:>A9-AbnpnpuzڦhVNTjkgSBHJL^ck|ngeb`ZXVSLH[zŻ{:7733:DA+$'!!%1Lioosurnkgecabdilh[RPTTTXcq}~w\HFGTcnmg_>$!N}|sZ6'8GTs}ragf);URMC-#$%;RQNA1+5MaruoljnnsX./-/6E`tdH@=QV@DI5,,4Caywrqrtvwwwpg\LIOWctʿvrpooqsrnnsvqptusnnstrnmtusorwvrruwtqvzwvtxssptvuusvwsquwursywtoqrrnnsupnmmpljorqlmnnloih`SNIKJIGHKLMNOQQQONLNQ\dfjnhfjkdabifgiljhhmnkjmqmjkokikonkknogijnliipoggjnmjkkkhilmjjlkjijhiikjcZRH>71/17?Yjmmpnljgggfhiigc`cfghhhggcdgnoqqliilms}{~_ox}|}qF$]wcaacmxz~tm\eTA@ayV:MWa՝εlZ`Zȳشhuľ¶ĩljzɪtw4!+*wP8@@"!(,,*()+,+($4DJA4:y}|rP,33(! $3m_chnnb^^YW\\XY[[N?ISUVSKGJOSSUUTTZbgfgXLUURIGKMOOOTWURLD?..:2-0-@`jke?.22''LdpTvA9?ABCENU[ablpqu}|{}|qA.45>H9*Eeqtvyyy~١gZQTag_ICIR\adjzljgb_YWWQJI]~Ƚz87725;D@+%'" "%2Okloturmjgdacbcflh\TQSTUXdt}~s^DAAJ\hheT8!Rx}shT1+:GN`YEFaa#=TPK=*$$(:SRO>0*4M_nz~~m_]aehrR/0--6HcvfB9?K4,1/)*-4Gf{wrqstvwwunf]JHOWdrɿtppopsqonrutoqwvpqstroquutrtvuoozyrquzurtwvpnpuuspsvsmotusqtxupnrpnnsvtplmmnlksplllmlklhZRKHIKIGIFKOPPOPQPMKIVdggilkiknhafffdgjifgjmjillihjkkhgklkhlmlgkkmmfnolhlmllmmljjllkknmijllkhiigc`aVLC<512?^imoonliedcbfghfd``eghiifjljlprrrjikkqy~xow~~sct|o;)yw`[]_cpuzzqc^cK=@kn`]L?@G+ 6]lK~h~nXWOJ]³[SOQPPRSSTTWSUYYXZYZ[\^^\^`_[__`dins|ͩ>,)*-& !#-8KXpӼ`S[_bͥԧ[jȿպưĢfm|羧tx33U…@OUkf" &+'&%'(-/."5L[E3:yynI,./&$! %9YWdloqkd_RKSXXYWRD;JY\VPICERVUTSPQX`fffXUWUQLSSLKOOVYWUMH=.0:1-.+Lwq-051()Lf{Wsusuyyx{~}|}s;#,;PH((Ggsv}՝d]XNQXSJO[_a`ce{ºkkfa]XVVQHF[ϿȾt13503( !"(AZZT>/)3O_llt{~k][[[bqW.0--5Fcw~c<5:70----+.5Ilyupprsuvwtlc[IGOXdq~Ⱦtpqrrqooqvurruxsopsrnosusoqutqnnvpopsuqntxtmouwrqswvonoqstvywrqpqolpztnnmooinqsjilpolj`ZNJFEIGEKMLSQPNQONMLLVfhgmkihikheeeefdfjhfgjhfimlgfikhgjnlllnmjkmmlllplhmpoiknlifjkjiknjdglljhffeefjcYQK>36Bflmlmlkfca^_bbddb\^abdgggippnnqtrnkmpv|}~o|diw~rEAw]WV\glix~wo```C<=KUgnrq|`@"$AhQa{~af\SMQfʽuSSQRRTVUVWXXVY\[ZZZZ\\]^_`abcddadfkox͘10XoJ*+0"!*=NXwĮi``f͘ʨКeƽ˿̿˜bt~Ѭus1$_m!3YzO" ')%$&*.83+"1CPE2?|wkD(0<)$#$#&.AOO_cffedZLGLPYa`WHDS^SQOJFGUXVYSPRU^ei`UTSRPT_VOPPQY[UQKG?+8>2..+Db6NO/& -PjzV¶}y{|j+;NJ8)+Fgv}Ҙc`\VVVVW[`^_`^d÷lkfa\VWTOGF_νȾo06535=IB) ('!$4UinpstqnjhfedbbjlaXTQRRRVgv~pSD@AKV]ZPB2$Xppd`M2.PPI='"!'BQPJ:,*6M^ddhruvlXRPPOWmU..-,5Jhxz^43888<;?;1.6KlvsnosrsuvrmbXGHQYeqŷrmoqoonotsrqrurlpqsomrqqpostqnoqsoqstqou|upoqxtqswuqlorqsuxwqkmqpmkquokopmjgrpjhmnnml^VOJHIGGIJLLNQPOLPNLNOWiheijjeeiifbdcbffhiggkkfhpqlihiigjnmjoqonhjljjjlmhcmoihlnlighjkjikjfgihhihgedhicf_YJA@Lhmlkjgfeeb\]`^ab`]^]^aeghopqqqrrqroosy~~x]mw{y3^~j]_nzxz}|ui[dU<:655-:K`k{jB -SvO}nn[XOMXvþǹfRTRVTVUUTUYXX\\[\[Z[[\_aacbddcfddgiovɄ%""(CLJH:-*8M\^X`ghfUIGIJLSk>-,,,5MkxyS631/JiwxˏdaZWVX__\Z\`beh½iicb]TVSOFFa̺Żg2:435AI;%!&&&7UkkorrqnieeecbejqfYRRQPQZkw~{oTFB?BLOJ;- /_vkd_F*2GWb`YKSk~K%ESNG8" !+BOKG:*+7M\]\\ZZTIB@CGMXmj2.,++5RkytE06Pd]YSL5*,4Oktplorttuuqh`RGGO[ftrppqoqtrrqqtsrusrpmqtsompsrropqrlnrqokourjmpproopsolosrqsutqnorrmloqoopnkkonnjlomkfUIHEDFEEHIKPMOMNONMJKQ[jjeghiifkklljjhdfiihfilhghlljhkliehllhilnlhgjkllnojknokimojhikiiknmifligekighgfdfgihdbabfjmlifeee`\\^\YWZaddb`a^_ejlrvvursuuwxzz}ϮPF~~jZH3@L`hkoorw|M8}yp^[\?630*(&#! & &CmýVk{stxnm]UHWuȿsDEPVXXXWYZ\^^]\__Z_^\ZW[aefghjiikkilou}˻IYu8ߜ[-BLT[k]\^̻ü}msyɜ˺ȵ{l{Ҵ{m'$>2#.$(+5>=.#$$ !!4,! +[yxzuwyxtiB:pbOQO>IUkxsppnllaTPZ_`\ZUZ[\[TXXPPSW\_^YUZdheXPMMQTUSGFHGHGJJID@7)$9;21/2Xwzl4+0.'!0TnggztqmeO'3FE?:/2QfwuɊe`[XX\__^_`_`ehehea]YWRNFIg;ùe1865:BM=!!'#';[llquurmhecebbflo_TQPQQR[kx~|mSC@@ABA70$ 3^skbYE-8SjnliablzH(EULF6"# *?NJA7*+GOZqb0+)+-7VkzqC.3B>31/,))+5Qptnmqstrstph^RGHMZetɽtsrnoqrrpostrpprqmknpnklnpppmoqolnrpmnpsrlorqmortqnprrppuvsqorplknoomnqnhinmlkmkhfYNICDDEGIJMMNMKKKNNJJS^hjgfggjiikjhfehdaddggfjkhgkmjiimlkilmljkkkjjljimnkfjomijlmlffhjgilnkiifegikjhiiedgfffdgjghjklhdeefb[Z[\ZYY\`bcca^\bfjouvuustvxx{~y~qKW|~[Y?&&$%+101=zn.+1-% 3Slkl¿xsqniQ483011.3Riy~xȂ_`[VW\^]\[^``ee}dfca\XTQLFLeͼ·^/763:EC-!& (<`nlsvuqlgcccacgllfVQONPSYlx|{hQ=B@;971*  2`phaYD(7b{t|nbVmwE(EULF6"*BKFC7(/>Taa_[ZZUH>=DGN[q\.,*+1;Um|i=,/51+)($%',7Sntpoqqqsttoe[PEIOYduȺtqoonpppompspnqrpnjlopnmnsqnoqnimprontsqopsrpostpmksunmpsrooopmikmomklnkgjllllkmaOJFBFHFFJHKLKMJLMKLKGM^fjgdbiiheimjhgliedfgghjjegkpmiiolijlmkijkjhkllkjlojfmokkklljikjkjnnmjhiefhjhdhjhbdhc`adhifgjkjgfgfc][\\Z[]\]^_`cda_aejoquuuvvuwy|~|il~ZV/$%"!#&')29M[k}g(m}~~vqfTZF93/$ ""7]}r]woy~vyvvpnj\XSOdź±2",=RV\^_^^abbbcccaccba]Y^ejlnpqrrsttuty~͙1_n{".@PZxЫl]V\]dמ׾kâ~ǼȾǧĢlr}|漜zj%!!VtL')-+'%" ""Em{j<;zt^H8,,('Mwrmgfhgijke^`b_YWZUQKQ^_X\UW_eh^T]ilg_VTLERTODFHIFBFFIGF@6&#;:.21;W*+.,$ 4Srwt~||wtP7464*-+/Snz|~ca]YT]a\YZ]aack}efb^[WSRNDKjʻ\0731442&!&"! &BclltvtrlhffedcgoqdUNNMOS]mv|ygMBE=9032.(8esj^U?,;XcqrYT]pv?)GSIA3 "-AHIG4'+0*-%.Rlz{xba]YY^`^[Y\abbj½z`gc_ZXUTNGKl˺S.841441,#"&  '?ckmsvspkgfeeecekfYSONNPS^mwynb`gZXZXZXO=;drgZV<*8SfumN]dso:*HLGB4" 0CHIF4',@Sa\Z[ZYWQQNMOU_yN,++-.?Zq~e3,02+('&%%(-;WnomkoqqtqpmbYMBDNXfuƺ}rqoprqnnrtpnpqspjmonlooomlnupljmommnqtrnmpnllpomkrrplmqrppqpojjlokllllifinnmgcZLFCBDDEDGFIIKKJNMGFGM]aekkffijhfhlmhfkcaehieccgjgfkiefikmjhhjhijmkkjjkijmkjknkijhjjhiiihhjjhghgebelghijhcdhgdeffecjhhgcdedb`\XXXYZ[^_^\[]_ccceilqrsstwwy}dZH  !""$*0;7 C~zvm[h^OYafihhggihhkhgiieb^^emrvyyz~|~~~n%f}yrP.@Q_Ψ_Z^|ɐʏ\´øмʾ_zs߷u\" "3XI;+&1 -/-*+,+)*,'Qmjzf;)43'(]bfecbZW^ehfdaaZUQPRY[]^VUYV[ad_\dllki^XQJOUPHDJKJIIMROMH>5$';8230L2#+-)" 7Wxox~tL$&'"$&'2Wn||{vca^ZZ^a][[]_\al|ijd`XWVTPEJoʵR683.4:?:#"% !!"*Eajntvsolheccbba`\VSNMMNU_nx~}Y!>ivgaT6(:Sd|odcbul7.IKKE2!! 2BOK?2(.@U^WQSTTTRTQOOTcyE)+*+1;Zr~f1--*''&%&&)-:Vmolklpoorsn`WMDFNXfvŻrmmppomosrmnqspmklqnmqonlorsmkjpommqromppomosqnjnsrkknpnlpqokhimjghlojihjjjheVE=CBDCEFGHHLJGJKHFHKU]bchjfbglfbekjhfgideehhdceiiemljehmkhijjihiljjjnlhgjjghlkjkijjghhjhikkiiigedfgighjhdbdeb_gieffjggebded^Z[XWXY[]^^_^]]^bfggiknnruvxyy}ZV7" !!Rxs~zytgWiO:CXcSTRPH?:1-(!3Yuntzwvz}ujd`ZRPaο´R6) )=U^gijkklkkllmmllmjdacjqx}ϺTRY"4IYbԫWXaϾɅrӲ¿ȽȾcw|wķ}\!!*J)$&!0/-1996770* 2Qz}yu_8(82&$^\gcg^\ZaedidbaYOHCHQPTVUZVX]_c`cijkibYWRRWWODEFIJKKMPNOJ>4"+?:330>jzI**-)#;\y|lxvS""!"*-,7\n|{xoba\YZ^`^Z_`_bfqwfifb[ZYTOEQqȸM07127DQ5!$ $  *Hennttpnkhfddba^\WTQLLLOVcnw~ľS ?mpg^S7&9YnwOSTWxg2.JOMB.!!1FMJ?/).BUZMHJLKPPNMJLScz|K1,*+1?^v}d3+*(%%%%(*+-=^oomikonnqri^VJGJQZgvķqkmonlkprmmorplmnmnqsommjqrollrtnlqrqllqqnjlpmkmqqojjlmklnpokhlkfhlmlkijihgaREABCBBDFHFHFEFGFFFDHZgf^bgifdijcfkniffhigjlkecfhifgjgefkki`gnjhhkkiimljikihflnjjkkkjiljhgkkihikgbeihgfiigbdjdbfkgdfhigeb`bed^ZZZYXZ^^^]\``^^aeghikllquuwy{wYM("#%-61#Twfjodlz~|zyq_[eC9GYR:@EJNPPKD<+$?fb|xytuldc^WSSqɼɻoG@4(!)?Tajnqqqqopqqrqqrmjf^doyϯ=6r{{j%-@L\mۿԤ[Y_ܨ̥ܿ˾ƺĿƶ}j{zzW  .8!*;a?%60.:C:563+#"Qmpz_1+62&'a`fgibbbc`hid_bZOH?BAEMUXWU_^agglnnmcYVXSX_ZOBHIKMLJLKKMJB2 ,><22/@wD,+,) !>[zf~Q#-68>;09[p}x~pb_[YZ^_[WZZ^^ctrkjdbZXVUPDUsǵºJ47237FG1# !!,Kjmpuurpljheca`][XTPNKKLRbmu|~||z<Cnpf\O2':LbU?EKX{h02LQMC-!!2IMH@/'0CUVIBABEFEFGFIQa{]5,,-2Hfx|\5))()(&''))->[lljjkmooqojaXJFIOYhvŶ}tnrnlnotomorrmkopkjmooklnrqkloqnknrqojlmnminolmoponnikjkppnkjlnidilkjkjjifZJDB@DBCEFGEDGDCFFEEEJWcedcfhhgkkgbilkgeklfhijd`bfdeeehhehljgglnfijmhgnlkijhghimlgdijjhfijccmkigijebhigfhhfdcdebbfebdgfdedb`dddb_]\ZY]_][[[[^```dfijkkpuvwux~eXG###7k],'i}vxwodXUbfs|}~yxxlZ^]<9KW>#"#-!Hoog[N/(8EJKIDQ\}c-5PRMB-"!4HKH>.'2FXUE;:>?=<=@DISc`3,++2Efv}Z1*+,,**+,+'/A^lklkljmopoi^XIEHKYiwĵ~uppomnpolnoplintqmkmnmjlprmimppklotpopponmnsqnoopommlikmqmiikmifhjlkhiigd[HACCCBADEEFFDDCCBDDFMZggdeigfdgihccjjieeihfgiheceffghjjhjkljijkheikjfhlihhjidehnhfhlkggkjhilkhhiifcgifdeffddcdcceecafecba`abdba_^__\[^__][[\\``acgkmnptvuvvy}~`V? $DνyQ!H~s~xrsobgXaqlwtwx{wtvdWcM39QS6#!'7ML%4WuoxzqladRJFB:.#$0Ibrvyz{{{{zzyzzyyvqiiq{ύ3*/CO_nU_s׎oƾmױƼþ¾жǪpp~uȻ{O! 7c*(/*(('(*.*)%!jmB//bv[1.60"!-w{fgiidf`^jhaQOUNPIFIQ][VUY][Z\gjgfdWOUWX[acaXPLPV\UPJHFIFB1/E9330:?QuJ)*,' %E`jxĿ|G-.015//7^or۪k^_ZY[`^[YZ\\]fyoiib`\XWSICVwŵɾ;05127@F2#$"-Nhlntvrnlifdcb^\YVRMJJKNS^lrvvumblrywn^TF6! "Mknf^L,(8CKPS[fm\'8TVMB(!"8ILF<-'4EXT@::=>;;=>BJTeT/,--4HhwW0+-24238=1)/B`lklkmmooprh^SIEHO_kyĵ~}vqkllmnnmommmptqmlmoolinqlijprnkoqqnpqnmlmmmlnpoljmmifilomhilkgdgijjhfdbVH36QO6"!8S?">ezcxrvԿʾnRQJHB8+!&9Wp~}~~~~~|ytnluu"44>jW/FS]|ܝWY[}Љ~ϧf̣ûƦȰ¡ou~sŴ{|O >tbM401/7S|qP,-'$CaZ{»{M!167<7.=_ss٥h]\YX[^^ZWWY\`bumihba\XWQJCWvŷʿ~>5711:ED2!$!,Qflnttpmigedb`^\ZVQNKIKNT^ksvwsiPBOUMBD:4/" #Qsqi_I+*8GNUi}oqY#:RSJ<%$8IID:-'3I[T=9<BA@CBEBBCCCFB@CEFKYdgd_adfeeghfggikhikmgijjhgfffdeihheegiefhjffikiiijjigjifhkmgeiihhijfbhjkhfgeccffdbceecbbcbcfdcfjjfca_`abb`ZUUWX^b``__^\Z]__bdehknquvvxz{yh@*!<_GEbT$%GtkinulRsmq|k[N97./;2221RF+,&%EarU~K1EFLG7-@[qw٥h`][[^_]XVWX[_dymigc`ZYVOHCWvij½z235118DE.!# &3Njnouurnifcd`^_^ZVRNIGIKR_jquwqhTHPRKD<733+ &Spme[D)*;GO^nbkT!:NKF:""9GHC7*(3I_U>9;=>>?ABEJUhY-**+.5Mjz{Q+,3;AMbN-(+2Hdmjhlklmopld[RBBGOYmzõ~{|}~}{~{nmlpmjkprpjiloollkkiglmlhfkmnloqnkknqmlkmlklmnlhmnoiikllljlmhfjlihhfc\K?==?@==EDDA@BCBB@BFIQ\aiib^dhgfgkiffhgfdjji^eigdeegdebgfecjjgfgjihjiihkkjghijfbikgcghgfeghfeghggggdbddddghdbbddcbdecahhfca]_abba_[VUWY]acda`_]\]b`bhggjmrstvxy||rS,% F_Q(0cU"(Z~tk[Vfgutoeb]]E(&0L@2,1FLH5&%;[G!2Vuryv°[ZUSPMG>4)! &7hy]DGT_jmx|||tr|7%Mj}jN)(7JVdŹ\YbװvUƴƪȽizkӯ{zI);R+ Ae&",*-8>4'&'!1xxrP+191(!"2h^hggffe\ONHEG`gc__\]``]XSX[^_`]Z[\[ZXZdffcWNMNOZ_XRMKGCB=,-;4226ii3'*%'GanUH3CKHB:(@asxҞd^^\^`]ZXXYYZ[eziigc_ZXTNBDX{õv7640/8F@'!#%7Tgkotspnjgcca_^\XUQKGGGJS_jptupeYShkeU=57?<&WrmcXA(+=HOha?EgQ =OKC8 $9:;=1+*LUo{WZiڠĺ¯suǪ˿ɽȷi{{jάzF4},BV)2,071(#""!'9az}}oQ-.4/$ 7gaihfbcbUMKFDYdc]\`^``\WX[]]^_`_^`a^[YYbcc_XROIR]_[QOOGCA=.2<4431Mn{K+(#'Ifn\A0RG@A2(Fcq{ԛd_[]]\\\[WW[\_dihfc^[WTOED[}žƺp654019L;%"!$5Vjlousomkhedba^[YUQJHIGJQ_hnsrmd_jiA57@5.]pjaY>(-MOG7&%6JZO:469::>A?BHTlK-+*+.9RnzrC-/7I\L50-*+7PiliiijkloojbYLBCIR^m|ö}{{|{yyzzz|{|zqmkmmmopojimnmlmlmmmppljimmmmppmjknonhhljjilpkijnlgijlmkijiegijigdbVE=:N:.(#(>LLOKPXVO0*Gj}nxzƷdc`^YZUTMH?4+&+5-$"!#$!$.0:HYitqr{fTzu'-?RZ^NVYsٓέnĤɼѺƱn|yfʨz}C!GE6a*7:-.+%#!"!!&?^p|nJ(.5-";mhgghgf`WOGN[gf^RW^^^[VX[YXZ][__cjh]WSU^aa_VQNNWZ\UNOKGB@<*;@3230el5*("+Jeq`ĺA9A9A>1-Deqyіf_\ZXXXZTQRYY_g¾cgec`ZUPNFF^Źm052-18?:$"# &5Xjmouspnkgeecb^[YTPLHFGJP]hnrqldbooH4170-[vm`T9&,=HOLEDOn}z|I#>LDB6&AOPH7&%4NaL712524;=>BHVmO,+((--/-+(+8Phlhghijmong_VJBCGP_p~õz{|zvwwyxxxxy|{spnlinpplijpollonkjkoljijnnnlmlkilmkikkjjloqnkjnmkjlmligjjhfijhdb`RB69:@A==BCCDBAAACBB>J\bfgjfffeeddeffghhhefjiieglheehfedeecbcfhhhggiffhjhhkgdfhgegiigdghfeffechjhcafeabdccedfeeffcbbceaacbbc_]Z[^]`a^]\][\\]^ZZ]``__`_acfhikllmqv{~~b0-!%K~t^8$"'V|to`VJ@HG5(!1P?6AEOL8)1Uus~vȿhecba_YZVPJ?3+++'#  %.=GRhtpqzvv~|xW>W[`lxw9,AVZjȻUY`}~͡lڽžʹï}n}xcȪ}? ;I0_l4!<8,+&$""! 1O:x~oG$2;+! =sjiffeb^ZRXdehd[PT[ZVV[[VUVZ[Y\cdbaXSPT\a`\URORTUUMJKKHD?5$=@1233jp6%+*&##+Mfngʿ|<=CALB0-Jgvv͐a_[XUWZXTTXYZ_fhkfda[VSLD@aǽĹk16523:E< !!(:Ylnpuspmjgdeb`^\WSOLIGHKS_iorqkbXjtL.0911`qgbT5&-?IQRKCUu}|sC%@JIF3&BSPH6'):LYH1*+-./68;@GWq[0))*.=Tp{n@05A<,*,-+(*7Rijeejjkmnmg^XKABHPaq~Ŀŷ|{xzxvywwxxy{ytqojionnkhlnnjlmmjkmonjjopnnoojkjnlkkqmjknplkjjmkiikmighligghid_XN?96;H: ""(;_klsvqonjged`^][WTNJHGHKRajpsqlaYj|oT;8<*4epf_Q5#-BLSZ[Ody}t=(CKKH1)ANNF3&*;QUG1'''(*046=9:>BBCEBCA>;8;Meeccdheabffb^cfdefhefghhfehjjfdiifhjhddgjddgjgceijhcghfbhhgghhdcegfeddceehhebcfgccee_^bdccceca`acbaeccc_]\\[]^_bb`\\\\\[\[Z[[\_db`bdfjigjloty~v?6* &e~I (@zyrUPT?/&NǹP`5'Glzq{zķnacilkjjikfc\O>2-# )>`|s]HrH7JUm˗ݚQ[\ѷr̢̼ʵɱğmmxv|qiǧwy7!L^I7I"<;,/13/,*-,$6q~|{oC:hv[E75789OsljghcgkkkhfhgbVLPUYZSRPSZ[YSU]\\XWXTQROOQRSTQOMKJGLPLHD;0 8:243;ke61...,%3Ohhbɽq:2,32201Okyyǃ`_YYVZ\XVTXZ^`kĴejea\XUSNFGeν¹c073/1`lkqtpnnigfc`^][WUPJGEGJPbjpsskddqzqqj[GF:$ 8cmd\M2$/=GWhgjr{r=)FPJ@-(>KI@0#*:RUE/&%$$'+/29ERsd4)(+0?\r~e7/2>PQTOG4*,:Wkjfehiihjhc]SD@BJTbqĽŻ}wwuwvsxtwvwvy}zriikjmlllnkklqnlloomjjiljfflnijlkffgmlgghlihegkkiiiigdghgfffe^QE?<;>=<:9>BA@ACCA=979322-!Fu}oKjvZJFG<4Trmf\^^_fijjdheaWOMRTMLLPY]\VOS]`_XZZWTMKOQQQOMLJKQOMLIFB9/ =9/64?MRhc;51/1-%5Tm{hjȿi.-468:32Qkxzvā^^XVUXYVPPTZ\_n°fjda\XVRMGMd̼º_+52..5:& $A_llpurmljfeda^\[WSNJHEFJTbkqtsnhmzuNPaOLM:%;inaZM/%1>EMchgdwo7*HMHC/$8?>1$ ):RYB.&$$#$'*.3?QuN.+*+0?_rd3.6EMNKE:,*.;Yifbdghgghgb[RB>CGQbp~Żuvuvuouuvtvwy{|rijnjhimnlggmpnklmllhjkkjimojjkokfinliglnkgiiiihilkeegihfeec[MD>;=?<:<;<@B@ABC>987ARddfdehihbdhdbcffdcijgefkgefggifhhgehhd`dhfeegjgehifcafhdbggdcihebcffdcdfeiggededebcedcdfbaaabb^bba^bdaba^ZZZZ[[^ab_\]]\Z[ZZX[][[_bdefgefjkllqw|uV88 '_t$$.h~zucHUG@yxwjv5"9^yevȽk(#%'*-qQ10!;7.5;?3-,,'J|lwiQudRE?;0+Wnjf^YWU[afheea]WOIUWVVRV[^\QKUeec``_aVKQQQPONMKHOPLNLID@;0$A817:BLRws?54313.&5Qq~_p_-8A??<13Phyu{{^\WRTVVWSSYY[`s~cjfa\YUQLBIgľ]161/.0, (Bbnmrvsnljgdca_][XRLJHFGKVcksvsolvq;6?BLG<*9;?<87;=>BA@@;845;G\bjgabefdcchg``efbcehieehfggghihhihhfhgcbec_adhgbehgddggbdficeeigccghedfhgehffddec`cccba]aa``baa`bab_ccaac^XZ[[Z[^b_[\]\]\\\^\\\\]_acfggghiknosw~v}C6-#E|ye%4uwv}xqWJY>Petκy4(Eh{o|zĹA#&;Yb_^mxk4#Iƿ,&G]9%0-F{P*CS]^WXwאsvXiŻŻſϼƶJ`polb_}ygq®sr.#".,$>8-11-%!#"!=n|zvve@7JK@6,# $*_gia]WSMU^bcda^[VJTa\\WT\]\THIWolfgiiaTPURQQMJLIJMIHNKHEB=3#;94>=<;<:789=@A@@>9445=L^beha[affb`gjd`ehgcggfffeiedgggddggddfifacdacbdeecdgfdehhbeihefffcbcfeacedcchfbceda^dcca_^``bcc`aa`aded_^`b[VXY[\^aa][\]^^]^\]\\[\]^`aehghjkknpty~ym2," 5fwU'I]]jjrvy~xz|xlIML8X{x՘z,.Qqn{~-#5MT/.RgT$ '\k|*Sl÷vX5CdhpA0HS[~؈NX\Ȼyycտ̼űzPampg_h~yav޾vq*"'@8,-*&$! ! /Zu~uf9%293)"(fiieZVUS[`defba]QS_a_[VY^[VSPK`mmmljj_TSWTOOLMROMJKNQMGEB:/%689IR[OC?:78:;<82'8Zxwiuy="#$-0/:VlxswoYYVSTVUXZYW[[`lwfke`[ZWSNDPkʽĻS-52../*"" +Hdmnsurnmigecb`[XTQNJFEDJWahprpmo^<86ESG$BknbYE*%5CNWai[^c+ .GNG=),CTUA.'""#"#&*3?\|X0-/02=Tg{Y0,:LUPPK:+)-@Ygb`_dfdhhe]UK@=CKTbpspqqqqpssrswzz|yrpojejnlhgknjfjmmkgjkifhijhfkmjghjjfegkhfeiifghifdehhje`\N=?>9:==85:>>?>?<5145CT_cefc^`cfdceha_bgfcekg`begeddeedcgigcfjiebfcabdfeeffffdgfdadf^_dcb`cddcacdceee``ddb`bbbabab__bca__a__aba\]a`XWV[[\_ba[XXY[[[]\ZZ\[\]\\_dijkklmnqu{~}~V/'D&".XyeXV]W\w}{wb=JB.]Ѝd~ 7\xj}uǼ\ "?XN!-?.!-r´ICd/dhzȩqN@|;5NSYdPTX]}жiչ˿˺˴ëw`bikebs}v`|ۼri' " !/=+E6,,*&"!! Q´k^~uc9%28/% ,hplhc\VWY]cfgb[NU`\Z^XRUWSVZTVbjhhiieTSRSSQOLT\SKKMUUNDA?8.&58AZjq_G?:9=@>=7.%SjvxxlYXVSRTVYWWXYY]jxdiea[\TSLEOmʺúN/52./00- !,Geinuuqomigeca]YVSQMHEDEIV_fmnjjleMHHO[E#GjmaWC)'6DP`neOba) 1ILC:'/CTT>,(&%# "(3?Za;46;@P_n}}T.(021//-+()0B\fc_bfffifcYRJ<>BJTcr½ºvopqpprpqruwww{~wtolihjjhehlkjiooljjmkggkjhejpnigmmgdejieeflg`cfgcejlhdcZJ?>=<:;<:66;>><=;6169*'88Eb||oUAACCA>>7,'=Yzz^oeh|q`ko))?C?>6,>Xl{uygYYVTXYXUSRTW[_lufjdc]ZURJCSoŶùJ151039C," ,Kjkottqolhfdc_^ZWTQLGDCFKS_fkkifhz\VTJTX=Okk`UC))7ELcdRLa{Z& 5KLB8$0DXW=,''%""(2A]v^ST\dift{M*((&%(('&&(1E]fa^ceeghgaXRJ?>CJTaqſÿürpqqnnorstsuxz|yrlijkjhgjlkhjmpoiinkhgjnjgfkmighkhcefiedfgge`hecdikhb^UC8>?;:<;86459=<:736:CN^bY]fd^[^`a``cdcbegdddghbgifddgfdcfjhfdjhdbdecb`cbabefcceedcdcddfggeeihccddc_ccaa`bb__`aa_`a^Z^b`_]^___dbab`a\YUUWYX`ed^[XWX[YY[Z[\ZX]_^_abdfkolkpty|kp5%;\M%Ayqy{vmLMM5-%%c~""-Pqqw|-$@sqlfa@ # >.%Et~S]Æb9EWA #$(&=]zxh\TSWXVUWZh٠ҶqǘʸǬǿVIcxromt||rWܺvg$"P{v,0B7-1/+%!!!#Zsfv|}w]2)581'".ousrm^Z[YOQZ\ULZ`STYZQNMOZ]]\_c_VRVUUURROCIJT^^USUYXTJDB?8*'45FYy|lRJMGA?>5*(>[{_i\{zpmfdx~anwkSk]$)32.//,@]l}vyܰcVZXUYXXSOPSX[duofiea[XUPJCSlĹøD37/-4<:%! !.Kkjouuqnjgfdc^]YVSPLGDBCIR^ejhfbl~lZSHCKA2Qqj]U?');ELY`]Zf{U 7IKE9! 1FVT:)&#"!"(2E_~vx}}mgwxK*&&&%'(&%'(0E_e`^bccdfe`WSG=>CLV_r¾¼½qlpoonpqrrrrux{{voiijkgffhihfikjielmhfilmgfhkjeegje`ihhfhggdfgidbejhd_U?<=>;9;=:53277:9647>DP`cb\`dea^`aabefdccfeceiiediedbaffcbefccfjgcaddc_aee^dgebeifedddacedcccecabcba\ca_b^b_bcbbbab```ca^]^_`_ad_^_`_[VTTVY\adea\XXXXWY\ZZ\ZY]_^_addhjopnrty}}zzuv^*! Nwg"-ZvqlWX~~zufEQI/)$A­oU[6]{luǺg=2CrO%*.+_oM +Pe,XzZ9MT;(&++DkrdZWTWVZuڑөpҼɻµĥŻO\o{{qar|yn^ۻyb (x}uoO1(/A0*,+&!! !#dx}vZ0&982&".mrtvhW\c^VVXTN[ZWUZ`UMIJS\Z[\_dc]]\]YSPRLFMPXXYRRUXWPKID<7)+68EVi~qh[PA?<4(&?Zxaÿjfrvd^]avtTYidd|m9%#135<50BYm|~v}٪eXXUTVYXSRTTV[dvnghda[YUPHEUtĵ¸=36.-4;8.1Pgipusomjhedc_^YTSPLFDBCJT_glligry]D7*!'J|Q1EZwqUYYYyς͚büķ¨źQZefe\b{}wh_ݴza!#@=apkN#+>/)&%#!!!$ka`|tW2*88-"!0uroiY[cdaXONQRURMS\\SJGLVVSTX^hlca^\WOONJKOQRRQLNPSROOJA95%-7:ITVl{hVC@<3'(AY^{ch|r[XX_nyaKR`ewgB.'-AABB3+A\ozrr}֤bWUQPVZYTTTWWYcwlgid`[XUOHGUvο736015AC.!!3Sikousnmkiebb_\YSQMIEAABKXdlppnmunE32;EQ8&Vmj`V:%-/(##" &s^:8_}uX/+75(#4qhYT[`ebYXWUSQMIT[\SNHLTJLRX^gkhdd]VRNMPRQSRNMJFJROORH>80!.9, #)5IUP5+'$"! #)6Je~lk}r=(&%'34)##"!/Lda^\_`acb_\WOAFYdgec_]`b`]abaabebacbbbabgebcfgddedccedda`fgecdabaaccbcfebdedbbcbaaef`adcba_`a__ba_^`_`^bb`\^``]\`a_]_b_\``ZZ]`___XUTUX[`cc_]]^_`][YXWWYYY\]`befgjijnruz}{z{}}|}}b2+!6XZ[O")L~|}{weCNF("!Jַy'1Zzr|yŹsW>8Udlgi\-') %+4@C2&>mȖoGAmC%9ONZ{ux\Z״ӿlȷǺŶw^mxWXjq{|s_fɽwU 'z@FXH+05,'$%" )tV!0g{vU+,75'&7{[SRX_bjd^[VX[XRSS[YURQQNJLTY\jrjfd]TOJNVWSTRMKHDJMLRLC=7.!/8=IPPhpea^JB>4(&G`lpO6C]dvtjbV;7KDBJNPJ=GR]YZvwyo_^XTEC??8'&AG@7//Gcx~oԜ^VSOOUXUSPRWXYf~ehgda\WSNGEVzʾǿx4/3/.6E;% %5Wlnqtrpnjhedb^]ZSPNIDADFN\gnrrmhjyibjrfN9+ ,\vwmW3#.=DIG@KYq|~qE &-,#',/8NYQ5/,,'#"$'/:Liymm}j9#!"")(#!!!2Ka]Z]__bb`]ZUOB=AELVap~zoonnoqqnppqsst{}yrkgddjlibcehggimliffhiffjhcbelgfefdgeeigccfeca^^RA727::748:50-+-/136;BLcdche`[bbbb`aabbcecdhebacgdcbddd`bcca^bccccgffgfdbbcbddefcceb__ab`_`db_`dbaabc_^ab^^^``ccc`]\`b_]]^\[]^_]Z_^X[^]\[^WSRUX]acb]ZX[_``[WXYY[YY]__bfghkhjmrvz~~|||}|}~}U3.,no %1ev`ots`r~wqWCL3$!E~ΪX"=e|w{y´`R61Ne{p\E# )IuĄe>@qnK'$APZ_ia^ڦҷwάȹƳŲjViiYnmo{{qZjϾzQ3f$Efm+.3-(()$" 1zN)6rxoP),50%'%"'5Xlnqtppnihgc_^[ZUPLGBADGQ[fouuoe_b`m|xL)#*_neTB%"0?DJMJMWr}|}o@  "0;619P[R:312.**,,1>Qoylo}j6!!! ##!"5La]Z]]\^da_YSMB>BGLXduxpnopomnonqrtvzwqjgehkiddfgkjkihjgffffhjibbdikbdfeadbffb\`ca\ZYN<52287558953.,,.028=BPfeehea`efd`ace_`baa`b_^]_bc``cdbddbaddfcccihgehhedbbd\`feb_dca[^`ba`ceabcc_^cea]_a]Y]^_\^__\[[]\[\_]]]^_^]__\[]][\^[USRUZ_cca]ZXZ\\`^YVY]\[\]]`deggihjlqx{zz{{|||||{M2)#>xK!$?y{sfg{uoFB@+$!"OyGtD'Flow{ʻ˿NO)/>?J@#/TϹwZ2""%AfmnV)FQWtwWiӕЯuŝǸť£cVaah}hi~|nVp̺}{P2lJ4b~O%,3-,/-%")$1sZ(,;{ymO$(2/#$:dXadgkmkjcafighbYSRQQRRKKLQQJPdmjheXROOZdYSQPJEGKKMOJ@><;=)08=LUVu\GNLFB<3(-IcnZ>FgkforphcbXPXSD>>;FR\`\[VR\^YZ_`\XTUQP@21301511LdxzvϋXVPMKOSUONQUUZ^}ijieb\VRNGBZſo163.05C;!"'9Xjmpuromkgdb`]\XSNLIEAAEQ_lvyvp_OVan{qQ-" EI<*"1AGNWWOWtzxk; +  #3GE<19QYP:665431015@Urxkpf1" "!! #5P^\YY^`_ba_XSLDAAHS_m}{oonmonnnonqswwqgfkmifefhhhgihgedeeeeggd_chif^addacdeb^^ab^[VI;434895369701/--05:?CSdcfkhdabhfb_bcb^aca```__caffccccdgidacddaachfdegfbcccb`adbabeb__ac``bb_`bdb^^db^\c_XZ\]]\^_^\\_^\Z^^^^`a_]`aaZ[\\\_^WTRTX^aca^]\YZ[Z\^\[^`]]^__adeijhilouy{x{z||{|{}oD6&>u<"'Rzua9D<)" @ƣv]ye0Uug}tȹiKG  .D+$-!&,$!6^ͪuU,!@tn71MUW֯bf`R|ҋϤsջŹɾƾ[W_aorarzvlSzڴzyL&H5%I\[9/7-.3-&! &-%)@\g/ ;`|nN!&1.#"5dZajkpllhachkif`PIOUW[[VSNKIHRbc^^YVTOT\_ZTSMGCDIKMIDB>;D<&17;PTYs^JHIFD?4&*IelcM=T{~zuppk]V[S?=<>Xafgffghhhhihca_[YTO@/4<>D:(.Qdvyr͊ZUOLIOUVONPUU[`ehgea]XSNF@[ľj051--6B8!'9Yklournljgeb`_\ZUPMIDAAEQ^iorl`JHRbk{~x`2" "1AK^rYQUw|zc8 +  1EMF<4>R\SD?=<<:757;FXsyio_/"""!$4P_ZY[`bbbc`WXRGGSXcsulmnnnnmkpsuvmchkjfdcfhfcehfgefeeegjfb`gkha]deaadicabed`YRF9013676448960000159?FXeddfifcddedbcabccdcaaddc`ffec`cbadda__cca_bdeefhhcbfecdcba_acc`_`a`__a_\_c_\^_b^[]a][^^_]^a_[\]][[\]\\_^^]^b]\[Z\\]_[SQRVZ^aeb^[[ZZXY[[[_`bb`_``adfhjklnqvy}}{|||{{|{}Z53 !Errhj]!".ayWkwvtvxuV>J9#!"Di 9b|}tyw²TE6 "- +/!3hl^O>3!!%AlǺqF$012LXsijYWXSS˄͘pΫûǩĹbdbin_i{}viT~Ϣ|xI'7ms,4?+,,*'%(--$+U_Ic}xlL"+4-"#:XYaeimmie_kiiie^QGLQX_^[WRQLIL[YTXZZVRUXYXUPHECCFKKIEA@@F9 37>OUYpVGIIGE@7&,GejMFGFC?]~xrheYQOKCEFUcfmrtsrpnmnlha___]ZQ>*4AEA0)7SfysoʄWSONQWVTSRSTVZffhhd^YVQKEGaͿľg*33--8D8!%@_lkpuqmjieb_]]\ZTNLHC@@AHQV\YTH;:>EXvvls[+"!#$# #5S_[X^``chhc`gabjw}kimlllkoruvoiihfcbefgefghgffeefhlkdcfjhcafhcacfea`dc][PA8103432249;9621257;?L]gedegeeehedcce`bccba_bcc_^bcb_`cbbdbbdfgdbcfgbdghdcdfb^`c_]\_ca_`ca^__^^]`b`[`da^Z``]]a`_^a^[Y]\ZZ]^]\Y]\[]`^\]\ZY[^^WSQUY]^`dda^\ZYZYYWX\_dd`_]`cegiijlpuwz}{|||||z|~H.)'cg%;rul^j~ynHDG0!!%jJ&Hjqw|ƿɽ?>, +> ,&2zvQ*Jvpkpruyj?$:NSawcWvbXURcھćyÕĸe\ha[gov~xhXͲw~F$at4 ?A,)'&),0.&=ou~wkC(.3*!7N[c`glkd^eikiie_PFLQX__^\\XOKQ^WSWYWVVUY]]VQHABEFKLJD@DEB7 17?OW\t\HHHHD>4&.HXdOKJIECF`wplbYX[ahjkoouwyxwvurqqokhicbYSH:,18>=56?RhysuāTQPNNRUTRQTVXWgfjhe^ZWQLEIbνĽc+42,-8E7%@^jlqspnlieb_^][XSNKGCA?<9:<74>Mapm;!  + %6DLVbd_b}~{}c0 + %/(:KQMG;5CZ``\XUNHEEB<>GZssfqY*##%('" !';Webabdkpvx~umnmmnonuoukjgddefedefffefgedeghb\ehf``cdb`abea_a`ZVL=52145420159;97457;=?L^egedgfdcefdbabc_^ada]^cb`^_a```aabefcbdffb`deebdh```da^[aa^\]````ab`_`ba`bb_]_c`\Y^^]Z[^[\[\[XZ]\YX\][][[\]^][Z[ZXXZ\ZTQOVZ]``acb_]ZYXXWVVZ^ac```bdfghhgjntwy}|z||~|{}}m;.&#Ux@%I|gy{ya931268632`pxrd`S4'   +#5CMcdQIa||y]) + + +  46"1COQK@35FZeeeeb\SLE><=H^yrgrO;6:;AFGLNQ`izxsmlozsqhdbbedbdffeegihfbbefbcfhfabfg_\bdca`ZUOC50--02551/2789999866?Ocghigegjgghjfdeeebahe^]`aa^`ba^``b`^adc_bec_adebacedbeca\`db^[^^^\__a_^^]\[_`^]]]^]__`[\[_]Z[[[V^^[WY\\XYZ\ZY[^]XZ][X\[[SOPSX\^_`a`_]ZXZXYZZVVX[]acghfffhjllnuy}z|}|{}}~H0."Xx}5#5l^Rsvqcr}xlJ>K0%sY#Jpsy|ÿĻ~KgV  Gi(!==J!">jyo~urvryztQ5^0&Ii]-BOTSVkJUTTT[w؆͡tĦö˹nWb[Wa\r{sY^ⴟx|;! #/3)3YVg~~~~~~{{ywusqrsnhga[WSK3-;:133;EVgwy{޻qRRNJJPTSPNRTW[caiec^XQNJFNk̿ĺU-5.*1AE/)Meilqrmljgcc`][XVSPJFBA<5.'*.0.-0Cah\LFGA2( +  '8FOY_cbh}{wZ% + -I="3JUQE;17J[ghihe`TLF:6;JXzyvtnlps{xjkvrnbbfjfbbhjgdehid`^becadfcbaehbY\cd_\YWJ=4/+,/12410146669975->\hkihhhihhhggfdeed``cd]]]`^^^beb_`de\^cb`eie``ba`_`dcaaba^]aaZX]_][\aa````_^``_^_`^[\a^]W[\ZSYZY[]_]YXZ\WX[[[Z[\[XY]][Y[[XOMPUY\^_^a`^[XWYZZZWWWWZ\_dgjigfghklqx{}{||{~zA1'H}U'I{zm^t}{w`AEG'"NapI*Wy|x¸jFqN$U_!+F3.ǂ#Dwutnpvw}nI3aR.hS7MOSazjPTRRS[ͅǘ{Ӽ¶ýfgngaky~||rU]ժsw;!#2-?XN+M?/7<2& !=ĸnIgevh<$.5$ LO[c]\\WX][[]^_\V^^_^ZP?>>>38AYkrzpeX`ejd`\cc^[`ed_^bdcbfdbb^dgc][deg]XTI83.+,0131///257657545B[bdhhecgiebfifddee`aacd^`bc\^ab`\]`c`_ba^^eeb\`a`]]aca`ccbaaa`Z]``][^c^^`a`\\a_\Z]]ZZ]\Z]TZ\YZZZY\_^ZZZXXVWY\[YZYTWY[YXXYXSKMPW[\]_^a`\ZWVY[[YY[ZYZ]`dgilkggiimsw{Ƥ|y|||~f4( D~i1 +Z~m|sV8H= -QzM :_||~yɿPYy62cO3P,0a'O{om{r_kvgA>[UAt4,DPQdҤ{aUSQQ_ڽѿ԰Ǹ¿Ľir{|~|ynV`аuy; &Nyu[*D<.50' !!D|-3cpxd:"/5#TM_hggbUUYYW[ZXUW^^Z]UD:<:::@LVZYVVVUYXVQLHDFDHKDDDDGGEIF9*';>HSZc|ZQSQLHF?1)8d~rYZUi}}|~~|||zyyyyxvttsnnnlnjcb]ZXSF2->HGA<21%  + (8CKVVOSk|{sN + + + ,S]0 2CIE;1)6J\hmlhfcYG:4:Rys~[P[cd`_[WNLSW]\\eeb`cgfdbcih__bfb\UPC6/-(*/110/--/34333206J]^cgieehkhfikjegkid`eda`afb^]_a^[Y_``a`a`\Yc`abcb]^aabaafecca`^\a`^\^`]\]`]\[]^XY[]\[[`^Z\W\Z[[ZZZ]\XZYZVWZXXZ\^[XV[ZXXZ[[XUONSX\[]^_a^[ZVVXYXY[]\\Z\^chkmmkjjgmtx|ȱ{yz{|P(+ #Yl:Xo-"6rcfyyyxlL=H- !""$#Jl|{{ɼRswObY&*%Cc,EP!IɿϹB!2W}}prlgqzy_1=gm77CReʢvURQSgݭ϶}ɛȾ˴fu}~{zgLmּzs6!Lt7'G=--*% #!Hoxy~wc8%-0!!ZQ^hqm[JU]WW_XPOW]SQXK<779?BHOVYVNPRSPMHEDCBECJNF>AEGDDFB9''6;HV]kwwhbf]QIE?1(?e]Zzc`Ok~|{{z{|{zxywwwvvsqomlllmic`^[UQD3(Yk~v|ܯiTQOLOTTTTRPRW]rmdfa_\WSNGFVlǽ¸M08/+088. /Ohjnrqomkie`a]XTNKJGB@>:1)$%!!,HUSK?@H7-   *:BFC@KTjzzpL + !3.+8K_moorqeYG95EsszQES^ed_UC;>CGKNPQUX]df__aiic\^ba[WP?2-*))+--,,*),/1///./9Pgaaggfdhjfciojfffie__dc^^__[[\`_\]\_``aa__cdc_``c`\abd_```c^c`]Z\`]]Y][Z]^_^]^a[X^ab\\_`\[\\ZYW[WYY^ZUZXXWWZZYXZZXWY[XUZYWWVSPQVYZ[_```_[VTTVVVY[\\\[]^`fknnnkjkpwz~xzzz{{||A63>XS}P&>zMNsuu|tdA?C)1ZJ,')Quv}~zŷ{jngy{gSP2.+YJ#cx#:`zvqps||yvS./dzPp@.6GUd¶vVRUqܟʪ|кżźƼky~}{ywcJrƿwq1$ JV:"09N+G7+*+&!!&' Tw}ua;#1-'dHZhkeJCTYS_]SLPUQFIMB757@ILLS\b_WVVSLFDECJKJHMOE@ACKHDDC;'&29L[\f~vneXKE>2'=epmjdaMk{zyyyyyyvuvuustuqponmlklgd`^^YSG0.=86=36C]m{o}ڭbRPMJLQRQMNPSYbtvaid`\VSMGDSsżD,4*,1?M;!/Rgimrqolkifb^\XSLHHEA><:1%)?I@@9;:-%  +  *;BDB?FLm}rxpI + 0Vl^-!*...**7K]inspmbQG;54Q{ovR;JY\]ZG8477:;<>>EO[bbX\`ef^]_\XTN:2*$'(,/.-+*(*/0.***.>Vicbdfgfcggechjfdbeed`bb_\^`^]][^^[^`__`a^^`ba^]`a`\`b`__`c``^`^\\^][[]_^^_`_]^``[\^\ZYZ^[Z\]\Z[Z\VZ\YXY[YYWY]XWZ\YW[ZZXYYXXXWQOSVXZ\]`_`^[YUVYXVX[\[[[]]_eilorpllpw{|{{|{||n@4&/d~y8,\{~t{uX8E@#:}jH<'*$4^w|}z·qȺŵca8)|ƼŽo'DitusruwzpK&.\mcHGP6"7HSXy¸^RXzהŝmǯĶθlz}xvurv{xgKxpt/%%0AAWb';/-00.**+-."Z~lh~ua9$0*$gyN[c`QDIRPPXVMQXSMKMH@46.2),5JL-#1Rfjmpommjidb_^XRMIHEA?>;/$!'KZ^^^`aa`[[_XSL60+'$(,//,)'((*,-+*+2?\dd`acfgd`hhfikjdcefddbdcbcba[]^Z]\]^_[Z_b_\^a^\`da_^bb]^^da\^aa`^_`\Z[]Z\^a_^^\]\[]^]Z\_[Z^^^[\[\]Z[[WXZ[XXXYYVXYXWZYZXXYZZYYUPPUVWY]^^_^[YXVWXXWYX[]Z[ZZ\chloqrnpty|vy|z|{{|[0&E+!3q~v~|vnK=B1eй1;e{}z|ʾjδ+4¿o+Pruu|nswvyzqE1kK;MU|`yŸTNWΉ`ȭ¶¿˸zmuk\`[fu{t`H}ìxk1$8w,--,5:<8/.-+X|[Qjw}v_5&-)&he^XYIDRVQPTGKQTNMOPJ@<@GPD=N]loj\YXTMTXXXTUSPNNGACHLHNLA4$&29L]\azrRLG>1+?f}zHo}x|}|zxxxxxyxuuuqomjfhlkifdb`[WUD/2=BDA63A]y|t֢^TPLLQTUOQTWTZ`tmegd_ZURNJHUuźƿ{9/0+*3C?)$6Tghkomkkifa``]YSLHJGB==:1$#*?^|}{{}{wZ+ +  +->JQX^^eywsx~k= #IdlX)!1DN4,&*9P]c`_[VQE882*8Is¿mxF20?MTSJ;66687665>IV__ZYYZ]^]UOE0+)%%(,.-,'&%$'()*+.4Geleaadfggciihhlmhdghgcdge^^a`[X[`_\]`_]Y]_`^]`_]_bea^`a`]^a`^^ad^Z__][ZZ\]`___^_^^^[_][\]\[]_\VRYWWXYZYVVYYYYXYXXZXVY\YXYYZXXYWMOPTUWZ]^[ZZ[ZWXWVWXXY[]Y[]_aafjnpqqtvz}~gm~A#"$"Rq%@}ji{wd<;=)![|dž)Prw|}}{}Ⱥ}oĆBF!6_zrtphu{x{c:@{6%>P]pTzyHR_ÇξtǶŶºýʶƿwnqZR\]kuyr`N~Ĩzk'$ ]uQ;$/-,6?2+&&&$!Z{}t]1#.) (uk^UQLVZ[TVNGMPHGGLSKDJMRQGFS_krg]ZTX_b_WZVVUOOQJEKKNSLI?6#'6;O\^liRMI@0(DpiPr}|{z|zxuvvuronkjjjlkhc`^][VF,(7B@:42Hcv~ooם\UOGMQVTUSSTSXdxhcgc^[WRNHHWxźŽ{4/2,+3C?*#5Xjjmpnlkhedc`]YTLKIEA<<<3$$2Ya$ + +  0?JZuxnjx{x{}|m: #AXaM'#JfVF.$+;O[]QOOQK:450,;PwizF,-7BMNH<43212024:ELRVTONPYXSPC2,'#$(*+,+($$'''&(*-3Kaphcadigggikfbhkheeffedffc_^`a[_`aZ]_c_^\__^]ba_^`a_S^]\[]``_]aca_]_^Z\]ZZXa_\Z_]Y[]_^YZ[WXU^]YUWYXW[\YTXYZXVWZWWXYTVYYWUYVUVSTUNOQSU[Z[ZWWXZZZ[XUTWYZWXX\^`c`ehlppsvy{ocr0#?n('S}zynW><.$!"'7aü\#  !1?H\n_VUu~y{|h9 + %6HO?"=_RK-#(=NZSEFJF:1130-BeþiyI,+,4AFF>3.0/-+,14=IOTKBAHOOM<-(%%$'*+-+(&%%%'&'*.;N[cheaachegiieffkjgfhheddfd`_b``]cea[\^`[Z\`^\]_^[\``]]a]XZ]ab_a`b`_aaZZ[]ZXY]\\Y\ZWWZZZXZ\XW\_WXXZYWZ\[YWWYUSX[XUVWWWVVWTQZTYXWWTOQTSX[]]\YXVX[\[YYZYXXXXY]^`ddceimptvx{wcga)#/azgvz{y~}xmJ9B5!ZǪi43Ajzz|zwiѳ>#yƿ*Ig{n~{lpx{xtyU.Ja^I8^f2KUZʚÇISwࢠ˫ͥǻħøcnpihi]m|zjTRաyd& '!e}QqW9,.-*%#(h}eL\|tY+Em`@2++159x~nd[bj_UKJD;KMFGEABSWSKNRNIS[\bf`WTWaga[XSWWVPKE?GNRLJHH@5 -5AY^`k{{}xiTMH>..Dkgigbef}{zzyxxwvutromllookif_`b]RB31/$"(03MflkmmqyЖYROHIOSQNNQUUWd}khd`\ZVPKFGY|¹Ƽt312++5EB$'9Xkjnrpmmkhda^[YQMJHFB>><0%"%,2@lƾ_5"3AIQH9AQy|y{|e4 + $)*'-9*$16B<#(=PZP@BGB736850IwhvI&&&)5?HA3-/-)((*0:EJHD=9=@?6,(##!$)**+'%%#$%&%)3BV]cfgeddfgfjkhegkjfdfigcghgd[ac_\___[XZ]\YZ^_\\\]\\^a\Z]b_[\^^\_ba__`^[VX[[XX\ZZYY\YYW[[\^[[Z[\ZWUWWWVXZWWYXWYVYXVSXWUYWUWWY[WYXWVQPQSUWY\^_^[VUUY[[[\ZYXYYX\`cbdefikorvy}xpg_y¼L&"@~zume]~|ud?;D-SG&Lpw~{{}yǼmȵО#/u*Mlsvunw~vypH&Gs~$#9RUTxŕkvKX}ېơxȰŻʿ´ûesqholnz{ylMZ֛yc (&``lZagF+-(&&$#$##\b;)%*\|rYB|cI>;<96u}rjdmhaWKJEEIHLOF?CYYSKINIMV]\[^YUMSZ[WPTRWSSOJ@EKPQMMLH@1,3@[adqhimcUKE=-,E`gsbd~{xxxwuwvuuvtspmmrqgfd`_][Q?10:6(),4NcikmnqtˋUTOKKSTSOPQTUWg~fefb^ZUNIDH]}Ƚo111--;KA+'<]giorpmlkjhb_[WOLKHEB?=;0%%)05>ožiO#$5B(%4CLNZSXe}xzx`/ + + '5<-77.&&"$("$'=QZN@FGC=:>?<2,-.*%! %0?EC?6342.,&$ "$*)('#%%##$&,5G]aadedehfddgifedijecggggcegca^cb_\c`]\[]]Z[_`___`^]_`^Z]^]\Y]]Z[^a_^Z]]YS[\[Y\\ZX[\ZX\^\YY_[UVZYUWXXWUVZXWXXXUWWVUSTXVTZYXWWYXTVXUTRPQRVXZ[\^]\YWTUYYZZYYZZ[Y[_cdcfdfkprvz|{xwumddny7$$!%9EWj2'Owk{t]7<=#%FpU?_|;0Yyx{wøqyÔvz.ͺB1Uxnvypwzrs}e8!8qy");PWo̴_PXьÙxвý¶µ´irjelrqvxxfK`әuy]#'!Ox&+.++*'%&(*( 0,#)11/`}oWOaI>:858|zpggljd[ZXPHEDJQNLSc\TKEDFQX]\abYUOORSRQSPRSTQLGKMQTSNHB:0*4BYafxjei`SKC:++G`EZ|zzzvvwtvuvtwvvnlvysfhgb`]XO?-4=2)+)2PeimlmqvʊWSPNLQUSMMQVVWdcefc^YTNJGH\~Ǽk03.(-;HC/&>aiinrolkihfa_\UPLKHEB?;:1)),03]e<)"%6GP^pkln}{z[, !?Z\LH>3*$':2.-/05EwſxL (PsmRB''8DNjxdYc|{{xU' "[zY[PD?7))=R\WSPNMJIIFCAFZyhT/$ $)))(+&"3=BA<4.(%$!"%&'#"$*.;Sgd__ceddfhgefghghkifgikihlkhcdgeb`bc^YZ\]VU\YZ[_a^][_^]YZ\][^`^[]``[]bb\Z[\YWXYWWZZZWVX[ZZW\[Y\\[Y]ZXTVY[VTWWWUTYXVWXZUUWYVVXVWWXXVRQQPPSUZ[^__[USTUSSXXZ[Z[[\\^acedgikloux|yyxxyy{wwtijlnqx T0,Ch&"8v~{{t~zve>9=)a^%Hjl}xƻiƾ5#gm #Ac}~h{zvwsmqvQ'#&$)BVWu|~pMUi൓ɲyxöĺµ˼xlvra]cmvys^DhѷywU"''WXu|hE("01/8?8/((&!%0__9czlL*%3/"%Ax`kqnlbaeeWPRV^b^Y[^]YPGJQRX]`feZVWXWUVWTLKOPPJEGOTLC?=<7-,7H\chy}bYVNNKG<,2PZRYZg^`}~~}{wstusqw{~}}{xrje\YVO@)+9A@=3:Sgjmooru}PSNOPRQONNRTUZg´bgfd^ZUOMHMcƼ`&4.(.9G?&(Cdhkpsommjhc`^\WPLLIC@=<93../16D{ĿtI,Nqqg;#+9DIUG??T}{ywS%  A_WYJ@B6,&AX[WWSOLKHJLC?Nb|ĿcX8+# #"%'$ )9BGD>4*%$$$$%&$" !"'.;Ufc`bcccdihfefhgehkhgdfhggikgebbdb_]b`][\^]ZY`]^]aa\\_`\\[]^[X]^\[Y_\\\^]\[[XYYYXXWZ[XUW[[WZZYWX[ZWVXWSPUXWVWXYTVWYVUYTRUVVVUXXUUVWTTSQOMOVXY]\]^ZUSSUVVWVXY\\\Z[]cdcefimptwy~yxxxywxzy{{usjno|ϲ|B(#2noV>) 'Eqqooen{wq[=>;$?YK[{},.Tsyt{uĶlt$,ľ)Ln~wn}~vwwtrrnJ#$#('"-ETU`{NUrޢƦiw¸ȽżɶpoubWcgnuvsYEs̿yzS$& Nu[A1%-,-22)#! &6rzP2fykM%&6/$ Dogusqj^_gdWSWT\b^XXZYZSLKQSV[aj`XVVYUTWRONKLOOICHIFGA==A?2 /9I]dhcUOONID<-6X_b~|xwsstuyxyyxxtqme^\XP>-,AH=1*9Sfjklnqx{PSPRQQRRNKRUVZgbhfc^[WPNINfƽƽZ)6-)-;I=&+Gehkornmmigda^[XRNLHD@>=9/**-+.LſnF(@VS?' )6BHD;I= ,Gdikpronnjfdb][VPLLHD@>>8.'*-.HMMF8-'&$$'%$ #$*5EXcda__dd``bffggiedfihdfgheflljcaifcbbb_[`a`YVYZ\Z\]ZZ[^^\\]^\YZ]^[\`\ZZ^^YZ[[WUTWZWVXXVSXVWYX[YVVZZWYZXWYZZ[VXYVTUSVUSSURSVTTSUWUVUUUTURMMMPUVXZ]^\XTUTTTXYYVW[^^\]]abbcgjnsvz{xxxwwxxxwuz~X&">uwo{h$:t}{vj>8?.$fq!Eie|z~~|~ƼcÎ~M@º5 6[ttt}{vxvtpwc7!=M8OZwϲǼLP\҈}ëľ³hrle``]fwukONzʠtwL!.mQ0$4c\E[I$)*+%" &;>A=-#2;M_iwueVONLG7,6^recgytwyvsrlkpvwkb_][XG*%#*>YillnnswpRRJHFGQQOPSUWZnzafec]WRLKMRj˿ºR,1*'.BI8.Ifjkqroomkgca\YUPMLFBBA@:1+/7<A(!Lb+Otzo{}|}yĹotξ3!d—$$@azsw~vrtyvts\05~t, ?PZ]EMcȈλ׸̷ԼŶ·hspbRT[r~ujQS|ȝv{I &Whmk6#&('$!%<`iY62m~xlD#'4,!(O{tqniXTZZXWVV][_ZYZXVVVQQQRRUX[\Y]ficXPQJBGMNNIDDFFB<;>>8+"1=Ol||n`RLKG7+5^fXXWepl|{wqopmpqyvkhdb`[I"!$%+04A[hklmnp|޴lUSLGGJRPNLSUV[nſvaedb]YSKJINnʾùM,0*(-BH0!-Jfjlqronmlgdb^WSPMMHECA?<65;ADKce< !.;DB@D*(CPWovuOQe̶߳Ф£ȾȾȳfxwaX_isxshIZ~Ĝn|I !*yxfY-%(%# $;`yeH:p~xjC#(4,'V}srp`MQSVV\[YZ[`YX]ZTRUUTRPPRU^\Yaec_[XTPMKLNLHECDC><7B[iiklmo{޵lUQJJKPSQPOUUU_rſw`fea]WQMKKTmɽJ,1)).@D3! .Nfgmppmlkkgca_ZUQNLGDA><<:=DJIKcľa2 + + + !.:?KhzzygC + 6Oad]W4 "%$&+.1=<-%3CIQZY^^cZYUOMKJSa`jQHC8* +  .?HIKOQI>0'$"$(/=SX[_b`adhgeefedcgffegkgeiklhfgmiddihdcefc_^^\^\\]VTV[[YZ\\ZY[\\XX][[U\\ZXZ_ZVW][YYXWTWVUUSVYWUWXVWVVWVUZZUSUVTQVUWSQUUSRRUWUSUTRUWXUVWWUVVSNKJMLPTYdw~lZXTTTUTVTWYXZ_bddcefegkqwy|xvxwvuwxxy||}Y25C|{[V!#E}yt]4>?&`\(Orxh~~zzķtmĮO@#"5Xqq\jmgA'Pjklnc='JefkmfeK 4MTWwbGV~ڗȨwͻǽĺɷtlsfcpurx{u^Eal}B#"!.&(%"!,b{Y-3u|wj<(-4("-jrpqfRNLPP\]XUNVUTZYUOJIRUTMJLThjb__adid\XYTMKKG@=<=?=?><:,$3>SbgwkYQQOPOB6?njPU}zxywwxsy~~|{vkjfdW8+8E<136E_ghkkms٬cOOLINRSQSQSTW_tqcjfd_WSPMJ\qɾ¹>,0*')*$  #1Vgimsqmnmhgc_]ZWSQMHFB>;=<=BCDKhľx[+ "2ACB?==Pn}xyxe> -?B1 #.:EMRNWZ_aefcefc_]WMD>_wh_jm\86aropn[29tu|s7 :NSpsPQ[ԎƠjļŻɹppqbajnqxyo\Ddm}|@#"$&%#*UxN)9x~vf;'/3* /plnjYOQXYSX\UJHPSZ[YTOKJMUSNIH[qre`_^ejc[XZZSIGDB?<@B@@==9+'4@Sbfnw^RMR\cmrF?cjhgVT}vvsot{|{||{|tmf`\M61>9(/54G\jijknq֧aRPLJPSSSTSRSXbu½rbjfd^WQOLM[rȼ|4,2+)()"! %5Ueimqpnnligda^[WSPMHEB>==;;?BBJjľrF# !6CDB@>>Pp{r|xh;!"(5CU]``bcbb_adbb][WRI;66:BSÿ__KE@60'  '8GHKLPOPF8.**0:EQZ`_\^efdbefeb`cdddgfffghfedeikgfhifefhc_bc_\]^_ZX]^\XW[ZYZZYWX[[[YZ\`[YY\\Z[_\ZZ[YTXXSSVTTTTXUUTVWUTUWYVVYXTSUWTTVUQRTUTTTUTRQPUUUVTSSTUQMHKKMMPU^q`[VVUUXWVX[_bfiiggilotxz~~ywvvvuuwttºd82)0nv76F0o~zvgC2@1!V= Ahh|~Ǽ`ǡ|Z'Gf}cetqM7JmpmkkQ+&UmLo1%;NVjzTO]̉“nŶþǴfpnZXhmuxwlXDhоv~x<"%&&"/]sH&@|we5%,1**qnmaMRX]^VUWKGJNQX]\YVROPUQKDF\mnc`]]gie^WXYNGEEHDBDFEA@?8('3@VdefmjYW[bmCC`kxwpjZyvtlu~||zz}~wrf_\P:25.0::8I[hhhkmrסZRPLKOQQRSSUSVcr¼pagcb]WQPMLYtƻ|6.1*&''! !%6Teimrpomkigd`\ZVSOKHFA<;<99=?@DlĿi( #5BEC??@Pqyr{ye>%-2=HV]cbcb`ZXWXVTTPLJF?549=ALc_\D@:40)! "7FLLNNOQKB:4027>HRZ]^addbfhfcadfffjihgfgheefffefjigchgc[_db^[]]\X[_]ZY[[ZZ^\ZZZ\[[YZXXWYZZZXYZWWWWTTWVQRWYWUVYXVUWWURXYTVW[WQSVTQSUSUUTSSTTUTSTWWUSUTRUUROLIKJLNQWgsaZYUUWXVUZ\_fjjigilpuy|xywuvxvuur}svspyU35"'PN7W/"G~}||yr]55>+Ckb+Utvg}{ķrhƬ»i!^ã?/Oo|mzc:0MbfdimK'?mdQ&=NRVUdOPe߸ɻշнepldkvuwyvlQIkлuz; ''%!(Hiuh@#@{w]5#-2)+psk[MY`a^URRNLNKLTY\[ZXTTSOH?I_qmbd`_fgb[UUPGCDMOJHLOHGB>4#*9EYeefa`bit:BgSbvn\~~yv|x{||~{sjfa[X>2;@DD<7M]ihilltԛ[TOLLMTTTTUTVUbxobhea_XQOLM[vŻv5//'$&&"!" #8Yhinrromlggda][XTOKHE@;:978==@FnĿR $5AFC?>ASxxqw}hG5244589?EKRX_fedgc`\YSNKIGGFDC>866:==;Dfa~]@@=70)" + + 3BMQOPPMQMG>635;=KUY`ddacjidceggceihfdgigfdghfdhjebdhda^dfb_a`\ZY]_WZ^^ZWZ\YVYYZZ[XVVVVYZXWXYYXWXYVVWVUTWXVVWVVUVWVVUWTPUXWSTVTSUVTSSTURQTTTQUVVVTVTRQSTROJJJINQR\qsc\VUUVVTW\agjlmkijouz~wxwvtwxwusyyimtw|~{C20P~d@%#$XVAP_mu~{ynR599*b}MѾv)7`}iszy|~{zbyš̿BC@3\q~qZ9.6AJZeh@'LcxJ -FQOPOSryHVt᫘ǵͩǡȽjtmeefkpwsgOOmϼq}q8!''%"".RmaC'@{x\3 -3')qnfSQbaa_XWQOPPIHPVZ^ZXUVUQICFesmdca_`_YQPQGDJSZTRPTOGGE>3 -CVx~uqx|mXSSVX^baekjikfcaa^ZYXVPKGCB@?=8129=?<97@A?CGGJY|F '8EFB@>FWw|sntvsmgggggidefe`__\\[ZVUVUTRPIDA><4-19<=><88:;D_¾]W:07<:;PckikmqyȊSQLIHKPQSTSRTYg÷gef__ZVQMMOazúõj*/.(&%$#&?^jkprpnlkigdb^ZXSOKHD@@DILQRSZg}D + ,?=>?=74;Z[~U59A>4-% #@R]ZZ[XWUWUOKA;768BNV^djccgheaafgedfjfbdhhddcggeehfb_a_\\^^XVX\\Z[\\YWY[XWY]WZY\[YVWZYVW\ZXWZXYXZ[XWVVXWVVWUUXWVSRRSSUVWUTSTRSXWTSWUUTVWTSWTUUVXVWTUUTTQLGGEJNOUlzp_ZYWW[]^cfknoqsvy~vxvvvwwuwz~F((){K%R~z|{vqV47;&VLWC9c~kn|yttw~~zxfuɠ{_(o.Pr~zrh_X[jN+>@>9/1ȿ<-MizsLELVqjizwtrsttusrnrtrjcded[C+/=A?<7DINNPPNX}9#0DSYYUUZ`gr~{vskjhfggbZWPSTQOOPOPNOOMLNPQOOONONNONOLKKF<33CFFFGFEB>514Cyÿ\U/4)*>C;88>UfhijmqzxPRLHHOUQSQSSS_mgifb`YVTQMRaļ^(30)&'&!)E_jmttqlljgdba^[WQLIFC?=?@?=ACCTýs?1.($&+4J^cefehlqruxz{xxpjcd`\UVPVMPQLOMMLLIJMLLKKJKLKKJIIJHHGHHGEE@79AEEEBA>;983-+2fW~X-+4;91*# 1KY__]\^YWXXYUPHB8358AKUbhfbaefaaefecflieeddecdee`]eea__a^UX^_[\\^[[YYWWYZXXY[ZZXXWWVWYUX[ZXYUY[WUWVVTUWXVVXXVUUVUSSVVTSVXWTXZVUSTRTVVSUVTSSWWVUSTTUUUUMGFFEJNQf}e]]]`ccdehlquy}wxwwvvvvurgiy~}}L)+(Ohit'%Rou}xrT368%(igqC+77.+29AUfillmq|ݾrPRLHHPTRTRSRQ_pĿ`jgc_YUQMMRcT(2.*()*%!+Ibilstolkjgca`^[UQMIEA><<:89@EI_¾oUMF=70,+058CKUcijlmmopnnnnmjbba_]]XVVUPWWTQURRRQMNNLLKJJIJHE@??@>?@@BBBAA>=@BBB@;:51/-+('.c[}Y*%,572.& +FW^_^_\Z\YVWXUOG>6228AN]da^deb]aeeddijdcffeddeffbdf`abcb_[^]][\]YYY[YWY\YVSXXWVXVTWY[ZYZ]ZXYZZVUUXXTUVUUUUXVRWVUUWXXWWWVVWWXVTUTTTWXVTUVTSUWTUVSSSUUSPIHEFGLQSlra`_`babehlqvz~xuuusrvwwxukaro=1,bU-jvp~zunG2:5 9n#\-!Ly`}}{~wxɿ_ɧοW)Oh0-QoxxnpootuS)/WYKH2MQOLJHFFFGHINZؐȪƻˮ̾hunfjmmpvsjLFd̪~v_  1,&%" Fu|lL32U|~zpI('/+ ;vNGHBAFIOZ^\YSY]]ZOIKPPTSKLMIFQejgdfdb_c`a[WWTRMIH@GHDGD<1&K;z]L?.6TzyXJ}~~xoqtusppqonnnlmnngfiif[=/10+&/9BUehjimq}ݸpLPLGJQSSSSTTU]p¯^jheaYSONLSfʿȿT&1-)+299+!0Kgijrunlkihea^^YSPLIFA>>=7546:;??A@??A@A@A?@>962.)%"!'R[}Y)$',11/*! %>SZ\\a_][ZWWXXXPF=635;BOY_bdc_^bfedfigefgfdehfdcbee\_cb`]\]ZXY[]ZWY[YX[[ZWTZZYZYZYZ[]WSXYYVV\YVUUWUUXVSQUXXVUYYWUWYWVVWVSUVUSRUVTTVXUUVWUTVWUTUTUUWVSOIIFHJLPZ|nebbbbbeimruy}xwvuusuvvw|wleze7;,1@KV@Er|{|zw|}yse<0<-/T4Ab+^w\~}{{}y~ǻ~bŜϮ21aiWUPD7+&5Zo|vnomouqG$.PoosJ!8KMKJJIEFFGJJQaΐǿԼŰżƵ˸ktlc^_cntofEHgȪ|v] +PQD+&&# 5ME`VL6T}|ykH%&-, ;x|II@>CIHMXa_\UV]a\PJINLRSMOOJIN\b[YUUVSMRUQNLKGEFHFHBBC@:5+Oɲh]YN>-5PggroYX}}{~ukbiqtrppqoljgbdgheelkf]:/48/,48BXikmnmoܴkQQLILPTTUTTUUZo]jiec[TNLNTgɾȾQ%1,(4Ldj?#/Pgiksunmjhfcb]\XTPMJFA?@CHPYdjozwqkhkdilphlqmollmkijgefda``\_ac__``\\\]^\\[ZWYZXSUWTRRTQPNI>82/0456<=@<===?B@A?>><961,'#  +tX{[-$$%)-,*" !8OW[]]][XYYWWWWTNF<8579BNX`c_^cegffgiedffdbegda^^c`\`b_]\^ZW[[`[Z[[\Z\\\XWZWXXYYWWU[XTVXYXXX\VUVWWTXZXTW[YUUXXUUUVTUUVUSVWUVRSUUVXWVVWWWVWWWVTUWUVXVRLEFFGJMQ]odcbceikntvx}}wvvtututqy}mknV46!"S~qsmllmlhnz}{xnU235$!HI>mee|||}|uönl¢̕9z1">_q|vzpnoprpf=#@T%?OMKJKHAEFGJLPeۼŻϨʵºȱxlvod_^amslgBIiǪxtY!$C`[B$&#'9Y{yO+Z}~~wjF"$.,9xxPRMIOPJMW]^YNT]`YOMMLIMQLLLIQT^dVQOQOKKOOMMLHHHHJMIC@@=<;+K̵na^\WM<'5Xpxhkd|{{~qcbjsvrnppoljfb_cdaeihbT6*394;;:DXgllklp{ۯdPOLJLNTUUUTUW]oþ{^hfb`\UONMOjȾǽJ)/*+Cg5 #3Sfjlssmmkhgcc`ZXSOMJGC@AIUbknsv|xspnmpmnnlmnjiihijggfdcbcb``b`a__^_^][\[[[[XWXYVUUVUSQQPMKD>8511210267579<=@??<=::;964-*%#$$F¿S~^2#""#%'&" ,GTV[[\[YZZUUUWZUPF=637:DQ[^^`ccaaggfdfge`bfecaa`c]]aa_^a]YZ]^\WY[ZWXZ\YTXXXWXZXUVY[XW[[YVY[YWVWUXXXVWSXXXVUWVTTUTSSWYUVWZXUWYYVWUTTUWWVUXWVXUWXVUWUQKEEEHKMRg|mebdhlnqsvy}xuvstuuutp}yljyuH2.(d|wpgit~~{vlG-97  %S|^z|zzx}~|uɿdӲ}u!V'&Ggvqagpunlnnrpp_4!:fv=,HRMKGFDBEHFHLUx㰖ķƠȮtpsgVVR_lvnb@RpťyzW! =fmY0#&" #7\iaF.a}~wkH!%.*:w~d[XY]XOQZXYYQV``ZRRQOLNNH?GOWYcaPNUVPILOSVWSJHGGJSMF@@@=8'#WϹub^]]ZUK<-:c]Ypmb~x{{okqzvootsomjgcbec`bgh`R9.9>AF=:G[gijlmqڮgORNLONPTUUTTXbtĿyZhcaa[TONMTmǾƽB,/,0Lut, $3Rejntrmkihgec_[WSPLKHDCBP_elssu|zusrnljggecefeeefegeeecdabb`````_]\\[\YZXYVUVSTUPTSRQRPMOKEA?;6133/)&'),+166:899::8799941.*+,@xÿPxY0#! !$%$ 'BQZV[[\[YVSPQTYZWLE:345;DPU\abbcdgffggeaabddcbca`^`a`^^_\Y\]\WUYXWVXYZWYYWVX\YZVY[YYZ[USUZXUVVWTXXVTVVXWWWXXUVXXWUYYXUWYYVUXWUTTTVVVWVVTXWTWVYVWVUSPJECDFGKSpbfzjedhloquxz~utuvustsrogdxuhl62,'E15us]}yrf=.>/29"/`xVzyz~|wuƺ|ZͰκQ.EQu-Oky~lZcffhjmloolS/4oh. /DLMJGEDDEEDFKV㤥ıñƽѽƷlro`X_[cmun\8Orġv{U.I_N+%'" #0EtiN4h~wjC'2, A~bRU^bSOWVNRSRWb`TLKOSONJ@?LWXY^YMOUUOIJO]`^QGHHHLSNFD?;82%TͶud\\]XZYWSC/;e™PDMa`Wvvyurvuttuxqlkhcagga_adg^P7043786:I[chikpuب]NQPNPQSVTSSSV`vĽt]jfa`\UOLKTnƽƽ>,0-/Km[%!%3Sgilssoliheeb^[WRPMJHGGKWdhmquvz{~}zyxurpkf`dcbb``^[_abb``bbab_^ba]]_^ZYXVVTUSSSSRRRSRNTTUUTUQQJC?=;85682(!!##%*/13569864787499745B}R[/$""!!"!!";MWTYX]ZZURKJOY[XSMD73369EQ\`aagffdffebbcdcbbdc`]bca^^^^\Z^ZXXZZYYZZYXYZWUXZ[YWX[YVVZZVRY[VVWWXWXXXUVUVVVXYXXYZWUWXVVWYWXTXWTTVVWXVYWVXWVWXZYXTWYSSPHBBDGKL[|mxjdgjoqvy|}tsuxusttqnlhekzv]35$,[t^1K~{wsV65>(7s_M<' =rg_{yy|~zt·iiɴϫ1N{J7Xm}g\[Z^bd`ehnjK&+R80FKKFEGDDEFEFP^ۛäm̱Ÿ˽kspfcd]aprmY;Ttġw{R!@cpV%&(" &?ti:3i~xeE$8C9& 'KZGIMHLS[NLNMNTZWGCGPUSNJJOUVTUTQNQTQNLMUbfcQILIGMKHBD?:60$!R{ldeb]__ZZ[]aL/;fKDJNY^`{rrxzors|yturjebbaed]\\baXN;-+)*098325:5-$>^QDO]bbb`aigcbmkK-?gcb_eomaslpysahpu~lptj^\\`X^_YWX`\VQ:)-/1<@;87::23740+--'$'-0132458?BACHN^dap¿T]1#$"! .FUWVXVY[YRJDHSZ[ZVSK=621551%/65937AM\glrsvyvumYE;:996574643114/*$!"%+15689;=DEDDFPc~Q_4&##!!!  (@OWVQRVVXUMCELV\]ZYSF<5005:CP[^cdffb`deb_`a_[[_b_YZ^^\\^\XY[[[[[Y[YZZYYZ[[ZX\\ZWXZXXX\ZXWYYZV[\YVVWXWWZYTU[[WWYZYXZYYXYZYWZ[Z[XY[Z\\[[[Z\\[[[][WYQEDC@DGMTn|pqpry|~svttuvttuwz±`'%!.GM8 Euzoq~xrV.4;*!(+^s9gjg{yxy}{søkZ̶FW *LmxrZJBEDC?83$,54=PY_dl~~xsoeF0EseZ}vuuvnQJThocWis^USUWXW]OFI^dZTK2))().38MbkkmmnxΘSPQRTUXWUVUTSXcy»jdieb_XUOMPVqú÷w213+%)($"!"!"-9Nhhhknjeeeaa`^]^abekmrrtuutttsuutqmlid`_`bddeghiihebc`cabb^_][\\YZYUSOLJEBCDCB@;4,,3CR`kquxxyvrhVF=:95.,//0-,//1/-***6:=>>ABFDDECD?KoO|b1&"#!!! ! "8MY[WTTUWXOE?EP[__[WNE92/15;DQVaffb_bec__``\^cd`]]^[]]^_]YZ]ZXY[[XXYZXT\ZXUX[XYW[YYY\ZZYY\ZYY]\YWWWVVW[YXZ\ZXX\ZYZ\\Y[]\ZY[]]\\]\[[\\X^[ZY[^]]][YQB@??BGLR^so~zursz}}uttutttusq½R&*7xu9&Z}}|ujH,78"5^lPeP!Ix^pzwwx|{zɿZi̥/AŽ=2Wnz~k[PIEFFBIammT0#Su+CKFB@BBFGHHLTvᡡ«|ƢǸ}kyk[]^alrtbJ?aѾl}{DAciT!('# 'KvrH,9v{~tb=4JK>0& !RvsY[YY[VURJGIFAQICBFKMMRYUQJHILVe`]WZ^[\WUTSQNMIGDCFIIA?<61"*25DOYchy}vl[=0Eskacllf~zjFB^ote`jbMLURXVRLFEUb_[SL-!+9ObikknpwϒUSTVZ]cb]ZVSTZg}úkhjea]XRNMPYq¹ör.04,%('%! !"#%%$&(+;M^ksqqrrnkihhgfhefklopsrrqsrqpqqusrsqplkieb`b]\_adeba`_]ab`_^a`\[[WWVUOJA=876>B@@:0'*4ATcmsvy{xtocTE?;6/'""###%*+(*,-/;HF??>@CDABBDB;?UvO~_3(#$$$! 0IY^YUSRPWPH>?KV^^]ZSIA50.15:GT_ec^`dd`abaa]ba_[]_`]]\^]ZY]\WX[YXTUWWWPUUUXWXWWXYWXY[[YWZ]ZZ\ZZWWYYYXZZX[[][YZZZY\][Z]`\Y[]\Z[\[]\ZW\Y][Z[\]^]]]XKOKhR4HLFB?CDFFGJO_ۓ§yҹʽĽϽƶwnxm]]]\ajn^HJdлp|}|B"TonJ!'%!"$Ljl?5:t{ra3"-7+  UsZYZ]\^ddXTOQIMHECEHJLSVSOOMLK[g_YY[`][WVRQONLIFBFGHFAC?7.(15COcty|uh\8-FjwNFJYmuo{hLVn~{da`SDJNLPOG@?ESZ]ZVJ-'9Qailjlp{͍UTWZaiq{l^XUSZgdfjdc]WRNMPYon,/2+%''#""$$'**114=JRf{|{zxvtspmlmlnlnppoopomlnppppqrqprqspnljedc_][]\ZZZYWVWYXXWWXUUUVWXVOC84234>A??8.',6DXdlrvx{vtnbP@?90' "#$&&3DHG@:69=??CEA>47@rT}b3(&$%%%$# &LW\]^acb^ce`^]ba_Z^a_YV\\[XZ[ZVYYTRRQRNKMMNOTTTUTXXVWXXYXY\ZZ[][\\Z[[[Z[\[[\[[]\[X\]Z[\][Y[]]][^\]^^^\\^]Z\\_\][^]TF>BA@CEIIFKYmuvz}xtttsssqss|ledkmszd1&"&]}RBZGq|wqZ61;)Fzƒ'7hoY~|~}yxĸmP˿d B4#Ehs~|dQE?A>=?LeoeG$0P6HJEDC@AFDFJNvӌž{˭ǴĹϻðtuy`K`\PSek\=Ngӻn}|>'Jgq: &#!#&Db[IC=u~s]2.:-'apQRY\aeif_ZWUSMPPJJKGJPROLNLLNU`[XZZZ[ZUUUTNIJIE@CB@CEE<5-'/5E]y{y{{upql\7,FY[X_pcg{iMe}f_YK@B@EMIA=>?LS]^ZE)(=PbknkmrzƇUUZcoqw\YVV[fchkgd^WPMLOZti-22*&'%"!!%),-16:@CKT_hqx}~{xvusqnpplllnmmnnmlllllkklnpoppppoqnkjhfc`ZYXUTPLKJKMPPQSTTTWXZ[_VB85336>AA>81-0:HXbkostvvztfN@?5+ + +#*8FIIB:558?@FB>956CvM{b6)%%%%,,*$! 1P[_ZUOPRSNG>@N[^^[[VPH?83/27AIOW^``^\ac__``^Y\``[XY\]Z[YYYZZURSRPOKHIFEINPRRQSRSTTUUWXZZY\\[\`\[[ZX[YZ[[Z\__\W[\ZZ^]Z]]]\[\_][[]_]\`^ZX\_[[]]YN=<>?ADFGCBFLWk}~uw{~~vuuwvssrqoppouvwy~|~V&(4q}m=%V\]|~zviI.49%%ŻRDqZe|xr]fȯA NĽnj+MkxwWHCGHDEAPin_;"'Dii9$=RKECCBBDEIIcŹx´ùȤķltsaewg[]ciX:Pjϵhz{=&HiY#"&%"  %3Png:QcjmjlpxąWUZekgy[XPUWgefieb]WPNNOYtɾǿc*12(#%%%$&+049=EINVQ[bhksyz~||{z||zvsppoookijiklkkkiijihhhgillmomllkjiidb^^XUOJD>=;>BDJKNQSUWZZ[\[TB85437?BDDA:6:BQ[diklot|~qO>>;1#  !*;BCD?:834>?A>760.CwMa7'''&'/20(" ,L]b]XRNKLRPC@HU\^^\XVOF>71036>JU]``acb_``c_][`b]Y[^][Za\WX\WRRTSSPLHCCCIKNSTRQRQNPSSWUUVWXXXYZYY[]\\[]]Z[^_^\\\][]_^]^_^ZZ][^X\_a^\^[Y\Z]]]^\WJ?:=?BDEFEBEHQXiuvy{wvuttvtsurkqwzyyz{}}~z@%)Iig[.1q{~yxd@,83*OJc"%Qv}O}{yzŽR|Ȝ#&nȸεO3Wp{tWJHNMIEGWfl[7"*\z(%AQGDCCCCDDJN}鯐³oɿǿ±¯ovtjgf[]agkS=Njдk}u8 59$""&&!"(Aw|P6B||pU+),%(jwgiglikb_ba^]]`]]\XSLHGBGGEHNQW^`ab_^\XUVZOGGIDEB=>?CA;:5*(/6Leh`VTXSKQP\cZ3,GdvF:Koxlm{rV\b[b\J@;5:IF848;N[_[P7!*=Thlmkms}~PX\dgkqXWTW\i`iigd[SPOLN\wǾȾ],13,&%&)*4FY^bhnssotttwyz{~}|}|}{xxwvsmmpmoonnonomkonopoomjihiggggfeedc_a^\RMIB<92212:ACGJNPSVYYZ[\YPC:89:AIKNPNNOT[`bdddhoxsL??>7* '49?@=:8319?=:7/)+8t¿N|g=''$&*/45.(" &>Ua`]YURNQOHDFP[aa^]ZTME=3-/38BPWY`cb_]``_^\^a_Y[_^[Z^^\Z[[TNNQPQQOKHGGIMPRRRPQRQQSVURRRSTVUVXWWY[[[[\]\a`]YZ\\[]^[]__\]][][_\^_^^^^]\\_^`_^\TH>9>ABDDEEEDEHJUg~svxyutsrttuswtsvxx}|{{{|}k3(#"Ykv\;tat|xtX6.9- Fnp^3_~iR}zxøiBʨĻx'`Ç0 :]qznRIGB<5:EYjgO2)]f(FNDACCCCDEOh⠟~°żɻ˿nxvfZGWelqhP?Toͳt{q5&KV;"&%  !)HsgOAA|~yoT) +.# ,pvmljlheX]`a[\\Z[][ZXTNKLRNLOSV]bcfkgc`ZUTSJGGFGGD??@CB?<6)'/7JX^^[SRPPOM^bQ1/K{uVNQYx|qq~~b_{fNU[^M:74ATF;56AT\aYK1 +>Rgjlkor޿zTV\gjrb]TVW^jþ_jjfa[TONKO^wȽƼ]-56/--,/2Fhuwvzxw|{{zyy{}}}{zyyyuvwsqonnorrtuvuuvvxuuuwspqmjigfa`\WUWRNKCB>62/*,,)+.28=?BFILOPTWYY\ZQGBCEJPTXY^\]]_bcaa_`do~wVA?=:3))27:;<6228;:63.)*1IN}e?+(&'),132,&! 5M]b`_YVROLLHEOZ`__^]YPKD931029CMT\b`]^bc]]]ab\X_`\]\^_]^^[SMNPQQRRPNMNMQRQQRPSTSSUTRTRTTUVTTUUVUUVXYXZ]\[ZZ\\\Y^_\\^a___]]\]^`]^\^ZZ_^]]^^]ZSB:9=AACCCAGEHKMMYivxxzwtsttx{zzwv{|}~{|yz|~V*) 8syjE'V]=i|wkM.27&9и!Al[ezwvKVؤgd^b{ĹS@@@cs~lYPE:68EMaiaK-$K~|n/,JKEA@ACEFHU~ߑàů¾û¿ʽ|ozydTZfjrrcKBWt˴p}}v4,soB%#%%! (GpsZ;C~xlS&*.& *upjlhdc_T\`a\\Z]ZX\ZYSQROLKLORWchcfljhd^XRNJDCFHHEAACCC?<6((06GS^hhc^bdbb]XF)0Me{qlijjzrpy}|lkmLGS`aD941DQF=7>HX]bYK0"+BVckmnnt~ݼsRW_jqmfVVY\o¿akkfaWUQNMP]vǻ·[DE>9?><=Kdvx{y{||}{zzyy}|z||ywvvtuusxtvvxyyy{zxxxxxvxtsrpnmjhb[PHGB<550/,*+('('''&')-68FMRbd\E+"UeNV(!:NKDCABCHKRmτǻչøºͼȷwqzkQceekumaGA[vʱr}~p2"\xD=0%"'#".\trX:5$ *,3?IWgp}wjUB1(0FPQPRMQ[XMv|yrjw|}olc[^qWAHU`bba^]WTRH<83/03;DPX]aa```_[]^\^`ab]]]_a]QB@?ADLRVTTOD6.4BKNMNOOMNNNKLORTTPTSUVTSSRSSRPTTUTSTTUUUUUVVSVWVVXZZW[\]^\[[ZZ\Y[ZTECCB@??=@@ACDDEEBFJK^}ptwzvtrruy~}yuvu|^00*9l~uO!R{}}zumO013'MtvmOae.[{X^|xvȾ\Rյ{Xrq"f_#9bpyt[]^`i^\bdgdT<%Dua$.;JJDBBDCDHNm鬐Ɲηƿǥëntvkiia^qrm`=L_}̩zp~|g-%Cec32!#'$ #4gnH5O}xlJ%+.# 7~jccggfe\YYZZVVVRLKPOJHNPSRPOS[a]TY\\\XTPKMIJSX[XKCCBDD@;4$)-4=CKRVUQF7),GPGBACCGIV眠Ž²Ľ´lxyhZ^bjurpY?N`ɪxud)*q>.#&$  &2db4+O~vjH$ -0# 8khegca]VSSPOOPJ==AHMLOQRXXPKO[a\XXWXYWUPJIKPUZZSIFIJJMF@4%*-39?GUlweWG8'4ITQMHECBADlmkfNWpeUSPPPHEFUb\Z\XK3"0E[m|{ثgS[sjWUYfzviljgb[VTSSR[k|~}}~|z{{{{{yxvxvwwwwxyzzz{{}~~~xtpqpg]ZXM@83,%$&&&('# !!"!""!""%*.5E]fiiqnjiccegffggiihhgfd]\YVVUSQRRRPQQU\]\R6-((&'+/-'  !!$&,1KĿMsmWF8-''&&&#'())& ,FXbcba`a`]YLC;>GJLOPQUVMGRZ``XWUVTWWPJJKORVXQJKLNQOD<6!(+29>DNfw~tg^R=-6HQMJGEDAAKj{n{KEczlLBDJQQHP\\YWVSG4 .H]pyاbTVjz[VRYgzwstpkgb]^_cjz}|}~|yyyxwwwxvwyz{{}~{{zxvnhhc[VNNA>81.)&"  !!!!"! #&+1B[kiginssponkjhhhggffgigdec^XWSRNNMMMONNPNI>4,%$-.+%  !&,/9EgKsjSF>4+)*'''')(((! $>U^`bccdda_XE<=I[dfcb_\[YUNF931,.2DOUWVPD=59>DKNPUVTWWUSRSVWUVVTK<=BFJNMMNQSNFCHJKPTTVWUUSUWVSUUUTSVUVXWVXXWXXVVUTUSOA8359;;>AABGGHMSestuywssrsv{{vuvze1-)"W\$H|wlT038+1ë\"*SuUV}xtȿ`ZӠ{phy#T|lh^c}1UlwbK?;H=AGPgo^H0+T}{L#4MJCDAEDKZ׋ûqϲ¸ÿmyr`^cdmrqhK845:>DJPTUTRKG?=?DFJPQPUUVTSTVUWTRF9>CABEHNPRRPMHFIJKMQRRTSUTUTSUUWVUTSVWVTVVUUWVTTTVWTOD946789:=ABDFHLRYnqswxwsrqru}|wv~P)0"/\x{>*^igsvlit}wsfB+3:%&~Ϩi28Yyw@k{xvrǾMl߼pT]`llxzx~c"km 7]ryx^KAHHGIFSfiXB,1f  :QJDCBGHPoȋ~Ʒ;̹vqqycX\_gotofM>Ujósw~]%.076&#$"!%5^jC-4Z}vjE!#.-"AqqEGKMOLMLIORUY^YTTVULLLMNRRLFNV[\S[gg`]VPLKJIIMOKJMNSPGA=.!(*279@KZhtz~|uhW=*6OONKHGDC@Hlpzzu`gymSORkt^\`WVYTRXZXQNOML2!1G_kpmnmsӢ]VWTXR\YZXZ[_do~~||~~~~|{||z{zyyz{~~~||yxsnlg`ZRNC8//,%#" %'(*)((&#$" ! !" !'2Klmmkjhinqruuwupmjfeb_acacaa_^`\XWTMJFDEEB?<:4+"(*)$&2CFTWN>1)($*BVba`_`ceba[I;AP^gecb_]\ZXTJC;30//6?IRY]_^[\``_]^a`^[P>97459=ADMSWVXUWQKGFDGIHLOOPPOQOPQPF8>IIEB=>EKNRRRONOKGHJIJQRQMQRQUUSUTRTUVTVUVSTVURTSSRTQG935677;;>A@AGLPTa}rqtx}|ttutz}|wyxB,23qh!1zwcJn{{wr[5(95=u 6ig/"Aif@{xuxĺzKܳ~xB(x˽@%Ccr{mTH?CEGFJYheS<+<|R&>QFCCCEL\췓ȯſѾnǼ{dpjY`ltttupdJ@TlƯzyY#)Ltyc("%!! $0G?*-2Z~ujC "//%AopGJNPPNRPPS]^^YMPQTSPNRPNQNFFNUVXVcmlb[WPNPNJJKMKKNQROFA6+ ()179AIX`mw}}ui\=,;LQPLHFDCCOqxrbhkgaTcumfibW\ZGXgaZSNLHE.".K]hjjmqwКY\YWXZ]_ghijoz~}z}}}~}~~{}~|xwvqojha[OK@>>9;62/-.+)'""&+))&%%%#" !!!&6Tmrspmlmmlnprtutspkggec``^abca`\]]YTOIB@=:852-$()(%%!$/Hj¿FqjJEPRVr>''%! #:Q_ba_`bdcb`VF@O]ggfeb`^Z[WPKD;4/-06BIRVWWWWTSSROMLLLMKMJHKLLMC8LSMIGFD@CRw|vjkol_cotobVW[^YQbkb^ZPLMH,".Nbikkmszʚhhabbfjqxzvu|}}|}~{}{xwwvpmkeaYRME>540-/323332210///-'" (*(&###"#$"   ""&?@?CEFKOS\nlsuxusqqv|wv|~R-4++G9&_{u}}ztd@*/2#"[~,4^~t;e~|ywwǾ]pΔ[axu_swOƌ0Wpv}aQAGTWVPWaf_K87fE'#/DJEC@DDY⚯Ļ}ʷйúĩyð|jb^rievyurpZDJ[sҹny~Q7XuykO"%"$"##$*6`{uhA"'23$#Wu\[XZ`eee_`aWLC@DNPOLQQOKJNQPRRZkgegdTIRZZSNJFEIEJOPIE?90##)+.5:AHR\elu|ys];*AORKHGGEABSzsxqjn{ykQ1FTY]^_jlfaRTLPH+ &9Rhnpst{ɹ}wtsuux{~}yz}~~~~~~~}yxwsnicdeb[SF=661.,)&%%((+-..0111011//'&""! "+.-*&%$#$$" !"':Yrvywvrrponljhilmosttqokhecaa^^^]_\ZYURNJG=4*$$#$%#&'''&%%%&(''&##5iż½Hqi;9tͮE+'$ ,DXbd`_accde_WOJVbffgdba_ZXVUSLC94--1EHKLJE:46ALTTTSRQKA94>MTTSNG@879AE?748:CJKC942469=>>>BCELLPXbxpquytrposy|~{yxutu|~}zF-,3TeH"3phrywlzy||yr]4)//"mжY Alf;}~{vvyŻ]޽zbes{_Z]!8^ux]OGLPPHBTbbYF76lkE2'5HHBCDFKtڑϡŨ·Ʒ}}ibbi[`koqpkVCI[sռiw{L 7j[#!# !!#"$*6b}vi='1/"&]wc\V_fhifb`^RG=:DNOKHKONJLONLNNbmfa_WLNVWTOKHGAFEJKKEEA<2$")+/59>HRX^fowzv_8*BNOLJGFEA@Q{|yr{|cSH/$ITXfkdjlg]XQJRC' (/Dcuy|}||~}}{yvsqokhbZLD@BLNF;4.*-//+)(((&'(,++-00100-.---)*(()-0.*('%#""!" $(:[qvwywtusspmkhiiiknprrrqpnhgb_]\^^[ZXUVRQMD9/!$),,++))))(('('&"" %KHqj:2}U*($! $;Tbea_]bcdeb_WOS\gigecb_ZYWYTQNA61/16?JV\_^]^bb^^]SE8/14877:<>CHOTXZYY[\^][[[Z\\\[^\Z[XYXWXWVWWWVRMB;BEGJMPSYnmqpsz{uqpnnrwzvwy~zl4&%>~UA}scmu|ulO.,2*OdqV*KsRJ|yuv·rgذ~:'gD&L{w~[KD@>>@AVdcWE6)?EHNIEFJLKNNFGHSdlf`ZQPQQQNJKKHFHJIIIHIC>6%%+*068=FOVYaiuxs[3-ERNLHFEDACS{uw}~}eSP@16U]cniefkgZVKHE9("'08Da~||xvurolheb^[WO:*$(4@IME:6..11..,*)'&())++/001/.....,-.,+-.-,*)'%#! "!!!!!(>]msuxuvvuusrpllljkkmpqsuvtspjeaa]Z[ZXVVSROJ=.!&*-.-,++)*(())*)& !##:l¿ýIqp=2r{y~̦d-(%"3M]dba``bcbbbZQOZdjhfeb`_\ZXVTPK@9413=FUY][```]_`[M72,.26899;?BEMUZZZ]]\]\[[[Y\\]\]\ZZ[\\ZZ[WYWVTPNHDAACGKNMJIECFHLNPOIA71:EMSUWVQE5/3?FHFDC<;DIF@81--6<;<@@CIIMORXaumnpr{uuqqnqz{zvs~y_('$*Z{j@%Y~~~}|scC(-0%f>3q~~P2\{s:c|wvvƿiyСeuŷ #4/0l~yVQKKMHEF_ccVF@F_()+?HDDDDJv۠ay_bl¿˻¿T_~hXbgckhgfnprlOANbz˹jvvG@HNQOHDGJUfgec[WTPPNJIHGHJMNLKHJKE@:,"(,379=DKTY^eovnQ-0FQOLHEDC??P~}voos~yjZROD;E\^dk_^kpj\OFC?504=JV[p}zzzxxvrolhgd^]^]YUPNNG1!%,1NVY_^[\]baO?4.*,27;==BGJNRVX[]]__\\\]]\[^[VQKLTY\\XVVYYYVTOJGHKNLJOOHDILIFDDFINPQTLFBDFJORURG7--6@FC>;@EJIHA2(%(+26;8BFGPY`i_WVRpe`T(""*CJBBFQz單ȿíûǩjwlfgdaf`[^^eq}~~|ywusofJFSegt{xB;VyU.#%#  %,O]jrusvvwxvvxwvrrrpnlkjjlloortssqmid_[ZXSI9( "09986530../0//-,..,++**$"! Vÿ½HlyB%DZniK*\̱H3-'#"#-K`jkkhfdcbaba^UR[ckmigeb`_\ZXVSPNHB99;HU\b_[^_`[J>30./29=?BCGLPRUZZ[]]^ZXWXYY[\[THDEPYXOLHNRTSTSNGDHLLIHMLHGJJGHGHKMPRSRRSMKGFJJMQL<-.4?FJIGHIIHGA8+&#&.26;>BCDGKOUV`txfijmv}qoprruwtsx{vsy{xw{vpw~}~~`-,* Gm` "_yteH).6)!UZO=3]{y=X|xrwhj׶{b`i~F\5"ic @dqxYKBGRNGCSacZRPFnq#!&2FFCEF\؊ĿϷ¿q~{zz{yzwnlu}yvutncFFTh޿au}}xAIcmT*"$"  " ! "&,3j}s^4$," 2nlgkjfdeed`]XSOONQSXZXRSUSNKJFF[ghcabb`bbZVTUVXYUTUTRPPME:#'+/666;DMVZY\cf\A'3HQONKHEC@?Hmtm_A?JWTUXFHN?:Udfccffoy{~|ywvrnmjigc_ba_^`^^^^\[ZZ[Z[[ZWTTRPPE5&#+1642//0>FZc`UJ=7ty=#:QFCGS{ȃǻʨĿþxox~}|ywtrm`DFUiܿ}`r{~v="IhnP $%#&(+/4/ " #'0;n|s^3$* 2urkjjhfcfhbbZURQNRVZZYXXXSOKGCE[oplllifgg_ZZZ[ZXVVTSPKQLE8 (*.677>>>A9,(&')'(+**,.024424430.-/.,,/.,+++**+)(&%""!!" "#'+*&%""!&4DT`fqtuuutuuttstuspnnmnnkjjlkkpsrvrpkd_UE<:>BECBCB@<;9642.-,,--.-,..+'!#/@CFFGJLQTWZZ]^WQEFHLKB@>??BEFDFCAFOYOD@JPQPOJDAFJGBCCAAA@DHPZYYZ[ZYYVXVUWVTPQNHHGEDBAA529DH?/$!$*29>FGIKLOOPSW`gikjlp|utpotz~}yw}|zvvvvwv{}~r<* 6lx8 @t}}vmS.*0)&rȒCmo"!LtP6{xtt~ļWьq{{au8 2<;57=<1.TlsxqZQI?;8DTbc`SF0)fUP?$9OFEIfȽþƻ¯sy|}|zxxsok^CHXlܼzdrz|s:;t}jC$&),5QbkiW6!!&0GSZ]`dkfR7,>PTSPMKJKRQj}zhhnjngchc[[^aa`[\abcjnqswuy|}~|yytqqkhgedcca```^\][\[\\Y[\[ZXYZZ[[[Z[YXUSOLMIC1!+58::961147@>B4*'&()')++,,./247456510/1/-.0.++++,,,*+)('&%%$"!$)++*'%#"#*8CQdmtttuttuvuuutsrqprpollkklnnmtqtsohbVMJHGFFEEDAAA=:;8410.--+,,---*%#9ZƿClI'bh5&OiW=330/+$!&&#(6BZipsoklifba^\WQ[fkmkihfb]_^[VTSQPNIFGMX]]`ba_[OGA@@>>?ACEFEGKQSWZ\]_ZTMNNNFCFHF@EIHDB@@GOWODBOSTRLGCHOJFCHNLFCCEHOTVZ[[[[[\[[]\[ZXYVPNOOPNKJIJLIHD5*""(1;ADFHGMPOOQTZ`fhjlqtrqqtyxvv~}zvyyxw|~}b.$6 EyY+Vxc~zvkG,-2(/vG.u7/\|x6K~{wrvglwgsr}ry]azEl'3FLJJGMK06]lqwjQSJA>=DL`d]PC-T|{jO+@OFHS잘ƹ½ƾrz|~{{xwsnm\:IYo۹{ht~zp7+d{vaF#)4MsP!!" &-Ap|w\1$)6uvijfihda^]^_[TVWZ[\ZZZVVVOLJIMdxxtrqokfbb][[YWWVVWVWTQMF8 +.2789BKQ[djppgTA=M\^[XXX\agr|y|xnlfd]khimniinppoopruuvxy{|}~~}{zyvtrmoifde^[_a`a`aa__]\\[]\ZYZ][[\YYZZZZXZWXXYWSPMKMHA,'.8=>=94126:7+(''''()*+,-0204877886420/.-/.-,,*,-*++,**+*((%%$"&',/+)&$!!",;Uhvxvttuuuvvwsrtqqsqqnmlkjkkknmqruqme`YRPKJFFEDDB?===87542/,-,,.-,& BĿGhLR}xC1?@6533-'""'+)!!*2Oemoqollkgdc`ZRV`jmnkhfedb^]XUUUTPMLILX\^bca[SID?BC@?>AFGGCEKOPVY]_`^[YYYY[Z]YTSVVSNHGJKOOIBBIJLOLGFNSIBDOXUJDACDGPUZ^^]]\^__^]]\\[WTRTTRRRQQONLKH@5&"%,8?BEIJMQPQSTY^egimt{qonpwz{ywy|P)21'_r_A+j|}wr`9'//$)iE\w2=hf/e}zuswžfܯmpvs,.z(:=@?EMF$"@bnt|~gQNC=88CVddZN?*]}{z>!.HNFKd䍭Ѳÿ|¼½o{}|zxsplR@K\pڷxkyxo3*Nkf\O*"5`M ""!"'0@r~tW.$("!8wzmkiijgbVRY\[WTYZ\\YYZYWWUSRPUg{xwssolfac`^_\[Y\[YWWURNF9()1-27:;61/,)('(&'()*-./1203478::9860,)*-,.-,,-/-+,+++,+)('))&#"$&+.0.+'$#"#,Jk{~yusssttvtrsrvurqnoomllmhijmoppqsmfd^WSMJFEFGD@A@><<:8620/00-,+%"*{ľJjQ 7oS,><74242.'!!(.,##:Nakqroqokhfb_WR[goqmhffe`_]ZWVVSPPOKMU[`_]\\LEA?@DA@?BGFEDGKOTWX]ab``a`aab```_^_]\YXYVUURJDHHGDHHBCIKFBBKPQG?=>@CJVZ_^]^][_^\]^^\ZXVTSUUUWUVUSPOLK=/%")3>BEILNRQRRUY_dgjowuspnpwzzxup}~|~vC * ;ae^98|wz~{w|wjQ/(/+0lӾu*"ImM5{wttümׯDI';FEEHMRA'Hcow}_NA:97?KXgbWM=(b'"5OIDP~Չ㘂ǥžr|~~zzxvspiM@N^wշsn}yq0"=WVcY9*Ta,!! '0>v|pV*$)";xojhfjh_IGU\ULT[\\[ZYYXWWWXVTXjzzwssqlgded`^^\Z[\ZWYVONFA965;?9?CIMV\gouz~~}}{wyxzyz|{}~zxwrpkfb_`b`_[\`_]^]^^]]\\[^]_]]^^]\YZZZYYYXZZYXWVYYXWWUUXWURNLLIC6%"&0?EJF>92+*(&&')+),,-101/446789;:91-)''(**+,..,.--,+,+-+**+*'""#%*,000/,''(1Lq{tsoopqstutspnnrppolkkijjkkkkkorqmkfbYSOIGGFEFDA@A?>;883320/11.)'?u½IkW"#T{a&'9;853321/' !*10$!9PcnrsqnljigdYRXcorokihea`^[YWWSPPNMNSX]ZVYUHD?>>CCABBFFHJIJNSVX\`ddcbbdedeecbdcbcbbda_\YWVVRKDDEA?BDBCAEIGA?>@BCJSV[][]_^]]^_^][YUSLMPSUWZ\YVSROMD6+#'0?DFIJMRPPRW\afimqzpporry~zrtupuz~}}}j1"/ C_nV$Oy_jnlYz{vgH+.2)/w9ua-Xvz:Izuswº|r˛m^lt_EY$*>HJNSNM40RgqzxXGCD@=CNZe`WO>5~D#$9OEHgɅϊjfğ̸żúwr|~~zyvuqmcI80,(&'()**++,0122557679;<;71-'$""#&(+---.../.-//---,*&%#!"%(*,./1222/,.4Rr{tgiosuuuurppoqqrqonnlljkjhfgimlmronh`\TLHIJIGEEFFDBA@:8887;ELJFSiÿMk]  9n}h3.C=876430/*% ('!":ThotqnmllihbXW`lpplhhfdb_]ZYXTROOLLQSOLBCC@A?;=ADAB@ACHJJIMQUV[`cdcbeffggffffddcdccdbaa``\UGCEHGHDGE@?CA==AABBAFJRWYYZ\\\[]`]\ZVK@67&/3&@{Lft-9b|j$e}~yupzqőgIMz?''/DKLMHHH05Xmr{pYVTQPUW[ab_VNB?vkUa.#AKCUȍbgȪþƼʿxv}}~yxvtpj`F;P`wϹws{j.%JZaN)FYVE?A?@3!"##'/Dx~vhF'+"!?{ohcckicYSPTXZ\\XXZXWUTVXWXX[hktwsrqoligcdb_```````ac`lr{w}y{zyz{z}~}~~~}~~}}}}{|zxwvutsqnjhgdca\\V[ZZ[ZYYYZZYZ[YZ\^\[[YZ[\\U][[Z\Z[Z[ZWVXVWVUWXVVWXXXZ[XTTTUUTNLJHE=1#+5=CB=4-('(()++,,.024557779;<;;:87.'" #'*,--021.012/-..,,*)''%$)+-,.0./024453:Zvp_behlqrrpqqnoprpopnnllmligebfegnopomgc_VSMKIHFHHGGECA@@@BI\zýÿGm]% ,aykG' );E>:9864010+$#$=UlswtommmldZZairsllkhfdb`][XUTQPMKKLHD@>>>@@?>CHEDA>AEHJIMOSSZ_ddcdfefgghhhhgfgffecdcddd^WIGLNLKKGEABD?=A??A@ACHLPONORVVWX]]\XP>,#$'-7GX``[[XURPL$ *,$"B~nfhllimfb\\][[[WXXXYWVWZ[^]h~vz~xtpqmjegfgfceiiijqy}|y{}~~}~|}~{xxwsrnifc`_^Z\WTXXWXXWWXYYXWXZYYYZZ[YW[\[ZZZZZYYZ[YZY[ZYZXZXVVWVUTUUTTVWVWVVTRRRPSQLKHHD=-#(-8;82+&'*))+,.,001456669:;:9;;;<81,#$'),-000131001/11/-+)'('(',,*-004577Bao_]]`eknqopppomooopooqoqokieddedelmnqplid_YUOKKJKKLHIFHEFHT}þNja' DodI/)5CD?<:997322/)$#;Ylwuqonmlie``fosomihgeba^[YVURPNLIJGFA<;>???=@DFDB?@EKKJLMQVX]beeefgfghhhgghhfgfcdbddcb`_YVUSOIJGECDDB@DDDDCCBFKKIEGHKJKSZ\[RI>1& )NGIKKKA!$Ddnue]`_^_^``edYRJ4V=.GJV鏠Żïȵɽɾnw~~|yvrqnk\>@Pd|˫nt~~c'"Fmd5$8EF>;977*!###$$+Ftyrheb^WA&!12&&Nwrpolijge^^`]]][ZZZZ\[\^_cjy~v|{vusqmmlkllorvy~{uuwqpmkca]ZXUXUOSTVVUVWXWVUVXYXWWWXXXYYZ[YYYWVWXWZ[XYYYZZYYWWWVXWXXWUTTUSQRUSTSTSPRROPQMMIDDB:("&*,./,&%()*+-..012356889;;=<<;<@>=8.&#'),/0202242331-($" #%&*..0359F_}tlf_]_dfjmllnomppoqrqpoonplhgfedcgijklknmlfa[WUPNLMMMIKIIJRm¿Eg`)9hcH10FIB?=<=<;5531/*!(C_qvrnpolnib_dmqpnihgfeda]\XVTOOJJLLJEA?>>>><GFIIGE5*Nipvy_\__^]]abfbVOF$+1--,4.""1GIh׆²ӿƺqz~|{xvtpmiW9CRh|Ͷlu~{b'"Ke_5$2CKKA=;2#!##"#%)Crysojhc^VA&%IJ7'%%(2Yusrpljhgecca_`_^`_^_a[ahkq}xty|{{}{xxz|~}~ztspmhda^\ZYUSVUSTTSVRRRSTTVVUTUUVVWVWWVWWXYYXXXXXWVWVVYYWVTWXXWVVXVWVUWWTTTTRRRSTSTRRSRPPNMNMKHDCB5"!#&)+)&')**+.01233779:;;;=>==>=???=;5,"!')-.0452540-($! !! !$(+,.139Hf}wsqijiihhjilmlppoppnmpppqnlkjhgeefdfghjlpmkeb_YUOMMMKLMKJMR[h|Bgc,#:TWMAEKIDCD?>=<854200)%0Oipssqnmmic_bkqplfcceec_]]YVTSQMKKMKIEB>>?@=<@DEAADLPPLIKPUY\adffhgegikjkihjiigd`_^^^`^^__\\^^]^[YTTMQVQLHDBENUZ\[YZ]URWWJ:#"*8EQVWTTUYVUSN=35BIJORRVY[_cgkqu{sqppsz{vrr}xx~{|}~~tL$" .GIZB.wz}xnR.(00!*( HkV0q}{{wttümͽiiY_}>%&2.0.'05'1UjqxvXZ[\^^aadgaUPA ! #&(($$7NV~{ǾÿǨŷnz|}|xvsnkgT6CSg}˷lw|y\$#Kmj.#4D>;0,(%"#" !$(Mv{tqla`\]Q4>foR>6216\urpolkiihgecbb`^fbghopu|~~~~}}~~}}zyvsnjffa_[ZYUVUTRRRQTSPSRRSSQQSTSSSUUUSTTUUTUUVVTXUVWUUVVUUWUUUTSUTVWVWVWVUVUUTRTSUTQRSQSURQSOQPPOLLLIGEB=0 "$&'''())*+-024557789:;:::=>@AA>:2( "(,.002410/-++*)'%$%&(),029Gh|z{tsrqjklkkklmmnoonnnonoonmkljiffdeddegiknomhcc[QPMLLMKIILMLQfſGgf($09FZWXVNIEBA?><;=964450,*% 3Kgrtrnkligaaipqkb]]bdca_][WTURNLLLKJGB>?A@:;@EFDFJOSQNJKOVY_cdghhggghijkjghikkgcbbbaabaa_[]^X^`a`_aa\]\XQNKLQVYY[^`a`]`]N9#$-57>@?@DIQRRSOC87=GLMORUX\_cflryuqopqu|{rrzyspps|}wvz|~{p9).Nkb("E}{zwkF)*0++mk*Po?A}{}yvsuqȽswsl %$&%""+1*7^msyp\[Z[_^aacf`QO8%.)%%?SnƼzƣ¾ĸ|m|}}|xuromgL6DSjʸnx|{Z $Ow]5#*-&"!!! !"!!$'.P{{wtpqtt`@VYG?821_vttspnklmkkkmnoux}~}||}~}}|}~{wvwsrrnokigc^]YWTSQQQRSPRRPNQQPSPQRPRQQRQTUTSSSQQRSSTSSURSUTUSUTRTVTSPTSTSSTTTWVUVWVTSSTTTSRQSSRRQQQSRPRNPRPNLJJHFC?9+!!"$%(&''',,-023366899:;;;<=<<==?@AA??<7.%"!'-/.130101/*(&&'(*+.2:Hm~||{xywyrtsqnllklmnonnmnmmmnnnnnmlhgeedddeghgoomifb[WTLLMJKJJLNNcOhg-%-47>??;:652120*$1QhrtoiiiiebekrmbWUW]^_^][WUWSNLLLMIEC@?>=99>DGEHLNQPMMNQVX^`cikihjkihjmkijiijihhhhigghfbba^\\`^]^__^_^]\YUUVUTWZ`bcddcaT<( !#"!'38661+,0K*7g{Q5Ywn(Wz}{ytqwžr_py|m),$+.5:@@9!$EamtzfOW\^]\_`ad]VT. &($$(E[ꢑļþƵºʿ{t|}{xvusol_H:FWlɻkzxwZ!IiR5$('" !##! %*3W|xut]7I^SF@=802d|{{wwvxz{z|~}zx{|{~{{{{|~~}|xxtpojefc`][ZWTUPSQNRQRONPOOOOOPORPPPMNNPQPQQRRQRRPRPOPPRSQSURUSTSRQNOPSTPPQRSRQRSTTRSSTUSRSUSRQQQQRRQOOQPPNOOOPNLJJFFDA=8( !#&&'&'),,./13357788;<;==<=>>>?@?>?@>=93," "$'*+/-00.,+,,,.126?Mn}{zzz{{|{{xspopllllmmpnmmnmompnopmkihhhfcd`ccfiknnlhb_VTROONNMPQT]frǾJch5!)378@IYXMICA@CBA?@@>=;85111.)# 6[mxumkigfa]fppeVNKPUY[[\[YYUQOKJKFCA=:8778=AFJJLMQRPOOPQU[^dillmmjiijjkkkjjjiijkjkkijihfcc`_cb_^_]\\[]\\[^\[Y[[`cedddc[E.&,3/(&,48971,(,6?DGEFID;BHNSTTUYY[_eks~}rqqmnnpppquux~~tustuvv{}|~~zN%))A]I*-xwqru}xpS1(,-%\{lF!Ad~U(p}y|~ywsr{ĽnӼG-".BKNSUK2)Mdos}~bY\^_`]`bdfTUR## "/Pv䗢ĉϷ̽ҳµɾwvzxvtsqrol_D6GXnƸhy{ttzR)JZf<$(%#!"" ##! %*3Z{tpqQ(/03;>?<>=;<==@?@@@><<=<<<:6/&  $&'(-//1355468@Qo|{yvyz|}{{zxwtrqpnmnnnmmomonnmoponlkjhgfcdbacegilnmmhd_[VSQQPRSTUW[mȿMcc-+5;;?ENPJEAC@BDDBBA>=<;5232/+( #Abx{xpjhf^Xblqi[OFEJPUY[[YXUSNKJJGDB:97555:>EIJLNRRRQPPNSXafjnpookjkljkklkkkkjjllkjjjkligffggeddeca`]Z]]]]][[[\`cdfedd]O=4CJRUUTSVVZafmsxonomjmooorrwtststwx{}|~q>!+#Axijx~{uiD(),(!\{(Qp|@/{}~yusrüpͺm~ry0!+DNNJB:4(1Wipvy`X[__`_^b`_U[S !$!$:]ߏ`kjgͮ˰ѱľurrjkhdimmj[=3HYr̸jyxnmnz~M&U{p>$'$"!!! "" ")1Z{tkhX;4B?:FNQNYc}~~|xvqnlfdba^^XWYSVTTQQPTQQSQPQQRROOONNNOMMOMMROMOONMLNNOQPPNQPOSRPQPNNNNPQRRPNPQPRMOOOOOPQRRQQPOOPRQOQRPQQRNMMNPPPPLMLJNMMKKMLJGCCA@=:1" !"!##%&))*-/0/1357687;<;:;=?==>??>?<<=<=;;82+" #&'),,-.13=Oq~~|{zxy{{{{ywz|zwtsqonnmmnmonnnoqpqommkjhfedfdcdehikmmoie`\YWTVTTWWZ`}K_b&(3:=@COSE@@BBCEDEEB??=;99632.*%")Kkx}xqldZX_krrdQG>@DJRV[ZWTRPNKJGC?<;94668=BGKLOQRTSNLLS[ahmqsrqnmnmjljmmmkomkklkjjjikjihiikhhghhhfcdebdba^^^_`behgfedXJ<=FF1*2:<=<853@QPPG1"1AKGQUTSRORT[bfjpxqmmnllooowztuussrttvy|~~yi-'2#W~fxx{zqb9%).'Bqq|i/\xl1O}}{vrsvx|Ȼ~w`rp,CFGHFEA) ;[kpyt_Y^][]]^`a\WZM$+/++;wˆۓcw^]ͣĢƧ¿νneKUdZV^jlgU;7JZpηhwvidcm|N&^zc9%*'# "##"$%+2Ullhfhd^`cgdhlsy}~~zysrpolh`]_]WY]YXZVXWWVSSUSRTTSRQPQPPPMMNOMNNMOQMNONNNNNMMMOQNMOOQOOSQOOONONJMNOQNOPPOQMQPPNNNQPNPPOOQQQPLPPOPOOLMNNPMNNNMJLMLKJKLJGEC???<8. !%%$%')),-.0132476:7:<==>??==>?@?@==?=<;9974/( ""$'4Lq}}|z|{zzyw{|}{{{vtsopmnonmpppqrqoonlijjhfefdfeeeggjlmmkke`[ZVUWVX]p¼J\c*-8=AFNKBBAB@BDBEECBA>>:99540.)&!0Tjy~zsjaYYhqvjXN?<>@GQWZWSRONLGEB<<886566:96:BD>-#.GPUV[\ZQQRVY\`eiutpnlklmlmnqyyy{wsqrstuxz|}}s_&+*,o~ypW,%-.(9j|TCk|U#f{xtqrxƾl¹^!8LIGMJIB##C`kqyo]ZZZWZ\_`a]Y`E'=N<1/P۱whnɵĬ¾¹Q8HmjZPYcgdV8:H]q̵jx|n]QO[o}M*Keg3 (,(#  ! $#&&),29]wjmrxx{|}~~~~~|}zxvrpmhefbc_^]]ZYYYZXXWWVUSRSSQROOOPMNOPNNLMMOOPONMNLMNMQMKMNPPNOOOMMNONNNONOPNOONONLMMNNMOPOOMMLNMOOMOONLOPNLNNOONOLLLMMMMLKMLHIIIHGIGHFDA??<96*!#$&&'),--.223489889:;<=>>@B?>>?>?=<><;<:99983,$ (Go}|z|}{yywyy{|}}zyytrppomnnonrrqopomjiighhgffdfdcbgigiopkiea^ZYWVZl¿Ibg0(4;HMHDDBAC@ABCEFEDCB?>=;7623-*%6Vo|zyrh]VaouobQA<<=CLRVUSROKKIED=;6663458=DJKORTTUSPSZbgjpsuyytooponoooopoopommnoomlkjlmllmkkkkkiijiiifgkiggegiihhfb`UB-%%'/:>@@@<:522.('5QXVW[cgh]VVZ[_adkyzspmkklllns}ztrsuttyz{}{lJ$">}~xkG&'/,'YtJǹ`'On>7~~zxutqÿmҽ}C #18>@<6=3'LelrzjWYYZYZ[_c`[\e= #*HXX<.8sʼƭô¸x3LjjbQLQ\Y^Q68H[s̶oz~eCABKc}J*U^B((&$!!%.168;DU`qy~}~~~~|{zxxwurolniffdabaa_\\Z[[XWZXYZWXWURRQQPQOPQOLNNNMLKLLOPONMMKKMMONNNMKMLNNNLKMONNMKMMMONMOOONNNLMMMONMLLLLNPMMMMNMMMMKLMMNOOLKKJJKIJJKKJHHGGGFGFGDB><<=84*!"##%(()*+,./23367989:<;<=>=@@=>@@=><;<;9:;988785-)"%Iq}~}}~}}|zxvz~~~}|}|yuuqnnmmmopoooonljihhhghhegfdfceeehlnmmjea`^[[i¼G[o-!/=IKECCBAA@CCBBDDEFA?B@=:87770+&0MkwzxrbXZgqshZI>>>AGMOPOOMJHHHF?:76665569@GJMPQSWTRU\chjmsz~ysqqponoqrqpoonmmnonlmljllkkkklkiklkkijjjikiijgjhhif^[YN9(&*44:=@?><;72.)%,@OUZY\gnmh]Z]\]^dm}uoolkkkkmoqxz{{qsrrrttw||{d2& %Szkd}xb7#)2,(\howov|\2^wm.K}xtrqrxvͼr|~(  7GFGDE@&/Qgow~z`UYYZZ\]_d`Zal;%$+5C>5'L铠»̧·o?iYNLC;@HQXK05G`rʳ{pzsH.@HM\}J))'&" !##%(8EMU]ejw~{|~~~{{{zxyttvrqplmommjhifffcbcab^\ZYZZXVXUVYWTUSSRQRQPRQPLMMMNMLLMMMNMMLKJJLKOOMMIILMNLMLLMOOLMKKLNMMLKKKKLLMLKLMNLKKLLKKMKKKJLKLKLNMMLKMLLJHIIIHEGHJHFHGGDEFDB@>;:962'#$&&*,,./244446699;==>?>=<>>>?@?AA==;<;;9:=;7999862,$%Kv|}~}|{z{{zy{}}|}{zywrqonmmmmmonnoljjjigfikihgdfedccfghhkmllifdbm}Jak1*>IEABDCBBCCBDECDEEBBA?@><9762.*& /Kdu||p\Xdozp^OC@FKKNPUWUTX]cgkpx~xtqoonrqppprqoonnoplmmkmnlllklkjknmkkklnmnkjhfecaa\RPPC-'2CI><=@>ADD@90)%'4EPYZ\esyum`_^_bhquommlkkmonmlkjloqpprrtvw}~~~~z_*&* ,mpdd~~vP%#+/%(dwjR?f|W%f|wsqltƿsɺtppkcyj!0GJJLE?6'7]nnwx]WXX[Y]_ab]Vls2 !%'&#"$mߊŹ϶ǾþkXZLEF<4>EX]D/9K`wȯ{lye:BS\S]|D !(.*%#$&-0::770#"%)**04668>=@BCEEDFHHFFIGDA@CABC@ABA><>>=;:<:87778852/)"(R|~}||~~}|}}~~yx{{yyzxvtrommklmmmpqomkiijghigehheheadfeefhgiklnmju¾|{{y|Kam2&9DCACBAABB@BBEFGFFCBA>@>=9755/,'$)Gctxuh[^hwwhWC?>?AFJC@=>ABDFFB<76777457ALPP?4,'#'8KV\[bpyjb`bchroonjjjkmmmnpppprsrrussuw{}}}|wI%$8~yyf6%--PI2&%KpA5y}ytqpsýmķxY"# 3>BD=2:5#>ampx~qYUW[Z[^^ba\Xtc!!8˃¸ѢȱĽ\OSXc`PENX__D2=Mawȩ|iwzY;FSVGcz?! '.257;0*,/4CTh}xroosy}ytturpsqlcJIV_jkklmnmjgifffeddda^`]]\Y[YWWYVUWUTTTUTQPQRPNNNMMKNOMLMKKKLKKMLJMKJJKJLJLLJJKLMLMLLMLLJJLLKKKKLKIMLLKJKLJLLJIIIKJIHKKMMLJHGHKKJIHIGFFFEDDEEDDEDCBC?>=<963,!!&**,38;?AEKQOSW\\]]^ZZ\YVUPPJIKGDEB@>=>=<<;:9865665431-&&X~}}}~||z|}~||{z|yxwuppokihhijmqpmmklmjiiihikhiiebgdbcdcdeilovzyzzz}~JYp3(??>AB@B?BBABCEFFDDCBBBA??<:7402-)$)Abuyoa\gswo`N@??>BFE>868=CEEB?968887669@HMQRRUY[^`bimsxutrsprrrrqpnppnnmllmmkkklllmlklljllmnmlklnlje``_\XN@) #2ZcU?9:<>CS^cQ>0($&4MY]]akzjabdlv{pljhijllmooopqrqqrqtrsux{}~~{m7&"Q~mvqnly|W1 (/)/Nc;-Yun/C|xsopr»yp׾ļ? "7LPM?68/&IgnrzhTWZZ[\^\b_ZYsU!LüˠƣĿκĺMLWdroQN`jbW;1>Mb|ɩuhxyZA>::633* &'"#%)+028=@EPVW]agkpquuvspokic`\VQQNIHFAB@>=<;;;977655322.*#(W|~}|{{{{{~~~}}}{ywuqonnjhc][ejmmmklomlkkhghkiibeddeffdabbbhpĿ}||EUl42@?@DBAB?ABCCDDDCEDCCBBCDB><:9551,(% %Ibtqf\epxug[HA@<=BE>6017<>DC@;767:8757BPdiZG2(%&3O[^^\euodfozrnljhhjjklnooooqoqtrrqquy{{|{wY*!((jnr~qI+$*.&;cxa)c}zwrosyƿjzз¼" *5FKKF@9),Rjpv{hZX\[Z[]\__VbsG!&Z綥Ǿ×ƻ¿Żµ_Z[_dV6UlheV31=Mb}ȥtnyxaI56Cfy= !"&*4D^ptxxztpx~xvtnc\[YbklcM9&%2Hat{vsrrpiV2#%2BQ_eillijijgfgfb__`^]][[[[YWWTUSUUSSSSRNNPMLMJLKJLJJMMJKMLKLJKKJIHGJJIIJJIIIILKJJLLJJJIHHJJKJKIKLKKKIIIHHHKIJJGHJKIJGGKIHJJIFGGEEFDEDDBCEC?>@AB@=<::752/'#//&!"'*.137BC>75588633;AGMOUWY[[]fkov}zxwsonpnmlnnmmnmklmnmmlklnnnomlkic@$"8bkhX=567;>BWorfV8.&(4P^``bekqruolljkhjlmllonnooqstqqruuy{z}ypN!$8~xl?+%+* GnF(t}zvponühʵ¿t+;!'?EEGDF9(3Xmru}{aVZ\[W[^_a_Tkx="#! /x瞡|Ư¿igg`caPXnlfdZ;5>Lfħrpw}}rZKQcy?(&!%&&(*,-6@MVfw|yshTIC9*,+1>Q_]YM:,)-BYq}xuqliS:#$5H[agijiiieegfba`a]\]ZYY[[XVXVUTSSQQQPOPNLKKJJKJKLLLJKMKIGJGJJIHIGJKJIIHHGIHIILJJJIKIJIHIJJJKKKJLLKKGFGHFFHJHFHIHGIHIHHHGHIFGFFCECBAAABCA>>>=>==<96531+%%0/(%!#$&).148=EHKQWXZ_dilqswvtrposljgc`^YUQMMLJGDAAA@>:87666552.*! %%1`|z|zzwxxz{{{~~}}~}yuusppoprsropnlmnmllmmljkmjgcihhhhhgfedgrɷGS~r31@?AA?ABD?@CFACDDEDCDD@CBDCF@>>=;731/-'-FXa[]gtxtjWDA===BA81.,04;BC>74369742:@HNQSWZ\^ahmpz|{vuropomnnlnnnmmmllllmlk^?*2RjoibS>048:7 *7AEDDF># 8cosu|r[UVVYX[[]a^Vwv3#$8ߋwiYfzücmrnmllolifaP65>LcĢrmw~xjaaoxA181*)+*))(./05<@FNOTTZ\js|~}}}}yyzwureL.+,*$##&-9N[e_WK6*-8Pe{|pd[L=( .APWbgiiffhgfdaa^][[[[YYYXWXVURSSPSSPQPNLJLNLLLKKJKJJKIHJIHIHHHHJJJKIHJJIJIIHJGJIIMHGHGHHJIHJJJJIGJKHHFEGGEHGGFGGGGHHJHHHFFECEC@A@?@BBB?>==<<<;;96540-#%))(*%'(& ##%(*.06:>BHKNSVY^cgijklomiikidb`[[XVUSPMMKHHGEDB><<;8764451.'! 1`~|yxvxwz}||}~{||~}{z{yzzyy|{zvvtomlkjkniiihlhgghjkkkiiihghr}}LY{v8!7GAA>=?BAA@@CBCDEDCDHCDCEEECB@@<:8641-+$  .@SW\dsz{qcPE?=:>@<4/,,08?A=843377427;CLQQVZ\^binr{~{wutroonnnnmlklmmi[;CZhmnkjeXC55;CDThtxuhR>1/5FT]bcefk|tnmliihhkklmmnnpqorrsrsvxxyzyte-!'gvqk`n|rX>+)*"8e~b+Y|vqnjyƿh׻7 Bψ㯄efqrŽzvssyzvtpnkhfaP98?Mc~Ğrqw~}thkzxZOQPMLPUOOUVZ\cfgmvw|}~~{||{xzwvuywyzusrfM1*(&&""(2BTegdWD1(.@Zvt[N@5*!"6CU]fheffed``a^\[Z\\[XUTVUTTTVTRQQQOMMNKKKKJIKLKKKIIHHJGHHHGFGGHIKGGHIFGHKIJIHHIKIFIHJIHHGIHIKGGLKGGFHHHGHGGHFDGGEFIFFEEFEDEDCCCA@A??=>>==<<:98332.+#&%*.17-*"! "$%'+.049>;=97520+'!);LV[iqxrfWLA=?;634775559@HORWZ^`djot}|{vssqpolmlmiUKRgilmnnlh]M=:CPT`pvxwm]J:2,5FVcgjjms}pmjhjihhgjlllnnoppqqqrtuwwxxysV!1y~zmQ5)+'.)FmA#r~}zwpmnÿSԶ%F1!5@AA@EF<*Rjnq{}{k\XYYXZ\_`_XZ|e#!#%"*S}yó{dsxtsomkjhd^OA?CM`~mrx~xjk}heglpvyxqmttvvssyz~|{z{zvxyxvvwvxyxyzvttjS8)%$''! !&-7MdpneS=,)3Qn~jQ:2*$'9HW_dfeecbb`]\[[][YXVTVSRQRTSQPNQNMNJJHJJIIMKJJJJIIIHJKHIGEGFGJGFGGDFHIJJGIHHHGIHIKJIIHHHHHGGGFGFGEFDFFFGGEEEGGGGFDCEFFFEB@@AA?@=<=>>=><:<;8530.-'$%#*132-+(*'# !!"%'*.048<@BIOPVZ\aaedabaa`^][WUTSROLKIHHIGECEB?>?=99975669941&#$)?g~zyyyz|~{|}}}~~~~xyx{|||{{xwqlihcggjljjkiihhjkmmppnmollmtxŴ~y||~}|zHP~t=&8?>?==>=??@BA@ACHIZaVIECBCEDBAAA@?==;570,%!'8DNXchjf^VJ@??@B<4-+*/9=><734457768>FOTWY]aejotz~zvrqnmh^filmnmnonmgZH;DS`ksvusof[TJ@BDYhllnmnwxnkifhihihhilloonnooorrruwyyw{tF#!C~}tfM6*+%AQ;*$*Ttq2A~|ysnnrq[̴e$2!(?CFFGJK52[mss{zhVX]ZZ[^_a`Zg]&*..)% ")`¼еžȷ¾~{ynsrhd_bdhgb_NGGJR]yqwz|ppydhryy|~|{xwwy{|}|}}yyyz|{yzxwxwuwuwyxzzvvutp[?*$$&((%" #)7K_ttlaJ4+,=^vqW?.)# 0BQ[bgdcba_]]Z^[ZZYXTUSRRRRSRPNOMMMJIKKKIIJHKJIHJIIHLIFGHHGEHHJJHHGGHHHGHIFHIGGHGIHHFHHIIGFFHHGGHDEFFFEFDEFFEFGHEEBDDCCCBB@BA=>=<=>=;;;::97431-+&&''.2.&',14.'!&(*,/147>;898784557662*"!+Dk~yz|zz{}}||~~~~}{{}}}}|ytoppnqrqsqpnliihiiilmnnoopopqpkg]l̽~}}}{{~HP{yB)3;9<<===@ABBDDEM_qXFAABGEGBCD@>=>;9832,$ %2@HQUY_\WVJA>@FB:0-,,49<;744338989=CLQTZ]`ejnouzxvqqoooooklnnneV=1?Sfoqssrpmhb^ZW[honopopu|slhfeffghihhkmnnnoonoqrsqtxxyti3"#\zoeI/)) -^VSE>7421-)),8^{\*R~{xrnoyƿQhųO%/&6879AJD*;`npozyeT[_[[\\^`^UoM-154-&!$2zǨĽµ|xžorjWSPYdife_SMRTW]jz{}xszkq{~||z{~{~~~~|yz}~{}|{zz|{xxwvvvvuuvxywxxwxuur`B,%#"$()(%""'0@[oxunZ?<<=<;;:9::8874301-*%&',/+'! '28/)"!$&(+,05:=BHKMQSUTUVXXXYVVUSPMKKIIHDEDFBBB@?@>=<;9986632124673/( )Dl~~|||yz|||~}||{}~~||}}{zxvsqpoljhhjjkjmoopponiaRMRScwŶ~~}}}GPvwB,6<<=<<=@CCDDGL_mSADBAFEFCAC@A@>:9740+' #3@EJMNUVTOHAADF=6/-+05:<:63327996:?HNRW\`gklnt~yvvsrroooni^J>;;Xhnpqrttrpkd^]ainpoprrsztvmiecdefffigijlolmnonopqsuuxxxp[""/w}tl`G+,).UWJDGFV[RKLW3CiH(j|xtompĻ=m׿- /, 6KIEGJN>"$Fhnru{ybVZZWY[\^`]Sx=++--)%&(8搳ȾüBd̹ptkddefiiiid[[^]_\Zm̾{w|rv|~}~}{~~}~}}{}zz}{|{y|||||}z|~{xywyxvtuuxyxvyzzyvvsdK/%$!$'(++'!!&-8Oj|{vfT;/8GaxqY>0)% *:KS]__a_\]\\[YXYUUSSUSQRQONMLJJKHGGHIGGHIIHHFFGEDFHHGFGGGGGJFFGGFGHGEGFEEGFFEFFFFGFFFDEFFCEEEFEEDGFDCCCEDDDDDDDDBAA@@@?>><=<;=:8:997642212,)$!%)'&%%140+!! "$%(+/157:AEHLOOPPRRSSTTRPQLKHGGHDCCCB@BA@>===::97774542023313-($$Cl~}zyxx{}|}~~}|{{}}}~~~|zvusrrmimlkjkmmnoqrme]VSZ\\]_lEMuyC$069<=>>@BBEGO[nTABCCEFCABAC?A=98542+% !0:CGHJPPNJGDCD@<2-+,29;8512338:87>=;;<;:=<:;98742210/-(" $! *153+!&$!!"#&*-/248=@EGGIKKLOOLLMMLIGGEDCCAA@??@=<>;<::<=;;<;;99213001.,*& Eo~~||~~z{zzyz}}|}{|zzz|~~}~~{yvuvttsronlijjkoolqtqfZY[a`deeeaczFOyzB)2:<;=>BBCFJZnRIEFGGCCBCFBAB==963/,%!/:CFGGIJJKGBFFA5.,-/597310/17897>@FNU[afgbdr}{upjknnqpqpqqpquxyxxulgab`bafnpqrrvtsqlgaZZ^`b``cehjlmkilnploortxzvvc<# T}xphV8*)*TF#.:0..2;A: 4a~]'O|yvqlnvžQJ˶^$3::88;7(0Xkosy~o[Y[ZZ\\]_aYbx&(%$.ZʋĶ½}idk}mszwvsopolkouwy{vj]XV`~vlrtz|~~~~~~~||}}|}|y{z{}|{|{|}||~||}{{|zvzwwxwwvuvwuvyyyyvyxtmU9)# "$)--)% #)0@VpoW3'1JioU>/)# %7COW\[ZYZZYWUTSTRPQQOMMLMIKIGIJJFFHGFHGGGGFEDEEFFEFGEEDDEFFDGFDGFFFGEEGFGGGFFGFGEGGEDFFECBBCDCFECCDECABBCBB@>?BA?>=;:9:99:::9974540211-,("$(/451' (2,% $((*+-168=><9::;=?BDBBIQPMI@<900/-./+&#$Hs|{|}}|{yxz|}~}|yywzz|}|}~~~zxuwusttywusollmnoptvqfV\`fgiijgffc`pǾEKv{E#.8;>==ABCNZ~iRDEGEGDCBCCA@@?:9:52+% !.8BHC@EIJKIHHD:2,,-27865/.14799;>DKS[_cfhkvypjg`afikjjkprstvuvvsklkhhnnoqppruqhZ728@GMOQV`fc`deehikjmnnruttsZ+4)+n{toeS2)(!Pg9* Aj}C*j~zwsokiwú>`űC (?IJHMPA&7ZlptxjX[[XXZZ^`\Wqe!%%%.l͐|xžĿ~NWz~yxttrpx|yvtstrprx~}}~~uj[RRMNcnc{|uhpu~~}|~~}~|~}z{}|z|y}|z|}{{||{}z{{{|zxyyvwvvxvuwvvxyzxwxwxum^C.#"%,.-,&! $+19He|mV4%)@]x|bG4.)" +;HQUZ[Z[[WUUUQOPQONLKKKIJJGGIHEEGHFEFGFFGGFFFGEEFEEEFFDFDDHHFGGHGHGFFGHGFFFFGEEFFFEFFEEDBCDCDDDCCCC@CAA@B@?>A@><<;:9977878665231///1.*' &(*/.($#(27+% ##$')*-03489:=>@ABCDAACBCCB?>?=<;<;<89:1(,-+*,'%,Pu}~}}{}~{|z|{{|||wyywyxyzzz{{{{yuuttrrtwz{zyvrnmnnotvreP\bgikikiifecb]esżFMqzH(4==<=@CKY~dMGGEDCBBCCACB@><:730*$ ,9BB@>AHJKJMJB71--/59740./259::?7"(Opp/9x~zwqmllǿg9m׽)1CGC@A@;#"@blswzzcWWXW[X[^`YTS!!'3{zq]Yuéļj>Tnzwtpmjgjlqz{yvwwxwvvy{}~}xwsi[RNJFABzsmu~~}~~~~}{}zz}~}{yz|yz}|y{z{||z{{{zxxwwvwuuxvwvtxxtxwyyuwvndK0%  #(+,-+(%#"%+4AYnzqZB!';:::;;:754445642200..--*' !"'+-*'*/1-( #'"!!#%%'(,.01324699::=;=????<<<;:<:;88679>L]txwhWF40+*,,.5Uy~||xwz{}{z}|}{|zyywxxxvvvuuspnmnnsvx{|~}xqnrsqrurd[^dgjlmmjigfdcaccal;ENsH$07=>842,% )3>B?>EGILONJ=3/.-17840//24:<=?AHNW`dfgm}wpjha]ZUPVWUXZ_`iimmnnmlqvtqP!#?BHEOSVXX?2^zV'O~ysomkqźHDxϷk"3>??@BI> &Llosx~u^WZZXYW]__XZA !(@呱޳ndeɹ¹|GLaoue`Y[Z_]`iqtwwuvx{zyxy|~~|{zwndYMLJKM~uw}}zz}}~~~~~~|{}{||}~{z|zy|{zzzyz{|{z{|yxxvvxvvvtwwvuwwwxvzywwuqiQ5'"!%))+-/+& !#*/:Jfv}s^,&4Uut^E63,'#'5BLRVWVSTTRQPNNNLKMLJHHHFGGHGFDBEFDFFBFFDDDEGGHGCFEGFGGDEFFGEFEDEFGGFFFEEEEFEDCDEDCEFFDCCDDCBCCCBCBA@@@>><=;::::887524334100...-+*(%  !%,-%!$).0*%**"!!#$'++,+-0136546579998889889876666:DXxtYA3/(*/;]z~{vyvyx{~}||yxz|||{{|{~~zxusqngjlmrvx{|~~wrtvwsqnb\aggmmnljkjiheedda_]`oƼFMoI%0:@?FPuzeP?EEBCBEDACBCA>;8574,%'5?@@?BHJNSPE6/,-057500./06;>@BHOX`eio|uojhb^YY[WYY]``bbSOR^fln@6lW)29=R_edpocW1-:L_os\2.B71161..,)*&"!-kz_pxnaph`lwvni_8))#(#&*22:E=&>h;/c|wpmkjx÷@X­ūO $6?GIFJJ7-Sinqx}}q]X[[ZZZ\^^X`0#&J؈ȼ~ɩý{j_nwrdc_bejlnruttuyxy|{|{{}~}{xpcWRNKJ{{~{z{}~~~~~|{}}z~~|{zy{{yyz{{zy{{y{{{{zxyvvuuuxutuvwwxxxxyvtskV>*$  "')+-/.,&"!!$.8DZsy]. )<]|}fO=3/)'$ -=;=<<:88977754221/.00---*(&! ! "'.0+.8-#)(!!#&%'+/-0111247555565664336546:Cfv`@*+.>^~~~|{ywuvy|}{yyyzz|}}~{yrppmkjmnsvwy}~zyz|{uqha]ceimmmkjkjhhfgfcaadeb]csƿýEOmL&3?GRqy`NIBCEBEDCDFBA@<:9862,& )7@A>>CFMUWM80-'! # %Nql26v~zupmklȻɿd4fmdo^vj*  ,7>CBIE(5^kopv|~nX\]ZXZZ[^]Wj "&-WǂżǿśüUiv|~~wttux|wwxzyxy||}~}|{}~~xlaUPQU}{~~|}}|~~~~~~~~~~~}~}|}{z{zzyxxzzzyyzzyyzz{zz|{xxxtuwuwxuuuuwvwvvwvssrl^H1($!$'*,,,-*&#$*2=LbK('5@WwqX@92.*&!%/=GOQQRTTRQNKKLLKJKGHGFGGFGFFFDDEDDEFFDEFFGHDDDEFEECEHEFCEEBDEHHFFDCDDEDDEDDCEEDEDCCCCA@ABAB@AB@@?>><;<;::9878755420220/0/++,)'%  !%+,-54**5*  $'(*,--11224433532235438Fo~qT/+9`|ytvvxy}}yxwvxyyzyrsrpooonoprsuw{~~xrlhfggjmllkjhgghgifdjgecbaa`]iuĻ{yxFMo}M*=OnxaNFDFCBBDBBBAAA><<730,%)7@B>@P?!$OhotdjvoZOahatlj`//vmlkngG&4;6Nrmlfd]XQLFEA99W{rny}wrooO*/NpYINCFQH<92*($!.^|[(C{yrplkqзȼLErĸ7%xy("!;ECRim_K5 ';IFFGD9#=apqsyzm][[XZY[^_ZSvy (,5hï˽|oz}{x|{~|z{|{|~~xlaZW]}}~|}}~~~~}~}zyzx{|zxvvz{{{zxzzxzzy|zz|zxwvvvwvwwvvutuuxwvvtruqlbP:0(  "&),-,-+)(#!'/4<5+&$+49=PjzbL<50-)$(4CHLOPQQOLKKMMKJHGGCEFEDEDDDDDDEEFGEDDEFEFDCDDFGEEGFFECEEDDBFGFGEEDDECCFEADEEEEBBCBB?AA@BCB@A?>@=;<;=:88877763331020/..-,*('&%  #!!),-2.+02*#!#$%(*+--021335522104AnrY>8a~}~}|xyurx{}~}zxxxvxwsvsppuqolprsuu{|}|wwtrolkllkiiijjigjiihgfhebcccd_ep˿{xwspqFIpM#&Aez]LBBD@BBA?@BACA?=842/+#)7BA>@FS_[L?41.03684.-..2:@CHPYenyzwtqppqpiVCAI7"?aoc>5]ga]u{ptj|]+YrLernp^4&69<=9320)!*8CABHXdeYH93//25671.++.5>DJPZeo~~{|}yv`H?D?B\jaL?ToWWp}`da4T|I 4#?anqR),7:Foj.0B_XTkYLJfkM.:zspra,.9WD>}JW=6?=<'&Gmk3=pytqnkkyUt΄84")5527)# "2BJMKE@1-Qioqv~{gX[[XXY\_b\bK !()*@ꐡƸmy}aSlw{{~zy~yuro~}~~}}}}~|{yx{|yywsxz{{zxyzzzyy{z{|ywxvuvtuvttvutttvvvvutsqppk\I7-&$&''),++)()'$  &,--/01467;>Onv_H;741)%!#1>FLMMNLKLJJGGGFFGFHFDDCCDDDDEHFEEDBDDEFFDFGDCCBDEFEDFFFFFCEDEEBCBEFDDCCCEFDEBBDBB@ABBDC@A?@??>=;99755555310/...-,+++&$$#  $-2665<>:.!"# #$'(+00000.04Is~mr~{{|||~|~}xwwyxz|~~~|xuxqiedkjimpuz}~{unjfffhijmnlllljgfgjjkihfeefe`\k~|zyuuvwvwyFGj}R$(Us_KIEA@?BB?BBA??@=845/'#+;CGJ\ekcN?71/27673-++-2;AFP\gp{nYIB?HTC?IZm^7JgLAZUUyw5*[IYslG"199MxZ,7]jXlc;27bi<.M~xrquW(3DQ5__ln/!88!-VvW/N}xtpkjnҷǽjfbL *=C07<)! "9KOOOI?,7YkloxxaY[[XX[]`dir= &)'/J܃ѽi7B_ry|~~xy~~|~~~|}~{{zwxyzyxwwxz{{xwwxzzxzzzzzzxvvvttvtssutttsutwtrrrqqpn`L7/'" "$'')++**))('%"$(,123479:;?Mf{iQ>:91,*%!%2=CJKJLJHHFFGEFFEFDDDDCCDFFDEEECBBCDDDDDEFCCADDFGEFEEGFDBDFCBCCCEDDBCBCDCEDCBBDCABA@AA@?>???>=;:75655651221..-,*(*($%%#  &49<6.*#/>IPYbieWH;4/14554/,+*/6=DQ]gwyfR9)$$#4>>. 97%:ZjwwQ 1rcUloe9%694bv\JUibamS<3Gh]/3f|tqsqC)5NQC419sgGD(!71:e|J/]|vrnkkuʳĺYv۵;+os&;SOKcZ." !.ELLOLHA)=^mps|zYXYZYZ[\`kv|% #)&$2\qǴfNdpw{~}~~~~~|}}~}}~|z|yyyxyzyxwwzzxvvvvxyyywxxxyyuxwuuusrrtttssstvrqpqrqpncM<2)#! #%&')+++*)(((($ "(-024689;;;CXwuZI?;63/+%"+8DEHJIJIGFFDFEEEDCBCDCDEECBBCCDBCDCCCDEDCECDEFDDEEFFEFCDEDBCBECDCBCDDCBBBCCACC@AA@?>?=@>>=;=:987766542320.-+))*)'(%%"  $'(-2;FLNU^hmYA(*35/"$(,+..-,./7X{}|||x|~~~}}zwsrquxxz||y{|sif`^[W\\`gnsx~~{vqkikmmmnnmmmlmlkjgihhfhmmlkjigebd^euƾ~}~}{{|}|~E?gY'$;c~s[KEAA@CDBB@CEABA::53.(##9GMOVbd^RE921366430+*.2:DP\i}u\3!,5;47CIB$(`}pdjiV..:97r}nnonjkj[OLbqL,8|zsrvl0.6WlhYUTYBNgV:$,=- Hpv@9mxuokin~µgDz؜&GN?aniyc #  "0.FKNHHJ: "Ccmprz[XZ[YY]`gw~xq #$%&&$(:bϏJq˸wy~~}}|}~}~{}~{{}||{z{ywyzwxvvyywuuvvwz{wvwuwwvuxvvvttsstsrqrrtsrrpqropmcSA4(#!#$$$'*++)(&(+.-'""&+-146899;;;Rj}gOE=::61/("%0>>=>?>=<;<888875432330..,+)))(((&%#  $)17=>ELPSNPRYejdK0'/5,  !$'*+-.,++.07Tu~{}~||{{~}}~~|~}}~|ywtsrqtutw{zpggdcde_^_`fit|}{xsrppuutrrronkigllikihfhlklmmlljhfccbhpAHiX*&Al|w~qYEBCAFCBA@CECA@>;7540*$,:BEHWZ_ZQA93359753-*,06?K[p`-%2;;LcgV'",0AKCSkfG(4=65FOS^bjqpvonq~j:4K}~vpq{W%.9exdbg_`]]YQC>76FD (Ssh5Gwspljq˼>6r}0a2 Ebqur7""Da|pXD>?>972.' (4>>>>?<988776533430-,-,,)()('%# !  #)5:>ABFIKHFGHJNSXYN6,./  !$%),..-,,,,,,5Hi~}~~~}}||{z{|~}~}|~~|}~~zwurrsw}yoefjklf^YYZ\bqy|zyvwuuwwxvvusqomkkkllllijklnmonnlmljfcddai{ADf\,*>^qww{pWIDAEEC@AACDEA@>::88/*$*9@CIP^a`MB96578650+,04=H[p_'#)-6:R^]e`OE,.[pc9'792,3=;?<ADDB:DHOZbdlqx|ywwvqrz~~~~~|}~~}||~}}|z|~|yy{|yyw{xvwwvwuuwyxwvtvvvyxuwttusuvutuuttrtsprpqtttspoonnnmh\D/&"'('''&*4:=8,"! !&,.0478::;=GZt|_JDB><;84.' ,5;BEFFDDECBBECBEECCCBBCCACBDDCCDEDDBCCCDBBAADDDGEDCBBBBACCCDCBCAFBBCBA?AA?A@@>=??><>>;99977533432/--.-+)'''%$#  #',277889;qyl\>'&2FqJ(")( 3M_lrt\QWXX]bp\0=>;@FIIHSXXST^cjnuz}|zyywrpnljfhkkmz~~}|}}}{{}|z|~zz}}zzz{{yzxywvvtuvtuxxwwutvvxwuvvuutuvuvtttsqqtrrqptsqspponoonli\D.% %&(('%(09?@7*$#" !"%',/48:9<=?DNigQFB?@@;96.%%.8??@<<>>=;=;:98975423531//0.+))''&&$"!  "&+32345;?CDCDCFHGDDB?82" #$'))+*-./..---+*))*++,3B[u~~z}||}z{yyz{|~}~}|~~tjeeeaXMLOMOSVco{{|zwwxwwwxyxxyxxuroolmmmllkmolmoqpoonnmkhffeedm~~|yxwvzDH`a3 *Dbu}|xuzjOBBEB@@CABADAB??>;52-(!,3*)?[Z>1'%,NR0&%$OldI)+447V^N\bZYXWWPRKI90Gzmt|zzsorg(,;_[Hgnihih_[SO@:E?'Wrt?@DHc}p\KEEEAA@83(#!)18>@AABA@ACCDDBBBAACCBBBBACBCBCCCDDDDEDCCEDDCCCFDDBBCCCBBABBCCB@A?????>=?@=<>;=<=;986774313420//-,*)(&%&&$ "19767<@BDEDBADDCA?=;:5,$!#&)*+-/.//0/0-,,+++*(())((--/>Xi}vv~||~~~}}|{{zxwvyzz}~}~~ufcd`ZOJMRTPPQ^gt}y|{xyyxxxyyywyvtsqnnonmmmmnopqpqqpqomlkjiffdaafo~zyxvsqmwCF^^6!,QouyttyzuO@BA?AAA@CBCCDAB?;843.(".DXjsxxndSC;9:850+**09EKXfv7&'!GW]ONNOcziYO:.;B?@aoc>*173?RLM_XXX]\XPMRT:.\vpwzwpnw~N%,AbP9HMU^eknhZG27E63]sX4Z}zyupjeqƻp'&@M@(A?GJACSdpdWZ[^dvN0126;AOenwy~{xpib_^YXTXVUZZ]__``acefir{}~~~~~~}{|{|||zy{{||||yvxzyxyzwywwwssvuttuutsttvvxwwvrrrqtusvsqstrsqprnoppopnnnmmolmlh^L7(# #%)'$&'+4=><70*&&%" $*/5:=?AACDDSuhQGGFFEB?82*"#-5:=ABACAABCDCDDBCBACABABEDEBCCCCCCBBABABDCBCDC@BCCCBCBCBBCCBA??@@?AA@@?>?;<;;=<:977765331110/.-+*)&&%%%# )67998:=ABD?>??>=<96983/+$#((,../00..//-.-,*++**('()''&()*+2CUivoVslMRr~~~|}}}~~~{zxxvwzyy{~~}}~}||ukhf_THKRXYWSQX_k|~}~|z{zxzyxxxwvutuuqsqoonmnlmsqqroqpmlnmkjjgdggfgghx¾~}zvrpmd^VjFD`]4$<9;83-' 2PelszvqeVC=<<83.*+-2=@?@Oezq("GW\_nmnvsywc`pxlcXbkZ4(245HVIVbafkfbWC;RJ57vwp|tmgpY-)/N^>7689=BZlcXJ>;?)=cvxD7l|zwslgev÷Z!!(9B:Bb#,**4.%80*+059?BIMNSUY\[\d\bdacaVTU]itdZY_bdw|`EHIHFHPd}{yytnjff_ZSTOPSORSTSUYZ^^_abaeeejt}~~}}~~{z{~~{{zyyz{{yxyzz{{yyyzxwxxxxsuvstuuuutssqtttuuwxurrrstvutqqqqqpoqtoppoponomlkmlmkhbR?+$!%))$%%'.7=CB:3.)%$$!!$*05:?BCCEFJShxcNJHIHGF>80'(/5;?@BAA@BCBABBCA@AAAAABBDBDCDBCCCBABCBCBBDDAACCCBAAABDDDCCAAA@?>@A??>>?;;;<;:9767754421/./..-+*)''%&$  + + + + + &-25668>==<9787544.) #'*-.-.//,+--+*+,*++)''(((('%('')'(+18=GHM>-7,1Vy}}|{}|zyxwxxywxzz{~~~|}~}}wrqng[LJVZ^^ZVTWcu}~}|}|{zzyyyxvutuusvtqqolplnlqporqommnmmkjjiihfgijjkqž~}xqojaXSMHGkDE]^.(E`lwwtxx|V=>?@B?@@?@A>A@?B?=;>974/)"7PipuxyqgWIB@=73-*+/8ACDA=Sgy^5O_Uaolj_noVczxmmi\fiM-)436MP;70(&/6=AA???@?@ACCBAABBCB@BDCDBBA@AABACDBBBDCCECABCBBBBBCCDBA@BBB?>>?>??;:9766753311//0/-+*)''&&'#  + + + &/3566??B?>?B>961.)# 0Qmosxzsk\JF@961,),4CQVRLLAI[u|G!9GB8DGKGUnZJlxglzdYib@'-44;VE0000:VfjhPMP:,S}~|um\@ )BMntTJDFIJPSIL]k_UWW\cdeeghikknnnoppmkkjjjjikihheiqx}l^RQOI?Rftty}|ywsrsttmmiec_]ZY[XXVVTSPPQQNNQQRQRV[]]_``_^cfhr~~}|}|}|zy{|{}|yxxy|zyzwxzzwxyxuxywwuvvupststtstssuuttssuuvursttrrorrrtqnppmmlmqonoljlmligfgd\N2(!!"!"#',37-% -4;?>@BAADCCABCBAAA@ABCBCAC@<=>@CAABBCBBACCAC@?@AA@@BBA@AAA@@??>?=<>=><<;:;:865744221000-,+)('&#%#! + + + + + $.16783457893,%  $%(+,+)***()((())''(%$%$$&'&'($#%$#####%&%&:[w~|{}~~~~|}|{~}zz{{xwxyyx|~}~~{|~|zxxvlc]]_]__]WJE]r{~~}~~}|{{|{z{|z{zwrstuusssrqnnoooqqqqrpmjlnonmmlnpqononnnmvý{riaXRMJDBDCDFPtBC^_/ 2J\nuvtqs{pDBD?@@???@@@?@>?AB?@EJB;964/) !2Mfmt{yuo^RMC:4-*)0=Oblfb[UNSP]n}|;4/+*+&(//%&)*,5CB/?aq{xK]kX5(245CV>678:F_hi\>NN7.iy}{qgQ.!,F]QLMMKLLPNFKD79E+@ezW3f|yvohaB'?Y|fBGJLKKNMPRRSRTWZZ_cfiqqkjihjmknpqqqrpomrpqorsurvz~~}{}{eYJIJC>I]knruw{}~}|zsqkgfbcWYchjeffbba\[\[\YXUVVSOOONNPQRSRSXZ]]_aab`bdfq~~~}}~~}}{|||{|{z{{{{yxyyyywwxuxzxxywvxywuutvvrprttuttsrqtutrqstuvtqrrrtsprqrrpmppnnmnpommjjkkkjhggf`P4'  !"#',3:BNM;$")1:=AGF@=<=>@CGHHKNTklXQMOPOOLE?7,#&07:@A@?@ABABB@@B@A@?@AB@@<6227=?BCCCDB@AABCA?>@B@@ABBA@@@@@??>??<==<<;:;;975564421010/,,*))(%##! !" + + + + !*26:@CIMNRRTUTLG@9:98:==91)"! #(*++*++)(')('('&'%%%$$&&%%''%##$!"""#$%(=ax||~~~~~zz{|}||{}||yzzyyxz{~~}}~}~zyytpgdc\^]\UJBOgw~~~{|||}{{||}~||{{zyxutursooqrqronppononkknpqopqpqqrtssqrromml}ýyqhc[TNNJJGFEFEEFUwAF[_2,Kdmtrstx}}IAAA=AB@??@??>@@AB@BFEGE>;941,'"5Rkmsx{zndZPA:3-+.6CP[iolpifd[Y\_o|f@DDEHGLIGBDCE@=@9956Xo}{VMfeO-*335HRBCBDDGGIC56RH0?vyywtz~~wnaK$",JZWVNNLMNMKJH<7CG!&Nlyn;5k}{wsnfW4';LXn~_\\Z]]]\\]\Z^__cfhlmoomoonnpsrrssttvwvvu{|~{n\ROKHAG[hnuz~|yplcTJJSPWXZYSVdggdedcdb`^^[[\YWWUOQPNNQPPRRTUYZ\^^^a]^bchs~|~}}}}~{{}{{{{||zz{zwwwwvvvwxxxwwxwttwvuwuuvtrpqsutrsrqqstsrqsrvvsqqqsspprpppnnonmmmnmlmkikklkihgff`P8' !##%)-6COO<"*1=GSelfYLB9;?CIHHLNTc{v\RORORQOMGA4,$")4;>??@ABDBAA@AA@A?@@@>>8.%&+2:@BBCDABABBBBA@AABBABBBAAA@@??>>?==>><;<;:99655443100/--,**(&$$%"!"  + + + + %09>BILPVVVXZXQJFBAEAAB??=1)"!%''(((&''((())'&$&%$%&'&%$&%#$$"! !""#&:`x~}~~}|}}||||}|}~|||zywvwxz|}|}|~zyvpnie_^WNBFTp~}{yzz{{y}||||{|~~}}yxvrsnrqrsrrqoooooliknqronooqstsuvuutsqpkilwĽſ|riaTQROJIJHIJLIIJJK[yADVd20K`jnrtwy}T=D@?>A@???@@?@BDDA@AEEGFA>:751-&!4Remrw|xsi_RE72145;BIMNVbntyoojffelrt}LBFGGEINMPPSU\\\`Z^][enpd[jj`F&-96LQVSPKLMIHT=/UkzZ$/f}zvtpj`L- "%*./25;>HQ_nx|tomlijhjiklllmmknpqrqronqrssswxwyzx|~~uf\XSLFJ[nv{}zyvunj]^WNLI5-=JOTWWVUVcfffecbec^c`_^^YYZUSRNNOQOOQQPRVXZ]^^```defs~~|}~}~}|}|{{zxzzz|zy||ywvtuvvvvwvvuwuuvutsuttturropsrssrqpprsrrrpsusqpqqrpnnpnpnkmmlmlkljkkhjiikihhfefaV>)! "&+1BNO6(07CYnyk[KD>;FHHKMPQXo}fYUTVUUVUQIA8+$ !(-4:=?@ABBA@A@@@@??@?=?8+!$*6=@BCBCBDBBA@ABBBBAA@AABBA??@??=<<==;;<:8995543431//.-,*)))%#$%"  + + +  + 0;CHMSUVZYZXSROOMMFAA@CHB7,$!"#%''''&&'(('&$&'&%%&%&%##$"%$##!!#(DEELV``fmpquxurmkfgw}m9CBDEFGJNQQRSX[ZZ[^afikkillgZ<)4<68IHHMPJLPIHEHYO-/l|ywturstnjc_U3 #!&+3=GHMNC8XgxyI#<\t}}~}{xuqpmf[D4246>BIKQTZ]chiqy}{xtstqrtutvwvvvutuvusuuvxz{}~wlcXNJO`t{~|xsmkkmig`[VQLJHB51=JOSWXXOV_bddc_ab`abaec`\[VTTRQPOLMOPPPTVZ]\]`]^_`cjr~}~}|}~~~}||}{x{zzzxzzyz{ywvuuttturtuuvurtwutsttutqopoorstsqppprrrsqnssqpppqpnmmnmolllklkljhjligjhijhgeeedbX@) #&.?LH0!*,0Af|{iZH=>EIHKMOQScl[VWWYZYYWQHC8+$##)/6?AA?>8-# +39>@ABBCBB>???@AAA??@@@@A>?@?>=;;<;;;:99977642221/---*)(('%###!  + + +  ++;HNVUX][WURRSVRNGHFFIJHB:0' !$&'(''%%('&%''&''&$$%$#%%%$"$" ""*?eyxvwxz{{||}|{zz{}|~~{}}||}~}|{zzzwuvwvy{|}~}{wsnf[OXft}{wvvvuvxzy{{~}|~|}||yy{zxvtssoqqpljjnoonmnqrrutxwswwwwusspnljedp|{pe\YPNMNLKJKKLLINMMNNNNNPbCCSze2 "=Zfquy}dAADD@AA@????BA>=?CABDEEEDDEE@=752/*&!$?CHJNQROUY\bhnv|{}|zvunjhsQ>BBCFIGJMPSSTVXZ[[^`cgiihjibU0(66./.113356;>AAAD1+>ytnlicc^L) ""! ;R^l{|g5!'@PW]bfmpuxtiifegklbR>BFMRYZ`ckotz~{yxxzwyy{|zyywz{|zo`UR[r}}~{vqssrolgfdgif^WSQMHH@21>KPRUVWOU\_bba`__``_ada__YUUUPPLONPOOQSTUZ[[\^]]^`civ~~~}{|{{}|{||{zz{yyzy{zz{yyzxvvusssrtssvuvwssuusrtsturqrqoprsqpponoppqqpnopqpoppnmnmmnllmllmkkijlkhgiiihgfeeccd\H,!(1AAB???@@?@@A@??:3'&/58>ACCCAAC@>?>@@=?@@@>A>>?><==<:;:;9888886432010,++*(((&%$"  + +  ):DNPV[^ZVSTXWQLJJJIHGCA>;1) !#%'''%&&''%(&%'&'&'%%'%%##"##!#*?dz}sqruvxyzz|||zzy{{}}~}|z{}~~|||zzxxwwyxvy{~~}}yofchw}z{zz{zxvwy}|z|}~~~~~~}~~~|{utsprqoomkmmlmlnoqstsvwuuuwwuwtsrqqnhe`fiy~si_WSRQSNOMNOMLMONMMNNOPPPORTeABSzg1 )D_kt{oBDAAA?AA?@@A@BB@?@AAADGFEDGECA?9770+*$ !%8Thlq{}l]TPNKILQTTRRSVWY^cjnsxy~zxxtqmq{n;ADCDEHKMOQRUWWWZ\]`ccfifgigdH*)52,+,.++*(*+.-*.-.+O}xtllZ>""! .:GZhpnV-!%()/889AHNX_XPCCPQ[gh]QT\`ehnpuz}}~~~}|pkjn{~}ytuusoprqpnjgefgif`YSOLIH@31?JLQVXUPSZ]^]`a^]^\]ac__`[VWTOQMMOPRQQQQVX\[\]^]_`cjw~~}|{{y{}zz{y{zzz{wxyxyxxwxywvvtrrqrssuuuuupqstqrttrsqpqoprqsqpppoppprqnlmppopqnlonmnkjkklmmlkiijjiehgfdefffccd]I0 !)1:C?0')*%&2PpycO@>@EJPQORRXg|h[\Z\]``ba]VOA4,%$*/6=?A?>>?@?>>?>??=7,"#)18=@?>?@AA@A@B@?AA@@>??>=>=<<;::;;89978554100/.-+*)(('%$"   + + "/;KU\_^[WXTSPHJJHC?;:<>=75+% $%$$%$%%'%$&&%%&&%%%%%" $#$! !$*Ahy|qrrrstuwvxyz{{zzxy{}~}{|||}}zyyxzyyyz{|~~~|{ww}}xwyz|{~}}~|{{~|}}~~|~~~~~~zxtstrqqplmmlnmmprrsssustvxyyyxxwwtrookhdb`qż~ribYRQRPRUTRRRRNNPQMNPRRQRTSSSWYkBDRzk7 /OdqzLABAAA?B?>>>AABA@?AA@ABCEEEDECB>;85/.+$" ':Tflpy{mc]ZXVZ[[[XWVVVXZ_`eekqvz|{xxrkoqxK;DFHIHINPQRTWWXXY[]]_dfiihhg[:%-51++++)**(((('*,,-0cxrkY6 $$.>RM>3.3424568<>?@CCB?CMNWdfdfjlnosw~zx|~}zwturrrqrrroiddfiigbZSQMLF@51CNORSWTLOVZ[Z]\\Z^]_``^a\WWSPOONNPRRPPPSUY]]\[\[^`bhw{z~}||zx||{{zx|zyyzwxyvwwvwyvuvusrrprqrsqstrqppqrsqrrrponmopqrpqpnnoooqnmmnpoopollmkmnllmmmlkjhggghgdheeeccdccfe^N8&!'09?>2 '-//--/:TnydLFDDHMOQQQTSd~g_Z^^_bcdc`^XOB7.((,49;>?@>AB@A?=<<<8.% %.5:=>@A?@?AAA@@?A@>=>>==><<:;::<<9:96545432001/-,)))&$""   + )AKT\^\WXOMHCGDB=8657>@>>71+$! "#$&'&$%&$$&$%&&&%"! ###""$/Giy}spqpqqqrquuvz{|||xy{z~}}|{z~|}{zzyy{||zyyz{z|~~}|}}wttvwy|~||}~~||{{~}~~}zzzvvusqnomikmnmqpqqsutwxvxzzwwxwvtsrrnlhgfghs¾{sid^[ZWTRRQSTUSPQSRQQSROTSRSTTUUTU[mCEKyg/  <\qVABAAB??B@@>?BBDDCAB@B@@ACDFFEDEE?;84/,)'" %;Rfns{tneedfd_]ZXVXXY[\^`^bgjosuy{}~~|wssrw~q:JPSRSQPUSWXWXZY\\]_bceijjhggW1#/4/*))***(()&%'())-;wxrjR0#&!#+5=DFB>><@?>@CHJKNNPY\]_ehnqsstuuxz~wwrrv}~{xvsvsrttsphhdaggfc\UUPLF>30BOPPUUTPRVYXXYYZ\[\``]^`\XWURQPNPPQSRRRUVZ\ZZZZ[]`ci{}~|{{zwxzz||zz|zyyyxwwvvuuywrsusrsrrporrpqsrqmoprrqqqppmlnmppppqpnoononlllmoononmnlllkjiikjhiihggfgfggcdecccacebaUA." %.3:<1#'.257=<:?>91'!  )06;>@>AAA@@BA??>;=>>;<<=<;<:99;988643333321/.,+())'&#!   +  + +$5BOW\]ZUSF?CEB><88:ACCDA<92,$ #$%'$#%%%$%$%$$$&""!#"!! #$.Hk}|trpqqonnoqnrvxy|{|}z{{}}}}z{|{{{|{{|}{yyyxxxy|~~}~xpppprtxz~~|{{z|~~|{}}|zxvspoljkmnmooopqruwwwyyyxxxvwuttrronmnjhehr~skjfb_\Y]YVWURUUXRRRRUXWVWUXVUUTTTSRRZuBHVug4 (Jm]DEB>@?@CDECBBCBBCC@B=?@@DEFCBDCBFD=;710*$ " '=Peqt|zoihklgc_^\YYW[\]^]bcdhhkmoswy|~zwuxIC]]_\]^[adbbdcaac_cigfjjjkliaJ*(44-+,,)(((()''(&'()A~woeJ(#(-'(/27>:0(#$,3:??BB@>?@@??=;?>;<<=><9::99:987533333101/-,,(''&%"!  #''$"   +-:IW_[VQNHGDCEC@<>@EGHFGGC<1*$!#$&%%%&&$&$&&$%%#!""#" #&-Jl}uropolmmmoomptvxyz{|}}}y{}}~~{{xyzz{|||{zxzzyxxxy}}|}~}}~}|xxysomlmoposrvz||zz{z{z{}}}}}~~}{y|{xurnkkjklolnomptvvwxyxyxwvutuussssronlhhdbl¼xrikggdbaabaa][[ZXXYZXWWTY[]XXXXTQSPRRSSUXwCGQvf3 0ZveDDA>?>=@ABAAABDEECAA@>>@@DFDDDDEECA><:52+$ (6Qhrv|{ojjknpkfea^\ZXZ[_bcefegfjllopvx||zxw}m;QY[``^abdgjlojkphjtupqwtpqng^?",>>.,,,,*)(*)(()(''$2^u|wrfD((0:=93+)$(7AEKRZ_ca_`_]_`caccijjnnoruvx{~~yvx|yvtrlbXNPRS[\a`ba^fnv~|y{xvxzvrojhefghhd_XSNJF=26FOQRTSVPNOQQRSRVVVZZ[^_[\XSRSOPPQORQRQSTW[[Z[[[[]^`j}}|~~{yxwz{xyzwxywzyuvvqtuutuuvwtsspnpoqqqqppoommnopstqmonmnknqonnnnnmmpmlllklmmnkjjjijjihgiheihiiefhffedccbbaabbb`^YN9&#&*-21) #/66AA?==>>=<=@?>>=<=>;9::9:998643420/0/..-,*&&%$! &-/-)$ + + (;FS_^XUSROOOJHCA@CINQNOOKE;1' !$%&$%&&&&$$&%&$# "#" "%.Tt~vsooppllnmmmpprvuxxy|{}}}y{~||~}|{zxx|~}}|zyyzzxwyyz{}|{~}z}}|{{wtronnppqqqqswz|~||}|{}|}}{~}}{z||}{wtqnkjmnlkmkmoqrttwwwvxwxvwvvvvwysrqnoiieim»zqsoihegfgecbddca^aa`\\^^^\[XZZ]WYWVVQROPQOPQXzBDLto8 &?`|rI?A?=?A@ACB@@AABCDBCBAA?@ACDCDECHFFCBA=:51*  *=MYfZM@?FQ`gkojc_aoyxqkc^UUWRboffhgjllnrrrsrpjW@-+2=CGEEDCCCC@:5/)"'/49>??>?>=<<=?>@<;;>?;8999;98664221000/.-,**&&%#"  &/352+&! +  + -BO[\WW[[XOQLIGEHJNPONNKLI>6-# !##&&&%%#$'$$"##$#"##""%3Xq}yxrppmlnonmkilmpptv{z{}~~|||{{}~~}||zz|}||}{{z{yzzzzyvvvw{~~{wvqsstutqrrpruxxz|~|y}~{}{{{~|}~}}}}xtpppnlkijjlmprqppruwxxwvuuuuuuvutrrqmnkhfhzľ}lidhijgghggegeghggedfhifefdca^[YXYXSTTSSNOOOOKOMZzBCRog; 3Ni|tMCA><>>>AAABBBC@@CDAD@?@?@CDCECDFHHGFB@?<561%!"$"!1@Tovx{jW9D]jlrpsrmfc`_]]^_`abejjklmoooprtz}}yxrwxX-@KZ`dfgb\VZpxn_`]SR`yodiknqqrtuvz|{voK'#*7BHDDFGFDHFA?:3,&##+49<>=>><;<>?>><<:;=::9:78:7443313320.-+*)('&$#"   )37896,'! + + + +8ET]aa_]WOLSNOGMHIHA;<A==>>?DCCDCAB@>@FB@@???@BDDCCEFHIIHCAA>973+""!%4AThsw{pR?.5Gbhpswuromidb^\X[\adcgkjnmnpoopsvy|}~~~~|zyu|*,;BACJIMONOPSTVXWY]abdgnqrwoh[C?U>7435650/*((('&" &*-266;69=?C@GHQU\dllg`Z\\[[[Z[[Z[[\\]^YZY[Y[]dfjopsuwyzywwz|~~}{wnged^^XUQQLIPUXY^bdcbfktz}}{toljeehiijf`ZRNIF?38ITPSWVRKIILLLLLNOORVWWXXSUSPNKMLLNPRRQQUUWXYV[WX[[[i}vwzyzwuwvwyxuuvuvxvvtssssttqrstrrpnlmnnnmmlmnmlkkkllnlkmhhfgjjjlkjjhghhhihgigfiieeghhghgdecabbdfccdedcb_^^^_`^^]___`\R?. !)7EIKONJKNVXWUS@=>=<=<=<<:<8:;:896325553430.-,*))'#%$"   (1348<81)% + + 3EOWXXVRHFB@941)&$#0:@?4*$!#$&%$##%$"""""## !&5^w~{wuvstunmnklmmnmmknorsuvvyz|~}|yyx{|}~{xz|}{xywwzy|zyyyywyz{|}}xxyz{zy|{y{{{zyxsutwz|~}}|||}|||}|}z{}z|~|{{yurnnmkigjiikkmorvvtuutstutuvvwvvttrqnlihghu~njjfffecc`bffeghjllmnopopromjjifd`\\ZWUUPRSSSTROLLJGIILZBGLsp> )9:=<>?>???@BAACCBABBBB=@@AA@ABCBBBBDFHFECC?<<;72& !#"(7GWjtx~w`=)(&2GYeszswsqmkfb`]^]]_acghlmnnnnnptvuvsuwxz~~|{}~~F2>@CFEJLKKLMPTSSUWYY\^_bcghedT??sybOB:8:@>EFGGGEGHHGGIHD@<1)"#,58=>><=>=<;<==;;98;977644555211/..,++*)'&$"    %/4388;:3,'"  09B?97.(% &6=>5.'! #&$!"#"""!$$#!" "(9`w}xsuttstrrqpmkmnplmmnorrrtwxz}}|||y{|~|zzzzz|}{z{|zz{{zwxywz}|zruuy|{z{}}~~}~}{yvvyy{}{~~|zyz{~{xz|~~~}|z|yxwvsrljjhfhgilmqprqssrtrtttwwxutyzyvpkjfceq|kcdhgfddeddcccfgmijlnprrrusqomlijgdb]\]]YYSQRUWTVPNNLJGHFI^@BKpx@ /7;=9:<@@@?AC@BBB@?DCBCBA@@ACCBBABBCDFEEBB?>>;84*"!#$"*7BRitz|cC*#"#$(:K]lprstupliea`_][]_`aeehhkmqqqrqqrtquxxyzyy~~~|vyy"29=59KPPQUXQLIKJJJIIMKKMMPROQRSQQPLKOPQQQNSTTUZXWYXYY[\]j|suwxxvvtsttsvusuttssqqqptsrrpomqommmmkjlnmiijllhjknljkjheeehigjiihijighfefhigfgfdcfedefddddcdcdacd_````^^]\^^\Z\][[ZYVG6%$.BSZVJ68@HKMSSKG`}tT70HivjbaVSey{fhlnpruwyxwrV+!):DEHHHGHHGHHIIJHDB80(%! !(07<<<<=;99999776655532111//.+**))&%#""   &/2368:9980+(! + +  + + + + + !1::91'!!$%&&""!"" !  !"(:dwzussttutvtpomnnpomnmllnnnprvw{{}~~||}{||}~{zzyz}|{|~~zxyyz{z{{{{x}}~}zwsruuwxwz}}zywttxy{|~}{{{{}{{}}}{}~|{|}}{vspliggddegiloorqrsqsrsuuuvy~~{xutrkhf\e~rmjjhhhgdfgebeeefgkjkkmomprqpoonjgigeeghbca^[XUTTUVTQNNJIEDADF\@>Tms<(17776;?>>>BB@ABBBA@A@B@?>@BCABDBBACFJIHFDA><:960'!!""$"-9DVkw|iQ5$ )5FZotvxvvtokie_^\][[^_bfhjmptsrstsutvtttvwx{}~~A ((*/35887;>BIFGOROQVWY]`_``\F'#+.6:DGHJJJMclH47Jw}rmicdhonXXdkooqrvytlM+#5BIJJIIKHIKMKKLLMIFB95/)%$#"!&/8>?A?>=;;9;:::998768433110/..-+))('(%$#!  &/00269<<=<61*& + +  + + + + '1:<;1($ "%$"$##"  " !%9gz{tsssrtuturrsrpsrollkkmlkoprtvwz||~~}}||~}|{zz{{}~~{{xx|{{{}{|~~~~zxxvuuux}|}xvrurx{~z}|zyz|}||~}{|}{ywoonhfc`^bdfkmoqprtsrsrqsx}yxupmif^drĺtqjijkkiijijkekgkljllmkknljkmnmnmljiggfb`cdddb^YZUSSTSQPPMHFEBBBIeA@Jkr?-8<758:=?=AB>@AB@AB@AB??@ACCAB@ACBCDFHIECBA@<977.%""#'" ->JXlzx\;)2@Wgtttyyrtpigd_[Z[[\_bfijmpuvxwsututsttvwz{{{}~o#))*.359@CFJKSTWY[ZR>28;??DFHKLNOQPWZ\]ejhc``cdhlkmopstttvvuuuvuutrprqquwyz|}{vpjfhca^XUPLIJMS\`gefhiosy~zmggedbcgfgc^WSMKO@4;NUUVWVRKHJJHIJHGGIILLLMNNNLKNMNNNOONNOPUXVWWTXXXZ[_h}}tuxvvvtttsusuttrpqrprpoponookkmllljlljjjhgggjjhhjjgggffeededfgfghghfdeecddcdbccbaccababdca`_`aa``^Z\\]][[\\[\[XWWXW[ZRL@1%!!.>JNHHGNOHEDCFMexZ?29f|qf[U72L^hmnopqpeE&  2@IKJKIKIKLKLMMNPLMLHB;40.+'& "'.:=@BCC@=:;:8::97557433320/..,**)''&&#"  %,-/47998;?=93.)# +  + + + + + )8FNF:0'"!$#'%""!"! '>cz{vutqrtusrttuusspqpnlklnmnmkmorwx{{~~}{x{~}~}|{}~~~||}|{z{|{{~~|wtuvuvxz}}ywvuswy{|~}|||yz|}~~||}~}{}|{||xupnkd_WU[dehjknqsssspotx}~|ytrliii^c{}tqqnmjlkmlkkillljnloimmllkkiihhhjlmjijjkhecbcfefb_[\VVUQNMNKIGDA@@BHh@>Gjr@$3@9669=?>?@BBBCB@@BCD?AB?<894,! !"##! 0@O^n}eJ.$+;Rgsuz~{wvtpkgea__\^\_bgkppsstvywuuvwwwvxxxwy|~@!!%'(*-/465BFJOWXRLIKNMNRUWXZ]``edgfkhgefggjlnrprtuwxxwwvwvvvttstvvx~ytmjecd`\TNIGJOT\dbeeggkqv{zpigfedfhhgd^[QONMB0:RXVVXXUMIKIFIKGGJFGKMMMNMMMKKMNLMOQQPQRVWVXVUWUUW[^h|twvuuusurturprtrqqpproppooomjlkjljjjljhhgihhhghihgghebcbccbdgedgfghdcccbabcbaaaaa`abbbba_`__`_^^\[Z]\[\\XYZXYYXWYWWZXSNE7:4&& &2>HKTURPLHB>AIjfL93YtK-)F[jlmkmpfC&!.>JKLLLKKLKMNMOPPPRQNMKF?;83-'""$&',7<@EGFA==;9:866568523220/-..,*('((&$"!  $*/034568;?@?>71/* + + +  + + + + + *d{yusrqrttsosttssssqqnnonnmnnllqrsvuvy{~~|{|~~}~~~~}}}~|~~~~{{{}~~zssvwusw{|}{zwuvx{|}~~}}{|}|{z~~}}|{{wsmjf_]\]`dgknrrtrqosvvw{|~zutphfe]fqŽ~tnlnonomlnmnnnnnoonnqponnmkjhgfdfgfgkklljkkjjfdeghfdb_\VTSPOOMMIJGCCCDKk@CGhr@-;=;77<<=@@@?@@@@@@@BDBBC?BBABC>2AOVWXYYSJHNKHHIGHHDEHIIKJLMMMLNMMOONOPOPQRVWVYVTTUWZj|vwuwvsrtprsrppqqqqoromonnlkjjjijkjkihhfghgghefigfggfccb`ba`becfedfebba```ab`_aa`a___^__^\\^__^^\YZZZYXYZVVXWVWWUUSTTURQNWklN+).&"!#-6@LW\YQIFD@>@PplXF7K{Y%>XiklieX7$ );GILLMKNMMNPMOQRRSRQRRQMJE?;1)'&%$',3>EHJGA<::866764741223...---)((''$#"!  "(*.10388;>?@@@A80(  +  + + +  +-MVTWYZTLKMJJJIGJHEHGHHIHKKKLLKMOOPOMNNPSUYUUWVUVYW]h{sstvsrsqopppqpopnnonllmklmjihiiijkifgfefefeffghggfebbca```bdbbcbcd`^ba__``_a`^`_]]]_]^\^][]]\[]]WYXXYYXYWUWVTSSTRSQPSRS\v|Z3-,'$ #"&04;K[d]SIABA@>>ceMDFsU"5Rcgc^M4$"6GMLLMMNONORSQSQRVVVUVWWUSNKB<41+()+.4=FIGH@>;875455633431-//,,*()(%$$#    %'(+-/5878;?ABEID;1-'  + + + + + ++ALRTOE8-% "%'%&$"!",Di}vsqqqqrtsrrssrqssquusstqqnonnpppqprruuy|{|}||z|~|~}~~~~}|}|}}{{{}|ywuwxy~|{wvxy{|~~}}{{|}{}|||~{|}|{xysrnieaabdgjklnrsstutvuuw}|wtplgccdt|wvsrrqpnmnoqsrqrqrnolkkijjjkkkhfifdfeefddhillmomjhigfiedc^^]ZUQOROPPLHDDCACtAB@krI,=E<618;BA@@A@@BB@AB@?AACCB@AA@AAACDCEFGAA?==:984) !'FT\dm}gE% +>Xm{}|{yvtokjfcaaeddcfjmmsvw{|}}|||}|{{|}~|ttq[F2)()-/048;=ADFGLQRY[_acfjiikiifegjjjlmlkmnnnmomopppuvy|~|twuu~}~{wllhcc\SJHJJMQRU\_cggfiqxz{{uqligffijhie_VOKHG<1>MTRTUVQMMLKKIJHGGGHGIIHIIGJIILLMMMOLLOPSUURSTTVWVX]l}xqpqtrqqonooonmmmlmnlkjjjjjgedghghjhddcdfdeecbcdcddbcd`a_^]a`b```b`a``__`^_]^_\]][\^]^\[][[][Y[\_`_ZWVVUSTTUTSQRRRRRPQQVdnB )-.*%#%'-28H[gd[OE?<;::J~sVEDna% 1L``XD, !/DMMLMMPQPMQTTSSSTXXWXXZ\\Z\YPJC:521026=DFGEC<775566523120..--*())'###    %'),+.2556:ADCEHID=6/'  + +  + + + + + + -=PSTRG?7,$ "$%$%%"#,Ei{srppprqrqqqsrrssssuttvvssoqqonpqrrqpqrswwx{|}|y{|}~~~}~|{{||~~|}{zyxxxwy}}{ywvvy|}~|{{{z|}}}||}~{z{z~{wvrnjfccd^cehlnqsutsuutvxy|~{uplhebgksuq|yzyvutsqqpmprrsrsrqnljehhggjjiigff`feedeedfhlllmmljkihgedc`^\\UQQPLLJGD@==>Jt@@HiqH!3G@:24>BB>>@???@@A?AA@BFBB@@A@@A@ACEFFFBCBA=<;:62)#/LX\_nlO1!(7I[grz~~}}|yvspmkieedeeefilosuw{{~~~~~r>2489<=BFJLMQVWZ]^`dfhjjjllkjiglkmnnomporqttxy|}~xx{xtoib^VPMKJLLMRUY]dfeflqtwwtqkfehfhkljfbSNKJG;2=LRPSVRPNMNMLIIIEGHGGJJJJJHHJIKKLLNNMOQNQSSSTTUXUUV\nvnqqrqqnllnmoonnklmlklihjihgddgfegheccbbccedabdcccc`ab^^^]]a_b_]bc___^_^]\^\\]]^]\\[[]\[ZXYZZ[Z^fmj`[URTSRSSQPQQMOOPPQS[h~X)#0993-*')+.7J[llc[L<756:H_=Din5+DXT:%%** +?LNMLMRSRQNRSUUVXYZ\[[^_^`bb``]QFB?989:>BFGC@<967853320100,,,,)&'(#$#    "%(*+,.2459=@CDFJKKG;2+" + + + + +  )AJY]SLG;.'!##&%%'0Jo|}tpmnnoqqrpsssrrqrrtstutuutrqoorrqprqopqnrvwyz{~~~}~~}{{{}~~|{||{zvuvvxxvx|~yxuuwy{~~~~}|zzyw{{z||}||{{|yvutqlmjegdgjlmosqqtttstvwz|}yuokifea^_hoqsvuvvuuvtsropnpopqqpnljhghddehigghiefhhedfebbfjjklmmkkhffefea^^[WPOLHEB?><86;Q|@ABfvK' !,;D>1/7?A?>>??AA@A>?AABCFAAB@@A??BBCECEBABB=?=:671!2MWZ`pv[>&*8HZhrvz~{zxvrpikfeffefkjinqswxz|~ɷ@;<>BEGILOTUV[\^beeegiijmmnonoimpsvuwy}{w~~~xof`]VRMKJKMLNRX]adjlmmqmqqlfeedhknlf_SMJKF;3?MRRSVTQNOROONMIFIHFGHJIJIGHJILKLKMKKPOQRRRQSSSSTTU\numppqqpnmkkmlmomkkljiihilhhdceedeifbaabcdcda`ddca_^_`a^]\\]__`\^a`^\\]_\\\]]\Y\\\\YWZZXYXWWWVXW^u~~nbXSVUQNOMNPROQQRTVXaoc=!!%*5CLHA73,'),6E_mnjg\H:836@omHHdw?$7=0")8ID+&:HNNNORRPRPRSVUXZY[\]\_aabdhijjga]TNLD>??@CCB?=<77543210//+,-+*('&#"!   "#&(++/2468;>BDEHJLMKA6/' + + + +   + /AMWVUQQC4& $&&(3Sn{|oplmlnonorrrsrrrrsqpqtvvvvtttsroqqqopoopqrvvvxz||~~}~|z}}}|{|zz{{|{yxyywtljprvvtvy~~zutuwz{|}{{{{zz||{{||~|}}{z|zvtsrpmjiijkkmnqsssstttvxxx|~zvtqnkhefg_hmnnnonpqpppmjklljlnmkkifegc`ceghffgehhhiffa\]bikkjlmljhggdda`[YWNKGC@>;676439Q?AChyM$" $0?=-)/6=BA>>@C@@@?@@AEDCCBD@?@AABACEFGDBDE@=<:;;6, ;MUW`mfJ*"'0NSSRVWQNOTSTRNKKLIIJHGIGHGGIIJLLIKJMKLPPPRSRRSRSUUYnuoqppponlkllilmkkkkjjheihiidbebacebbaadbbc`__aa`^_aa_^_\[]^]^_[]_`][Z\\[]]]^YW\YZZX[ZXUVUVVVVVYdtd\WSPMMNOPPRTVX\ahtwTGBCFMSUTPG>5-)*1DalqnkcUKD=86`cYeP ++"!3VpT,#5FQPQRQRRSSTTVYXYZ[\\]`bcegjllorsrnge^TGEA@=?@@=:86310100-+*+**)'%#! "#&*+,/2567:;?CGJKMMMIC61/$ + + +    +  4JRWXUVRB2"#&*3Vo~~trnmmonkmporqpprqrrrrruutvtuwvutsspnooooporuuuwxx{}|~~{}||~~}~}{zyy{{zxvuvsqsuwuvuwz{|{xwvvy|~~|{{zyzyz{}}}}}}{yzzzyxwustomllljjmnprtututuwwvy~~}|xtrmllkkmljjhjkikkihhfffgfhkjhjhfbbcaacfffehghgikgd_Z_bkmkkklnkhfdca\ZUQHFB?<:89767559W>ADfxM# !#&*+#"(29>A@@AAB>>>>?B??DDBBA@@@AC?BHDBCBDEA>;;9963( %]zjk`%(.-#"BjsL/!1CNRRPPSTUUVWVXZY\[]^_bdghimnosvx{}vmfXLE@>@>?=::5210..,+++)*)(&#  !"%(*+-.047:<@DHIMKJLLNH941'! + + +  !7LUWWXVO:!#)2Yo~~wsponnmkllkmmmppqtsqrruuqtttuuututropooooorqsrusstvyz|~}|z|z{|||||{{||{xuxwuututsuuvx|~}zuwwwz|~~|{{|}yxz{}{wyz}}{{zzyywvssrpnljjmnnnrrrssvtrtuvy|~}~|xususqppnkkkhhghghhgd_cdeffegihea_]abbeefghfgfhifa`]bijjlkijjifc`XUUTNIC@><:97774886;\>?CcuR(  !$##$%'))%%(-4;?AA?BAB??@@CBCECDCABB>?@B@AACCCED??>>:887/# +JYa``lz`@'$"! !&(*4=NZjzz}|ywuqqooquwx~ypmjǃ?IJMQTVX`degmqsuv|z}{y}}~xsjc`[TPLJIKKMQUXZ[`frqkhjjkkonkd]WNIHD82@SSSUXVSRVXZYSPQOKKNJJIGFHIJJLGJIILKNPPNMQOOPQOPRSVZovlmmnlkjklkjjjhghffgfedfffecbabdbaba``^^`^]__`a_^^^_]\\Z][Z[ZZZ[YXYXYZZZYY[XWTTTUUUVTTTSUQRRQSXbrbXUSRTW[afkrw~g]ZXZZ\ZVQF<35OYX]WVA#$4Zt~}xusqpmllljjlllmpssrrrrstruurutuvwwusqqqqqppqpprpqtutuwyz{~}~}zwyyyxxxy{{}yutxxwwusoormorw}~zwsqvxz}~}}|}}{yzz{}}||}~|y{zzzxwvtqnkjjkloqrqrstsqsotwz{{zyxxwyxvtrmmoliihhfedbbaba`afgihca^\_^_`aceefgghhhb\^eikkihedb_ZWPLKMNJF?>=<<9:963577<_?=D\|vR( !!"""##$%')(+,+)++,18;@??@CB=>BBAAACDECAA@=@?BB@ACGDCEB@>?:9:93-  3Q_b]YjiO5)*&  #$'*,-/4AK^kv|~~|~}yвVDPJWZ`hmqwy{}}}xsnhdYRMKKMJLOQRRV]fnpljjijjlkjd]VMJJC84@VWRTTUQSY\\YUVTROONJJGEGGGHIIIHHIKJJMMKLPONQQPQPRUWptkmmkjjjihfhjhffdeggffccfcbca`cb`aa`^^\_^\\\\^\[]][[[ZVWXYXXWXXXXWXXXZ\XUXWTTRSTSTTRQRSRRORPONTay|j_YYZ]bfjpw{~rd^ZZ\]^\TNB<;FUcgiljhg`ZU]nkxySR[pL  /JUB6+!/9DMRSVTTXXXYZ[]^_`aceghkmqtxy}ukaVPB@DGE<321/,,+**)&'&%!  "6EH@#!"%'(**-0137;@EHIJLMOOMNNLF>5-# + + + + + + -EYZ\XWJ3"3Xt{xwttronliikkkllnmpnpprrsqtututvvvvvvusssqpooooqporsqrpswxz{}~~~~|wuwvtututqonsssuqminphkosu}yvsvvwx{|}~~~~||{yw{|{zzy{}z{z{|zyyyxwspnmjkmoprrrpqpotwxyyzwx|{z{zyxvqqhljjgebcc`_`__adeiifaYW\Z\^]^`abeffhd][`chkidb]XUPKIGFFJKIF@@?<;:;853667=d@?D^yyU- !  !!$%%"%&&()**,-/.//10109;>@B?CB???@@@@BCCBABCBA=@??BBDDEID>>>=;<:64+  :Xb^]ZjqYC71)" !#"##%')*+-335>JMagqzƑU[ejrtx~}zy|~~ysj`WROMMJMORTRTZdmnjjiimlllhc^UNJIE94@TTSUURQS]`_]XYWTQPLLKGHHGFGHIIJIJKILNLLNOMNROPPPRVZprkljjiihighkfeedceffdcbcba``_``__`]]][[\[[\[[ZZZXXWVWXUTVXUTWWVTUVVTTXXUUTSSRQQNMPRQQQQQQPNOOOR[t|~xh`Z]aeinrw|~la^]`ba_\UMB?HP[bhihed`_chtqxyv9%)ES>>/",7CNSTSVXYZ\[]`^_`bfhjjmqswz}|okifXMKRP?41.,++*)*)'&%""  0DKJ@ !"#&(*+.058;?DGJKJLLONMOSQJA52*! + +  4OY][WXS8$0Ys~xtvuttsqnllmljjkmonoqqrrsstvvuuvuvwvvwtutrqqppqplnrqqqtuwxz{z||~|~~{zwrqonmkigeggkkllntxwustz~~{xxvquwwz}~}{{{ywyzzzzy}}||zzzyyywxtppolnpppppqqrtuwxyzxxz{||}||yusqolmifbba]]]\^acfhgc_ZWYYZ[\__a_bfea[WZ__a_[WUOKEEDBDDFIJJG@@?<<<;764478AgEBE]wyU0" ! #"##$&&(())+,,,./221566796:@?BBDBCC?=@A@?BBCEFCDE?>?@AABCAABBBCBEBB@A@DFFGDBA?A?<9897- %DZgl^[fvfVE7*"  !!"##$&'(**)+,-.24466878:::<9<>FNRdqt~twwx{|}}vof]UPJMNTY^_^clljjgjkiljfc[RLGID63GPRSUUVQWdhcdb^ZZWRSPMLJJKKHIIHIKJJJIIILKJMKMNMLMPQXrrjhhgdfiffgfdcdbdbadb_``_`^\\[]]\\[Y[[YXXWVZYXXWXWXXUSRRRSSSUSTI@AGNQSQOPQPPML>4;AILJLLLMPOQQRTZdklikqpd_`fgkpux{yhcddehgfdVJCCEKRXZ[]Z_clv|utmoRdqrH$1Samg?%$#6ISTVWZ^^^^bcdeejjnrvvy~pJFikkq^B50-,))(**(%$$#!  .880!  #'))*/0156:?BEFIKKNMLNOORRRQKA863'  + + + + + +  *DWX]WcbVE2#+]vzwusrqorqqpnlmnmmlkmlpoppsqstuvxwwvsstvxwututtrsrrpppqoprruzyxzyxuwtturld`\[YWUPKNSX^hpz|{wusrux|}~{yxwwxy{~}{zyyy{zz{|}{|~|{z{y{yzytstrnoolklnqsvwwxxv{~}~~|{ysqnjhfa`]YWVUX_aeedb`[[ZZY]_`a^][XSIPSVUPJIJLHFDBBBBBFJPPMHEC?>===98:99[x|W1%"!""$$##%&(&&')+,,.011214457:>@BCEGGDFHKMFBB@?@CAB?@>>ABAABCDAC??AADFECC@>B@=;:764*/Vki[X[h}ti]J7+%"#$%$%%'(*+-,.13226:>:>@@CCFGGPOWW]bahmpnotz}y{|{|}{pg^VPLLQV[^^fmlijhiijljgeYNJCFB64DQUTRSTRZdffgd__][VTQPOMLMIHJIHIHHHIJJJJJJJIMMMNPPPXu~mhfeeehhhgeba`abb__``_`___\]]Y[[YYZXYYWWWVVWUVVTUUVVTTSSRQQSQQND68>FMOOPONQMLI6&*6>EIMLLMOPUWY]bgnqqptzteddeilosw{ujghjijlg^UE?AEJQRPMSbs}yjnzoiopo>&+OiwiD52&)=<<=;9889;LxL@BYw{X3%%$$#%'''')+*,+*-00245468::=??BDFIKILMRTWSJDDB@BCAA?@@BAACA@ABA@BAABCCEEEC@A?><:8550'0BHPTSW_}ulXG8*%'())*+.0113499=B@FHNPT^[]ceggoqsqqsurtsswwwyxz~~}{~}~}~}~yrh^WOJOUY]^fknhjjhjjkhedVJGBFB54ESTQNTUV\fgiifdd`][VSTPLLKIHIFEHFEGFIKKKKKJJNLMNOQR\t~ngfeefgefeaaa`_`______]]][[\[XXVVXWXWUVUTVVTTUSRRSTRSRRRQOOSRTOG;86<<<<::::=T|I=DWuwZ3&&%$%''(((,/021/1357877:=?@DFEFIKKOPSW[]ZVNGHIFEDB>?AA@B@BAA@@ACB@@BEBDHEDB???;99796/" !! !)8CJONS[tx`SMA3,-/14579=?EINU[\bdchjmnqps{x|x}{z{{z{yzxzzxxzzz{z{~~~~}}|~zuldWOQVUV[ckkgijkikihe^NDDBEA33EOPPNQTV[fkmifgdaa\WWWSQMLKIHFIGEGHFGKJJJJILNMMNLNRXt~mdfddedbba^``^_^]^\[\YX[[Z\ZVVSOOTRSSSUUUUSRRQSSQPQRRRRROPQTWZYWQH;9;>JYwwc_fjc_[rbWkh:&C]O2;G9$!.;JT[`_bedgikprwz}b0X|vttsog\L;1.-+*&$'&%$#!" +9:  !%&%'(-..238;@EGHGIJIJLKNPQRTVX[]WLA8/)  + +  &?T\iwyYNe{uqqppprqopqpqrrqrppqonkhijmoprtttststuuuutssvtvwwywutqqrprqqqtvutqmdVTY\^^]`daXRMH@CGOX\bhorsqpsuvw}|~zwurttx{~~}}|}{zxyz{}~|y{zyxvvvwtssvutqqpoomloquwxyyyywvtsqokea]\YTQRU\_aa`\Z[TWUXWXXZYVTVVYXWSOMOQLIHIGCCCEFGLPOLDCC@=>=<;<;<99U~@>@Pt{]4*'''))+,+,.01354579:<<=ABDBFHJJLPRRUY\`c_]WOFDFEEB?@BB@A@?@BB@?BC@ABFGEEECACB==98::;7- $)*)(%!/;CEGD9Cmuo|kKIKC;9:?@GLPVW^glrtxzvuzx{}~}}}}~}|{z{{||zzz{{zy||z{|}}~{}|vpd\VTSPXajjhikjkmlie\LDCFFA02CORPOPSQWflljfgedb]Y\ZUTOKJIHFIGFGHFHHIIJKLLLMKKLLO\{~ldeddca``_^\]^^^]]\[[YYYYZXUTSJCELNPQQRSSSQQQPSSMQTVUSUTTVX\aeefa\PA9=BFIHIJE<+#/6% + + +  .TktvCAg|}tnmlnorsrprooprrstttpqnjklklloqrqqstuuwvsuusssuwyyyxvtsssttrrtttsndYVW[_a_abfb[UPKFCDJPZadotsroprsv{~}xtrtvz}|yxxxxz{||||zz{yxxvvuuuvvvppnnkmmnpssuvuttsssojhf_][VSSVY[]^[YWXWYW[YYY[ZXUWX[XVUTRSROLKIGEFFEGIMTOIECE?=>>=<=<;:>ZA=@Rty]7+(+*+---/012247789:;@BAFFGHIIMORUWV[^_^ffb\UKGFFFC@A@@@????DD>@@??@ACEDDFDBCB>?:9;<;84*#*031/,%+47;=8.,EpxahzzlXKJNLQW`dhmmtuxz|z}~|~~~}~~}~~}{zz||{|{||{||{||}~~~~}xog`USOW^jkkllkkmmie[IFEHF?24DPRQRRQOZhmkijhgfa]_]ZVSNLKHHFGEGHGIKJHIIJKHIJKJLNR]x~ndcdd```\]^ZY\\]]Z[ZXWXWWXTRROB6:BHKOOOQPQPPPQRQPTWXXXYWZ^adgijihe_SC>;?DFIFC=) -:GNW^dikoswz~{mhhhjorw{vqoqsqonh\M@43:Jb|qptq_P\jBBF,$H^W8,30'/."%2BOYbfhknpsu{yVKh{vuvvqqoljbXF6-,)(()('%$""  $..$  #$$&)*+,/26:?DFGGIHIJJKNRPPSV[^_^^[UI@5)  +NnO#?ky|tmkijmoppopppqqsstvvsspmmnlkkmpqpqstttttuuussttvwwvwvwvtuuuttutrqkYSWY]`bbeegfa[[VOGACNU\cirqmkjmnry~}}{uusyx{|}|{wwxxyz{|zzxwz{z{wvxutwwusqokkhhjnnpronooomkigd^\XSONSVW[[ZZ[\\Z\ZY[^[Z\YXXY[[XWSTSOLKJGEFFDHJLNNJFCA@>=>>==<;<:]E<>Tqzb?.),-./21226678;9;=@@BCCFGILOPQUWUZ\_acdhfea[RMIFFDDA@@?BB@BCCAA==A@BABDDEDDD@>>;:<=<;:5')499;72+!'/8>:6046Sxyu{}ukeejnlmqsrsuwyy|}}~}~}~|}~}|}}}}z|}}|}}}}}}~}~~~}~{~~~ytk`YST_illonljlkjdZLJJKF>23FSPPRRPQ\hmkjhhie`_b^ZXWSRNIHFEFGHFGIHHJJJLIJKLKKNR_~}k`ba_\]^]^\ZXZYYZWWWUWUUVVRPPL:.-5?EKLNONNOPQRTUVXZ\\[\]_bcgjkjijkhbWH<:?CEBB<'(9FOX^dlotw|tjijlqux}}vstrqrplaT@506QyrTXzP(9QO5 Dc_XNI**:D6-$-=KX`fjlosx|d^jsvwvsopnljdYL90,)(*)'%%#"!  (,/-! !$$$&*+*+125:>BEFHIHKKLNQSRRSWZ]_^^``^SC82'"  !Enp&Cp{|snlkkjlmnnnprrpnpqsttvssnpoooooopqqrssstuuvvttuuvvuvxxvuwvuvvttrodURVY]aabhhifcc`\WOGDJRX`gknkifgkotz~}}zwvwx{}|{zxzxyy{|zz{}}zyxwvvvwvtsojfefiloomjghiggfda[[XTQSVVZ[_^]__^\Z[^^_\]]]YX[^]ZXWWRMKJKHDEFDHKNPMIEDB?=@?=;><:<=bF>8SocA0-..043356779:><=@DCFFFIKRRSWWXYZ\acdeheeggaZQKEDDCAAA?CDCBC@?@??@?AABFFECECA@<=><<;<:7/"2;BFDC>5)&/@GHB@CFM^ltqpqpqstvuwxx{{}~|}~~}{}||}~~||}}|||}}|||||~}}~~~|}~}y}~~zsmcYTbkmklmkjkjhcZNLKHE=24FPNOOOQS\fjjkghjebc`]]ZWTTMIGGEHFHHGIIGIIJIJKKKKIKP]{yh`_^]\[\]\ZYYXWYYUVTSVUUSRRQPI9+&+2>?8%(7DOYaimrw{znllptw|~zvsrrssmdU?41Bd~pj{mNRTqg82Yeh]6*;HHA3+!!)5GT`gkntx~ndouwwuroooolg^O=1-)('''%$$$  "443- "#"%(()-1267=BEFIFHJKNOPPQQTXYZ_]`_a`]UIA<1)# +   )Nqw8!Fpzzrnllkjiiilmnoqpnpqrrssrurqrpqnlnmnmprqpqqstsuvvvuuuuttuwyvvwwutqj\UUVY\`dehiihfhfc^RKFFMS\fiiihgijnqw~}|||}{xvvx{z~~{xxuxxvyzz{z{{||zwtwutstpkhggkoqrmeb]_a`]^ZYZYTWYZ[]]_^`ac_^[\^^\\^[\XZ\[YXYVTQNLKKJGDEBGKONNKECA?>@>=<>;;<@fH?>Rl{}gD5222456579:9;=??CDFHMKPRRVVXZ]]^_`bdgghhjehfaZPGDECABB@CFDCB??AA?>@AB?EHFEDCAB?>>>===::4) 0=GLOUSKFGT[ZRPRWahjmwwsurstuuxwyyz{|}~~}~}}~}}~}{{{{z|}||}|}}}|}}|~~~|}{}}|zxmfdkonkkjjijhgbYLHGED=12DLMOPPON\gklkiihfdd```[YWSMLHEEFEDKHFHHHHHHIKKJHIMP^|vd^]\[\YYZYYVVUWVTTVSSSUSQQQOMH:'!"'/9AGKKLNOPSUWYZ[Z[Z[]^`bdfhjloorpoh]QD;:;:2$&4CNWahnsx|~tqqsvz}üyutttsog\K=;Eg|y~xN;$"8QO0%;JPNH?2,#$1BQ^hpsw~|svyvuurpqpponi^QD3,((('%%$##!!3:5/ #$&'(*-/48<>ADFGHGJKKKMNPRSWZ\_^``abbb[NEB>74 + +  2Zv|X Is}|qpmkijkihhilloqpttrpqrpqqqssspqolmmonopprqrqsvwvvvvutuvwxwwvyyuti\UUXZ`ehjlkkjklkidYPGDJQ[accijikmnrtw~}||~~|{xwvxy{}}}zyywzyyy|{}{||{{zustupljjjoruvufca`_^\^_]^]Y[\\]^[]_`ab`]Z]]^]^_^ZU[][VXWUSONLJJFEEEFIMMMOKFBA@?>=>>>=><@kG<>Kj~eB7245679:<<<=?ADEHJJPRQRTWXZ^^__abbefggijmlkkiaXNIGDB?A@BCDACA>@@BBACAACECHDDDC@A@=>>;:<62.-16=DLRW]`]Z\b`a][aipnklowsnttuvxzzy{}{|~~}}~|{|||}||{{{yzz{zzz{{||{||~~~}|||{yvspqpnkhhjihihaVJCBBC;04FMLMNONP[glkjjhfeddab_YYUQMMFEEDCEHGFDDGFGJIJKIGKLJ\td^][Z[YYWWZUUUTTSRTTQORRPNQNLF6%! %-5>EIKMOPPQTUUVWZY[^^``dehiknnrrsqkbUF>981""1AMXaiouz~xtstx{}þ}wuuwwvqbVG:H\tgBG>#"6ITURMF?4( %.>N]fnu~zvuyyvtsrqqoonnjaTH7.)()'$$#   '13/ #%$%'*.0259:?DDGGIJJJLMLMQQTYY\^^`bbcb`]WTR=%  >k{N!Mt~~vromjmllkhiikklnqprqoqrpoosttssrqooonmmmoopqrtutuvusutvvvywxxxxugYRVY[`chmoomlnoqoh^TLHIPV\bhmkkkonrtv{}zrpsuxz~}|zxxuxwwx{}|{{{|wvwvsnkkkntxwribbaa_`_`]^`_[]^__]_`aa`____]^\]_\\\]\\YWWURLMMKJGFFFGGKNOMJE@@@?<>=>=>>;GoG===:89:?CGMQUZ[^`adeeeefjstpjlimsww{wrpstwwyzyzz{{||~{{x{}}{}}}z|z{|zz{~~}}~}~|z{{xuqlighjjigf_SFABBC:-2DMMLLNMO\gjiijhgecaaa^\ZUQNKGDCFEFEFFEFFFHHHEIHGKLM`ue\Z[[ZZZUWYWTVQRSRTTQNQOOMKKJD3$ ")39/ /@MXahouy}|wvvx{ƽ}{|{}~zpbQ@=Tm}S2;KF10ETWWVXSK>3()2=?9=?@>IohM779;==>@AAEJJKKNORUWXZZ[Z_`acdeffhkjjllllnooomkbWOH@@?AA@@?BA?>C@AAEDDCDEDDEFGGA???=<989;@ELVVX]__acfefijjjmplmmpnppssuxv{yqsssrtvwwwxyxyz}{|{zz||~|}{||||}{}}zz}{vsnjhgfhiifc\RHBCDC8-3DNMKKMKL\ggghhfeccdb^]\YTPOIHDBEEFEEBCEDFJGFHIHHHINdwbXWXZ[__ZWUTTSPRONROONLMMKJLI?."!$,:BGLPPOPRTVVVZ^]]^cfffijlqsuuutvsg]MA1$ -:IWahpuy|xuvx|~~zk_E8K^nsOIVkb.$*'*AQX[]\\[UKB807@N[env~{xtutstuuvurronnkf^OA3*''((&""#  ""$'&(*.115?=<<>=ACGGHMQRRTTWY\Z]^\aacfhghjjkmjlmloqpqssqqlcYOEA@AA@@AACCACAABCBBCCEEEFGGFCCC@?;988;>EMWY\acdefhiijjjkmllmnrpqpqqrrsurstsutsuuwwyvx{{{z}~}~}~|{~|z{{xvtpjhfghehea\SJDFFB8.3EKJIKJHLZffghffecbcb_^\[VPMJGDDDDCEECDFEFFEGHGGIFHKas]VVVYajkaZURQPPPNNOLMMKLKILKG@0 $-7@GMOQRRRVXY[]^^`bdhghilprsstvwvso`RA2$  +;Ic{y< !(:>3$%<><=;><=Q|9:?InoI;<=?@BFFJLNSVVWWXZ]_^_aaedfiljllklnnnqqrttuvywwvpfXJCACC?A@?BBAAACDADC@EJJGFDCA@?BD@<;99;9Fmc,""#7SF."!5LX]_cehjmnng_PG?DFR_m{}zwvuuuuutsrrsrpoomhcYI;,)''%%###" #&! "!#%'**-/35<>@DBHJKHLJMLLQQTWYZ\]`badccecV9$ AiE.Yyzvttstrsqomlkkjifgiglmmlmopomqtsssuututroonlklmoqqrrsuuvwvuwwyvri\WX\\ahnpuwxx{zwrlc[WMGHKTXemtuurprrrrty~}zxywqrw{}~}{{zyyywxxtnlmmrz{wogddccaba`aa`]^_bdfjffda`^`_``^`aba_]^]YQNMKMPOLJIJJJGHKQTOMGGCBB?=><==<;?U66>HmmP?>?CFIJLPPSVXX\\\^`aceefghkjinmqrstutxyvw|~~~}scPGCDC@@>??B?@BCC@@CEFHHHFEFCBEE@>>::<=<@DQY_ehkiklklkmlkklnpstrtttwxxwyxz{{zyy{|}~~~}~}~}{}{{||yyxxxwslgdeghggd^YNHFED@3(2CJHGFIGJXfffdeeabc`\]ZYXTPLKHCDDBDCDDEDGFBEEEGGFHIKclVVVYb{xhZPNKJKJGIJHHIGEFFD<* '4>EKOPRTVWX[\_^`deggikloqtvvx{|{ytm\M>5,& "5ET^hntx}~ȻweP8IhzD0+/'&E8!$# .HW]acfjlqv{~}qfPC;?Lat|ywvvvuvuvrrrpprppnjd[O?/((&%$#$$#  !(+1+  "%'**-.27<;?DEGHIIHJLLMPRSWXY__aaaccdecS9" Fq|4 3`||zurrqostusqpmmjjhhifgjikmnnoropqsqrsrtvtppoljlmlnpqrrsvwwvxvwwvrl_WYX[ciorx{|||zxtnd^WNFFNUZensstuppommosz~}vtqswz{}~yxzwyuomllpsw{wmffgecb`abaa`abccefhfeccaab_a__ab_]`_^]YNFHIMOOONLLJIFGJNQNIGEB@@@A>>>=;;?Y7;B>>?<;;>b:::LhmUGIMRWY\^`bbdgiknpqqsuxwxz{~}}~~~|{||{|||yti]SJBBCC@AAABCBCCCCCCCGFFHFADFEC?:O`gmsy~źteK9@Iaxdvn3%& #&!=R`ddjorxn@03Gf{}xwwuuustsrqqsprrppngaVF7,)&%%%#"   0<5  "#%&)*,.037>AEBEHIHGJMJJMPSVYZ^^^^^bbab^O9&  + + *Y^ :e|zunjkiilmpptttromjkljiigfhklmmouusqqqttrpqrrsrnmnnmnnopqtutwuuvwwne\Z[Z_jptx|}|{zzrld_\SHFMU^eknqrtsqmkjglsz}}zuqrtw{}}yxqllmntwzuidddfda^^]^accc`bedfcdgfbcbb_`^\_`\^]\\WMGFKMMLNMLHIGGIMOOMIGCCB@>@>;=;;=Dg88>Kh{zeSPTZ\aflnsrsuzyyy{|yz~}|}}}}~~}|{{}{yz{{{|{xvqdYMD=@>???A@@BDCDGDDDFDEFEDEDB=?KX^]UQKGTdmswwvtoqrqoousqstwuvy{}~~~~|}}~{}~z{|}||{xwxvsststsssmhcaabadf`]VNGDDC<0(0;9@DUguywpmnqrvwxyyz~z{y{}}||~}}~}~~{z}~}|}}}{{zzy|y{zxxz{xxuunbWJCB;?@?@@A@CBDECCEDBCDCBFBKUhuvpha]Yamtv{}{zyuvvuwwyyz{}~~~~~}}~}{{z|}{{}|z||{z|{xywrqtrrtrttogd^^aceda\UMCAA?;1(0;@CAA@@FY`ab`___]]]ZX[VQNLGGEBBACHHHFDEEECCCBCFEEHMhj^^afhlpt|n\OGDBECAA@@?>:1 (5CLRVX[\\]abbdhjlmopqpqtxy{~zm[L?6.$ &5M[gotzɺu^I7Qv´sbT>(!(--*#/G^dhmsy|Q-"$+:Wo}~yvutqtstsrqqposrqpljcYO@1((('%""#"!"! ,7-  ! $''))+047;?@CFGFHIJLMLMPPUVZ\\]^_]_ccZH9#   *7<<71.Cc< Akzxtwvqnkfbccbjmopqqpormmjjkkjjijlnnnmprqqrqrrstuuspprrpopqooqpsttwysjb\Z_doruy}|{{zwrjb_ZPFBPZ`cejnrwvwzyvtrsv{}~}~}~~~~~{~~|xoqrtvyzrihkmnrwyvhgddde`aaabc`cdddgeffdfe`]]^^`a_`[^\[^]SHFINOLLIHIGHGGILPRMIFFC@>=?==<<;?==:0 &4ALRW[[]^_`afiikjmpqpstwy|}zn`Q@8.' "7KXgnt{~oTIPpȾwc]H?. -:6/$'@Ueintzl?*'!!(4Lfyzwtsqtroqooonmrqpolje\QE4*($%$#%$#""!  "27+  !&('))*.15;?@CFDFGGHJKLMORTX[\[\_^_`abYC2#  )E^jokg\W]l~n7Bl|}wtxtsokigc`bbchjloppqommhkkjjjiljjjlopoqpoqrtstpptsttqsrsnonorrrturoc\[_cintx{xxyvunhb_YKEGMVadekoqsvvwxxtvvu{{}||}{}~~~~~~}~~zwrrsookhjjlnsuvqhfdfeb^``aacdcccciheiaffb`^`__`^a_^][Z^UKGHQPKMJGFGGEEHLOPLIEDB?@?>><;>==Hsntxy}{wxz{{wy{zzx{{ywxxyywx{y{|||{{{|}~|}}|zxvvuvxxwwwxwvvuurpl_RHBA?@B@?@BBBDCCCBDFNSenw|zwtsruzxxz|~~}}~~~~~~}~~|}~|{}~zxwxx{|zwxyxxyxyyxxvtsuqppqorsqkg^XX]bcc`[RIA>==90'.;=<>??@DT]_^_`^\[YXYWUTQMGIFC??>9779?DHFEBCCDEEDGMWscRTX]bdgjnpvu_NF@A@><<<;9/!"'3@KRXZ^`_acgghkjknpruuvy|}}qdQB5/(   2DZdnv{ȹuaJAZxëmO@L=-876/ "9Rciov~zS0))# &1E_u|yvuuuspponkgmmopoolh^TH6.)%%$##$$#"!! -60' "$&')++.14;?@A=?<;=>CA@>?AABCBAFPXiqvtuwurqsy~{xwyz}~}~||}|{{|z{{z{zvz{y{|}|~|}~~|}~~~~|}~|{|xxzywvywvzxvuwxyxxvwwwwuqqspnonospmhc[USY^_`]WND>:9<;2(-9><;<<=ET_`^_][ZXVWVTTTKJKGCB>>=:7427>EHFGDCEEGJLQ^vzbPPTX[_bgijnt|wcPFA><=<<<8.'' $/:FOY\`_a`dgfiklmoruuuxz|~shVB92+#  0CUcntz˿oYIUnɳsYgp? +4=D-#3O_hov|e<&('&#! $+?Zp}{yxvutsppokfhllqspmif`UH:0)''%'$"$$! ,52- !"##&)*+/25:>?DFEEEIIIJMMNRTUXZZ]]`cab_P>5  =o~yxttpg^VMJJZp}||zzwspoonigc__ab`dhimqpnoolkkjkjjgjijkmnlnpooprsoqqsrsrqtqqoonoqsrold^^bhkpuwwwvrnie\XPFEKQW[_dhknprruttuwvxyxwuuuwz|}~}|~|}}}|~~|wnhhimsxwphbefda`ba``abcbdgihggfddc]^ba`a_]\]^^^[TMLPOOLIHHHEEEDDKQNJFCCAB>@==:9;<;5..7AFIJKHFFHIMQ]s~x[OOQSVY_cdflorzv_MB><<:984+.20*!!-7CNX\___adfikmmpsuvy{z}~~}yl\I<6.% *ARblt|Ŵx`IDlѸo7'!3F=0/"'@Q`houjS1##$%%#! "*9Uky~|zwtttronghnlkppllkgaYM?1(''%&$"$$!!!#252' !!""$(*,-259>AACEIHHIJILKJPOQXZX]]]_```K7( "M|~}{zuwsrvmojajt|{|~zywwtqqnmlhgb_]^`dilnprqnpqlmmkjjhiikmlknnnqooppqqrrquuqqopmnopppmgb_^chjoqrrqmkde[VOGEKQY]afiloprlpttutuuvuvtssty~~~~~|}|z|}}~}xnhhknswuofccfdbababcdbbddgilifefc``bb___a_\[]^_[UPOONOLHGGHHFGGHNQPKFBAAB=>==9/,/6BHMMKIHIKMM[s~uyu^MLNPSW[]`afikpxs_M=<:8852+"3>@90&!&5CPW]^`abfijklqtvwxyz||}wlYL<4-) '7#"*?PTSM@.&#"!!$$! "(5Jfv~}zwtstspnkmnjkmpqmjgcZP@4+((&$&&#!  ! )63/% "#&'*,0368;=@CDEGHHMJJKHOSTVXY\\[\^`\K3& + 2`~|zwtomfmqvywqsx~{{{y{zzwuutrokiebba]bdfjnpppqpmnolihgghjkkkkkmqpopooprpqqsutrronmononjfc_cehmnppnkhcYXRLHLRW[]chjmoorppttrtsrqsstrrsvz~~}}~~~~}~{{{zzxz{}~}~xjgklortrldddfeb__``accbceghjfdc^^_``_^]\___]]]^]TNNNMNKGGIIFDBFJONMHDCAA??A?=;9::=Xuwwwwxwyywwwyuxy|{|wyyxxuuyzwz||yz{{zwyxvxxxwvwvvxyyyyz}||~zwyyywwxvrj_PEA@ACHMZjwzwtwwvttqonkmnonnkhiorrsutrruussvurquvxwz{}{|~|z|~}x||}~||~{|~}|}}~~}||||}|zzz{w{{xwz{wxxusvwvusppsrqqrqrtrqrppoommqolmihkjhihe]TLNTZZZVPG@954:=.#*5999899BNWYXVUWTSRQRROLKIEDCA=<<=:82/,08CLOPNKJHLL[r~stw^KIJKNUXW[^cehkor~|p`NB<9964*&9EIHB5)'5AMX]aabffiinqtwxxz{|}~~zm^NA73*#%9Palv~Ǽv_BBeƭl^{pP!'6B:-31(263/*'%  "$"!'3D^t}zywssrrojjijlopnlkid]TG9+()(%%%&$#"#"  '0.*"  !#%')).057=@CBEGGIIGHILMOSUWXZ[\\\]_ZI2#   @it|~xwuuqjZSjnnqxyvw~|~{z{|||y{xvsnlljge^Zb_cgjlqprqnnnjigiiiijhjlmnnnpqnloopopqusqpomomoplg_`abeilkkgea_ZVNIHNUY\_ejmnoppopsssturqrrssrruw}}~}{|~}}~}}~}||{yxvw}~~}}}~~~~xohklosusjdbccdca^_a``bccdfggfb``ab`b`^\__\\Z^VaVSPQPNLJJIFGGECEJOONJCA@?@@A><;9;9Ba \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/LICENSE.md b/third-party-programs/Velocity-Bench/hplinpack/LICENSE.md new file mode 100644 index 000000000..5d33d8d1f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/LICENSE.md @@ -0,0 +1,42 @@ + -- High Performance Computing Linpack Benchmark (HPL) + Modifications Copyright (C) 2023 Intel Corporation​ + + -- Copyright notice and Licensing terms: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. All advertising materials mentioning features or use of this + software must display the following acknowledgement: + This product includes software developed at the University of + Tennessee, Knoxville, Innovative Computing Laboratory. + + 4. The name of the University, the name of the Laboratory, or the + names of its contributors may not be used to endorse or promote + products derived from this software without specific written + permission. + + -- Disclaimer: + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + --------------------------------------------------------------------- + + SPDX-License-Identifier: BSD-4-Clause diff --git a/third-party-programs/Velocity-Bench/hplinpack/README.md b/third-party-programs/Velocity-Bench/hplinpack/README.md new file mode 100644 index 000000000..80da74c85 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/README.md @@ -0,0 +1,89 @@ +This is a workload for high performance linpack.
+ +## CUDA
+Source the oneAPI
+cd cuda/hpl-2.3/
+make clean && make
+cd bin/intel64/ cp ../../../../datafiles/HPL_small_gpu.dat HPL.dat
+export LD_LIBRARY_PATH=../../src/cuda/:$LD_LIBRARY_PATH
+ +## HIP
+Source the oneAPI
+cd hip/hpl-2.3/
+make clean && make
+cd bin/intel64/ cp ../../../../datafiles/HPL_small_gpu.dat HPL.dat
+export LD_LIBRARY_PATH=../../src/cuda/:$LD_LIBRARY_PATH
+ +## Open Source oneAPI DPC++ compiler for Nvidia backend
+export USE_AMD_BACKEND=ON
+ +Source the oneAPI MPI and Onemkl environment variables.
+source /opt/intel/oneapi/mkl/latest/env/vars.sh
+source /opt/intel/oneapi/mpi/latest/env/vars.sh
+ +Source the open source oneAPI DPC++ compiler.
+ +cd dpcpp/hpl-2.3/
+make clean && make
+cd bin/intel64/
+cp ../../../../datafiles/HPL_small_gpu.dat HPL.dat
+export LD_LIBRARY_PATH=../../src/dpcpp/:$LD_LIBRARY_PATH
+./xhpl
+ +## Open Source oneAPI DPC++ compiler for Nvidia backend
+export USE_NVIDIA_BACKEND=ON
+ +Source the OneAPI MPI and Onemkl environment variables.
+source /opt/intel/oneapi/mkl/latest/env/vars.sh
+source /opt/intel/oneapi/mpi/latest/env/vars.sh
+ +Source the open source oneAPI DPC++ compiler.
+source ~/sycl_workspace/llvm/env.sh
+ +cd dpcpp/hpl-2.3/
+make clean && make
+cd bin/intel64/
+cp ../../../../datafiles/HPL_small_gpu.dat HPL.dat
+export LD_LIBRARY_PATH=../../src/dpcpp/:$LD_LIBRARY_PATH
+./xhpl
+ +## DPC++ MPI version.
+source oneAPI
+cd dpcpp/hpl-2.3/
+make clean && make
+cd bin/intel64/
+cp ../../../../datafiles/HPL_small_gpu_2_tile.dat HPL.dat
+export LD_LIBRARY_PATH=../../src/dpcpp/:$LD_LIBRARY_PATH
+export I_MPI_DEBUG=5
+export I_MPI_FABRICS=shm
+export I_MPI_OFFLOAD_TOPOLIB=level_zero
+export I_MPI_OFFLOAD_CELL_LIST=0,1
+mpirun -bootstrap ssh -n 2 ./xhpl
+ +## For CPU.
+source oneAPI
+export ONEAPI_DEVICE_SELECTOR=opencl:cpu
+cd dpcpp/hpl-2.3/
+make clean && make
+cd bin/intel64/
+cp ../../../../datafiles/HPL_small_cpu.dat HPL.dat
+export LD_LIBRARY_PATH=../../src/dpcpp/:$LD_LIBRARY_PATH
+OMP_NUM_THREADS=32, OMP_PLACES=numa_domains, OMP_PROC_BIND=close ./xhpl
+ +## view output
+### look for the GFlops measurement in the output log
+================================================================================
+T/V N NB P Q Time Gflops
+--------------------------------------------------------------------------------
+WR10L2L2 4096 768 1 1 0.33 1.387e+02
+--------------------------------------------------------------------------------
+||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= 0.0056536 ...... PASSED
+================================================================================
+ +Finished 1 tests with the following results:
+ 1 tests completed and passed residual checks,
+ 0 tests completed and failed residual checks,
+ 0 tests skipped because of illegal input values.
+--------------------------------------------------------------------------------
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/AUTHORS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/AUTHORS new file mode 100644 index 000000000..b08e25180 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/AUTHORS @@ -0,0 +1,6 @@ +Antoine Petitet +Clint Whaley rcwhaley@lsu.edu +Jack Dongarra dongarra@icl.utk.edu +Andy Cleary +Piotr Luszczek luszczek@icl.utk.edu +Julien Langou Julien.Langou@ucdenver.edu diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/BUGS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/BUGS new file mode 100644 index 000000000..08d694014 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/BUGS @@ -0,0 +1,9 @@ +============================================================== + List of the known problems with the HPL software + + Current as of release HPL - 2.3 - December 2, 2018 +============================================================== + +============================================================== + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/COPYING b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/COPYING new file mode 100644 index 000000000..08465d618 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/COPYING @@ -0,0 +1,45 @@ +====================================================================== + -- High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 + Antoine P. Petitet + University of Tennessee, Knoxville + Innovative Computing Laboratory + (C) Copyright 2000-2008 All Rights Reserved + + -- Copyright notice and Licensing terms: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. All advertising materials mentioning features or use of this + software must display the following acknowledgement: + This product includes software developed at the University of + Tennessee, Knoxville, Innovative Computing Laboratory. + + 4. The name of the University, the name of the Laboratory, or the + names of its contributors may not be used to endorse or promote + products derived from this software without specific written + permission. + + -- Disclaimer: + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +====================================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/COPYRIGHT b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/COPYRIGHT new file mode 100644 index 000000000..08465d618 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/COPYRIGHT @@ -0,0 +1,45 @@ +====================================================================== + -- High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 + Antoine P. Petitet + University of Tennessee, Knoxville + Innovative Computing Laboratory + (C) Copyright 2000-2008 All Rights Reserved + + -- Copyright notice and Licensing terms: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. All advertising materials mentioning features or use of this + software must display the following acknowledgement: + This product includes software developed at the University of + Tennessee, Knoxville, Innovative Computing Laboratory. + + 4. The name of the University, the name of the Laboratory, or the + names of its contributors may not be used to endorse or promote + products derived from this software without specific written + permission. + + -- Disclaimer: + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +====================================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/ChangeLog b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/ChangeLog new file mode 100644 index 000000000..1c2b36778 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/ChangeLog @@ -0,0 +1,16 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + Done list in version 1.0b, December 15th, 2004 + - Fixed problem with 32-bit integer overflow. + Thanks to John Baron. + + Done list in version 1.0a, January 1st, 2004 + - Added Row- or Column-major process mapping in data file + - Fixed compilation error for gcc 3.3 in walltime. + - Fixed building problems on the T3E; + Thanks to Edward Anderson. + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/HISTORY b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/HISTORY new file mode 100644 index 000000000..d6d59ee45 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/HISTORY @@ -0,0 +1,103 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + History + + - 09/09/00 Public release of Version 1.0 + + - 09/27/00 A couple of mistakes in the VSIPL port have been + corrected. The tar file as well as the web site were updated + on September 27th, 2000. Note that these problems were not + affecting the BLAS version of the software in any way. + + - 01/01/04 Version 1.0a + The MPI process grid numbering scheme is now an run-time + option. + The inlined assembly timer routine that caused the compila- + tion to fail when using gcc version 3.3 and above has been + removed from the package. + Various building problems on the T3E have been fixed; Thanks + to Edward Anderson. + + - 15/12/04 Version 1.0b + Weakness of the pseudo-random matrix generator found for pro- + blem sizes being power of twos and larger than 2^15; Thanks + to Gregory Bauer. This problem has not been fixed. It is thus + currently recommended to HPL users willing to test matrices + of size larger than 2^15 to not use power twos. + + When the matrix size is such that one needs > 16 GB per MPI + rank, the intermediate calculation (mat.ld+1) * mat.nq in + HPL_pdtest.c ends up overflowing because it is done using + 32-bit arithmetic. This issue has been fixed by typecasting + to size_t; Thanks to John Baron. + + - 09/10/08 Version 2.0 + + Piotr Luszczek changed to 64-bit RNG, modified files: + -- [M] include/hpl_matgen.h + -- [M] testing/matgen/HPL_ladd.c + -- [M] testing/matgen/HPL_lmul.c + -- [M] testing/matgen/HPL_rand.c + -- [M] testing/ptest/HPL_pdinfo.c + + For a motivation for the change, see: + Dongarra and Langou, ``The Problem with the Linpack + Benchmark Matrix Generator'', LAWN 206, June 2008. + + -- [M] testing/ptest/HPL_pdtest.c -- + + Julien Langou changed the test for correctness from + ||Ax-b||_oo / ( eps * ||A||_1 * N ) + ||Ax-b||_oo / ( eps * ||A||_1 * ||x||_1 ) + ||Ax-b||_oo / ( eps * ||A||_oo * ||x||_oo * N ) + to the normwise backward error + || r ||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N ) + See: + Nicholas J. Higham, ``Accuracy and Stability of Numerical Algorithms'', + Society for Industrial and Applied Mathematics, Philadelphia, PA, USA, + Second Edition, pages = xxx+680, ISBN = 0-89871-521-0, 2002. + + Note that in our case || b ||_oo is almost for sure + 1/2, we compute it anyway. + + - 10/26/2012 Version 2.1 + + Piotr Luszczek introduced exact time stamping for HPL_pdgesv(): + -- [M] dist/include/hpl_misc.h + -- [M] dist/testing/ptest/HPL_pdtest.c + + Piotr Luszczek fixed out-of-bounds access in data spreading functions + and exact time stamping for HPL_pdgesv(): + -- [M] dist/src/pgesv/HPL_spreadN.c + -- [M] dist/src/pgesv/HPL_spreadT.c + Thanks to Stephen Whalen from Cray. + + - 02/24/2016 Version 2.2 + + Piotr Luszczek added continuous reporting of factorization progress + submitted by Intel and make scripts that uses Intel software tools and + libraries and their Apple's Mac OS X equivalents. + + - 12/02/2018 Version 2.3 + + Piotr Luszczek removed deprecated MPI functions that are no longer + supported in some MPI implementations (for example Open MPI 4.0) and + replaced them with + modern equivalents in HPL_packL(): + -- [M] src/comm/HPL_packL.c + + Piotr Luszczek added one digit to the display of performance result + and changed display of scaled residual to scientific notation with + extra digits in HPL_pdtest(): + -- [M] testing/ptest/HPL_pdtest.c + + Piotr Luszczek added support for Autotools configuration packages + autoconf and automake: + -- [A] Makefile.am + -- [A] configure.ac + -- [A] acinclude.m4 + -- [A] src/Makefile.am + -- [A] testing/Makefile.am diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/INSTALL b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/INSTALL new file mode 100644 index 000000000..fec266c49 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/INSTALL @@ -0,0 +1,81 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + 1) Retrieve the tar file, then + + gunzip hpl.tgz; tar -xvf hpl.tar + + this will create an hpl directory, that we call below the + top-level directory. + + 2) Create a file Make. in the top-level directory. For + this purpose, you may want to re-use one contained in the + setup directory. This file essentially contains the compilers + and librairies with their paths to be used. + + 3) Type "make arch=". This should create an executable + in the bin/ directory called xhpl. + + For example, on our Linux PII cluster, I create a file called + Make.Linux_PII in the top-level directory. Then, I type + "make arch=Linux_PII" + This creates the executable file bin/Linux_PII/xhpl. + + 4) Quick check: run a few tests: + + cd bin/ + mpirun -np 4 xhpl + + 5) Tuning: Most of the performance parameters can be tuned, + by modifying the input file bin/HPL.dat. See the file TUNING + in the top-level directory. + +============================================================== + + Compile time options: At the end of the "model" Make., + --------------------- the user is given the opportunity to + compile the software with some specific compile options. The + list of this options and their meaning are: + + -DHPL_COPY_L + force the copy of the panel L before bcast; + + -DHPL_CALL_CBLAS + call the cblas interface; + + -DHPL_CALL_VSIPL + call the vsip library; + + -DHPL_DETAILED_TIMING + enables detail timers; + + The user must choose between either the BLAS Fortran 77 + interface, or the BLAS C interface, or the VSIPL library + depending on which computational kernels are available on his + system. Only one of these options should be selected. If you + choose the BLAS Fortran 77 interface, it is necessary to fill + out the machine-specific C to Fortran 77 interface section of + the Make. file. To do this, please refer to the + Make. examples contained in the setup directory. + + By default HPL will: + *) not copy L before broadcast, + *) call the BLAS Fortran 77 interface, + *) not display detailed timing information. + + As an example, suppose one wants HPL to copy the panel of + columns into a contiguous buffer before broadcasting. In + theory, it would be more efficient to let HPL create the + appropriate MPI user-defined data type since this may avoid + the data copy. So, it is a strange idea, but one insists. To + achieve this one would add -DHPL_COPY_L to the definition of + HPL_OPTS at the end of the file Make.. Issue then a + "make clean arch=; make build arch=" and the xhpl + executable will be re-build with that feature in. +============================================================== + + Check out the website www.netlib.org/benchmark/hpl for the + latest information. +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 new file mode 100644 index 000000000..492ed42ca --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 @@ -0,0 +1,236 @@ + # -- High Performance Computing Linpack Benchmark (HPL) + # Modifications Copyright (C) 2023 Intel Corporation​ + # + # -- Copyright notice and Licensing terms: + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: + # + # 1. Redistributions of source code must retain the above copyright + # notice, this list of conditions and the following disclaimer. + # + # 2. Redistributions in binary form must reproduce the above copyright + # notice, this list of conditions, and the following disclaimer in the + # documentation and/or other materials provided with the distribution. + # + # 3. All advertising materials mentioning features or use of this + # software must display the following acknowledgement: + # This product includes software developed at the University of + # Tennessee, Knoxville, Innovative Computing Laboratory. + # + # 4. The name of the University, the name of the Laboratory, or the + # names of its contributors may not be used to endorse or promote + # products derived from this software without specific written + # permission. + # + # -- Disclaimer: + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # --------------------------------------------------------------------- + # + #SPDX-License-Identifier: BSD-4-Clause + +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -fs +MKDIR = mkdir -p +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = intel64 +export ARCH = intel64 +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +# Set TOPdir to the location of where this is being built +TOPdir = $(CURDIR) +INCdir = $(TOPdir)/include +BINdir =$(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a + +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +OneAPIdir = $(ONEAPI_ROOT) +MPdir = $(OneAPIdir)/mpi/latest/ +MPinc = -I$(MPdir)/include/ +MPlib = -lmpi #$(MPdir)/lib/release/libmpi.so +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(OneAPIdir)/mkl/latest/lib/intel64/ +LAinc = -I$(OneAPIdir)/mkl/latest/include/intel64/ +LAlib = -L$(TOPdir)/src/cuda/ -ldgemm -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lpthread -liomp5 -lm -lstdc++ -I$(TOPdir)/src/cuda/ +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) #$(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# -DASYOUGO enable timing information as you go (nonintrusive) +# -DASYOUGO2 slightly intrusive timing information +# -DASYOUGO2_DISPLAY display detailed DGEMM information +# -DENDEARLY end the problem early +# -DFASTSWAP insert to use DLASWP instead of HPL code +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = $(CC) +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- +MAKE = make VERBOSE=1 arch=$(ARCH) TOPdir=$(TOPdir) diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.top b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.top new file mode 100644 index 000000000..48967633b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.top @@ -0,0 +1,195 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +arch = UNKNOWN +# +include Make.$(arch) +# +## build ############################################################### +# +build_src : + ( $(CD) src/auxil/$(arch); $(MAKE) ) + ( $(CD) src/blas/$(arch); $(MAKE) ) + ( $(CD) src/comm/$(arch); $(MAKE) ) + ( $(CD) src/grid/$(arch); $(MAKE) ) + ( $(CD) src/panel/$(arch); $(MAKE) ) + ( $(CD) src/pauxil/$(arch); $(MAKE) ) + ( $(CD) src/pfact/$(arch); $(MAKE) ) + ( $(CD) src/pgesv/$(arch); $(MAKE) ) + ( $(CD) src/cuda/; $(MAKE) ) +# +build_tst : + ( $(CD) testing/matgen/$(arch); $(MAKE) ) + ( $(CD) testing/timer/$(arch); $(MAKE) ) + ( $(CD) testing/pmatgen/$(arch); $(MAKE) ) + ( $(CD) testing/ptimer/$(arch); $(MAKE) ) + ( $(CD) testing/ptest/$(arch); $(MAKE) ) +#( SPMS_make_cd`' testing/test/$(arch); SPMS_make_make`' ) +# +## startup ############################################################# +# +startup_dir : + - $(MKDIR) include/$(arch) + - $(MKDIR) lib + - $(MKDIR) lib/$(arch) + - $(MKDIR) bin + - $(MKDIR) bin/$(arch) +# +startup_src : + - $(MAKE) -f Make.top leaf le=src/auxil arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/blas arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/comm arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/grid arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/panel arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/pauxil arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/pfact arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/pgesv arch=$(arch) +# +startup_tst : + - $(MAKE) -f Make.top leaf le=testing/matgen arch=$(arch) + - $(MAKE) -f Make.top leaf le=testing/timer arch=$(arch) + - $(MAKE) -f Make.top leaf le=testing/pmatgen arch=$(arch) + - $(MAKE) -f Make.top leaf le=testing/ptimer arch=$(arch) + - $(MAKE) -f Make.top leaf le=testing/ptest arch=$(arch) +#- SPMS_make_make`' -f Make.top leaf le=testing/test arch=$(arch) +# +## refresh ############################################################# +# +refresh_src : + - $(CP) makes/Make.auxil src/auxil/$(arch)/Makefile + - $(CP) makes/Make.blas src/blas/$(arch)/Makefile + - $(CP) makes/Make.comm src/comm/$(arch)/Makefile + - $(CP) makes/Make.grid src/grid/$(arch)/Makefile + - $(CP) makes/Make.panel src/panel/$(arch)/Makefile + - $(CP) makes/Make.pauxil src/pauxil/$(arch)/Makefile + - $(CP) makes/Make.pfact src/pfact/$(arch)/Makefile + - $(CP) makes/Make.pgesv src/pgesv/$(arch)/Makefile +# +refresh_tst : + - $(CP) makes/Make.matgen testing/matgen/$(arch)/Makefile + - $(CP) makes/Make.timer testing/timer/$(arch)/Makefile + - $(CP) makes/Make.pmatgen testing/pmatgen/$(arch)/Makefile + - $(CP) makes/Make.ptimer testing/ptimer/$(arch)/Makefile + - $(CP) makes/Make.ptest testing/ptest/$(arch)/Makefile +#- SPMS_make_cp`' makes/Make.test testing/test/$(arch)/Makefile +# +## clean ############################################################### +# +clean_src : + - ( $(CD) src/auxil/$(arch); $(MAKE) clean ) + - ( $(CD) src/blas/$(arch); $(MAKE) clean ) + - ( $(CD) src/comm/$(arch); $(MAKE) clean ) + - ( $(CD) src/grid/$(arch); $(MAKE) clean ) + - ( $(CD) src/panel/$(arch); $(MAKE) clean ) + - ( $(CD) src/pauxil/$(arch); $(MAKE) clean ) + - ( $(CD) src/pfact/$(arch); $(MAKE) clean ) + - ( $(CD) src/pgesv/$(arch); $(MAKE) clean ) + - ( $(CD) src/cuda/; $(MAKE) clean) +# +clean_tst : + - ( $(CD) testing/matgen/$(arch); $(MAKE) clean ) + - ( $(CD) testing/timer/$(arch); $(MAKE) clean ) + - ( $(CD) testing/pmatgen/$(arch); $(MAKE) clean ) + - ( $(CD) testing/ptimer/$(arch); $(MAKE) clean ) + - ( $(CD) testing/ptest/$(arch); $(MAKE) clean ) +#- ( SPMS_make_cd`' testing/test/$(arch); SPMS_make_make`' clean ) +# +## clean_arch ########################################################## +# +clean_arch_src : + - $(RM) -r src/auxil/$(arch) + - $(RM) -r src/blas/$(arch) + - $(RM) -r src/comm/$(arch) + - $(RM) -r src/grid/$(arch) + - $(RM) -r src/panel/$(arch) + - $(RM) -r src/pauxil/$(arch) + - $(RM) -r src/pfact/$(arch) + - $(RM) -r src/pgesv/$(arch) + - ( $(CD) src/cuda; $(MAKE) clean) +# +clean_arch_tst : + - $(RM) -r testing/matgen/$(arch) + - $(RM) -r testing/timer/$(arch) + - $(RM) -r testing/pmatgen/$(arch) + - $(RM) -r testing/ptimer/$(arch) + - $(RM) -r testing/ptest/$(arch) +#- SPMS_make_rm`' -r testing/test/$(arch) +# +## clean_arch_all ###################################################### +# +clean_arch_all : + - $(MAKE) -f Make.top clean_arch_src arch=$(arch) + - $(MAKE) -f Make.top clean_arch_tst arch=$(arch) + - $(RM) -r bin/$(arch) include/$(arch) lib/$(arch) +# +## clean_guard ######################################################### +# +clean_guard_src : + - ( $(CD) src/auxil/$(arch); $(RM) *.grd ) + - ( $(CD) src/blas/$(arch); $(RM) *.grd ) + - ( $(CD) src/comm/$(arch); $(RM) *.grd ) + - ( $(CD) src/grid/$(arch); $(RM) *.grd ) + - ( $(CD) src/panel/$(arch); $(RM) *.grd ) + - ( $(CD) src/pauxil/$(arch); $(RM) *.grd ) + - ( $(CD) src/pfact/$(arch); $(RM) *.grd ) + - ( $(CD) src/pgesv/$(arch); $(RM) *.grd ) +# +clean_guard_tst : + - ( $(CD) testing/matgen/$(arch); $(RM) *.grd ) + - ( $(CD) testing/timer/$(arch); $(RM) *.grd ) + - ( $(CD) testing/pmatgen/$(arch); $(RM) *.grd ) + - ( $(CD) testing/ptimer/$(arch); $(RM) *.grd ) + - ( $(CD) testing/ptest/$(arch); $(RM) *.grd ) +#- ( SPMS_make_cd`' testing/test/$(arch); SPMS_make_rm`' *.grd ) +# +## misc ################################################################ +# +leaf : + - ( $(CD) $(le) ; $(MKDIR) $(arch) ) + - ( $(CD) $(le)/$(arch) ; \ + $(LN_S) $(TOPdir)/Make.$(arch) Make.inc ) +# +######################################################################## diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile new file mode 100644 index 000000000..40b5585ae --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile @@ -0,0 +1,135 @@ + + # -- High Performance Computing Linpack Benchmark (HPL) + # Modifications Copyright (C) 2023 Intel Corporation​ + # + # -- Copyright notice and Licensing terms: + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: + # + # 1. Redistributions of source code must retain the above copyright + # notice, this list of conditions and the following disclaimer. + # + # 2. Redistributions in binary form must reproduce the above copyright + # notice, this list of conditions, and the following disclaimer in the + # documentation and/or other materials provided with the distribution. + # + # 3. All advertising materials mentioning features or use of this + # software must display the following acknowledgement: + # This product includes software developed at the University of + # Tennessee, Knoxville, Innovative Computing Laboratory. + # + # 4. The name of the University, the name of the Laboratory, or the + # names of its contributors may not be used to endorse or promote + # products derived from this software without specific written + # permission. + # + # -- Disclaimer: + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # --------------------------------------------------------------------- + # + #SPDX-License-Identifier: BSD-4-Clause + +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# +SHELL = /bin/sh +# +arch = intel64 +make = 'make VERBOSE=1' +# +## Targets ############################################################# +# +all : install +# +# ###################################################################### +# +install : startup refresh build +# +startup : + $(MAKE) -f Make.top startup_dir arch=$(arch) + $(MAKE) -f Make.top startup_src arch=$(arch) + $(MAKE) -f Make.top startup_tst arch=$(arch) + $(MAKE) -f Make.top refresh_src arch=$(arch) + $(MAKE) -f Make.top refresh_tst arch=$(arch) +# +refresh : + $(MAKE) -f Make.top refresh_src arch=$(arch) + $(MAKE) -f Make.top refresh_tst arch=$(arch) +# +build : + $(MAKE) -f Make.top build_src arch=$(arch) + $(MAKE) -f Make.top build_tst arch=$(arch) +# +clean : + $(MAKE) -f Make.top clean_src arch=$(arch) + $(MAKE) -f Make.top clean_tst arch=$(arch) +# +clean_arch : + $(MAKE) -f Make.top clean_arch_src arch=$(arch) + $(MAKE) -f Make.top clean_arch_tst arch=$(arch) +# +clean_arch_all : + $(MAKE) -f Make.top clean_arch_all arch=$(arch) +# +clean_guard : + $(MAKE) -f Make.top clean_guard_src arch=$(arch) + $(MAKE) -f Make.top clean_guard_tst arch=$(arch) +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile.am new file mode 100644 index 000000000..1ad8c1b17 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src testing + +AM_CPPFLAGS = -I$(top_srcdir)/include diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile.in new file mode 100644 index 000000000..76f0e2dd6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/Makefile.in @@ -0,0 +1,772 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +subdir = . +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \ + $(am__configure_deps) $(am__DIST_COMMON) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/include/hplconfig.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + cscope distdir distdir-am dist dist-all distcheck +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +CSCOPE = cscope +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/include/hplconfig.h.in AUTHORS COPYING ChangeLog \ + INSTALL NEWS README THANKS TODO compile config.guess \ + config.sub depcomp install-sh missing +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + if test -d "$(distdir)"; then \ + find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -rf "$(distdir)" \ + || { sleep 5 && rm -rf "$(distdir)"; }; \ + else :; fi +am__post_remove_distdir = $(am__remove_distdir) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +DIST_ARCHIVES = $(distdir).tar.gz +GZIP_ENV = --best +DIST_TARGETS = dist-gzip +distuninstallcheck_listfiles = find . -type f -print +am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ + | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' +distcleancheck_listfiles = find . -type f -print +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIBS = @BLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build_alias = @build_alias@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host_alias = @host_alias@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = src testing +AM_CPPFLAGS = -I$(top_srcdir)/include +all: all-recursive + +.SUFFIXES: +am--refresh: Makefile + @: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --gnu'; \ + $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: $(am__configure_deps) + $(am__cd) $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) +$(am__aclocal_m4_deps): + +include/hplconfig.h: include/stamp-h1 + @test -f $@ || rm -f include/stamp-h1 + @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) include/stamp-h1 + +include/stamp-h1: $(top_srcdir)/include/hplconfig.h.in $(top_builddir)/config.status + @rm -f include/stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status include/hplconfig.h +$(top_srcdir)/include/hplconfig.h.in: $(am__configure_deps) + ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) + rm -f include/stamp-h1 + touch $@ + +distclean-hdr: + -rm -f include/hplconfig.h include/stamp-h1 + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscope: cscope.files + test ! -s cscope.files \ + || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) +clean-cscope: + -rm -f cscope.files +cscope.files: clean-cscope cscopelist +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + -rm -f cscope.out cscope.in.out cscope.po.out cscope.files + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + $(am__remove_distdir) + test -d "$(distdir)" || mkdir "$(distdir)" + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done + -test -n "$(am__skip_mode_fix)" \ + || find "$(distdir)" -type d ! -perm -755 \ + -exec chmod u+rwx,go+rx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r "$(distdir)" +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz + $(am__post_remove_distdir) + +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 + $(am__post_remove_distdir) + +dist-lzip: distdir + tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz + $(am__post_remove_distdir) + +dist-xz: distdir + tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz + $(am__post_remove_distdir) + +dist-tarZ: distdir + @echo WARNING: "Support for distribution archives compressed with" \ + "legacy program 'compress' is deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__post_remove_distdir) + +dist-shar: distdir + @echo WARNING: "Support for shar distribution archives is" \ + "deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz + $(am__post_remove_distdir) + +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__post_remove_distdir) + +dist dist-all: + $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' + $(am__post_remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.lz*) \ + lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ + *.tar.xz*) \ + xz -dc $(distdir).tar.xz | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + esac + chmod -R a-w $(distdir) + chmod u+w $(distdir) + mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst + chmod a-w $(distdir) + test -d $(distdir)/_build || exit 0; \ + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && am__cwd=`pwd` \ + && $(am__cd) $(distdir)/_build/sub \ + && ../../configure \ + $(AM_DISTCHECK_CONFIGURE_FLAGS) \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + --srcdir=../.. --prefix="$$dc_install_base" \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) dvi \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ + && cd "$$am__cwd" \ + || exit 1 + $(am__post_remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' +distuninstallcheck: + @test -n '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: trying to run $@ with an empty' \ + '$$(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + $(am__cd) '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic mostlyclean-am + +distclean: distclean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-hdr distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--refresh check check-am clean clean-cscope clean-generic \ + cscope cscopelist-am ctags ctags-am dist dist-all dist-bzip2 \ + dist-gzip dist-lzip dist-shar dist-tarZ dist-xz dist-zip \ + distcheck distclean distclean-generic distclean-hdr \ + distclean-tags distcleancheck distdir distuninstallcheck dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/NEWS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/NEWS new file mode 100644 index 000000000..d6d59ee45 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/NEWS @@ -0,0 +1,103 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + History + + - 09/09/00 Public release of Version 1.0 + + - 09/27/00 A couple of mistakes in the VSIPL port have been + corrected. The tar file as well as the web site were updated + on September 27th, 2000. Note that these problems were not + affecting the BLAS version of the software in any way. + + - 01/01/04 Version 1.0a + The MPI process grid numbering scheme is now an run-time + option. + The inlined assembly timer routine that caused the compila- + tion to fail when using gcc version 3.3 and above has been + removed from the package. + Various building problems on the T3E have been fixed; Thanks + to Edward Anderson. + + - 15/12/04 Version 1.0b + Weakness of the pseudo-random matrix generator found for pro- + blem sizes being power of twos and larger than 2^15; Thanks + to Gregory Bauer. This problem has not been fixed. It is thus + currently recommended to HPL users willing to test matrices + of size larger than 2^15 to not use power twos. + + When the matrix size is such that one needs > 16 GB per MPI + rank, the intermediate calculation (mat.ld+1) * mat.nq in + HPL_pdtest.c ends up overflowing because it is done using + 32-bit arithmetic. This issue has been fixed by typecasting + to size_t; Thanks to John Baron. + + - 09/10/08 Version 2.0 + + Piotr Luszczek changed to 64-bit RNG, modified files: + -- [M] include/hpl_matgen.h + -- [M] testing/matgen/HPL_ladd.c + -- [M] testing/matgen/HPL_lmul.c + -- [M] testing/matgen/HPL_rand.c + -- [M] testing/ptest/HPL_pdinfo.c + + For a motivation for the change, see: + Dongarra and Langou, ``The Problem with the Linpack + Benchmark Matrix Generator'', LAWN 206, June 2008. + + -- [M] testing/ptest/HPL_pdtest.c -- + + Julien Langou changed the test for correctness from + ||Ax-b||_oo / ( eps * ||A||_1 * N ) + ||Ax-b||_oo / ( eps * ||A||_1 * ||x||_1 ) + ||Ax-b||_oo / ( eps * ||A||_oo * ||x||_oo * N ) + to the normwise backward error + || r ||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N ) + See: + Nicholas J. Higham, ``Accuracy and Stability of Numerical Algorithms'', + Society for Industrial and Applied Mathematics, Philadelphia, PA, USA, + Second Edition, pages = xxx+680, ISBN = 0-89871-521-0, 2002. + + Note that in our case || b ||_oo is almost for sure + 1/2, we compute it anyway. + + - 10/26/2012 Version 2.1 + + Piotr Luszczek introduced exact time stamping for HPL_pdgesv(): + -- [M] dist/include/hpl_misc.h + -- [M] dist/testing/ptest/HPL_pdtest.c + + Piotr Luszczek fixed out-of-bounds access in data spreading functions + and exact time stamping for HPL_pdgesv(): + -- [M] dist/src/pgesv/HPL_spreadN.c + -- [M] dist/src/pgesv/HPL_spreadT.c + Thanks to Stephen Whalen from Cray. + + - 02/24/2016 Version 2.2 + + Piotr Luszczek added continuous reporting of factorization progress + submitted by Intel and make scripts that uses Intel software tools and + libraries and their Apple's Mac OS X equivalents. + + - 12/02/2018 Version 2.3 + + Piotr Luszczek removed deprecated MPI functions that are no longer + supported in some MPI implementations (for example Open MPI 4.0) and + replaced them with + modern equivalents in HPL_packL(): + -- [M] src/comm/HPL_packL.c + + Piotr Luszczek added one digit to the display of performance result + and changed display of scaled residual to scientific notation with + extra digits in HPL_pdtest(): + -- [M] testing/ptest/HPL_pdtest.c + + Piotr Luszczek added support for Autotools configuration packages + autoconf and automake: + -- [A] Makefile.am + -- [A] configure.ac + -- [A] acinclude.m4 + -- [A] src/Makefile.am + -- [A] testing/Makefile.am diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/README b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/README new file mode 100644 index 000000000..c3f79a877 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/README @@ -0,0 +1,32 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + HPL is a software package that solves a (random) dense linear + system in double precision (64 bits) arithmetic on + distributed-memory computers. It can thus be regarded as a + portable as well as freely available implementation of the + High Performance Computing Linpack Benchmark. + + The HPL software package requires the availibility on your + system of an implementation of the Message Passing Interface + MPI (1.1 compliant). An implementation of either the Basic + Linear Algebra Subprograms BLAS or the Vector Signal Image + Processing Library VSIPL is also needed. Machine-specific as + well as generic implementations of MPI, the BLAS and VSIPL + are available for a large variety of systems. + + Install See the file INSTALL in this directory. + ------- + + Tuning See the file TUNING in this directory. + ------ + + Bugs Known problems and bugs with this release are documen- + ---- ted in the file hpl/BUGS. + + Check out the website www.netlib.org/benchmark/hpl for the + latest information. + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/THANKS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/THANKS new file mode 100644 index 000000000..1c5641ce4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/THANKS @@ -0,0 +1 @@ +This software was improved with contribution of external developers. diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/TODO b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/TODO new file mode 100644 index 000000000..1c2b36778 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/TODO @@ -0,0 +1,16 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + Done list in version 1.0b, December 15th, 2004 + - Fixed problem with 32-bit integer overflow. + Thanks to John Baron. + + Done list in version 1.0a, January 1st, 2004 + - Added Row- or Column-major process mapping in data file + - Fixed compilation error for gcc 3.3 in walltime. + - Fixed building problems on the T3E; + Thanks to Edward Anderson. + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/TUNING b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/TUNING new file mode 100644 index 000000000..24707f1fc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/TUNING @@ -0,0 +1,419 @@ +============================================================== + Performance Tuning and setting up the input data file HPL.dat + + Current as of release HPL - 2.3 - December 2, 2018 +============================================================== + Check out the website www.netlib.org/benchmark/hpl for the + latest information. + + After having built the executable hpl/bin//xhpl, one + may want to modify the input data file HPL.dat. This file + should reside in the same directory as the executable + hpl/bin//xhpl. An example HPL.dat file is provided by + default. This file contains information about the problem + sizes, machine configuration, and algorithm features to be + used by the executable. It is 30 lines long. All the selected + parameters will be printed in the output generated by the + executable. + + At the end of this file, there is a couple of experimental + guide lines that you may find useful. + +============================================================== + File HPL.dat (description): + + Line 1: (unused) Typically one would use this line for its + own good. For example, it could be used to summarize the con- + tent of the input file. By default this line reads: + + HPL Linpack benchmark input file + + Line 2: (unused) same as line 1. By default this line reads: + + Innovative Computing Laboratory, University of Tennessee + + Line 3: the user can choose where the output should be re- + directed to. In the case of a file, a name is necessary, and + this is the line where one wants to specify it. Only the + first name on this line is significative. By default, the li- + ne reads: + + HPL.out output file name (if any) + + This means that if one chooses to redirect the output to a + file, the file will be called "HPL.out". The rest of the line + is unused, and this space to put some informative comment on + the meaning of this line. + + Line 4: This line specifies where the output should go. The + line is formatted, it must be a positive integer, the rest is + unsignificant. 3 choices are possible for the positive inte- + ger, 6 means that the output will go the standard output, 7 + means that the output will go to the standard error. Any o- + ther integer means that the output should be redirected + to a file, which name has been specified in the line above. + This line by default reads: + + 6 device out (6=stdout,7=stderr,file) + + which means that the output generated by the executable + should be redirected to the standard output. + + Line 5: This line specifies the number of problem sizes to be + executed. This number should be less than or equal to 20. The + first integer is significant, the rest is ignored. If the + line reads: + + 3 # of problems sizes (N) + + this means that the user is willing to run 3 problem sizes + that will be specified in the next line. + + Line 6: This line specifies the problem sizes one wants to + run. Assuming the line above started with 3, the 3 first + positive integers are significant, the rest is ignored. For + example: + + 3000 6000 10000 Ns + + means that one wants xhpl to run 3 (specified in line 5) pro- + blem sizes, namely 3000, 6000 and 10000. + + Line 7: This line specifies the number of block sizes to be + runned. This number should be less than or equal to 20. + The first integer is significant, the rest is ignored. If the + line reads: + + 5 # of NBs + + this means that the user is willing to use 5 block sizes that + will be specified in the next line. + + Line 8: This line specifies the block sizes one wants to run. + Assuming the line above started with 5, the 5 first positive + integers are significant, the rest is ignored. For example: + + 80 100 120 140 160 NBs + + means that one wants xhpl to use 5 (specified in line 7) + block sizes, namely 80, 100, 120, 140 and 160. + + Line 9 specifies how the MPI processes should be mapped onto + the nodes of your platform. There are currently two possible + mappings, namely row- and column-major. This feature is main- + ly useful when these nodes are themselves multi-processor + computers. A row-major mapping is recommended. + + Line 10: This line specifies the number of process grid to + be runned. This number should be less than or equal to 20. + The first integer is significant, the rest is ignored. If the + line reads: + + 2 # of process grids (P x Q) + + this means that you are willing to try 2 process grid sizes + that will be specified in the next line. + + Line 11-12: These two lines specify the number of process + rows and columns of each grid you want to run on. Assuming + the line above (10) started with 2, the 2 first positive in- + tegers of those two lines are significant, the rest is igno- + red. For example: + + 1 2 Ps + 6 8 Qs + + means that one wants to run xhpl on 2 process grids (line + 10), namely 1 by 6 and 2 by 8. Note: In this example, it is + required then to start xhpl on at least 16 nodes (max of P_i + xQ_i). The runs on the two grids will be consecutive. If one + was starting xhpl on more than 16 nodes, say 52, only 6 would + be used for the first grid (1x6) and then 16 (2x8) would be + used for the second grid. The fact that you started the MPI + job on 52 nodes, will not make HPL use all of them. In this + example, only 16 would be used. If one wants to run xhpl with + 52 processes one needs to specify a grid of 52 processes, for + example the following lines would do the job: + + 4 2 Ps + 13 8 Qs + + Line 13: This line specifies the threshold the residuals + should be compared to. The residuals should be or order 1, + but are in practice slightly less than this, typically 0.001. + This line is made of a real number, the rest is unsignifi- + cant. For example: + + 16.0 threshold + + In practice, a value of 16.0 will cover most cases. For va- + rious reasons, it is possible that some of the residuals be- + come slightly larger, say for example 35.6. xhpl will flag + those runs as failed, however they can be considered as cor- + rect. A run can be considered as failed if the residual is a + few order of magnitude bigger than 1 for example 10^6 or mo- + re. Note: if one was to specify a threshold of 0.0, all tests + would be flagged as failed, even though the answer is likely + to be correct. It is allowed to specify a negative value for + this threshold, in which case the checks will be by-passed, + no matter what the value is, as soon as it is negative. This + feature allows to save time when performing a lot of experi- + ments, say for instance during the tuning phase. Example: + + -16.0 threshold + + The remaning lines allow to specifies algorithmic features. + xhpl will run all possible combinations of those for each + problem size, block size, process grid combination. This is + handy when one looks for an "optimal" set of parameters. To + understand a little bit better, let say first a few words + about the algorithm implemented in HPL. Basically this is a + right-looking version with row-partial pivoting. The panel + factorization is matrix-matrix operation based and recursive, + dividing the panel into NDIV subpanels at each step. This + part of the panel factorization is denoted below by + "recursive panel fact. (RFACT)". The recursion stops when the + current panel is made of less than or equal to NBMIN columns. + At that point, xhpl uses a matrix-vector operation based + factorization denoted below by "PFACTs". Classic recursion + would then use NDIV=2, NBMIN=1. There are essentially 3 + numerically equivalent LU factorization algorithm variants + (left-looking, Crout and right-looking). In HPL, one can + choose every one of those for the RFACT, as well as the + PFACT. The following lines of HPL.dat allows you to set those + parameters. + + Lines 14-21: (Example 1) + 3 # of panel fact + 0 1 2 PFACTs (0=left, 1=Crout, 2=Right) + 4 # of recursive stopping criterium + 1 2 4 8 NBMINs (>= 1) + 3 # of panels in recursion + 2 3 4 NDIVs + 3 # of recursive panel fact. + 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) + + This example would try all variants of PFACT, 4 values for + NBMIN, namely 1, 2, 4 and 8, 3 values for NDIV namely 2, 3 + and 4, and all variants for RFACT. Lines 14-21: (Example 1) + + 2 # of panel fact + 2 0 PFACTs (0=left, 1=Crout, 2=Right) + 2 # of recursive stopping criterium + 4 8 NBMINs (>= 1) + 1 # of panels in recursion + 2 NDIVs + 1 # of recursive panel fact. + 2 RFACTs (0=left, 1=Crout, 2=Right) + + This example would try 2 variants of PFACT namely right loo- + king and left looking, 2 values for NBMIN, namely 4 and 8, 1 + value for NDIV namely 2, and one variant for RFACT. + + In the main loop of the algorithm, the current panel of co- + lumn is broadcast in process rows using a virtual ring to- + pology. HPL offers various choices, and one most likely want + to use the increasing ring modified encoded as 1. 4 is also + a good choice. Lines 22-23: (Example 1): + + 1 # of broadcast + 1 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + + This will cause HPL to broadcast the current panel using the + increasing ring modified topology. Lines 22-23: (Example 2): + + 2 # of broadcast + 0 4 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + + This will cause HPL to broadcast the current panel using the + increasing ring virtual topology and the long message algori- + thm. + + Lines 24-25 allow to specify the look-ahead depth used by + HPL. A depth of 0 means that the next panel is factorized af- + ter the update by the current panel is completely finished. A + depth of 1 means that the next panel is factorized immediate- + ly after being updated. The update by the current panel is + then finished. A depth of k means that the k next panels are + factorized immediately after being updated. The update by the + current panel is then finished. It turns out that a depth of + 1 seems to give the best results, but may need a large pro- + blem size before one can see the performance gain. So use 1, + if you do not know better, otherwise you may want to try 0. + Look-ahead of depths 2 and larger will probably not give you + better results. Lines 24-25: (Example 1): + + 1 # of lookahead depth + 1 DEPTHs (>=0) + + This will cause HPL to use a look-ahead of depth 1. + Lines 24-25: (Example 2): + + 2 # of lookahead depth + 0 1 DEPTHs (>=0) + + This will cause HPL to use a look-ahead of depths 0 and 1. + + Lines 26-27 allow to specify the swapping algorithm used by + HPL for all tests. There are currently two swapping algo- + rithms available, one based on "binary exchange" and the + other one based on a "spread-roll" procedure (also called + "long" below. For large problem sizes, this last one is like- + ly to be more efficient. The user can also choose to mix both + variants, that is "binary-exchange" for a number of columns + less than a threshold value, and then the "spread-roll" al- + gorithm. This threshold value is then specified on Line 27. + Lines 26-27: (Example 1): + + 1 SWAP (0=bin-exch,1=long,2=mix) + 60 swapping threshold + + This will cause HPL to use the "long" or "spread-roll" swap- + ping algorithm. Note that a threshold is specified in that + example but not used by HPL. Lines 26-27: (Example 2): + + 2 SWAP (0=bin-exch,1=long,2=mix) + 60 swapping threshold + + This will cause HPL to use the "long" or "spread-roll" swap- + ping algorithm as soon as there is more than 60 columns in + the row panel. Otherwise, the "binary-exchange" algorithm + will be used instead. + + Line 28 allows to specify whether the upper triangle of the + panel of columns should be stored in no-transposed or + transposed form. Example: + + 0 L1 in (0=transposed,1=no-transposed) form + + Line 29 allows to specify whether the panel of rows U should + be stored in no-transposed or transposed form. Example: + + 0 U in (0=transposed,1=no-transposed) form + + Line 30 enables/disables the equilibration phase. This option + will not be used unless you selected 1 or 2 in Line 26. Ex: + + 1 Equilibration (0=no,1=yes) + + + Line 31 allows to specify the alignment in memory for the + memory space allocated by HPL. On modern machines, one proba- + bly wants to use 4, 8 or 16. This may result in a tiny amount + of memory wasted. Example: + + 4 memory alignment in double (> 0) + +============================================================== + Guide lines: + + 1) Figure out a good block size for the matrix-matrix + multiply routine. The best method is to try a few out. If you + happen to know the block size used by the matrix-matrix + multiply routine, a small multiple of that block size will do + fine. + + HPL uses the block size NB for the data distribution as well + as for the computational granularity. From a data + distribution point of view, the smallest NB, the better the + load balance. You definitely want to stay away from very + large values of NB. From a computation point of view, a too + small value of NB may limit the computational performance by + a large factor because almost no data reuse will occur in the + highest level of the memory hierarchy. The number of messages + will also increase. Efficient matrix-multiply routines are + often internally blocked. Small multiples of this blocking + factor are likely to be good block sizes for HPL. The bottom + line is that "good" block sizes are almost always in the + [32..256] interval. The best values depend on the computation + / communication performance ratio of your system. To a much + less extent, the problem size matters as well. Say for + example, you emperically found that 44 was a good block size + with respect to performance. 88 or 132 are likely to give + slightly better results for large problem sizes because of a + slighlty higher flop rate. + + 2) The process mapping should not matter if the nodes of + your platform are single processor computers. If these nodes + are multi-processors, a row-major mapping is recommended. + + 3) HPL likes "square" or slightly flat process grids. Unless + you are using a very small process grid, stay away from the + 1-by-Q and P-by-1 process grids. + + 4) Panel factorization parameters: a good start are the fol- + lowing for the lines 14-21: + + 1 # of panel fact + 1 PFACTs (0=left, 1=Crout, 2=Right) + 2 # of recursive stopping criterium + 4 8 NBMINs (>= 1) + 1 # of panels in recursion + 2 NDIVs + 1 # of recursive panel fact. + 2 RFACTs (0=left, 1=Crout, 2=Right) + + 5) Broadcast parameters: at this time, it is far from obvious + to me what the best setting is, so i would probably try them + all. If I had to guess I would probably start with the follo- + wing for the lines 22-23: + + 2 # of broadcast + 1 3 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + + The best broadcast depends on your problem size and harware + performance. My take is that 4 or 5 may be competitive for + machines featuring very fast nodes comparatively to the + network. + + 6) Look-ahead depth: as mentioned above 0 or 1 are likely to + be the best choices. This also depends on the problem size + and machine configuration, so I would try "no look-ahead (0)" + and "look-ahead of depth 1 (1)". That is for lines 24-25: + + 2 # of lookahead depth + 0 1 DEPTHs (>=0) + + 7) Swapping: one can select only one of the three algorithm + in the input file. Theoretically, mix (2) should win, however + long (1) might just be good enough. The difference should be + small between those two assuming a swapping threshold of the + order of the block size (NB) selected. If this threshold is + very large, HPL will use bin_exch (0) most of the time and if + it is very small (< NB) long (1) will always be used. In + short and assuming the block size (NB) used is say 60, I + would choose for the lines 26-27: + + 2 SWAP (0=bin-exch,1=long,2=mix) + 60 swapping threshold + + I would also try the long variant. For a very small number + of processes in every column of the process grid (say < 4), + very little performance difference should be observable. + + 8) Local storage: I do not think Line 28 matters. Pick 0 in + doubt. Line 29 is more important. It controls how the panel + of rows should be stored. No doubt 0 is better. The caveat is + that in that case the matrix-multiply function is called with + ( Notrans, Trans, ... ), that is C := C - A B^T. Unless the + computational kernel you are using has a very poor (with + respect to performance) implementation of that case, and is + much more efficient with ( Notrans, Notrans, ... ) just pick + 0 as well. So, my choice: + + 0 L1 in (0=transposed,1=no-transposed) form + 0 U in (0=transposed,1=no-transposed) form + + 9) Equilibration: It is hard to tell whether equilibration + should always be performed or not. Not knowing much about the + random matrix generated and because the overhead is so small + compared to the possible gain, I turn it on all the time. + + 1 Equilibration (0=no,1=yes) + + 10) For alignment, 4 should be plenty, but just to be safe, + one may want to pick 8 instead. + + 8 memory alignment in double (> 0) + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/acinclude.m4 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/acinclude.m4 new file mode 100644 index 000000000..4072a950f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/acinclude.m4 @@ -0,0 +1,90 @@ + +AC_DEFUN([HPL_BLAS], [ + +AC_PREREQ(2.69) + +hpl_blas_ok=no + +dnl FIXME: add --with-blas="" + +current_LIBS="$LIBS" + +cat < hplvars.txt +name1=OpenBLAS +rout1=dgemm_ +libs1=-lopenblas -lm + +name2=Atlas Fortran BLAS +rout2=dgemm_ +libs2=-lf77blas -latlas + +name3=Sequential Intel MKL LP64 (group) +rout3=dgemm_ +libs3=-Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread + +name4=Sequential Intel MKL LP64 +rout4=dgemm_ +libs4=-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread + +name5=AMD's ACML +rout5=dgemm_ +libs5=-lacml -lm + +name6=Accelerate +rout6=dgemm_ +libs6=-framework Accelerate + +name7=Apple VecLib +rout7=dgemm_ +libs7=-framework vecLib + +name8=IBM ESSL +rout8=dgemm_ +libs8=-lessl + +name9=NVIDIA nvblas +rout9=dgemm_ +libs9=-lnvblas + +name10=Generic BLAS +rout10=dgemm_ +libs10=-lblas + +HPLEOF +for hpl_i in 1 2 3 4 5 6 7 8 9 10; +do +if test x$hpl_blas_ok = xno; then + name="`grep ^name${hpl_i}= hplvars.txt | sed s/^name${hpl_i}=//`" + rout="`grep ^rout${hpl_i}= hplvars.txt | sed s/^rout${hpl_i}=//`" + libs="`grep ^libs${hpl_i}= hplvars.txt | sed s/^libs${hpl_i}=//`" + AC_MSG_CHECKING([for [$]rout in [$]name]) + + LIBS="[$]libs" + AC_TRY_LINK_FUNC([$]rout, [hpl_blas_ok=yes;BLAS_LIBS="[$]libs"]) + LIBS="$current_LIBS" + + AC_MSG_RESULT($hpl_blas_ok) +fi +done +rm hplvars.txt + +if test x$hpl_blas_ok = xno; then +dnl +AC_MSG_CHECKING([for dgemm_ in OpenBLAS]) +AC_CHECK_LIB(openblas, dgemm_, [hpl_blas_ok=yes;BLAS_LIBS="-lopenblas"]) +AC_MSG_RESULT($hpl_blas_ok) +dnl +fi + +AC_SUBST(BLAS_LIBS) + +# If present, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$hpl_blas_ok" = xyes; then + ifelse([$1],,AC_DEFINE(HAVE_BLAS,1,[Define if you have a BLAS library.]),[$1]) + : +else + hpl_blas_ok=no + $2 +fi + +])dnl HPL_BLAS diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/aclocal.m4 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/aclocal.m4 new file mode 100644 index 000000000..56c6bd753 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/aclocal.m4 @@ -0,0 +1,1308 @@ +# generated automatically by aclocal 1.16.1 -*- Autoconf -*- + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. + +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],, +[m4_warning([this file was generated for autoconf 2.69. +You have another version of autoconf. It may work, but is not guaranteed to. +If you have problems, you may need to regenerate the build system entirely. +To do so, use the procedure documented by the package, typically 'autoreconf'.])]) + +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_prog_cc_mpi.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PROG_CC_MPI([MPI-WANTED-TEST[, ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]]) +# +# DESCRIPTION +# +# This macro tries to find out how to compile C programs that use MPI +# (Message Passing Interface), a standard API for parallel process +# communication (see http://www-unix.mcs.anl.gov/mpi/). The macro has to +# be used instead of the standard macro AC_PROG_CC and will replace the +# standard variable CC with the found compiler. +# +# MPI-WANTED-TEST is used to test whether MPI is actually wanted by the +# user. If MPI-WANTED_TEST is omitted or if it succeeds, the macro will +# try to find out how to use MPI, if it fails, the macro will call +# AC_PROG_CC to find a standard C compiler instead. +# +# When MPI is found, ACTION-IF-FOUND will be executed, if MPI is not found +# (or MPI-WANTED-TEST fails) ACTION-IF-NOT-FOUND is executed. If +# ACTION-IF-FOUND is not set, the macro will define HAVE_MPI. +# +# The following example demonstrates usage of the macro: +# +# # If --with-mpi=auto is used, try to find MPI, but use standard C compiler if it is not found. +# # If --with-mpi=yes is used, try to find MPI and fail if it isn't found. +# # If --with-mpi=no is used, use a standard C compiler instead. +# AC_ARG_WITH(mpi, [AS_HELP_STRING([--with-mpi], +# [compile with MPI (parallelization) support. If none is found, +# MPI is not used. Default: auto]) +# ],,[with_mpi=auto]) +# # +# AX_PROG_CC_MPI([test x"$with_mpi" != xno],[use_mpi=yes],[ +# use_mpi=no +# if test x"$with_mpi" = xyes; then +# AC_MSG_FAILURE([MPI compiler requested, but couldn't use MPI.]) +# else +# AC_MSG_WARN([No MPI compiler found, won't use MPI.]) +# fi +# ]) +# +# LICENSE +# +# Copyright (c) 2010,2011 Olaf Lenz +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 2 + +AC_DEFUN([AX_PROG_CC_MPI], [ +AC_PREREQ(2.50) + +# Check for compiler +# Needs to be split off into an extra macro to ensure right expansion +# order. +AC_REQUIRE([_AX_PROG_CC_MPI],[_AX_PROG_CC_MPI([$1])]) + +AS_IF([test x"$_ax_prog_cc_mpi_mpi_wanted" = xno], + [ _ax_prog_cc_mpi_mpi_found=no ], + [ + AC_LANG_PUSH([C]) + # test whether MPI_Init is available + # We do not use AC_SEARCH_LIBS here, as it caches its outcome and + # thus disallows corresponding calls in the other AX_PROG_*_MPI + # macros. + for lib in NONE mpi mpich; do + save_LIBS=$LIBS + if test x"$lib" = xNONE; then + AC_MSG_CHECKING([for function MPI_Init]) + else + AC_MSG_CHECKING([for function MPI_Init in -l$lib]) + LIBS="-l$lib $LIBS" + fi + AC_LINK_IFELSE([AC_LANG_CALL([],[MPI_Init])], + [ _ax_prog_cc_mpi_mpi_found=yes ], + [ _ax_prog_cc_mpi_mpi_found=no ]) + AC_MSG_RESULT($_ax_prog_cc_mpi_mpi_found) + if test "x$_ax_prog_cc_mpi_mpi_found" = "xyes"; then + break; + fi + LIBS=$save_LIBS + done + + # Check for header + AS_IF([test x"$_ax_prog_cc_mpi_mpi_found" = xyes], [ + AC_MSG_CHECKING([for mpi.h]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include ])], + [ AC_MSG_RESULT(yes)], + [ AC_MSG_RESULT(no) + _ax_prog_cc_mpi_mpi_found=no + ]) + ]) + AC_LANG_POP([C]) +]) + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +AS_IF([test x"$_ax_prog_cc_mpi_mpi_found" = xyes], [ + ifelse([$2],,[AC_DEFINE(HAVE_MPI,1,[Define if you have the MPI library.])],[$2]) + : +],[ + $3 + : +]) + +])dnl AX_PROG_CC_MPI + +dnl _AX_PROG_CC_MPI is an internal macro required by AX_PROG_CC_MPI. +dnl To ensure the right expansion order, the main function AX_PROG_CC_MPI +dnl has to be split into two parts. +dnl +dnl Known MPI C compilers: +dnl mpicc +dnl mpixlc_r +dnl mpixlc +dnl hcc +dnl mpxlc_r +dnl mpxlc +dnl sxmpicc NEC SX +dnl mpifcc Fujitsu +dnl mpgcc +dnl mpcc +dnl cmpicc +dnl cc +dnl +AC_DEFUN([_AX_PROG_CC_MPI], [ + AC_ARG_VAR(MPICC,[MPI C compiler command]) + ifelse([$1],,[_ax_prog_cc_mpi_mpi_wanted=yes],[ + AC_MSG_CHECKING([whether to compile using MPI]) + if $1; then + _ax_prog_cc_mpi_mpi_wanted=yes + else + _ax_prog_cc_mpi_mpi_wanted=no + fi + AC_MSG_RESULT($_ax_prog_cc_mpi_mpi_wanted) + ]) + if test x"$_ax_prog_cc_mpi_mpi_wanted" = xyes; then + if test -z "$CC" && test -n "$MPICC"; then + CC="$MPICC" + else + AC_CHECK_TOOLS([CC], [mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc]) + fi + fi + AC_PROG_CC +])dnl _AX_PROG_CC_MPI + +# Copyright (C) 2002-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_AUTOMAKE_VERSION(VERSION) +# ---------------------------- +# Automake X.Y traces this macro to ensure aclocal.m4 has been +# generated from the m4 files accompanying Automake X.Y. +# (This private macro should not be called outside this file.) +AC_DEFUN([AM_AUTOMAKE_VERSION], +[am__api_version='1.16' +dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to +dnl require some minimum version. Point them to the right macro. +m4_if([$1], [1.16.1], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl +]) + +# _AM_AUTOCONF_VERSION(VERSION) +# ----------------------------- +# aclocal traces this macro to find the Autoconf version. +# This is a private macro too. Using m4_define simplifies +# the logic in aclocal, which can simply ignore this definition. +m4_define([_AM_AUTOCONF_VERSION], []) + +# AM_SET_CURRENT_AUTOMAKE_VERSION +# ------------------------------- +# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. +# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. +AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +[AM_AUTOMAKE_VERSION([1.16.1])dnl +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) + +# AM_AUX_DIR_EXPAND -*- Autoconf -*- + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets +# $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to +# '$srcdir', '$srcdir/..', or '$srcdir/../..'. +# +# Of course, Automake must honor this variable whenever it calls a +# tool from the auxiliary directory. The problem is that $srcdir (and +# therefore $ac_aux_dir as well) can be either absolute or relative, +# depending on how configure is run. This is pretty annoying, since +# it makes $ac_aux_dir quite unusable in subdirectories: in the top +# source directory, any form will work fine, but in subdirectories a +# relative path needs to be adjusted first. +# +# $ac_aux_dir/missing +# fails when called from a subdirectory if $ac_aux_dir is relative +# $top_srcdir/$ac_aux_dir/missing +# fails if $ac_aux_dir is absolute, +# fails when called from a subdirectory in a VPATH build with +# a relative $ac_aux_dir +# +# The reason of the latter failure is that $top_srcdir and $ac_aux_dir +# are both prefixed by $srcdir. In an in-source build this is usually +# harmless because $srcdir is '.', but things will broke when you +# start a VPATH build or use an absolute $srcdir. +# +# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, +# iff we strip the leading $srcdir from $ac_aux_dir. That would be: +# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` +# and then we would define $MISSING as +# MISSING="\${SHELL} $am_aux_dir/missing" +# This will work as long as MISSING is not called from configure, because +# unfortunately $(top_srcdir) has no meaning in configure. +# However there are other variables, like CC, which are often used in +# configure, and could therefore not use this "fixed" $ac_aux_dir. +# +# Another solution, used here, is to always expand $ac_aux_dir to an +# absolute PATH. The drawback is that using absolute paths prevent a +# configured tree to be moved without reconfiguration. + +AC_DEFUN([AM_AUX_DIR_EXPAND], +[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` +]) + +# AM_CONDITIONAL -*- Autoconf -*- + +# Copyright (C) 1997-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_CONDITIONAL(NAME, SHELL-CONDITION) +# ------------------------------------- +# Define a conditional. +AC_DEFUN([AM_CONDITIONAL], +[AC_PREREQ([2.52])dnl + m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], + [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl +AC_SUBST([$1_TRUE])dnl +AC_SUBST([$1_FALSE])dnl +_AM_SUBST_NOTMAKE([$1_TRUE])dnl +_AM_SUBST_NOTMAKE([$1_FALSE])dnl +m4_define([_AM_COND_VALUE_$1], [$2])dnl +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi +AC_CONFIG_COMMANDS_PRE( +[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then + AC_MSG_ERROR([[conditional "$1" was never defined. +Usually this means the macro was only invoked conditionally.]]) +fi])]) + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be +# written in clear, in which case automake, when reading aclocal.m4, +# will think it sees a *use*, and therefore will trigger all it's +# C support machinery. Also note that it means that autoscan, seeing +# CC etc. in the Makefile, will ask for an AC_PROG_CC use... + + +# _AM_DEPENDENCIES(NAME) +# ---------------------- +# See how the compiler implements dependency checking. +# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". +# We try a few techniques and use that to set a single cache variable. +# +# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was +# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular +# dependency, and given that the user is not expected to run this macro, +# just rely on AC_PROG_CC. +AC_DEFUN([_AM_DEPENDENCIES], +[AC_REQUIRE([AM_SET_DEPDIR])dnl +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl +AC_REQUIRE([AM_MAKE_INCLUDE])dnl +AC_REQUIRE([AM_DEP_TRACK])dnl + +m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], + [$1], [CXX], [depcc="$CXX" am_compiler_list=], + [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], + [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], + [$1], [UPC], [depcc="$UPC" am_compiler_list=], + [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], + [depcc="$$1" am_compiler_list=]) + +AC_CACHE_CHECK([dependency style of $depcc], + [am_cv_$1_dependencies_compiler_type], +[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_$1_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` + fi + am__universal=false + m4_case([$1], [CC], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac], + [CXX], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac]) + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_$1_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_$1_dependencies_compiler_type=none +fi +]) +AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) +AM_CONDITIONAL([am__fastdep$1], [ + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) +]) + + +# AM_SET_DEPDIR +# ------------- +# Choose a directory name for dependency files. +# This macro is AC_REQUIREd in _AM_DEPENDENCIES. +AC_DEFUN([AM_SET_DEPDIR], +[AC_REQUIRE([AM_SET_LEADING_DOT])dnl +AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl +]) + + +# AM_DEP_TRACK +# ------------ +AC_DEFUN([AM_DEP_TRACK], +[AC_ARG_ENABLE([dependency-tracking], [dnl +AS_HELP_STRING( + [--enable-dependency-tracking], + [do not reject slow dependency extractors]) +AS_HELP_STRING( + [--disable-dependency-tracking], + [speeds up one-time build])]) +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi +AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) +AC_SUBST([AMDEPBACKSLASH])dnl +_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl +AC_SUBST([am__nodep])dnl +_AM_SUBST_NOTMAKE([am__nodep])dnl +]) + +# Generate code to set up dependency tracking. -*- Autoconf -*- + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_OUTPUT_DEPENDENCY_COMMANDS +# ------------------------------ +AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], +[{ + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + AS_CASE([$CONFIG_FILES], + [*\'*], [eval set x "$CONFIG_FILES"], + [*], [set x $CONFIG_FILES]) + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`AS_DIRNAME(["$am_mf"])` + am_filepart=`AS_BASENAME(["$am_mf"])` + AM_RUN_LOG([cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles]) || am_rc=$? + done + if test $am_rc -ne 0; then + AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. Try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking).]) + fi + AS_UNSET([am_dirpart]) + AS_UNSET([am_filepart]) + AS_UNSET([am_mf]) + AS_UNSET([am_rc]) + rm -f conftest-deps.mk +} +])# _AM_OUTPUT_DEPENDENCY_COMMANDS + + +# AM_OUTPUT_DEPENDENCY_COMMANDS +# ----------------------------- +# This macro should only be invoked once -- use via AC_REQUIRE. +# +# This code is only required when automatic dependency tracking is enabled. +# This creates each '.Po' and '.Plo' makefile fragment that we'll need in +# order to bootstrap the dependency handling code. +AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], +[AC_CONFIG_COMMANDS([depfiles], + [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], + [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])]) + +# Do all the work for Automake. -*- Autoconf -*- + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This macro actually does too much. Some checks are only needed if +# your package does certain things. But this isn't really a big deal. + +dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O. +m4_define([AC_PROG_CC], +m4_defn([AC_PROG_CC]) +[_AM_PROG_CC_C_O +]) + +# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) +# AM_INIT_AUTOMAKE([OPTIONS]) +# ----------------------------------------------- +# The call with PACKAGE and VERSION arguments is the old style +# call (pre autoconf-2.50), which is being phased out. PACKAGE +# and VERSION should now be passed to AC_INIT and removed from +# the call to AM_INIT_AUTOMAKE. +# We support both call styles for the transition. After +# the next Automake release, Autoconf can make the AC_INIT +# arguments mandatory, and then we can depend on a new Autoconf +# release and drop the old call support. +AC_DEFUN([AM_INIT_AUTOMAKE], +[AC_PREREQ([2.65])dnl +dnl Autoconf wants to disallow AM_ names. We explicitly allow +dnl the ones we care about. +m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl +AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl +AC_REQUIRE([AC_PROG_INSTALL])dnl +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi +AC_SUBST([CYGPATH_W]) + +# Define the identity of the package. +dnl Distinguish between old-style and new-style calls. +m4_ifval([$2], +[AC_DIAGNOSE([obsolete], + [$0: two- and three-arguments forms are deprecated.]) +m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl + AC_SUBST([PACKAGE], [$1])dnl + AC_SUBST([VERSION], [$2])], +[_AM_SET_OPTIONS([$1])dnl +dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. +m4_if( + m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]), + [ok:ok],, + [m4_fatal([AC_INIT should be called with package and version arguments])])dnl + AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl + AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl + +_AM_IF_OPTION([no-define],, +[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) + AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl + +# Some tools Automake needs. +AC_REQUIRE([AM_SANITY_CHECK])dnl +AC_REQUIRE([AC_ARG_PROGRAM])dnl +AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) +AM_MISSING_PROG([AUTOCONF], [autoconf]) +AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) +AM_MISSING_PROG([AUTOHEADER], [autoheader]) +AM_MISSING_PROG([MAKEINFO], [makeinfo]) +AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl +AC_REQUIRE([AC_PROG_MKDIR_P])dnl +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +AC_SUBST([mkdir_p], ['$(MKDIR_P)']) +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([AC_PROG_MAKE_SET])dnl +AC_REQUIRE([AM_SET_LEADING_DOT])dnl +_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], + [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], + [_AM_PROG_TAR([v7])])]) +_AM_IF_OPTION([no-dependencies],, +[AC_PROVIDE_IFELSE([AC_PROG_CC], + [_AM_DEPENDENCIES([CC])], + [m4_define([AC_PROG_CC], + m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [_AM_DEPENDENCIES([CXX])], + [m4_define([AC_PROG_CXX], + m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJC], + [_AM_DEPENDENCIES([OBJC])], + [m4_define([AC_PROG_OBJC], + m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], + [_AM_DEPENDENCIES([OBJCXX])], + [m4_define([AC_PROG_OBJCXX], + m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl +]) +AC_REQUIRE([AM_SILENT_RULES])dnl +dnl The testsuite driver may need to know about EXEEXT, so add the +dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This +dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below. +AC_CONFIG_COMMANDS_PRE(dnl +[m4_provide_if([_AM_COMPILER_EXEEXT], + [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) + fi +fi +dnl The trailing newline in this macro's definition is deliberate, for +dnl backward compatibility and to allow trailing 'dnl'-style comments +dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841. +]) + +dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not +dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further +dnl mangled by Autoconf and run in a shell conditional statement. +m4_define([_AC_COMPILER_EXEEXT], +m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) + +# When config.status generates a header, we must update the stamp-h file. +# This file resides in the same directory as the config header +# that is generated. The stamp files are numbered to have different names. + +# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the +# loop where config.status creates the headers, so we can generate +# our stamp files there. +AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], +[# Compute $1's index in $config_headers. +_am_arg=$1 +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_SH +# ------------------ +# Define $install_sh. +AC_DEFUN([AM_PROG_INSTALL_SH], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi +AC_SUBST([install_sh])]) + +# Copyright (C) 2003-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# Check whether the underlying file-system supports filenames +# with a leading dot. For instance MS-DOS doesn't. +AC_DEFUN([AM_SET_LEADING_DOT], +[rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null +AC_SUBST([am__leading_dot])]) + +# Check to see how 'make' treats includes. -*- Autoconf -*- + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAKE_INCLUDE() +# ----------------- +# Check whether make has an 'include' directive that can support all +# the idioms we need for our automatic dependency tracking code. +AC_DEFUN([AM_MAKE_INCLUDE], +[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive]) +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out]) + AS_CASE([$?:`cat confinc.out 2>/dev/null`], + ['0:this is the am__doit target'], + [AS_CASE([$s], + [BSD], [am__include='.include' am__quote='"'], + [am__include='include' am__quote=''])]) + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +AC_MSG_RESULT([${_am_result}]) +AC_SUBST([am__include])]) +AC_SUBST([am__quote])]) + +# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- + +# Copyright (C) 1997-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MISSING_PROG(NAME, PROGRAM) +# ------------------------------ +AC_DEFUN([AM_MISSING_PROG], +[AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + +# AM_MISSING_HAS_RUN +# ------------------ +# Define MISSING if not defined so far and test if it is modern enough. +# If it is, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([missing])dnl +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + AC_MSG_WARN(['missing' script is too old or missing]) +fi +]) + +# Helper functions for option handling. -*- Autoconf -*- + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_MANGLE_OPTION(NAME) +# ----------------------- +AC_DEFUN([_AM_MANGLE_OPTION], +[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) + +# _AM_SET_OPTION(NAME) +# -------------------- +# Set option NAME. Presently that only means defining a flag for this option. +AC_DEFUN([_AM_SET_OPTION], +[m4_define(_AM_MANGLE_OPTION([$1]), [1])]) + +# _AM_SET_OPTIONS(OPTIONS) +# ------------------------ +# OPTIONS is a space-separated list of Automake options. +AC_DEFUN([_AM_SET_OPTIONS], +[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) + +# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) +# ------------------------------------------- +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +AC_DEFUN([_AM_IF_OPTION], +[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_CC_C_O +# --------------- +# Like AC_PROG_CC_C_O, but changed for automake. We rewrite AC_PROG_CC +# to automatically call this. +AC_DEFUN([_AM_PROG_CC_C_O], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([compile])dnl +AC_LANG_PUSH([C])dnl +AC_CACHE_CHECK( + [whether $CC understands -c and -o together], + [am_cv_prog_cc_c_o], + [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])]) + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i]) +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +AC_LANG_POP([C])]) + +# For backward compatibility. +AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])]) + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_RUN_LOG(COMMAND) +# ------------------- +# Run COMMAND, save the exit status in ac_status, and log it. +# (This has been adapted from Autoconf's _AC_RUN_LOG macro.) +AC_DEFUN([AM_RUN_LOG], +[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD + ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + (exit $ac_status); }]) + +# Check to make sure that the build environment is sane. -*- Autoconf -*- + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SANITY_CHECK +# --------------- +AC_DEFUN([AM_SANITY_CHECK], +[AC_MSG_CHECKING([whether build environment is sane]) +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[[\\\"\#\$\&\'\`$am_lf]]*) + AC_MSG_ERROR([unsafe absolute working directory name]);; +esac +case $srcdir in + *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) + AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$[*]" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$[*]" != "X $srcdir/configure conftest.file" \ + && test "$[*]" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken + alias in your environment]) + fi + if test "$[2]" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$[2]" = conftest.file + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +AC_MSG_RESULT([yes]) +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi +AC_CONFIG_COMMANDS_PRE( + [AC_MSG_CHECKING([that generated files are newer than configure]) + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + AC_MSG_RESULT([done])]) +rm -f conftest.file +]) + +# Copyright (C) 2009-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SILENT_RULES([DEFAULT]) +# -------------------------- +# Enable less verbose build rules; with the default set to DEFAULT +# ("yes" being less verbose, "no" or empty being verbose). +AC_DEFUN([AM_SILENT_RULES], +[AC_ARG_ENABLE([silent-rules], [dnl +AS_HELP_STRING( + [--enable-silent-rules], + [less verbose build output (undo: "make V=1")]) +AS_HELP_STRING( + [--disable-silent-rules], + [verbose build output (undo: "make V=0")])dnl +]) +case $enable_silent_rules in @%:@ ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);; +esac +dnl +dnl A few 'make' implementations (e.g., NonStop OS and NextStep) +dnl do not support nested variable expansions. +dnl See automake bug#9928 and bug#10237. +am_make=${MAKE-make} +AC_CACHE_CHECK([whether $am_make supports nested variables], + [am_cv_make_support_nested_variables], + [if AS_ECHO([['TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi]) +if test $am_cv_make_support_nested_variables = yes; then + dnl Using '$V' instead of '$(V)' breaks IRIX make. + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AC_SUBST([AM_V])dnl +AM_SUBST_NOTMAKE([AM_V])dnl +AC_SUBST([AM_DEFAULT_V])dnl +AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl +AC_SUBST([AM_DEFAULT_VERBOSITY])dnl +AM_BACKSLASH='\' +AC_SUBST([AM_BACKSLASH])dnl +_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl +]) + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_STRIP +# --------------------- +# One issue with vendor 'install' (even GNU) is that you can't +# specify the program used to strip binaries. This is especially +# annoying in cross-compiling environments, where the build's strip +# is unlikely to handle the host's binaries. +# Fortunately install-sh will honor a STRIPPROG variable, so we +# always use install-sh in "make install-strip", and initialize +# STRIPPROG with the value of the STRIP variable (set by the user). +AC_DEFUN([AM_PROG_INSTALL_STRIP], +[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. +if test "$cross_compiling" != no; then + AC_CHECK_TOOL([STRIP], [strip], :) +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" +AC_SUBST([INSTALL_STRIP_PROGRAM])]) + +# Copyright (C) 2006-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_SUBST_NOTMAKE(VARIABLE) +# --------------------------- +# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. +# This macro is traced by Automake. +AC_DEFUN([_AM_SUBST_NOTMAKE]) + +# AM_SUBST_NOTMAKE(VARIABLE) +# -------------------------- +# Public sister of _AM_SUBST_NOTMAKE. +AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) + +# Check how to create a tarball. -*- Autoconf -*- + +# Copyright (C) 2004-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_TAR(FORMAT) +# -------------------- +# Check how to create a tarball in format FORMAT. +# FORMAT should be one of 'v7', 'ustar', or 'pax'. +# +# Substitute a variable $(am__tar) that is a command +# writing to stdout a FORMAT-tarball containing the directory +# $tardir. +# tardir=directory && $(am__tar) > result.tar +# +# Substitute a variable $(am__untar) that extract such +# a tarball read from stdin. +# $(am__untar) < result.tar +# +AC_DEFUN([_AM_PROG_TAR], +[# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AC_SUBST([AMTAR], ['$${TAR-tar}']) + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' + +m4_if([$1], [v7], + [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], + + [m4_case([$1], + [ustar], + [# The POSIX 1988 'ustar' format is defined with fixed-size fields. + # There is notably a 21 bits limit for the UID and the GID. In fact, + # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 + # and bug#13588). + am_max_uid=2097151 # 2^21 - 1 + am_max_gid=$am_max_uid + # The $UID and $GID variables are not portable, so we need to resort + # to the POSIX-mandated id(1) utility. Errors in the 'id' calls + # below are definitely unexpected, so allow the users to see them + # (that is, avoid stderr redirection). + am_uid=`id -u || echo unknown` + am_gid=`id -g || echo unknown` + AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format]) + if test $am_uid -le $am_max_uid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi + AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format]) + if test $am_gid -le $am_max_gid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi], + + [pax], + [], + + [m4_fatal([Unknown tar format])]) + + AC_MSG_CHECKING([how to create a $1 tar archive]) + + # Go ahead even if we have the value already cached. We do so because we + # need to set the values for the 'am__tar' and 'am__untar' variables. + _am_tools=${am_cv_prog_tar_$1-$_am_tools} + + for _am_tool in $_am_tools; do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; do + AM_RUN_LOG([$_am_tar --version]) && break + done + am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x $1 -w "$$tardir"' + am__tar_='pax -L -x $1 -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H $1 -L' + am__tar_='find "$tardir" -print | cpio -o -H $1 -L' + am__untar='cpio -i -H $1 -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_$1}" && break + + # tar/untar a dummy directory, and stop if the command works. + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) + rm -rf conftest.dir + if test -s conftest.tar; then + AM_RUN_LOG([$am__untar /dev/null 2>&1 && break + fi + done + rm -rf conftest.dir + + AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) + AC_MSG_RESULT([$am_cv_prog_tar_$1])]) + +AC_SUBST([am__tar]) +AC_SUBST([am__untar]) +]) # _AM_PROG_TAR + +m4_include([acinclude.m4]) diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/HPL.dat b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/HPL.dat new file mode 100644 index 000000000..19a956783 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/HPL.dat @@ -0,0 +1,32 @@ +HPLinpack benchmark input file +Innovative Computing Laboratory, University of Tennessee +HPL.out output file name (if any) +6 device out (6=stdout,7=stderr,file) +2 # of problems sizes (N) +24576 24576 12288 Ns +1 # of NBs +2048 1024 2048 384 640 768 896 960 1024 1152 1280 384 640 960 768 640 256 960 512 768 1152 NBs +0 PMAP process mapping (0=Row-,1=Column-major) +1 # of process grids (P x Q) +1 Ps +1 Qs +16.0 threshold +1 # of panel fact +0 1 2 PFACTs (0=left, 1=Crout, 2=Right) +1 # of recursive stopping criterium +2 8 NBMINs (>= 1) +1 # of panels in recursion +2 NDIVs +1 # of recursive panel fact. +0 1 2 RFACTs (0=left, 1=Crout, 2=Right) +1 # of broadcast +0 2 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) +1 # of lookahead depth +1 0 DEPTHs (>=0) +1 SWAP (0=bin-exch,1=long,2=mix) +192 swapping threshold +1 L1 in (0=transposed,1=no-transposed) form +1 U in (0=transposed,1=no-transposed) form +1 Equilibration (0=no,1=yes) +8 memory alignment in double (> 0) + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl new file mode 100755 index 000000000..f192f33f4 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/compile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/compile new file mode 100755 index 000000000..99e50524b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/compile @@ -0,0 +1,348 @@ +#! /bin/sh +# Wrapper for compilers which do not understand '-c -o'. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# Written by Tom Tromey . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +nl=' +' + +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent tools from complaining about whitespace usage. +IFS=" "" $nl" + +file_conv= + +# func_file_conv build_file lazy +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. If the determined conversion +# type is listed in (the comma separated) LAZY, no conversion will +# take place. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv/,$2, in + *,$file_conv,*) + ;; + mingw/*) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin/*) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine/*) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_cl_dashL linkdir +# Make cl look for libraries in LINKDIR +func_cl_dashL () +{ + func_file_conv "$1" + if test -z "$lib_path"; then + lib_path=$file + else + lib_path="$lib_path;$file" + fi + linker_opts="$linker_opts -LIBPATH:$file" +} + +# func_cl_dashl library +# Do a library search-path lookup for cl +func_cl_dashl () +{ + lib=$1 + found=no + save_IFS=$IFS + IFS=';' + for dir in $lib_path $LIB + do + IFS=$save_IFS + if $shared && test -f "$dir/$lib.dll.lib"; then + found=yes + lib=$dir/$lib.dll.lib + break + fi + if test -f "$dir/$lib.lib"; then + found=yes + lib=$dir/$lib.lib + break + fi + if test -f "$dir/lib$lib.a"; then + found=yes + lib=$dir/lib$lib.a + break + fi + done + IFS=$save_IFS + + if test "$found" != yes; then + lib=$lib.lib + fi +} + +# func_cl_wrapper cl arg... +# Adjust compile command to suit cl +func_cl_wrapper () +{ + # Assume a capable shell + lib_path= + shared=: + linker_opts= + for arg + do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + eat=1 + case $2 in + *.o | *.[oO][bB][jJ]) + func_file_conv "$2" + set x "$@" -Fo"$file" + shift + ;; + *) + func_file_conv "$2" + set x "$@" -Fe"$file" + shift + ;; + esac + ;; + -I) + eat=1 + func_file_conv "$2" mingw + set x "$@" -I"$file" + shift + ;; + -I*) + func_file_conv "${1#-I}" mingw + set x "$@" -I"$file" + shift + ;; + -l) + eat=1 + func_cl_dashl "$2" + set x "$@" "$lib" + shift + ;; + -l*) + func_cl_dashl "${1#-l}" + set x "$@" "$lib" + shift + ;; + -L) + eat=1 + func_cl_dashL "$2" + ;; + -L*) + func_cl_dashL "${1#-L}" + ;; + -static) + shared=false + ;; + -Wl,*) + arg=${1#-Wl,} + save_ifs="$IFS"; IFS=',' + for flag in $arg; do + IFS="$save_ifs" + linker_opts="$linker_opts $flag" + done + IFS="$save_ifs" + ;; + -Xlinker) + eat=1 + linker_opts="$linker_opts $2" + ;; + -*) + set x "$@" "$1" + shift + ;; + *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) + func_file_conv "$1" + set x "$@" -Tp"$file" + shift + ;; + *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) + func_file_conv "$1" mingw + set x "$@" "$file" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift + done + if test -n "$linker_opts"; then + linker_opts="-link$linker_opts" + fi + exec "$@" $linker_opts + exit 1 +} + +eat= + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: compile [--help] [--version] PROGRAM [ARGS] + +Wrapper for compilers which do not understand '-c -o'. +Remove '-o dest.o' from ARGS, run PROGRAM with the remaining +arguments, and rename the output as expected. + +If you are trying to build a whole package this is not the +right script to run: please start by reading the file 'INSTALL'. + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "compile $scriptversion" + exit $? + ;; + cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \ + icl | *[/\\]icl | icl.exe | *[/\\]icl.exe ) + func_cl_wrapper "$@" # Doesn't return... + ;; +esac + +ofile= +cfile= + +for arg +do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + # So we strip '-o arg' only if arg is an object. + eat=1 + case $2 in + *.o | *.obj) + ofile=$2 + ;; + *) + set x "$@" -o "$2" + shift + ;; + esac + ;; + *.c) + cfile=$1 + set x "$@" "$1" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift +done + +if test -z "$ofile" || test -z "$cfile"; then + # If no '-o' option was seen then we might have been invoked from a + # pattern rule where we don't need one. That is ok -- this is a + # normal compilation that the losing compiler can handle. If no + # '.c' file was seen then we are probably linking. That is also + # ok. + exec "$@" +fi + +# Name of file we expect compiler to create. +cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` + +# Create the lock directory. +# Note: use '[/\\:.-]' here to ensure that we don't use the same name +# that we are using for the .o file. Also, base the name on the expected +# object file name, since that is what matters with a parallel build. +lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d +while true; do + if mkdir "$lockdir" >/dev/null 2>&1; then + break + fi + sleep 1 +done +# FIXME: race condition here if user kills between mkdir and trap. +trap "rmdir '$lockdir'; exit 1" 1 2 15 + +# Run the compile. +"$@" +ret=$? + +if test -f "$cofile"; then + test "$cofile" = "$ofile" || mv "$cofile" "$ofile" +elif test -f "${cofile}bj"; then + test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" +fi + +rmdir "$lockdir" +exit $ret + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/compile_commands.json b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/compile_commands.json new file mode 100644 index 000000000..c59b36a79 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/compile_commands.json @@ -0,0 +1,724 @@ +[ + { + "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_dlacpy.o HPL_dlatcpy.o HPL_fprintf.o HPL_warn.o HPL_abort.o HPL_dlaprnt.o HPL_dlange.o HPL_dlamch.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64" + }, + { + "command": "cc -c -o HPL_dlacpy.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlacpy.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlacpy.c" + }, + { + "command": "cc -c -o HPL_dlatcpy.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlatcpy.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlatcpy.c" + }, + { + "command": "cc -c -o HPL_fprintf.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_fprintf.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_fprintf.c" + }, + { + "command": "cc -c -o HPL_warn.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_warn.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_warn.c" + }, + { + "command": "cc -c -o HPL_abort.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_abort.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_abort.c" + }, + { + "command": "cc -c -o HPL_dlaprnt.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaprnt.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlaprnt.c" + }, + { + "command": "cc -c -o HPL_dlange.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlange.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlange.c" + }, + { + "command": "cc -c -o HPL_dlamch.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlamch.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlamch.c" + }, + { + "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_dcopy.o HPL_daxpy.o HPL_dscal.o HPL_idamax.o HPL_dgemv.o HPL_dtrsv.o HPL_dger.o HPL_dgemm.o HPL_dtrsm.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64" + }, + { + "command": "cc -c -o HPL_dcopy.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dcopy.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dcopy.c" + }, + { + "command": "cc -c -o HPL_daxpy.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_daxpy.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_daxpy.c" + }, + { + "command": "cc -c -o HPL_dscal.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dscal.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dscal.c" + }, + { + "command": "cc -c -o HPL_idamax.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_idamax.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_idamax.c" + }, + { + "command": "cc -c -o HPL_dgemv.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dgemv.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemv.c" + }, + { + "command": "cc -c -o HPL_dtrsv.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dtrsv.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsv.c" + }, + { + "command": "cc -c -o HPL_dger.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dger.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dger.c" + }, + { + "command": "cc -c -o HPL_dgemm.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dgemm.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemm.c" + }, + { + "command": "cc -c -o HPL_dtrsm.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dtrsm.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsm.c" + }, + { + "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_1ring.o HPL_1rinM.o HPL_2ring.o HPL_2rinM.o HPL_blong.o HPL_blonM.o HPL_packL.o HPL_copyL.o HPL_binit.o HPL_bcast.o HPL_bwait.o HPL_send.o HPL_recv.o HPL_sdrv.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64" + }, + { + "command": "cc -c -o HPL_1ring.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_1ring.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1ring.c" + }, + { + "command": "cc -c -o HPL_1rinM.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_1rinM.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1rinM.c" + }, + { + "command": "cc -c -o HPL_2ring.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_2ring.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2ring.c" + }, + { + "command": "cc -c -o HPL_2rinM.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_2rinM.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2rinM.c" + }, + { + "command": "cc -c -o HPL_blong.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_blong.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blong.c" + }, + { + "command": "cc -c -o HPL_blonM.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_blonM.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blonM.c" + }, + { + "command": "cc -c -o HPL_packL.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_packL.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_packL.c" + }, + { + "command": "cc -c -o HPL_copyL.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_copyL.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_copyL.c" + }, + { + "command": "cc -c -o HPL_binit.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_binit.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_binit.c" + }, + { + "command": "cc -c -o HPL_bcast.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_bcast.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bcast.c" + }, + { + "command": "cc -c -o HPL_bwait.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_bwait.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bwait.c" + }, + { + "command": "cc -c -o HPL_send.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_send.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_send.c" + }, + { + "command": "cc -c -o HPL_recv.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_recv.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_recv.c" + }, + { + "command": "cc -c -o HPL_sdrv.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_sdrv.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_sdrv.c" + }, + { + "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_grid_init.o HPL_pnum.o HPL_grid_info.o HPL_grid_exit.o HPL_broadcast.o HPL_reduce.o HPL_all_reduce.o HPL_barrier.o HPL_min.o HPL_max.o HPL_sum.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64" + }, + { + "command": "cc -c -o HPL_grid_init.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_grid_init.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_init.c" + }, + { + "command": "cc -c -o HPL_pnum.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pnum.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_pnum.c" + }, + { + "command": "cc -c -o HPL_grid_info.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_grid_info.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_info.c" + }, + { + "command": "cc -c -o HPL_grid_exit.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_grid_exit.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_exit.c" + }, + { + "command": "cc -c -o HPL_broadcast.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_broadcast.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_broadcast.c" + }, + { + "command": "cc -c -o HPL_reduce.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_reduce.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_reduce.c" + }, + { + "command": "cc -c -o HPL_all_reduce.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_all_reduce.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_all_reduce.c" + }, + { + "command": "cc -c -o HPL_barrier.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_barrier.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_barrier.c" + }, + { + "command": "cc -c -o HPL_min.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_min.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_min.c" + }, + { + "command": "cc -c -o HPL_max.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_max.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_max.c" + }, + { + "command": "cc -c -o HPL_sum.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_sum.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_sum.c" + }, + { + "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_pdpanel_new.o HPL_pdpanel_init.o HPL_pdpanel_disp.o HPL_pdpanel_free.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64" + }, + { + "command": "cc -c -o HPL_pdpanel_new.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanel_new.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_new.c" + }, + { + "command": "cc -c -o HPL_pdpanel_init.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanel_init.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_init.c" + }, + { + "command": "cc -c -o HPL_pdpanel_disp.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanel_disp.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_disp.c" + }, + { + "command": "cc -c -o HPL_pdpanel_free.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanel_free.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_free.c" + }, + { + "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_indxg2l.o HPL_indxg2lp.o HPL_indxg2p.o HPL_indxl2g.o HPL_infog2l.o HPL_numroc.o HPL_numrocI.o HPL_dlaswp00N.o HPL_dlaswp10N.o HPL_dlaswp01N.o HPL_dlaswp01T.o HPL_dlaswp02N.o HPL_dlaswp03N.o HPL_dlaswp03T.o HPL_dlaswp04N.o HPL_dlaswp04T.o HPL_dlaswp05N.o HPL_dlaswp05T.o HPL_dlaswp06N.o HPL_dlaswp06T.o HPL_pwarn.o HPL_pabort.o HPL_pdlaprnt.o HPL_pdlamch.o HPL_pdlange.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64" + }, + { + "command": "cc -c -o HPL_indxg2l.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_indxg2l.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2l.c" + }, + { + "command": "cc -c -o HPL_indxg2lp.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_indxg2lp.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2lp.c" + }, + { + "command": "cc -c -o HPL_indxg2p.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_indxg2p.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2p.c" + }, + { + "command": "cc -c -o HPL_indxl2g.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_indxl2g.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxl2g.c" + }, + { + "command": "cc -c -o HPL_infog2l.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_infog2l.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_infog2l.c" + }, + { + "command": "cc -c -o HPL_numroc.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_numroc.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numroc.c" + }, + { + "command": "cc -c -o HPL_numrocI.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_numrocI.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numrocI.c" + }, + { + "command": "cc -c -o HPL_dlaswp00N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp00N.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp00N.c" + }, + { + "command": "cc -c -o HPL_dlaswp10N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp10N.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp10N.c" + }, + { + "command": "cc -c -o HPL_dlaswp01N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp01N.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01N.c" + }, + { + "command": "cc -c -o HPL_dlaswp01T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp01T.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01T.c" + }, + { + "command": "cc -c -o HPL_dlaswp02N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp02N.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp02N.c" + }, + { + "command": "cc -c -o HPL_dlaswp03N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp03N.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03N.c" + }, + { + "command": "cc -c -o HPL_dlaswp03T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp03T.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03T.c" + }, + { + "command": "cc -c -o HPL_dlaswp04N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp04N.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04N.c" + }, + { + "command": "cc -c -o HPL_dlaswp04T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp04T.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04T.c" + }, + { + "command": "cc -c -o HPL_dlaswp05N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp05N.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05N.c" + }, + { + "command": "cc -c -o HPL_dlaswp05T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp05T.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05T.c" + }, + { + "command": "cc -c -o HPL_dlaswp06N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp06N.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06N.c" + }, + { + "command": "cc -c -o HPL_dlaswp06T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlaswp06T.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06T.c" + }, + { + "command": "cc -c -o HPL_pwarn.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pwarn.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pwarn.c" + }, + { + "command": "cc -c -o HPL_pabort.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pabort.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pabort.c" + }, + { + "command": "cc -c -o HPL_pdlaprnt.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlaprnt.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlaprnt.c" + }, + { + "command": "cc -c -o HPL_pdlamch.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlamch.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlamch.c" + }, + { + "command": "cc -c -o HPL_pdlange.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlange.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlange.c" + }, + { + "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_dlocmax.o HPL_dlocswpN.o HPL_dlocswpT.o HPL_pdmxswp.o HPL_pdpancrN.o HPL_pdpancrT.o HPL_pdpanllN.o HPL_pdpanllT.o HPL_pdpanrlN.o HPL_pdpanrlT.o HPL_pdrpanllN.o HPL_pdrpanllT.o HPL_pdrpancrN.o HPL_pdrpancrT.o HPL_pdrpanrlN.o HPL_pdrpanrlT.o HPL_pdfact.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64" + }, + { + "command": "cc -c -o HPL_dlocmax.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlocmax.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocmax.c" + }, + { + "command": "cc -c -o HPL_dlocswpN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlocswpN.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpN.c" + }, + { + "command": "cc -c -o HPL_dlocswpT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dlocswpT.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpT.c" + }, + { + "command": "cc -c -o HPL_pdmxswp.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdmxswp.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdmxswp.c" + }, + { + "command": "cc -c -o HPL_pdpancrN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpancrN.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrN.c" + }, + { + "command": "cc -c -o HPL_pdpancrT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpancrT.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrT.c" + }, + { + "command": "cc -c -o HPL_pdpanllN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanllN.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllN.c" + }, + { + "command": "cc -c -o HPL_pdpanllT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanllT.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllT.c" + }, + { + "command": "cc -c -o HPL_pdpanrlN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanrlN.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlN.c" + }, + { + "command": "cc -c -o HPL_pdpanrlT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdpanrlT.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlT.c" + }, + { + "command": "cc -c -o HPL_pdrpanllN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdrpanllN.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllN.c" + }, + { + "command": "cc -c -o HPL_pdrpanllT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdrpanllT.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllT.c" + }, + { + "command": "cc -c -o HPL_pdrpancrN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdrpancrN.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrN.c" + }, + { + "command": "cc -c -o HPL_pdrpancrT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdrpancrT.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrT.c" + }, + { + "command": "cc -c -o HPL_pdrpanrlN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdrpanrlN.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlN.c" + }, + { + "command": "cc -c -o HPL_pdrpanrlT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdrpanrlT.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlT.c" + }, + { + "command": "cc -c -o HPL_pdfact.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdfact.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdfact.c" + }, + { + "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_pipid.o HPL_plindx0.o HPL_pdlaswp00N.o HPL_pdlaswp00T.o HPL_perm.o HPL_logsort.o HPL_plindx10.o HPL_plindx1.o HPL_spreadN.o HPL_spreadT.o HPL_rollN.o HPL_rollT.o HPL_equil.o HPL_pdlaswp01N.o HPL_pdlaswp01T.o HPL_pdupdateNN.o HPL_pdupdateNT.o HPL_pdupdateTN.o HPL_pdupdateTT.o HPL_pdtrsv.o HPL_pdgesv0.o HPL_pdgesvK1.o HPL_pdgesvK2.o HPL_pdgesv.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64" + }, + { + "command": "cc -c -o HPL_pipid.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pipid.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pipid.c" + }, + { + "command": "cc -c -o HPL_plindx0.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_plindx0.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx0.c" + }, + { + "command": "cc -c -o HPL_pdlaswp00N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlaswp00N.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c" + }, + { + "command": "cc -c -o HPL_pdlaswp00T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlaswp00T.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c" + }, + { + "command": "cc -c -o HPL_perm.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_perm.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_perm.c" + }, + { + "command": "cc -c -o HPL_logsort.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_logsort.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_logsort.c" + }, + { + "command": "cc -c -o HPL_plindx10.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_plindx10.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx10.c" + }, + { + "command": "cc -c -o HPL_plindx1.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_plindx1.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx1.c" + }, + { + "command": "cc -c -o HPL_spreadN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_spreadN.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadN.c" + }, + { + "command": "cc -c -o HPL_spreadT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_spreadT.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadT.c" + }, + { + "command": "cc -c -o HPL_rollN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_rollN.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollN.c" + }, + { + "command": "cc -c -o HPL_rollT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_rollT.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollT.c" + }, + { + "command": "cc -c -o HPL_equil.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_equil.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_equil.c" + }, + { + "command": "cc -c -o HPL_pdlaswp01N.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlaswp01N.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c" + }, + { + "command": "cc -c -o HPL_pdlaswp01T.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdlaswp01T.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c" + }, + { + "command": "cc -c -o HPL_pdupdateNN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdupdateNN.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNN.c" + }, + { + "command": "cc -c -o HPL_pdupdateNT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdupdateNT.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNT.c" + }, + { + "command": "cc -c -o HPL_pdupdateTN.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdupdateTN.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTN.c" + }, + { + "command": "cc -c -o HPL_pdupdateTT.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdupdateTT.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTT.c" + }, + { + "command": "cc -c -o HPL_pdtrsv.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdtrsv.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdtrsv.c" + }, + { + "command": "cc -c -o HPL_pdgesv0.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdgesv0.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv0.c" + }, + { + "command": "cc -c -o HPL_pdgesvK1.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdgesvK1.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK1.c" + }, + { + "command": "cc -c -o HPL_pdgesvK2.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdgesvK2.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK2.c" + }, + { + "command": "cc -c -o HPL_pdgesv.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdgesv.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv.c" + }, + { + "command": "cc -c -O0 -fPIC -DMPI -o cuda_dgemm.o -I/usr/local/cuda/include -I/opt/intel/oneapi/mpi/2021.10.0/include cuda_dgemm.cpp", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp" + }, + { + "command": "c++ -O3 -shared -o libdgemm.so.1.0.1 cuda_dgemm.o -I/opt/intel/oneapi/mpi/2021.10.0/include -lmpi", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda" + }, + { + "command": "c++ -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccevCa71.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --build-id --eh-frame-hdr -melf_x86_64 --hash-style=gnu --as-needed -shared -o libdgemm.so.1.0.1 /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -soname libdgemm.so.1 cuda_dgemm.o --enable-new-dtags -rpath /opt/intel/oneapi/mpi/2021.10.0/lib/release -rpath /opt/intel/oneapi/mpi/2021.10.0/lib -lmpi --enable-new-dtags --push-state --as-needed --pop-state --push-state --as-needed --pop-state /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda" + }, + { + "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_dmatgen.o HPL_ladd.o HPL_lmul.o HPL_xjumpm.o HPL_jumpit.o HPL_rand.o HPL_setran.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64" + }, + { + "command": "cc -c -o HPL_dmatgen.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_dmatgen.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_dmatgen.c" + }, + { + "command": "cc -c -o HPL_ladd.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_ladd.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_ladd.c" + }, + { + "command": "cc -c -o HPL_lmul.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_lmul.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_lmul.c" + }, + { + "command": "cc -c -o HPL_xjumpm.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_xjumpm.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_xjumpm.c" + }, + { + "command": "cc -c -o HPL_jumpit.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_jumpit.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_jumpit.c" + }, + { + "command": "cc -c -o HPL_rand.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_rand.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_rand.c" + }, + { + "command": "cc -c -o HPL_setran.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_setran.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_setran.c" + }, + { + "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_timer.o HPL_timer_cputime.o HPL_timer_walltime.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64" + }, + { + "command": "cc -c -o HPL_timer.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_timer.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer.c" + }, + { + "command": "cc -c -o HPL_timer_cputime.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_timer_cputime.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_cputime.c" + }, + { + "command": "cc -c -o HPL_timer_walltime.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_timer_walltime.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_walltime.c" + }, + { + "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_pdmatgen.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64" + }, + { + "command": "cc -c -o HPL_pdmatgen.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdmatgen.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c" + }, + { + "command": "ar r /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a HPL_ptimer.o HPL_ptimer_cputime.o HPL_ptimer_walltime.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64" + }, + { + "command": "cc -c -o HPL_ptimer.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_ptimer.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer.c" + }, + { + "command": "cc -c -o HPL_ptimer_cputime.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_ptimer_cputime.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c" + }, + { + "command": "cc -c -o HPL_ptimer_walltime.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_ptimer_walltime.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c" + }, + { + "command": "c++ -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/ -lmpi", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64" + }, + { + "command": "cc -c -o HPL_pddriver.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pddriver.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pddriver.c" + }, + { + "command": "cc -c -o HPL_pdinfo.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdinfo.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdinfo.c" + }, + { + "command": "cc -c -o HPL_pdtest.o -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -I/opt/intel/oneapi/mpi/2021.10.0/include ../HPL_pdtest.c", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64", + "file": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdtest.c" + }, + { + "command": "c++ -cc=gcc -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/ -lmpi", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64" + }, + { + "command": "c++ -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/intel64 -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a -I/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/ -lmpi -I/opt/intel/oneapi/mpi/2021.10.0/include -lmpi", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64" + }, + { + "command": "c++ -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccANGnqv.res -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lpthread -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lgcc_s --build-id --eh-frame-hdr -melf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o /usr/lib/gcc/x86_64-linux-gnu/11/crtoffloadbegin.o HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a -lmpi --enable-new-dtags -rpath /opt/intel/oneapi/mpi/2021.10.0/lib/release -rpath /opt/intel/oneapi/mpi/2021.10.0/lib -lmpi --enable-new-dtags --push-state --as-needed --pop-state --push-state --as-needed --pop-state /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o /usr/lib/gcc/x86_64-linux-gnu/11/crtoffloadend.o", + "directory": "/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64" + } +] \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/config.guess b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/config.guess new file mode 100755 index 000000000..256083a70 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/config.guess @@ -0,0 +1,1476 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright 1992-2018 Free Software Foundation, Inc. + +timestamp='2018-03-08' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). +# +# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. +# +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess +# +# Please send patches to . + + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright 1992-2018 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > "$dummy.c" ; + for c in cc gcc c89 c99 ; do + if ($c -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +case "$UNAME_SYSTEM" in +Linux|GNU|GNU/*) + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + LIBC=gnu + + eval "$set_cc_for_build" + cat <<-EOF > "$dummy.c" + #include + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #else + LIBC=gnu + #endif + EOF + eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`" + + # If ldd exists, use it to detect musl libc. + if command -v ldd >/dev/null && \ + ldd --version 2>&1 | grep -q ^musl + then + LIBC=musl + fi + ;; +esac + +# Note: order is significant - the case branches are not exclusive. + +case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ + "/sbin/$sysctl" 2>/dev/null || \ + "/usr/sbin/$sysctl" 2>/dev/null || \ + echo unknown)` + case "$UNAME_MACHINE_ARCH" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + earmv*) + arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'` + endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'` + machine="${arch}${endian}"-unknown + ;; + *) machine="$UNAME_MACHINE_ARCH"-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently (or will in the future) and ABI. + case "$UNAME_MACHINE_ARCH" in + earm*) + os=netbsdelf + ;; + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval "$set_cc_for_build" + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # Determine ABI tags. + case "$UNAME_MACHINE_ARCH" in + earm*) + expr='s/^earmv[0-9]/-eabi/;s/eb$//' + abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"` + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "$UNAME_VERSION" in + Debian*) + release='-gnu' + ;; + *) + release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "$machine-${os}${release}${abi}" + exit ;; + *:Bitrig:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE" + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE" + exit ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE" + exit ;; + *:MidnightBSD:*:*) + echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE" + exit ;; + *:ekkoBSD:*:*) + echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE" + exit ;; + *:SolidBSD:*:*) + echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE" + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd"$UNAME_RELEASE" + exit ;; + *:MirBSD:*:*) + echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE" + exit ;; + *:Sortix:*:*) + echo "$UNAME_MACHINE"-unknown-sortix + exit ;; + *:Redox:*:*) + echo "$UNAME_MACHINE"-unknown-redox + exit ;; + mips:OSF1:*.*) + echo mips-dec-osf1 + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE=alpha ;; + "EV4.5 (21064)") + UNAME_MACHINE=alpha ;; + "LCA4 (21066/21068)") + UNAME_MACHINE=alpha ;; + "EV5 (21164)") + UNAME_MACHINE=alphaev5 ;; + "EV5.6 (21164A)") + UNAME_MACHINE=alphaev56 ;; + "EV5.6 (21164PC)") + UNAME_MACHINE=alphapca56 ;; + "EV5.7 (21164PC)") + UNAME_MACHINE=alphapca57 ;; + "EV6 (21264)") + UNAME_MACHINE=alphaev6 ;; + "EV6.7 (21264A)") + UNAME_MACHINE=alphaev67 ;; + "EV6.8CB (21264C)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8AL (21264B)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8CX (21264D)") + UNAME_MACHINE=alphaev68 ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE=alphaev69 ;; + "EV7 (21364)") + UNAME_MACHINE=alphaev7 ;; + "EV7.9 (21364A)") + UNAME_MACHINE=alphaev79 ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo "$UNAME_MACHINE"-dec-osf"`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`" + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo "$UNAME_MACHINE"-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo "$UNAME_MACHINE"-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix"$UNAME_RELEASE" + exit ;; + arm*:riscos:*:*|arm*:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + s390x:SunOS:*:*) + echo "$UNAME_MACHINE"-ibm-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`" + exit ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`" + exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux"$UNAME_RELEASE" + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + eval "$set_cc_for_build" + SUN_ARCH=i386 + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH=x86_64 + fi + fi + echo "$SUN_ARCH"-pc-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos"`echo "$UNAME_RELEASE"|sed -e 's/-/_/'`" + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos"$UNAME_RELEASE" + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos"$UNAME_RELEASE" + ;; + sun4) + echo sparc-sun-sunos"$UNAME_RELEASE" + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos"$UNAME_RELEASE" + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint"$UNAME_RELEASE" + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint"$UNAME_RELEASE" + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint"$UNAME_RELEASE" + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten"$UNAME_RELEASE" + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten"$UNAME_RELEASE" + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix"$UNAME_RELEASE" + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix"$UNAME_RELEASE" + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix"$UNAME_RELEASE" + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && + dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`"$dummy" "$dummyarg"` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos"$UNAME_RELEASE" + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ "$UNAME_PROCESSOR" = mc88100 ] || [ "$UNAME_PROCESSOR" = mc88110 ] + then + if [ "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx ] || \ + [ "$TARGET_BINARY_INTERFACE"x = x ] + then + echo m88k-dg-dgux"$UNAME_RELEASE" + else + echo m88k-dg-dguxbcs"$UNAME_RELEASE" + fi + else + echo i586-dg-dgux"$UNAME_RELEASE" + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix"`echo "$UNAME_RELEASE"|sed -e 's/-/_/g'`" + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" + fi + echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV" + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/lslpp ] ; then + IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | + awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` + else + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" + fi + echo "$IBM_ARCH"-ibm-aix"$IBM_REV" + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd"$UNAME_RELEASE" # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'` + case "$UNAME_MACHINE" in + 9000/31?) HP_ARCH=m68000 ;; + 9000/[34]??) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "$sc_cpu_version" in + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "$sc_kernel_bits" in + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "$HP_ARCH" = "" ]; then + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ "$HP_ARCH" = hppa2.0w ] + then + eval "$set_cc_for_build" + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ + then + HP_ARCH=hppa2.0w + else + HP_ARCH=hppa64 + fi + fi + echo "$HP_ARCH"-hp-hpux"$HPUX_REV" + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux"$HPUX_REV" + exit ;; + 3050*:HI-UX:*:*) + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo "$UNAME_MACHINE"-unknown-osf1mk + else + echo "$UNAME_MACHINE"-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE" + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi"$UNAME_RELEASE" + exit ;; + *:BSD/OS:*:*) + echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE" + exit ;; + *:FreeBSD:*:*) + UNAME_PROCESSOR=`/usr/bin/uname -p` + case "$UNAME_PROCESSOR" in + amd64) + UNAME_PROCESSOR=x86_64 ;; + i386) + UNAME_PROCESSOR=i586 ;; + esac + echo "$UNAME_PROCESSOR"-unknown-freebsd"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`" + exit ;; + i*:CYGWIN*:*) + echo "$UNAME_MACHINE"-pc-cygwin + exit ;; + *:MINGW64*:*) + echo "$UNAME_MACHINE"-pc-mingw64 + exit ;; + *:MINGW*:*) + echo "$UNAME_MACHINE"-pc-mingw32 + exit ;; + *:MSYS*:*) + echo "$UNAME_MACHINE"-pc-msys + exit ;; + i*:PW*:*) + echo "$UNAME_MACHINE"-pc-pw32 + exit ;; + *:Interix*:*) + case "$UNAME_MACHINE" in + x86) + echo i586-pc-interix"$UNAME_RELEASE" + exit ;; + authenticamd | genuineintel | EM64T) + echo x86_64-unknown-interix"$UNAME_RELEASE" + exit ;; + IA64) + echo ia64-unknown-interix"$UNAME_RELEASE" + exit ;; + esac ;; + i*:UWIN*:*) + echo "$UNAME_MACHINE"-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + *:GNU:*:*) + # the GNU system + echo "`echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,'`-unknown-$LIBC`echo "$UNAME_RELEASE"|sed -e 's,/.*$,,'`" + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo "$UNAME_MACHINE-unknown-`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`-$LIBC" + exit ;; + i*86:Minix:*:*) + echo "$UNAME_MACHINE"-pc-minix + exit ;; + aarch64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + arc:Linux:*:* | arceb:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + arm*:Linux:*:*) + eval "$set_cc_for_build" + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi + else + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf + fi + fi + exit ;; + avr32*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + cris:Linux:*:*) + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" + exit ;; + crisv32:Linux:*:*) + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" + exit ;; + e2k:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + frv:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + hexagon:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + i*86:Linux:*:*) + echo "$UNAME_MACHINE"-pc-linux-"$LIBC" + exit ;; + ia64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + k1om:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + m32r*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + m68*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" + #undef CPU + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=${UNAME_MACHINE}el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=${UNAME_MACHINE} + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU'`" + test "x$CPU" != x && { echo "$CPU-unknown-linux-$LIBC"; exit; } + ;; + mips64el:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + openrisc*:Linux:*:*) + echo or1k-unknown-linux-"$LIBC" + exit ;; + or32:Linux:*:* | or1k*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + padre:Linux:*:*) + echo sparc-unknown-linux-"$LIBC" + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-"$LIBC" + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;; + PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;; + *) echo hppa-unknown-linux-"$LIBC" ;; + esac + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-"$LIBC" + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-"$LIBC" + exit ;; + ppc64le:Linux:*:*) + echo powerpc64le-unknown-linux-"$LIBC" + exit ;; + ppcle:Linux:*:*) + echo powerpcle-unknown-linux-"$LIBC" + exit ;; + riscv32:Linux:*:* | riscv64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo "$UNAME_MACHINE"-ibm-linux-"$LIBC" + exit ;; + sh64*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + sh*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + tile*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + vax:Linux:*:*) + echo "$UNAME_MACHINE"-dec-linux-"$LIBC" + exit ;; + x86_64:Linux:*:*) + echo "$UNAME_MACHINE"-pc-linux-"$LIBC" + exit ;; + xtensa*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION" + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo "$UNAME_MACHINE"-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo "$UNAME_MACHINE"-unknown-stop + exit ;; + i*86:atheos:*:*) + echo "$UNAME_MACHINE"-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo "$UNAME_MACHINE"-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + echo i386-unknown-lynxos"$UNAME_RELEASE" + exit ;; + i*86:*DOS:*:*) + echo "$UNAME_MACHINE"-pc-msdosdjgpp + exit ;; + i*86:*:4.*:*) + UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL" + else + echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL" + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}{$UNAME_VERSION}" + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL" + else + echo "$UNAME_MACHINE"-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. + # Note: whatever this is, it MUST be the same as what config.sub + # prints for the "djgpp" host, or else GDB configure will decide that + # this is a cross-build. + echo i586-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv"$UNAME_RELEASE" # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos"$UNAME_RELEASE" + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos"$UNAME_RELEASE" + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos"$UNAME_RELEASE" + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + echo powerpc-unknown-lynxos"$UNAME_RELEASE" + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv"$UNAME_RELEASE" + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo "$UNAME_MACHINE"-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo "$UNAME_MACHINE"-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux"$UNAME_RELEASE" + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv"$UNAME_RELEASE" + else + echo mips-unknown-sysv"$UNAME_RELEASE" + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; + x86_64:Haiku:*:*) + echo x86_64-unknown-haiku + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux"$UNAME_RELEASE" + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux"$UNAME_RELEASE" + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux"$UNAME_RELEASE" + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux"$UNAME_RELEASE" + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux"$UNAME_RELEASE" + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux"$UNAME_RELEASE" + exit ;; + SX-ACE:SUPER-UX:*:*) + echo sxace-nec-superux"$UNAME_RELEASE" + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody"$UNAME_RELEASE" + exit ;; + *:Rhapsody:*:*) + echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE" + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + eval "$set_cc_for_build" + if test "$UNAME_PROCESSOR" = unknown ; then + UNAME_PROCESSOR=powerpc + fi + if test "`echo "$UNAME_RELEASE" | sed -e 's/\..*//'`" -le 10 ; then + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc + if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_PPC >/dev/null + then + UNAME_PROCESSOR=powerpc + fi + fi + elif test "$UNAME_PROCESSOR" = i386 ; then + # Avoid executing cc on OS X 10.9, as it ships with a stub + # that puts up a graphical alert prompting to install + # developer tools. Any system running Mac OS X 10.7 or + # later (Darwin 11 and later) is required to have a 64-bit + # processor. This is not true of the ARM version of Darwin + # that Apple uses in portable devices. + UNAME_PROCESSOR=x86_64 + fi + echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE" + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = x86; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE" + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NEO-*:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSE-*:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSR-*:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSV-*:NONSTOP_KERNEL:*:*) + echo nsv-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSX-*:NONSTOP_KERNEL:*:*) + echo nsx-tandem-nsk"$UNAME_RELEASE" + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE" + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = 386; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo "$UNAME_MACHINE"-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux"$UNAME_RELEASE" + exit ;; + *:DragonFly:*:*) + echo "$UNAME_MACHINE"-unknown-dragonfly"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`" + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "$UNAME_MACHINE" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo "$UNAME_MACHINE"-pc-skyos"`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`" + exit ;; + i*86:rdos:*:*) + echo "$UNAME_MACHINE"-pc-rdos + exit ;; + i*86:AROS:*:*) + echo "$UNAME_MACHINE"-pc-aros + exit ;; + x86_64:VMkernel:*:*) + echo "$UNAME_MACHINE"-unknown-esx + exit ;; + amd64:Isilon\ OneFS:*:*) + echo x86_64-unknown-onefs + exit ;; +esac + +echo "$0: unable to guess system type" >&2 + +case "$UNAME_MACHINE:$UNAME_SYSTEM" in + mips:Linux | mips64:Linux) + # If we got here on MIPS GNU/Linux, output extra information. + cat >&2 <&2 </dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = "$UNAME_MACHINE" +UNAME_RELEASE = "$UNAME_RELEASE" +UNAME_SYSTEM = "$UNAME_SYSTEM" +UNAME_VERSION = "$UNAME_VERSION" +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/config.sub b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/config.sub new file mode 100755 index 000000000..9ccf09a7a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/config.sub @@ -0,0 +1,1801 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright 1992-2018 Free Software Foundation, Inc. + +timestamp='2018-03-08' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). + + +# Please send patches to . +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS + +Canonicalize a configuration name. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright 1992-2018 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo "$1" + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ + linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ + knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \ + kopensolaris*-gnu* | cloudabi*-eabi* | \ + storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + android-linux) + os=-linux-android + basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown + ;; + *) + basic_machine=`echo "$1" | sed 's/-[^-]*$//'` + if [ "$basic_machine" != "$1" ] + then os=`echo "$1" | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis | -knuth | -cray | -microblaze*) + os= + basic_machine=$1 + ;; + -bluegene*) + os=-cnk + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco6) + os=-sco5v6 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*178) + os=-lynxos178 + ;; + -lynx*5) + os=-lynxos5 + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo "$1" | sed -e 's/86-.*/86-sequent/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ + | aarch64 | aarch64_be \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arceb \ + | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ + | avr | avr32 \ + | ba \ + | be32 | be64 \ + | bfin \ + | c4x | c8051 | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | e2k | epiphany \ + | fido | fr30 | frv | ft32 \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | hexagon \ + | i370 | i860 | i960 | ia16 | ia64 \ + | ip2k | iq2000 \ + | k1om \ + | le32 | le64 \ + | lm32 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | microblazeel | mcore | mep | metag \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64octeon | mips64octeonel \ + | mips64orion | mips64orionel \ + | mips64r5900 | mips64r5900el \ + | mips64vr | mips64vrel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa32r6 | mipsisa32r6el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64r6 | mipsisa64r6el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipsr5900 | mipsr5900el \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | moxie \ + | mt \ + | msp430 \ + | nds32 | nds32le | nds32be \ + | nios | nios2 | nios2eb | nios2el \ + | ns16k | ns32k \ + | open8 | or1k | or1knd | or32 \ + | pdp10 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle \ + | pru \ + | pyramid \ + | riscv32 | riscv64 \ + | rl78 | rx \ + | score \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu \ + | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ + | ubicom32 \ + | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ + | visium \ + | wasm32 \ + | x86 | xc16x | xstormy16 | xtensa \ + | z8k | z80) + basic_machine=$basic_machine-unknown + ;; + c54x) + basic_machine=tic54x-unknown + ;; + c55x) + basic_machine=tic55x-unknown + ;; + c6x) + basic_machine=tic6x-unknown + ;; + leon|leon[3-9]) + basic_machine=sparc-$basic_machine + ;; + m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip) + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65) + ;; + ms1) + basic_machine=mt-unknown + ;; + + strongarm | thumb | xscale) + basic_machine=arm-unknown + ;; + xgate) + basic_machine=$basic_machine-unknown + os=-none + ;; + xscaleeb) + basic_machine=armeb-unknown + ;; + + xscaleel) + basic_machine=armel-unknown + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ + | aarch64-* | aarch64_be-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* | avr32-* \ + | ba-* \ + | be32-* | be64-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* \ + | c8051-* | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | e2k-* | elxsi-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | hexagon-* \ + | i*86-* | i860-* | i960-* | ia16-* | ia64-* \ + | ip2k-* | iq2000-* \ + | k1om-* \ + | le32-* | le64-* \ + | lm32-* \ + | m32c-* | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \ + | microblaze-* | microblazeel-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64octeon-* | mips64octeonel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64r5900-* | mips64r5900el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa32r6-* | mipsisa32r6el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64r6-* | mipsisa64r6el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipsr5900-* | mipsr5900el-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ + | msp430-* \ + | nds32-* | nds32le-* | nds32be-* \ + | nios-* | nios2-* | nios2eb-* | nios2el-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | open8-* \ + | or1k*-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ + | pru-* \ + | pyramid-* \ + | riscv32-* | riscv64-* \ + | rl78-* | romp-* | rs6000-* | rx-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \ + | tahoe-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tile*-* \ + | tron-* \ + | ubicom32-* \ + | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ + | vax-* \ + | visium-* \ + | wasm32-* \ + | we32k-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* \ + | xstormy16-* | xtensa*-* \ + | ymp-* \ + | z8k-* | z80-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) + basic_machine=$basic_machine-unknown + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-pc + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aros) + basic_machine=i386-pc + os=-aros + ;; + asmjs) + basic_machine=asmjs-unknown + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + blackfin) + basic_machine=bfin-unknown + os=-linux + ;; + blackfin-*) + basic_machine=bfin-`echo "$basic_machine" | sed 's/^[^-]*-//'` + os=-linux + ;; + bluegene*) + basic_machine=powerpc-ibm + os=-cnk + ;; + c54x-*) + basic_machine=tic54x-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + c55x-*) + basic_machine=tic55x-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + c6x-*) + basic_machine=tic6x-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; + cegcc) + basic_machine=arm-unknown + os=-cegcc + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | j90) + basic_machine=j90-cray + os=-unicos + ;; + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16 | cr16-*) + basic_machine=cr16-unknown + os=-elf + ;; + crds | unos) + basic_machine=m68k-crds + ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + dicos) + basic_machine=i686-pc + os=-dicos + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2*) + basic_machine=m68k-bull + os=-sysv3 + ;; + e500v[12]) + basic_machine=powerpc-unknown + os=$os"spe" + ;; + e500v[12]-*) + basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'` + os=$os"spe" + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; + i*86v32) + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + leon-*|leon[3-9]-*) + basic_machine=sparc-`echo "$basic_machine" | sed 's/-.*//'` + ;; + m68knommu) + basic_machine=m68k-unknown + os=-linux + ;; + m68knommu-*) + basic_machine=m68k-`echo "$basic_machine" | sed 's/^[^-]*-//'` + os=-linux + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + microblaze*) + basic_machine=microblaze-xilinx + ;; + mingw64) + basic_machine=x86_64-pc + os=-mingw64 + ;; + mingw32) + basic_machine=i686-pc + os=-mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + os=-mingw32ce + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mips3*-*) + basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`-unknown + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + moxiebox) + basic_machine=moxie-unknown + os=-moxiebox + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + ms1-*) + basic_machine=`echo "$basic_machine" | sed -e 's/ms1-/mt-/'` + ;; + msys) + basic_machine=i686-pc + os=-msys + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + nacl) + basic_machine=le32-unknown + os=-nacl + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + neo-tandem) + basic_machine=neo-tandem + ;; + nse-tandem) + basic_machine=nse-tandem + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + nsv-tandem) + basic_machine=nsv-tandem + ;; + nsx-tandem) + basic_machine=nsx-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + openrisc | openrisc-*) + basic_machine=or32-unknown + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + parisc) + basic_machine=hppa-unknown + os=-linux + ;; + parisc-*) + basic_machine=hppa-`echo "$basic_machine" | sed 's/^[^-]*-//'` + os=-linux + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) + basic_machine=i586-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pentium4-*) + basic_machine=i786-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc | ppcbe) basic_machine=powerpc-unknown + ;; + ppc-* | ppcbe-*) + basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rdos | rdos64) + basic_machine=x86_64-pc + os=-rdos + ;; + rdos32) + basic_machine=i386-pc + os=-rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; + sequent) + basic_machine=i386-sequent + ;; + sh5el) + basic_machine=sh5le-unknown + ;; + simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + strongarm-* | thumb-*) + basic_machine=arm-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + tile*) + basic_machine=$basic_machine-unknown + os=-linux-gnu + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + x64) + basic_machine=x86_64-pc + ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + xscale-* | xscalee[bl]-*) + basic_machine=`echo "$basic_machine" | sed 's/^xscale/arm/'` + ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + romp) + basic_machine=romp-ibm + ;; + mmix) + basic_machine=mmix-knuth + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo "$basic_machine" | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo "$basic_machine" | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases that might get confused + # with valid system types. + # -solaris* is a basic system type, with this one exception. + -auroraux) + os=-auroraux + ;; + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # es1800 is here to avoid being matched by es* (a different OS) + -es1800*) + os=-ose + ;; + # Now accept the basic system types. + # The portable systems comes first. + # Each alternative MUST end in a * to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ + | -sym* | -kopensolaris* | -plan9* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* | -aros* | -cloudabi* | -sortix* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -knetbsd* | -mirbsd* | -netbsd* \ + | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* | -hcos* \ + | -chorusos* | -chorusrdb* | -cegcc* | -glidix* \ + | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -linux-newlib* | -linux-musl* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \ + | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox* | -bme* \ + | -midnightbsd*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -xray | -os68k* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo "$os" | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo "$os" | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo "$os" | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -nova*) + os=-rtmk-nova + ;; + -ns2) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -tpf*) + os=-tpf + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4*) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -zvmoe) + os=-zvmoe + ;; + -dicos*) + os=-dicos + ;; + -pikeos*) + # Until real need of OS specific support for + # particular features comes up, bare metal + # configurations are quite functional. + case $basic_machine in + arm*) + os=-eabi + ;; + *) + os=-elf + ;; + esac + ;; + -nacl*) + ;; + -ios) + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`"$1"\': system \`"$os"\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + c4x-* | tic4x-*) + os=-coff + ;; + c8051-*) + os=-elf + ;; + hexagon-*) + os=-elf + ;; + tic54x-*) + os=-coff + ;; + tic55x-*) + os=-coff + ;; + tic6x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + ;; + m68*-cisco) + os=-aout + ;; + mep-*) + os=-elf + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or32-*) + os=-coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + pru-*) + os=-elf + ;; + *-be) + os=-beos + ;; + *-ibm) + os=-aix + ;; + *-knuth) + os=-mmixware + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -cnk*|-aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; + esac + basic_machine=`echo "$basic_machine" | sed "s/unknown/$vendor/"` + ;; +esac + +echo "$basic_machine$os" +exit + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/configure b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/configure new file mode 100755 index 000000000..ed0b4faa0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/configure @@ -0,0 +1,6161 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.69 for hpl 2.3. +# +# Report bugs to . +# +# +# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +as_fn_exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org and hpl@icl.utk.edu +$0: about your system, including any error possibly output +$0: before this message. Then install a modern shell, or +$0: manually run the script under such a shell if you do +$0: have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='hpl' +PACKAGE_TARNAME='hpl' +PACKAGE_VERSION='2.3' +PACKAGE_STRING='hpl 2.3' +PACKAGE_BUGREPORT='hpl@icl.utk.edu' +PACKAGE_URL='' + +ac_unique_file="include/hpl.h" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='am__EXEEXT_FALSE +am__EXEEXT_TRUE +LTLIBOBJS +LIBOBJS +EGREP +GREP +CPP +BLAS_LIBS +AM_BACKSLASH +AM_DEFAULT_VERBOSITY +AM_DEFAULT_V +AM_V +am__fastdepCC_FALSE +am__fastdepCC_TRUE +CCDEPMODE +am__nodep +AMDEPBACKSLASH +AMDEP_FALSE +AMDEP_TRUE +am__include +DEPDIR +am__untar +am__tar +AMTAR +am__leading_dot +SET_MAKE +AWK +mkdir_p +MKDIR_P +INSTALL_STRIP_PROGRAM +STRIP +install_sh +MAKEINFO +AUTOHEADER +AUTOMAKE +AUTOCONF +ACLOCAL +VERSION +PACKAGE +CYGPATH_W +am__isrc +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +RANLIB +OBJEXT +EXEEXT +CPPFLAGS +LDFLAGS +CFLAGS +ac_ct_CC +CC +MPICC +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL +am__quote' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +enable_dependency_tracking +enable_silent_rules +' + ac_precious_vars='build_alias +host_alias +target_alias +MPICC +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures hpl 2.3 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/hpl] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of hpl 2.3:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-dependency-tracking + do not reject slow dependency extractors + --disable-dependency-tracking + speeds up one-time build + --enable-silent-rules less verbose build output (undo: "make V=1") + --disable-silent-rules verbose build output (undo: "make V=0") + +Some influential environment variables: + MPICC MPI C compiler command + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to . +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +hpl configure 2.3 +generated by GNU Autoconf 2.69 + +Copyright (C) 2012 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main () +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists, giving a warning if it cannot be compiled using +# the include files in INCLUDES and setting the cache variable VAR +# accordingly. +ac_fn_c_check_header_mongrel () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if eval \${$3+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} +( $as_echo "## ------------------------------ ## +## Report this to hpl@icl.utk.edu ## +## ------------------------------ ##" + ) | sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_mongrel + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by hpl $as_me 2.3, which was +generated by GNU Autoconf 2.69. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +ac_config_headers="$ac_config_headers include/hplconfig.h" + + +ac_aux_dir= +for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` + + + + _ax_prog_cc_mpi_mpi_wanted=yes + if test x"$_ax_prog_cc_mpi_mpi_wanted" = xyes; then + if test -z "$CC" && test -n "$MPICC"; then + CC="$MPICC" + else + if test -n "$ac_tool_prefix"; then + for ac_prog in mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + + fi + fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +$as_echo_n "checking whether $CC understands -c and -o together... " >&6; } +if ${am_cv_prog_cc_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 + ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +$as_echo "$am_cv_prog_cc_c_o" >&6; } +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + +# Check for compiler +# Needs to be split off into an extra macro to ensure right expansion +# order. + + +if test x"$_ax_prog_cc_mpi_mpi_wanted" = xno; then : + _ax_prog_cc_mpi_mpi_found=no +else + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + # test whether MPI_Init is available + # We do not use AC_SEARCH_LIBS here, as it caches its outcome and + # thus disallows corresponding calls in the other AX_PROG_*_MPI + # macros. + for lib in NONE mpi mpich; do + save_LIBS=$LIBS + if test x"$lib" = xNONE; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for function MPI_Init" >&5 +$as_echo_n "checking for function MPI_Init... " >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for function MPI_Init in -l$lib" >&5 +$as_echo_n "checking for function MPI_Init in -l$lib... " >&6; } + LIBS="-l$lib $LIBS" + fi + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char MPI_Init (); +int +main () +{ +return MPI_Init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + _ax_prog_cc_mpi_mpi_found=yes +else + _ax_prog_cc_mpi_mpi_found=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_ax_prog_cc_mpi_mpi_found" >&5 +$as_echo "$_ax_prog_cc_mpi_mpi_found" >&6; } + if test "x$_ax_prog_cc_mpi_mpi_found" = "xyes"; then + break; + fi + LIBS=$save_LIBS + done + + # Check for header + if test x"$_ax_prog_cc_mpi_mpi_found" = xyes; then : + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpi.h" >&5 +$as_echo_n "checking for mpi.h... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + _ax_prog_cc_mpi_mpi_found=no + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +fi + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$_ax_prog_cc_mpi_mpi_found" = xyes; then : + + +$as_echo "#define HAVE_MPI 1" >>confdefs.h + + : + +else + + + : + +fi + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +$as_echo "$RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +$as_echo "$ac_ct_RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +$as_echo_n "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if ${ac_cv_path_install+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in #(( + ./ | .// | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +$as_echo "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + + +am__api_version='1.16' + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 +$as_echo_n "checking whether build environment is sane... " >&6; } +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[\\\"\#\$\&\'\`$am_lf]*) + as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; +esac +case $srcdir in + *[\\\"\#\$\&\'\`$am_lf\ \ ]*) + as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$*" != "X $srcdir/configure conftest.file" \ + && test "$*" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + as_fn_error $? "ls -t appears to fail. Make sure there is not a broken + alias in your environment" "$LINENO" 5 + fi + if test "$2" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$2" = conftest.file + ) +then + # Ok. + : +else + as_fn_error $? "newly created file is older than distributed files! +Check your system clock" "$LINENO" 5 +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi + +rm -f conftest.file + +test "$program_prefix" != NONE && + program_transform_name="s&^&$program_prefix&;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s&\$&$program_suffix&;$program_transform_name" +# Double any \ or $. +# By default was `s,x,x', remove it if useless. +ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' +program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` + +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 +$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} +fi + +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi + +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +$as_echo "$STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +$as_echo "$ac_ct_STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5 +$as_echo_n "checking for a thread-safe mkdir -p... " >&6; } +if test -z "$MKDIR_P"; then + if ${ac_cv_path_mkdir+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in mkdir gmkdir; do + for ac_exec_ext in '' $ac_executable_extensions; do + as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue + case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( + 'mkdir (GNU coreutils) '* | \ + 'mkdir (coreutils) '* | \ + 'mkdir (fileutils) '4.1*) + ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext + break 3;; + esac + done + done + done +IFS=$as_save_IFS + +fi + + test -d ./--version && rmdir ./--version + if test "${ac_cv_path_mkdir+set}" = set; then + MKDIR_P="$ac_cv_path_mkdir -p" + else + # As a last resort, use the slow shell script. Don't cache a + # value for MKDIR_P within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + MKDIR_P="$ac_install_sh -d" + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 +$as_echo "$MKDIR_P" >&6; } + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AWK+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +$as_echo "$AWK" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AWK" && break +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +set x ${MAKE-make} +ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering ...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + SET_MAKE= +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null + +DEPDIR="${am__leading_dot}deps" + +ac_config_commands="$ac_config_commands depfiles" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5 +$as_echo_n "checking whether ${MAKE-make} supports the include directive... " >&6; } +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5 + (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + case $?:`cat confinc.out 2>/dev/null` in #( + '0:this is the am__doit target') : + case $s in #( + BSD) : + am__include='.include' am__quote='"' ;; #( + *) : + am__include='include' am__quote='' ;; +esac ;; #( + *) : + ;; +esac + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5 +$as_echo "${_am_result}" >&6; } + +# Check whether --enable-dependency-tracking was given. +if test "${enable_dependency_tracking+set}" = set; then : + enableval=$enable_dependency_tracking; +fi + +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi + if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' +else + AMDEP_TRUE='#' + AMDEP_FALSE= +fi + + +# Check whether --enable-silent-rules was given. +if test "${enable_silent_rules+set}" = set; then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=1;; +esac +am_make=${MAKE-make} +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +$as_echo_n "checking whether $am_make supports nested variables... " >&6; } +if ${am_cv_make_support_nested_variables+:} false; then : + $as_echo_n "(cached) " >&6 +else + if $as_echo 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +$as_echo "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + am__isrc=' -I$(srcdir)' + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='hpl' + VERSION='2.3' + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE "$PACKAGE" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define VERSION "$VERSION" +_ACEOF + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +mkdir_p='$(MKDIR_P)' + +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AMTAR='$${TAR-tar}' + + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar pax cpio none' + +am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' + + + + + +depcc="$CC" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CC_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 + fi +fi + + + + + + + + + +hpl_blas_ok=no + + +current_LIBS="$LIBS" + +cat < hplvars.txt +name1=OpenBLAS +rout1=dgemm_ +libs1=-lopenblas -lm + +name2=Atlas Fortran BLAS +rout2=dgemm_ +libs2=-lf77blas -latlas + +name3=Sequential Intel MKL LP64 (group) +rout3=dgemm_ +libs3=-Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread + +name4=Sequential Intel MKL LP64 +rout4=dgemm_ +libs4=-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread + +name5=AMD's ACML +rout5=dgemm_ +libs5=-lacml -lm + +name6=Accelerate +rout6=dgemm_ +libs6=-framework Accelerate + +name7=Apple VecLib +rout7=dgemm_ +libs7=-framework vecLib + +name8=IBM ESSL +rout8=dgemm_ +libs8=-lessl + +name9=NVIDIA nvblas +rout9=dgemm_ +libs9=-lnvblas + +name10=Generic BLAS +rout10=dgemm_ +libs10=-lblas + +HPLEOF +for hpl_i in 1 2 3 4 5 6 7 8 9 10; +do +if test x$hpl_blas_ok = xno; then + name="`grep ^name${hpl_i}= hplvars.txt | sed s/^name${hpl_i}=//`" + rout="`grep ^rout${hpl_i}= hplvars.txt | sed s/^rout${hpl_i}=//`" + libs="`grep ^libs${hpl_i}= hplvars.txt | sed s/^libs${hpl_i}=//`" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $rout in $name" >&5 +$as_echo_n "checking for $rout in $name... " >&6; } + + LIBS="$libs" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $rout (); +int +main () +{ +return $rout (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + hpl_blas_ok=yes;BLAS_LIBS="$libs" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LIBS="$current_LIBS" + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hpl_blas_ok" >&5 +$as_echo "$hpl_blas_ok" >&6; } +fi +done +rm hplvars.txt + +if test x$hpl_blas_ok = xno; then +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dgemm_ in OpenBLAS" >&5 +$as_echo_n "checking for dgemm_ in OpenBLAS... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dgemm_ in -lopenblas" >&5 +$as_echo_n "checking for dgemm_ in -lopenblas... " >&6; } +if ${ac_cv_lib_openblas_dgemm_+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lopenblas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dgemm_ (); +int +main () +{ +return dgemm_ (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_openblas_dgemm_=yes +else + ac_cv_lib_openblas_dgemm_=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_openblas_dgemm_" >&5 +$as_echo "$ac_cv_lib_openblas_dgemm_" >&6; } +if test "x$ac_cv_lib_openblas_dgemm_" = xyes; then : + hpl_blas_ok=yes;BLAS_LIBS="-lopenblas" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hpl_blas_ok" >&5 +$as_echo "$hpl_blas_ok" >&6; } +fi + + + +# If present, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$hpl_blas_ok" = xyes; then + LIBS="$BLAS_LIBS $LIBS" + : +else + hpl_blas_ok=no + as_fn_error $? "BLAS not found" "$LINENO" 5 +fi + + + + +for ac_func in dgemm_ +do : + ac_fn_c_check_func "$LINENO" "dgemm_" "ac_cv_func_dgemm_" +if test "x$ac_cv_func_dgemm_" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_DGEMM_ 1 +_ACEOF + +fi +done + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if ${ac_cv_prog_CPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in mpi.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "mpi.h" "ac_cv_header_mpi_h" "$ac_includes_default" +if test "x$ac_cv_header_mpi_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_MPI_H 1 +_ACEOF + +fi + +done + + +ac_config_files="$ac_config_files Makefile src/Makefile testing/Makefile" + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 +$as_echo_n "checking that generated files are newer than configure... " >&6; } + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5 +$as_echo "done" >&6; } +if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then + as_fn_error $? "conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi + if test -n "$EXEEXT"; then + am__EXEEXT_TRUE= + am__EXEEXT_FALSE='#' +else + am__EXEEXT_TRUE='#' + am__EXEEXT_FALSE= +fi + + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by hpl $as_me 2.3, which was +generated by GNU Autoconf 2.69. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration commands: +$config_commands + +Report bugs to ." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +hpl config.status 2.3 +configured by $0, generated by GNU Autoconf 2.69, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2012 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +MKDIR_P='$MKDIR_P' +AWK='$AWK' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# +# INIT-COMMANDS +# +AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "include/hplconfig.h") CONFIG_HEADERS="$CONFIG_HEADERS include/hplconfig.h" ;; + "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;; + "testing/Makefile") CONFIG_FILES="$CONFIG_FILES testing/Makefile" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers + test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS" +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac + ac_MKDIR_P=$MKDIR_P + case $MKDIR_P in + [\\/$]* | ?:[\\/]* ) ;; + */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +s&@MKDIR_P@&$ac_MKDIR_P&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +$as_echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi +# Compute "$ac_file"'s index in $config_headers. +_am_arg="$ac_file" +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || +$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$_am_arg" : 'X\(//\)[^/]' \| \ + X"$_am_arg" : 'X\(//\)$' \| \ + X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$_am_arg" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'`/stamp-h$_am_stamp_count + ;; + + :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +$as_echo "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "depfiles":C) test x"$AMDEP_TRUE" != x"" || { + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + case $CONFIG_FILES in #( + *\'*) : + eval set x "$CONFIG_FILES" ;; #( + *) : + set x $CONFIG_FILES ;; #( + *) : + ;; +esac + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`$as_echo "$am_mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`$as_dirname -- "$am_mf" || +$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$am_mf" : 'X\(//\)[^/]' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$am_mf" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + am_filepart=`$as_basename -- "$am_mf" || +$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$am_mf" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + { echo "$as_me:$LINENO: cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles" >&5 + (cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } || am_rc=$? + done + if test $am_rc -ne 0; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. Try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking). +See \`config.log' for more details" "$LINENO" 5; } + fi + { am_dirpart=; unset am_dirpart;} + { am_filepart=; unset am_filepart;} + { am_mf=; unset am_mf;} + { am_rc=; unset am_rc;} + rm -f conftest-deps.mk +} + ;; + + esac +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/configure.ac b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/configure.ac new file mode 100644 index 000000000..eb91dc590 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/configure.ac @@ -0,0 +1,34 @@ +AC_PREREQ([2.69]) + +AC_INIT(hpl, 2.3, hpl@icl.utk.edu) +AC_CONFIG_SRCDIR([include/hpl.h]) +AC_CONFIG_HEADERS([include/hplconfig.h]) + +AX_PROG_CC_MPI + +AC_PROG_RANLIB + +AC_PROG_INSTALL + +AM_INIT_AUTOMAKE([subdir-objects]) + +AM_PROG_CC_C_O + +dnl +dnl AX_BLAS requires Fortran compiler and detects fortran libraries in $FLIBS +dnl +dnl AX_BLAS(LIBS="$BLAS_LIBS $LIBS $FLIBS") +dnl + +HPL_BLAS(LIBS="$BLAS_LIBS $LIBS",AC_MSG_ERROR([BLAS not found])) + +dnl FIXME: test for CBLAS: Atlas, MKL, OpenBLAS, ESSL, ... +dnl FIXME: test for GSL CBLAS + +AC_CHECK_FUNCS([dgemm_]) + +AC_CHECK_HEADERS([mpi.h]) + +AC_CONFIG_FILES([Makefile src/Makefile testing/Makefile]) + +AC_OUTPUT diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/depcomp b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/depcomp new file mode 100755 index 000000000..65cbf7093 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/depcomp @@ -0,0 +1,791 @@ +#! /bin/sh +# depcomp - compile a program generating dependencies as side-effects + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva . + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by 'PROGRAMS ARGS'. + object Object file output by 'PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputting dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit $? + ;; +esac + +# Get the directory component of the given path, and save it in the +# global variables '$dir'. Note that this directory component will +# be either empty or ending with a '/' character. This is deliberate. +set_dir_from () +{ + case $1 in + */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; + *) dir=;; + esac +} + +# Get the suffix-stripped basename of the given path, and save it the +# global variable '$base'. +set_base_from () +{ + base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` +} + +# If no dependency file was actually created by the compiler invocation, +# we still have to create a dummy depfile, to avoid errors with the +# Makefile "include basename.Plo" scheme. +make_dummy_depfile () +{ + echo "#dummy" > "$depfile" +} + +# Factor out some common post-processing of the generated depfile. +# Requires the auxiliary global variable '$tmpdepfile' to be set. +aix_post_process_depfile () +{ + # If the compiler actually managed to produce a dependency file, + # post-process it. + if test -f "$tmpdepfile"; then + # Each line is of the form 'foo.o: dependency.h'. + # Do two passes, one to just change these to + # $object: dependency.h + # and one to simply output + # dependency.h: + # which is needed to avoid the deleted-header problem. + { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" + sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" + } > "$depfile" + rm -f "$tmpdepfile" + else + make_dummy_depfile + fi +} + +# A tabulation character. +tab=' ' +# A newline character. +nl=' +' +# Character ranges might be problematic outside the C locale. +# These definitions help. +upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ +lower=abcdefghijklmnopqrstuvwxyz +digits=0123456789 +alpha=${upper}${lower} + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Avoid interferences from the environment. +gccflag= dashmflag= + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +cygpath_u="cygpath -u -f -" +if test "$depmode" = msvcmsys; then + # This is just like msvisualcpp but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvisualcpp +fi + +if test "$depmode" = msvc7msys; then + # This is just like msvc7 but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvc7 +fi + +if test "$depmode" = xlc; then + # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. + gccflag=-qmakedep=gcc,-MF + depmode=gcc +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. +## Unfortunately, FreeBSD c89 acceptance of flags depends upon +## the command line argument order; so add the flags where they +## appear in depend2.am. Note that the slowdown incurred here +## affects only configure: in makefiles, %FASTDEP% shortcuts this. + for arg + do + case $arg in + -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; + *) set fnord "$@" "$arg" ;; + esac + shift # fnord + shift # $arg + done + "$@" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. +## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. +## (see the conditional assignment to $gccflag above). +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). Also, it might not be +## supported by the other compilers which use the 'gcc' depmode. +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The second -e expression handles DOS-style file names with drive + # letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the "deleted header file" problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. +## Some versions of gcc put a space before the ':'. On the theory +## that the space means something, we add a space to the output as +## well. hp depmode also adds that space, but also prefixes the VPATH +## to the object. Take care to not repeat it in the output. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like '#:fec' to the end of the + # dependency line. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ + | tr "$nl" ' ' >> "$depfile" + echo >> "$depfile" + # The second pass generates a dummy entry for each header file. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" + ;; + +xlc) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts '$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.u + tmpdepfile2=$base.u + tmpdepfile3=$dir.libs/$base.u + "$@" -Wc,-M + else + tmpdepfile1=$dir$base.u + tmpdepfile2=$dir$base.u + tmpdepfile3=$dir$base.u + "$@" -M + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + aix_post_process_depfile + ;; + +tcc) + # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 + # FIXME: That version still under development at the moment of writing. + # Make that this statement remains true also for stable, released + # versions. + # It will wrap lines (doesn't matter whether long or short) with a + # trailing '\', as in: + # + # foo.o : \ + # foo.c \ + # foo.h \ + # + # It will put a trailing '\' even on the last line, and will use leading + # spaces rather than leading tabs (at least since its commit 0394caf7 + # "Emit spaces for -MD"). + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. + # We have to change lines of the first kind to '$object: \'. + sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" + # And for each line of the second kind, we have to emit a 'dep.h:' + # dummy dependency, to avoid the deleted-header problem. + sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" + rm -f "$tmpdepfile" + ;; + +## The order of this option in the case statement is important, since the +## shell code in configure will try each of these formats in the order +## listed in this file. A plain '-MD' option would be understood by many +## compilers, so we must ensure this comes after the gcc and icc options. +pgcc) + # Portland's C compiler understands '-MD'. + # Will always output deps to 'file.d' where file is the root name of the + # source file under compilation, even if file resides in a subdirectory. + # The object file name does not affect the name of the '.d' file. + # pgcc 10.2 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using '\' : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + set_dir_from "$object" + # Use the source, not the object, to determine the base name, since + # that's sadly what pgcc will do too. + set_base_from "$source" + tmpdepfile=$base.d + + # For projects that build the same source file twice into different object + # files, the pgcc approach of using the *source* file root name can cause + # problems in parallel builds. Use a locking strategy to avoid stomping on + # the same $tmpdepfile. + lockdir=$base.d-lock + trap " + echo '$0: caught signal, cleaning up...' >&2 + rmdir '$lockdir' + exit 1 + " 1 2 13 15 + numtries=100 + i=$numtries + while test $i -gt 0; do + # mkdir is a portable test-and-set. + if mkdir "$lockdir" 2>/dev/null; then + # This process acquired the lock. + "$@" -MD + stat=$? + # Release the lock. + rmdir "$lockdir" + break + else + # If the lock is being held by a different process, wait + # until the winning process is done or we timeout. + while test -d "$lockdir" && test $i -gt 0; do + sleep 1 + i=`expr $i - 1` + done + fi + i=`expr $i - 1` + done + trap - 1 2 13 15 + if test $i -le 0; then + echo "$0: failed to acquire lock after $numtries attempts" >&2 + echo "$0: check lockdir '$lockdir'" >&2 + exit 1 + fi + + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp2) + # The "hp" stanza above does not work with aCC (C++) and HP's ia64 + # compilers, which have integrated preprocessors. The correct option + # to use with these is +Maked; it writes dependencies to a file named + # 'foo.d', which lands next to the object file, wherever that + # happens to be. + # Much of this is similar to the tru64 case; see comments there. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir.libs/$base.d + "$@" -Wc,+Maked + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + "$@" +Maked + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" + # Add 'dependent.h:' lines. + sed -ne '2,${ + s/^ *// + s/ \\*$// + s/$/:/ + p + }' "$tmpdepfile" >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" "$tmpdepfile2" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in 'foo.d' instead, so we check for that too. + # Subdirectories are respected. + set_dir_from "$object" + set_base_from "$object" + + if test "$libtool" = yes; then + # Libtool generates 2 separate objects for the 2 libraries. These + # two compilations output dependencies in $dir.libs/$base.o.d and + # in $dir$base.o.d. We have to check for both files, because + # one of the two compilations can be disabled. We should prefer + # $dir$base.o.d over $dir.libs/$base.o.d because the latter is + # automatically cleaned when .libs/ is deleted, while ignoring + # the former would cause a distcleancheck panic. + tmpdepfile1=$dir$base.o.d # libtool 1.5 + tmpdepfile2=$dir.libs/$base.o.d # Likewise. + tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 + "$@" -Wc,-MD + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + tmpdepfile3=$dir$base.d + "$@" -MD + fi + + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + # Same post-processing that is required for AIX mode. + aix_post_process_depfile + ;; + +msvc7) + if test "$libtool" = yes; then + showIncludes=-Wc,-showIncludes + else + showIncludes=-showIncludes + fi + "$@" $showIncludes > "$tmpdepfile" + stat=$? + grep -v '^Note: including file: ' "$tmpdepfile" + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The first sed program below extracts the file names and escapes + # backslashes for cygpath. The second sed program outputs the file + # name when reading, but also accumulates all include files in the + # hold buffer in order to output them again at the end. This only + # works with sed implementations that can handle large buffers. + sed < "$tmpdepfile" -n ' +/^Note: including file: *\(.*\)/ { + s//\1/ + s/\\/\\\\/g + p +}' | $cygpath_u | sort -u | sed -n ' +s/ /\\ /g +s/\(.*\)/'"$tab"'\1 \\/p +s/.\(.*\) \\/\1:/ +H +$ { + s/.*/'"$tab"'/ + G + p +}' >> "$depfile" + echo >> "$depfile" # make sure the fragment doesn't end with a backslash + rm -f "$tmpdepfile" + ;; + +msvc7msys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for ':' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. + "$@" $dashmflag | + sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this sed invocation + # correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no eat=no + for arg + do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + if test $eat = yes; then + eat=no + continue + fi + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -arch) + eat=yes ;; + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix=`echo "$object" | sed 's/^.*\././'` + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + # makedepend may prepend the VPATH from the source file name to the object. + # No need to regex-escape $object, excess matching of '.' is harmless. + sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process the last invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed '1,2d' "$tmpdepfile" \ + | tr ' ' "$nl" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E \ + | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + | sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + IFS=" " + for arg + do + case "$arg" in + -o) + shift + ;; + $object) + shift + ;; + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E 2>/dev/null | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" + echo "$tab" >> "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvcmsys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl.h new file mode 100644 index 000000000..6d131963f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl.h @@ -0,0 +1,97 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_H +#define HPL_H +/* + * --------------------------------------------------------------------- + * HPL default compile options that can overridden in the Make. + * --------------------------------------------------------------------- + */ +#ifndef HPL_NO_MPI_DATATYPE /* Use MPI user-defined data type */ +#define HPL_USE_MPI_DATATYPE +#endif + +#ifndef HPL_COPY_L /* do not copy L, use MPI user-defined data types */ +#define HPL_NO_COPY_L +#endif + +#ifndef HPL_DETAILED_TIMING /* Do not enable detailed timings */ +#define HPL_NO_DETAILED_TIMING +#endif + +#ifndef HPL_CALL_VSIPL /* Call the Fortran 77 BLAS interface */ +#ifndef HPL_CALL_CBLAS /* there can be only one */ +#define HPL_CALL_FBLAS +#endif +#endif +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pfact.h" +#include "hpl_pgesv.h" + +#include "hpl_timer.h" +#include "hpl_matgen.h" +#include "hpl_test.h" + +#include "hpl_ptimer.h" +#include "hpl_pmatgen.h" +#include "hpl_ptest.h" + +#endif +/* + * End of hpl.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_auxil.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_auxil.h new file mode 100644 index 000000000..861caf380 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_auxil.h @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_AUXIL_H +#define HPL_AUXIL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +/* + * --------------------------------------------------------------------- + * typedef definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_NORM_A = 800, HPL_NORM_1 = 801, HPL_NORM_I = 802 } HPL_T_NORM; + +typedef enum +{ + HPL_MACH_EPS = 900, /* relative machine precision */ + HPL_MACH_SFMIN = 901, /* safe minimum st 1/sfmin does not overflow */ + HPL_MACH_BASE = 902, /* base = base of the machine */ + HPL_MACH_PREC = 903, /* prec = eps*base */ + HPL_MACH_MLEN = 904, /* number of (base) digits in the mantissa */ + HPL_MACH_RND = 905, /* 1.0 if rounding occurs in addition */ + HPL_MACH_EMIN = 906, /* min exponent before (gradual) underflow */ + HPL_MACH_RMIN = 907, /* underflow threshold base**(emin-1) */ + HPL_MACH_EMAX = 908, /* largest exponent before overflow */ + HPL_MACH_RMAX = 909 /* overflow threshold - (base**emax)*(1-eps) */ + +} HPL_T_MACH; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_fprintf +STDC_ARGS( ( + FILE *, + const char *, + ... +) ); +void HPL_warn +STDC_ARGS( ( + FILE *, + int, + const char *, + const char *, + ... +) ); +void HPL_abort +STDC_ARGS( ( + int, + const char *, + const char *, + ... +) ); +void HPL_dlacpy +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dlatcpy +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dlaprnt +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int, + const int, + const char * +) ); +double HPL_dlange +STDC_ARGS( ( + const HPL_T_NORM, + const int, + const int, + const double *, + const int +) ); +double HPL_dlamch +STDC_ARGS( ( + const HPL_T_MACH +) ); + +#endif +/* + * End of hpl_auxil.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_blas.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_blas.h new file mode 100644 index 000000000..2a510471a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_blas.h @@ -0,0 +1,630 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_BLAS_H +#define HPL_BLAS_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" + + +/* + * --------------------------------------------------------------------- + * typedef definitions + * --------------------------------------------------------------------- + */ +enum HPL_ORDER +{ HplRowMajor = 101, HplColumnMajor = 102 }; +enum HPL_TRANS +{ HplNoTrans = 111, HplTrans = 112, HplConjTrans = 113 }; +enum HPL_UPLO +{ HplUpper = 121, HplLower = 122 }; +enum HPL_DIAG +{ HplNonUnit = 131, HplUnit = 132 }; +enum HPL_SIDE +{ HplLeft = 141, HplRight = 142 }; + + +#ifdef HPL_CALL_CBLAS + + +/* + * --------------------------------------------------------------------- + * The C interface of the BLAS is available ... + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define CBLAS_INDEX int + +#define CBLAS_ORDER HPL_ORDER +#define CblasRowMajor HplRowMajor +#define CblasColMajor HplColMajor + +#define CBLAS_TRANSPOSE HPL_TRANS +#define CblasNoTrans HplNoTrans +#define CblasTrans HplTrans +#define CblasConjTrans HplConjTrans + +#define CBLAS_UPLO HPL_UPLO +#define CblasUpper HplUpper +#define CblasLower HplLower + +#define CBLAS_DIAG HPL_DIAG +#define CblasNonUnit HplNonUnit +#define CblasUnit HplUnit + +#define CBLAS_SIDE HPL_SIDE +#define CblasLeft HplLeft +#define CblasRight HplRight +/* + * --------------------------------------------------------------------- + * CBLAS Function prototypes + * --------------------------------------------------------------------- + */ +CBLAS_INDEX cblas_idamax +STDC_ARGS( +( const int, const double *, const int ) ); +void cblas_dswap +STDC_ARGS( +( const int, double *, const int, double *, + const int ) ); +void cblas_dcopy +STDC_ARGS( +( const int, const double *, const int, double *, + const int ) ); +void cblas_daxpy +STDC_ARGS( +( const int, const double, const double *, const int, + double *, const int ) ); +void cblas_dscal +STDC_ARGS( +( const int, const double, double *, const int ) ); + +void cblas_dgemv +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const int, const int, const double, const double *, + const int, const double *, const int, const double, + double *, const int ) ); + +void cblas_dger +STDC_ARGS( +( const enum CBLAS_ORDER, const int, const int, + const double, const double *, const int, const double *, + const int, double *, const int ) ); +void cblas_dtrsv +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_UPLO, + const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG, + const int, const double *, const int, double *, + const int ) ); + +void cblas_dgemm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const enum CBLAS_TRANSPOSE, const int, const int, + const int, const double, const double *, const int, + const double *, const int, const double, double *, + const int ) ); + +void cblas_dtrsm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_SIDE, + const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, + const enum CBLAS_DIAG, const int, const int, + const double, const double *, const int, double *, + const int ) ); +void dpcpp_dgemm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const enum CBLAS_TRANSPOSE, const int, const int, + const int, const double, const double *, const int, + const double *, const int, const double, double *, + const int ) ); + +void dpcpp_dtrsm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_SIDE, + const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, + const enum CBLAS_DIAG, const int, const int, + const double, const double *, const int, double *, + const int ) ); +/* + * --------------------------------------------------------------------- + * HPL C BLAS macro definition + * --------------------------------------------------------------------- + */ +#define HPL_dswap cblas_dswap +#define HPL_dcopy cblas_dcopy +#define HPL_daxpy cblas_daxpy +#define HPL_dscal cblas_dscal +#define HPL_idamax cblas_idamax + +#define HPL_dgemv cblas_dgemv +#define HPL_dtrsv cblas_dtrsv +#define HPL_dger cblas_dger + +//#define HPL_dgemm cblas_dgemm +//#define HPL_dtrsm cblas_dtrsm +#define HPL_dgemm dpcpp_dgemm +#define HPL_dtrsm dpcpp_dtrsm + +#endif + +//#define HPL_hello sss_gemm + +#ifdef HPL_CALL_FBLAS +/* + * --------------------------------------------------------------------- + * Use the Fortran 77 interface of the BLAS ... + * --------------------------------------------------------------------- + * Defaults: Add_, F77_INTEGER=int, StringSunStyle + * --------------------------------------------------------------------- + */ +#ifndef NoChange +#ifndef UpCase +#ifndef Add__ +#ifndef Add_ + +#define Add_ + +#endif +#endif +#endif +#endif + +#ifndef F77_INTEGER +#define F77_INTEGER int +#else +#define HPL_USE_F77_INTEGER_DEF +#endif + +#ifndef StringCrayStyle +#ifndef StringStructVal +#ifndef StringStructPtr +#ifndef StringSunStyle + +#define StringSunStyle + +#endif +#endif +#endif +#endif +/* + * --------------------------------------------------------------------- + * Fortran 77 <-> C interface + * --------------------------------------------------------------------- + * + * These macros identifies how Fortran routines will be called. + * + * Add_ : the Fortran compiler expects the name of C functions to be + * in all lower case and to have an underscore postfixed it (Suns, Intel + * compilers expect this). + * + * NoChange : the Fortran compiler expects the name of C functions to be + * in all lower case (IBM RS6K compilers do this). + * + * UpCase : the Fortran compiler expects the name of C functions to be + * in all upcase. (Cray compilers expect this). + * + * Add__ : the Fortran compiler in use is f2c, a Fortran to C conver- + * ter. + */ +#ifdef NoChange +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm(...) + */ +#define F77dswap dswap +#define F77dscal dscal +#define F77dcopy dcopy +#define F77daxpy daxpy +#define F77idamax idamax + +#define F77dgemv dgemv +#define F77dtrsv dtrsv +#define F77dger dger + +#define F77dgemm dgemm +#define F77dtrsm dtrsm + +#endif + +#ifdef UpCase +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) DGEMM(...) + */ +#ifdef CRAY_BLAS + +#define F77dswap SSWAP +#define F77dscal SSCAL +#define F77dcopy SCOPY +#define F77daxpy SAXPY +#define F77idamax ISAMAX + +#define F77dgemv SGEMV +#define F77dtrsv STRSV +#define F77dger SGER + +#define F77dgemm SGEMM +#define F77dtrsm STRSM + +#else + +#define F77dswap DSWAP +#define F77dscal DSCAL +#define F77dcopy DCOPY +#define F77daxpy DAXPY +#define F77idamax IDAMAX + +#define F77dgemv DGEMV +#define F77dtrsv DTRSV +#define F77dger DGER + +#define F77dgemm DGEMM +#define F77dtrsm DTRSM + +#endif + +#endif + +#ifdef Add_ +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm_(...) + */ +#define F77dswap dswap_ +#define F77dscal dscal_ +#define F77dcopy dcopy_ +#define F77daxpy daxpy_ +#define F77idamax idamax_ + +#define F77dgemv dgemv_ +#define F77dtrsv dtrsv_ +#define F77dger dger_ + +#define F77dgemm dgemm_ +#define F77dtrsm dtrsm_ + +#endif + +#ifdef Add__ +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm_(...) + */ +#define F77dswap dswap_ +#define F77dscal dscal_ +#define F77dcopy dcopy_ +#define F77daxpy daxpy_ +#define F77idamax idamax_ + +#define F77dgemv dgemv_ +#define F77dtrsv dtrsv_ +#define F77dger dger_ + +#define F77dgemm dgemm_ +#define F77dtrsm dtrsm_ +//#define F77hello sss_gemm + +#endif +//#define F77hello sss_gemm +/* + * --------------------------------------------------------------------- + * Typedef definitions and conversion utilities + * --------------------------------------------------------------------- + */ +#ifdef StringCrayStyle + +#include + /* Type of character argument in a FORTRAN call */ +#define F77_CHAR _fcd + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(_fcdtocp(c) )) +#define HPL_C2F_CHAR(c) (_cptofcd(&(c), 1)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringStructVal + /* Type of character argument in a FORTRAN call */ +typedef struct { char *cp; F77_INTEGER len; } F77_CHAR; + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c.cp)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringStructPtr + /* Type of character argument in a FORTRAN call */ +typedef struct { char *cp; F77_INTEGER len; } F77_CHAR; + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c->cp)) + +#define F77_CHAR_DECL F77_CHAR * /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringSunStyle + /* Type of character argument in a FORTRAN call */ +#define F77_CHAR char * + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c)) +#define HPL_C2F_CHAR(c) (&(c)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ +#define F77_1_CHAR , F77_INTEGER +#define F77_2_CHAR F77_1_CHAR F77_1_CHAR +#define F77_3_CHAR F77_2_CHAR F77_1_CHAR +#define F77_4_CHAR F77_3_CHAR F77_1_CHAR + +#endif +/* ------------------------------------------------------------------ */ + +#ifndef F77_1_CHAR +#define F77_1_CHAR +#define F77_2_CHAR +#define F77_3_CHAR +#define F77_4_CHAR +#endif + +#define F77_INT_DECL const F77_INTEGER * /* input integer */ +#define F77_SIN_DECL const double * /* input scalar */ +#define F77_VIN_DECL const double * /* input vector */ +#define F77_VINOUT_DECL double * /* input/output matrix */ +#define F77_MIN_DECL const double * /* input matrix */ +#define F77_MINOUT_DECL double * /* input/output matrix */ + +#ifdef CRAY_PVP_ENV /* Type of FORTRAN functions */ +#define F77_VOID_FUN extern fortran void /* subroutine */ +#define F77_INT_FUN extern fortran int /* integer function */ +#else +#define F77_VOID_FUN extern void /* subroutine */ +#define F77_INT_FUN extern int /* integer function */ +#endif +/* + * --------------------------------------------------------------------- + * Fortran 77 BLAS function prototypes + * --------------------------------------------------------------------- + */ +F77_VOID_FUN F77dswap +STDC_ARGS( +( F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL, F77_VINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77dscal +STDC_ARGS( +( F77_INT_DECL, F77_SIN_DECL, F77_VINOUT_DECL, F77_INT_DECL ) ); +F77_VOID_FUN F77dcopy +STDC_ARGS( +( F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, F77_VINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77daxpy +STDC_ARGS( +( F77_INT_DECL, F77_SIN_DECL, F77_VIN_DECL, F77_INT_DECL, + F77_VINOUT_DECL, F77_INT_DECL ) ); +F77_INT_FUN F77idamax +STDC_ARGS( +( F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL ) ); + +F77_VOID_FUN F77dgemv +STDC_ARGS( +( F77_CHAR_DECL, F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, + F77_SIN_DECL, F77_VINOUT_DECL, F77_INT_DECL F77_1_CHAR ) ); +F77_VOID_FUN F77dger +STDC_ARGS( +( F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_VIN_DECL, + F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, F77_MINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77dtrsv +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, F77_INT_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL + F77_3_CHAR ) ); + +F77_VOID_FUN F77dgemm +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_INT_DECL, F77_INT_DECL, + F77_INT_DECL, F77_SIN_DECL, F77_MIN_DECL, F77_INT_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MINOUT_DECL, + F77_INT_DECL F77_2_CHAR ) ); +F77_VOID_FUN F77dtrsm +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, + F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MIN_DECL, + F77_INT_DECL, F77_MINOUT_DECL, F77_INT_DECL F77_4_CHAR ) ); + +#endif +/* + * --------------------------------------------------------------------- + * HPL BLAS Function prototypes + * --------------------------------------------------------------------- + */ +#ifndef HPL_CALL_CBLAS + +int HPL_idamax +STDC_ARGS( ( + const int, + const double *, + const int +) ); +void HPL_daxpy +STDC_ARGS( ( + const int, + const double, + const double *, + const int, + double *, + const int +) ); +void HPL_dcopy +STDC_ARGS( ( + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dscal +STDC_ARGS( ( + const int, + const double, + double *, + const int +) ); +void HPL_dswap +STDC_ARGS( ( + const int, + double *, + const int, + double *, + const int +) ); +void HPL_dgemv +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_TRANS, + const int, + const int, + const double, + const double *, + const int, + const double *, + const int, + const double, + double *, + const int +) ); +void HPL_dger +STDC_ARGS( ( + const enum HPL_ORDER, + const int, + const int, + const double, + const double *, + const int, + double *, + const int, + double *, + const int +) ); +void HPL_dtrsv +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_UPLO, + const enum HPL_TRANS, + const enum HPL_DIAG, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dgemm +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_TRANS, + const enum HPL_TRANS, + const int, + const int, + const int, + const double, + const double *, + const int, + const double *, + const int, + const double, + double *, + const int +) ); +void HPL_hello +STDC_ARGS( ( +) ); +#endif +void HPL_dtrsm +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_SIDE, + const enum HPL_UPLO, + const enum HPL_TRANS, + const enum HPL_DIAG, + const int, + const int, + const double, + const double *, + const int, + double *, + const int +) ); + +//#endif + +#endif +/* + * hpl_blas.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_comm.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_comm.h new file mode 100644 index 000000000..e3ba51a57 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_comm.h @@ -0,0 +1,161 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_COMM_H +#define HPL_COMM_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +#include "hpl_panel.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_1RING = 401, /* Increasing ring */ + HPL_1RING_M = 402, /* Increasing ring (modified) */ + HPL_2RING = 403, /* Increasing 2-ring */ + HPL_2RING_M = 404, /* Increasing 2-ring (modified) */ + HPL_BLONG = 405, /* long broadcast */ + HPL_BLONG_M = 406 /* long broadcast (modified) */ +} HPL_T_TOP; +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_FAILURE 0 +#define HPL_SUCCESS 1 +#define HPL_KEEP_TESTING 2 +/* + * --------------------------------------------------------------------- + * comm function prototypes + * --------------------------------------------------------------------- + */ +int HPL_send +STDC_ARGS( ( + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_recv +STDC_ARGS( ( + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_sdrv +STDC_ARGS( ( + double *, + int, + int, + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_binit +STDC_ARGS( ( + HPL_T_panel * +) ); +int HPL_bcast +STDC_ARGS( ( + HPL_T_panel *, + int * +) ); +int HPL_bwait +STDC_ARGS( ( + HPL_T_panel * +) ); +int HPL_packL +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int +) ); +void HPL_copyL +STDC_ARGS( ( + HPL_T_panel * +) ); + +int HPL_binit_1ring STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_1ring STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_1ring STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_1rinM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_1rinM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_1rinM STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_2ring STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_2ring STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_2ring STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_2rinM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_2rinM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_2rinM STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_blong STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_blong STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_blong STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_blonM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_blonM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_blonM STDC_ARGS( ( HPL_T_panel * ) ); + +#endif +/* + * End of hpl_comm.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_gesv.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_gesv.h new file mode 100644 index 000000000..ce671cf2b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_gesv.h @@ -0,0 +1,87 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_GESV_H +#define HPL_GESV_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_LEFT_LOOKING = 301, /* Left looking lu fact variant */ + HPL_CROUT = 302, /* Crout lu fact variant */ + HPL_RIGHT_LOOKING = 303 /* Right looking lu fact variant */ +} HPL_T_FACT; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dgesv +STDC_ARGS( +( const int, const int, const int, const HPL_T_FACT, + const HPL_T_FACT, const int, double *, + const int, int * ) ); +void HPL_ipid +STDC_ARGS( +( const int, double *, int *, int *, + int *, int *, int *, int *, + const int, const int, const int, const int, + const int ) ); + +#endif +/* + * End of hpl_gesv.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_grid.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_grid.h new file mode 100644 index 000000000..1895a5ed4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_grid.h @@ -0,0 +1,212 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_GRID_H +#define HPL_GRID_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum { HPL_INT = 100, HPL_DOUBLE = 101 } HPL_T_TYPE; + +typedef enum +{ + HPL_ROW_MAJOR = 201, + HPL_COLUMN_MAJOR = 202 +} HPL_T_ORDER; + +typedef struct HPL_S_grid +{ + MPI_Comm all_comm; /* grid communicator */ + MPI_Comm row_comm; /* row communicator */ + MPI_Comm col_comm; /* column communicator */ + HPL_T_ORDER order; /* ordering of the procs in the grid */ + int iam; /* my rank in the grid */ + int myrow; /* my row number in the grid */ + int mycol; /* my column number in the grid */ + int nprow; /* the total # of rows in the grid */ + int npcol; /* the total # of columns in the grid */ + int nprocs; /* the total # of procs in the grid */ + int row_ip2; /* largest power of two <= nprow */ + int row_hdim; /* row_ip2 procs hypercube dimension */ + int row_ip2m1; /* largest power of two <= nprow-1 */ + int row_mask; /* row_ip2m1 procs hypercube mask */ + int col_ip2; /* largest power of two <= npcol */ + int col_hdim; /* col_ip2 procs hypercube dimension */ + int col_ip2m1; /* largest power of two <= npcol-1 */ + int col_mask; /* col_ip2m1 procs hypercube mask */ +} HPL_T_grid; + +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef void (*HPL_T_OP) +( const int, const void *, void *, const HPL_T_TYPE ); +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define HPL_2_MPI_TYPE( typ ) \ + ( ( typ == HPL_INT ? MPI_INT : MPI_DOUBLE ) ) +/* + * The following macros perform common modulo operations; All functions + * except MPosMod assume arguments are < d (i.e., arguments are themsel- + * ves within modulo range). + */ + /* increment with mod */ +#define MModInc(I, d) if(++(I) == (d)) (I) = 0 + /* decrement with mod */ +#define MModDec(I, d) if(--(I) == -1) (I) = (d)-1 + /* positive modulo */ +#define MPosMod(I, d) ( (I) - ((I)/(d))*(d) ) + /* add two numbers */ +#define MModAdd(I1, I2, d) \ + ( ( (I1) + (I2) < (d) ) ? (I1) + (I2) : (I1) + (I2) - (d) ) + /* add 1 to # */ +#define MModAdd1(I, d) ( ((I) != (d)-1) ? (I) + 1 : 0 ) + /* subtract two numbers */ +#define MModSub(I1, I2, d) \ + ( ( (I1) < (I2) ) ? (d) + (I1) - (I2) : (I1) - (I2) ) + /* sub 1 from # */ +#define MModSub1(I, d) ( ((I)!=0) ? (I)-1 : (d)-1 ) +/* + * --------------------------------------------------------------------- + * grid function prototypes + * --------------------------------------------------------------------- + */ +int HPL_grid_init +STDC_ARGS( ( + MPI_Comm, + const HPL_T_ORDER, + const int, + const int, + HPL_T_grid * +) ); +int HPL_grid_exit +STDC_ARGS( ( + HPL_T_grid * +) ); + +int HPL_grid_info +STDC_ARGS( ( + const HPL_T_grid *, + int *, + int *, + int *, + int * +) ); +int HPL_pnum +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int +) ); + +int HPL_barrier +STDC_ARGS( ( + MPI_Comm +) ); +int HPL_broadcast +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const int, + MPI_Comm +) ); +int HPL_reduce +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const HPL_T_OP , + const int, + MPI_Comm +) ); +int HPL_all_reduce +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const HPL_T_OP , + MPI_Comm +) ); + +void HPL_max +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); +void HPL_min +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); +void HPL_sum +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); + +#endif +/* + * End of hpl_grid.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_matgen.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_matgen.h new file mode 100644 index 000000000..de6503eea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_matgen.h @@ -0,0 +1,120 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_MATGEN_H +#define HPL_MATGEN_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_MULT0 1284865837 +#define HPL_MULT1 1481765933 +#define HPL_IADD0 1 +#define HPL_IADD1 0 +#define HPL_DIVFAC 2147483648.0 +#define HPL_POW16 65536.0 +#define HPL_HALF 0.5 +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dmatgen +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int +) ); +void HPL_lmul +STDC_ARGS( ( + int *, + int *, + int * +) ); +void HPL_ladd +STDC_ARGS( ( + int *, + int *, + int * +) ); +void HPL_xjumpm +STDC_ARGS( ( + const int, + int *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_setran +STDC_ARGS( ( + const int, + int * +) ); +void HPL_jumpit +STDC_ARGS( ( + int *, + int *, + int *, + int * +) ); +double HPL_rand STDC_ARGS( ( void ) ); + +#endif +/* + * End of hpl_matgen.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_misc.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_misc.h new file mode 100644 index 000000000..ea421a403 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_misc.h @@ -0,0 +1,110 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_MISC_H +#define HPL_MISC_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#ifdef __STDC__ +#define STDC_HEADERS +#endif + +#include +#include +#include +#include + +#ifdef STDC_HEADERS +#include +#define STDC_ARGS(p) p +#else +#include +#define STDC_ARGS(p) () +#endif + +#ifdef HPL_CALL_VSIPL +#include +#endif +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_rone 1.0 +#define HPL_rtwo 2.0 +#define HPL_rzero 0.0 +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define Mabs( a_ ) ( ( (a_) < 0 ) ? -(a_) : (a_) ) +#define Mmin( a_, b_ ) ( ( (a_) < (b_) ) ? (a_) : (b_) ) +#define Mmax( a_, b_ ) ( ( (a_) > (b_) ) ? (a_) : (b_) ) + +#define Mfloor(a,b) (((a)>0) ? (((a)/(b))) : (-(((-(a))+(b)-1)/(b)))) +#define Mceil(a,b) ( ( (a)+(b)-1 ) / (b) ) +#define Miceil(a,b) (((a)>0) ? ((((a)+(b)-1)/(b))) : (-((-(a))/(b)))) + +#define Mupcase(C) (((C)>96 && (C)<123) ? (C) & 0xDF : (C)) +#define Mlowcase(C) (((C)>64 && (C)< 91) ? (C) | 32 : (C)) +/* + * Mptr returns a pointer to a_( i_, j_ ) for readability reasons and + * also less silly errors ... + */ +#define Mptr( a_, i_, j_, lda_ ) \ + ( (a_) + (size_t)(i_) + (size_t)(j_)*(size_t)(lda_) ) +/* + * Align pointer + */ +#define HPL_PTR( ptr_, al_ ) \ + ( ( ( (size_t)(ptr_)+(al_)-1 ) / (al_) ) * (al_) ) +#endif +/* + * End of hpl_misc.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_panel.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_panel.h new file mode 100644 index 000000000..d5ba2939c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_panel.h @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PANEL_H +#define HPL_PANEL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +#include "hpl_grid.h" +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef struct HPL_S_panel +{ + struct HPL_S_grid * grid; /* ptr to the process grid */ + struct HPL_S_palg * algo; /* ptr to the algo parameters */ + struct HPL_S_pmat * pmat; /* ptr to the local array info */ + double * A; /* ptr to trailing part of A */ + double * WORK; /* work space */ + double * L2; /* ptr to L */ + double * L1; /* ptr to jb x jb upper block of A */ + double * DPIV; /* ptr to replicated jb pivot array */ + double * DINFO; /* ptr to replicated scalar info */ + double * U; /* ptr to U */ + int * IWORK; /* integer workspace for swapping */ + void * * * buffers[2]; /* buffers for panel bcast */ + int counts [2]; /* counts for panel bcast */ + MPI_Datatype dtypes [2]; /* data types for panel bcast */ + MPI_Request request[1]; /* requests for panel bcast */ + MPI_Status status [1]; /* status for panel bcast */ + int nb; /* distribution blocking factor */ + int jb; /* panel width */ + int m; /* global # of rows of trailing part of A */ + int n; /* global # of cols of trailing part of A */ + int ia; /* global row index of trailing part of A */ + int ja; /* global col index of trailing part of A */ + int mp; /* local # of rows of trailing part of A */ + int nq; /* local # of cols of trailing part of A */ + int ii; /* local row index of trailing part of A */ + int jj; /* local col index of trailing part of A */ + int lda; /* local leading dim of array A */ + int prow; /* proc. row owning 1st row of trail. A */ + int pcol; /* proc. col owning 1st col of trail. A */ + int msgid; /* message id for panel bcast */ + int ldl2; /* local leading dim of array L2 */ + int len; /* length of the buffer to broadcast */ +#ifdef HPL_CALL_VSIPL + vsip_block_d * Ablock; /* A block */ + vsip_block_d * L1block; /* L1 block */ + vsip_block_d * L2block; /* L2 block */ + vsip_block_d * Ublock; /* U block */ +#endif +} HPL_T_panel; + +/* + * --------------------------------------------------------------------- + * panel function prototypes + * --------------------------------------------------------------------- + */ +#include "hpl_pgesv.h" + +void HPL_pdpanel_new +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + const int, + const int, + const int, + HPL_T_pmat *, + const int, + const int, + const int, + HPL_T_panel * * +) ); +void HPL_pdpanel_init +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + const int, + const int, + const int, + HPL_T_pmat *, + const int, + const int, + const int, + HPL_T_panel * +) ); +int HPL_pdpanel_disp +STDC_ARGS( ( + HPL_T_panel * * +) ); +int HPL_pdpanel_free +STDC_ARGS( ( + HPL_T_panel * +) ); + +#endif +/* + * End of hpl_panel.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pauxil.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pauxil.h new file mode 100644 index 000000000..1fd0ee457 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pauxil.h @@ -0,0 +1,505 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PAUXIL_H +#define HPL_PAUXIL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" + +#include "hpl_pmisc.h" +#include "hpl_grid.h" +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +/* + * Mindxg2p returns the process coodinate owning the entry globally in- + * dexed by ig_. + */ +#define Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) && \ + ( (nprocs_) > 1 ) ) \ + { \ + proc_ = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_); \ + proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ + } \ + else \ + { \ + proc_ = (src_); \ + } \ + } + +#define Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ + ( (nprocs_) == 1 ) ) { il_ = (ig_); } \ + else \ + { \ + int i__, j__; \ + j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ + il_ = (nb_)*( j__ - i__ ) + \ + ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ + (ig_) - (inb_) : (ig_) ); \ + } \ + } + +#define Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ ) \ + { \ + if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ + ( (nprocs_) == 1 ) ) \ + { il_ = (ig_); proc_ = (src_); } \ + else \ + { \ + int i__, j__; \ + j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ + il_ = (nb_)*(j__-i__) + \ + ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ + (ig_) - (inb_) : (ig_) ); \ + proc_ = (src_) + 1 + i__; \ + proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ + } \ + } +/* + * Mindxl2g computes the global index ig_ corresponding to the local + * index il_ in process proc_. + */ +#define Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ + { \ + if( (proc_) == (src_) ) \ + { \ + if( (il_) < (inb_) ) ig_ = (il_); \ + else ig_ = (il_) + \ + (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \ + } \ + else if( (proc_) < (src_) ) \ + { \ + ig_ = (il_) + (inb_) + \ + (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ + (proc_)-(src_)-1+(nprocs_) ); \ + } \ + else \ + { \ + ig_ = (il_) + (inb_) + \ + (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ + (proc_)-(src_)-1 ); \ + } \ + } \ + else \ + { \ + ig_ = (il_); \ + } \ + } +/* + * MnumrocI computes the # of local indexes np_ residing in the process + * of coordinate proc_ corresponding to the interval of global indexes + * i_:i_+n_-1 assuming that the global index 0 resides in the process + * src_, and that the indexes are distributed from src_ using the para- + * meters inb_, nb_ and nprocs_. + */ +#define MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ + { \ + int inb__, mydist__, n__, nblk__, quot__, src__; \ + if( ( inb__ = (inb_) - (i_) ) <= 0 ) \ + { \ + nblk__ = (-inb__) / (nb_) + 1; \ + src__ = (src_) + nblk__; \ + src__ -= ( src__ / (nprocs_) ) * (nprocs_); \ + inb__ += nblk__*(nb_); \ + if( ( n__ = (n_) - inb__ ) <= 0 ) \ + { \ + if( (proc_) == src__ ) np_ = (n_); \ + else np_ = 0; \ + } \ + else \ + { \ + if( ( mydist__ = (proc_) - src__ ) < 0 ) \ + mydist__ += (nprocs_); \ + nblk__ = n__ / (nb_) + 1; \ + mydist__ -= nblk__ - \ + (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \ + if( mydist__ < 0 ) \ + { \ + if( (proc_) != src__ ) \ + np_ = (nb_) + (nb_) * quot__; \ + else \ + np_ = inb__ + (nb_) * quot__; \ + } \ + else if( mydist__ > 0 ) \ + { \ + np_ = (nb_) * quot__; \ + } \ + else \ + { \ + if( (proc_) != src__ ) \ + np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ + else \ + np_ = (n_)+ (nb_)*(quot__ - nblk__); \ + } \ + } \ + } \ + else \ + { \ + if( ( n__ = (n_) - inb__ ) <= 0 ) \ + { \ + if( (proc_) == (src_) ) np_ = (n_); \ + else np_ = 0; \ + } \ + else \ + { \ + if( ( mydist__ = (proc_) - (src_) ) < 0 ) \ + mydist__ += (nprocs_); \ + nblk__ = n__ / (nb_) + 1; \ + mydist__ -= nblk__ - \ + ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \ + if( mydist__ < 0 ) \ + { \ + if( (proc_) != (src_) ) \ + np_ = (nb_) + (nb_) * quot__; \ + else \ + np_ = inb__ + (nb_) * quot__; \ + } \ + else if( mydist__ > 0 ) \ + { \ + np_ = (nb_) * quot__; \ + } \ + else \ + { \ + if( (proc_) != (src_) ) \ + np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ + else \ + np_ = (n_)+ (nb_)*(quot__ - nblk__); \ + } \ + } \ + } \ + } \ + else \ + { \ + np_ = (n_); \ + } \ + } + +#define Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ ) \ + MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ ) +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_indxg2lp +STDC_ARGS( ( + int *, + int *, + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxg2l +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxg2p +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxl2g +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int +) ); +void HPL_infog2l +STDC_ARGS( ( + int, + int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + int *, + int *, + int *, + int * +) ); +int HPL_numroc +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int +) ); +int HPL_numrocI +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int, + const int +) ); + +void HPL_dlaswp00N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp10N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp01N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp01T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp02N +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp03N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const double *, + const int +) ); +void HPL_dlaswp03T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const double *, + const int +) ); +void HPL_dlaswp04N +STDC_ARGS( ( + const int, + const int, + const int, + double *, + const int, + double *, + const int, + const double *, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp04T +STDC_ARGS( ( + const int, + const int, + const int, + double *, + const int, + double *, + const int, + const double *, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp05N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp05T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp06N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp06T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int * +) ); + +void HPL_pabort +STDC_ARGS( ( + int, + const char *, + const char *, + ... +) ); +void HPL_pwarn +STDC_ARGS( ( + FILE *, + int, + const char *, + const char *, + ... +) ); +void HPL_pdlaprnt +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int, + const int, + double *, + const int, + const int, + const int, + const char * +) ); +double HPL_pdlamch +STDC_ARGS( ( + MPI_Comm, + const HPL_T_MACH +) ); +double HPL_pdlange +STDC_ARGS( ( + const HPL_T_grid *, + const HPL_T_NORM, + const int, + const int, + const int, + const double *, + const int +) ); + +#endif +/* + * End of hpl_pauxil.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pfact.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pfact.h new file mode 100644 index 000000000..09eee79ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pfact.h @@ -0,0 +1,216 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PFACT_H +#define HPL_PFACT_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef void (*HPL_T_PFA_FUN) +( HPL_T_panel *, const int, const int, const int, + double * ); +typedef void (*HPL_T_RFA_FUN) +( HPL_T_panel *, const int, const int, const int, + double * ); +typedef void (*HPL_T_UPD_FUN) +( HPL_T_panel *, int *, HPL_T_panel *, const int ); +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dlocmax +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_dlocswpN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + double * +) ); +void HPL_dlocswpT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + double * +) ); +void HPL_pdmxswp +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdpancrN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpancrT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanllN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanllT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanrlN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanrlT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdrpancrN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpancrT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanllN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanllT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanrlN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanrlT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdfact +STDC_ARGS( ( + HPL_T_panel * +) ); + +#endif +/* + * End of hpl_pfact.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pgesv.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pgesv.h new file mode 100644 index 000000000..3ca576c68 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pgesv.h @@ -0,0 +1,346 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PGESV_H +#define HPL_PGESV_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" + +#include "hpl_pmisc.h" +#include "hpl_grid.h" +#include "hpl_comm.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pfact.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_SWAP00 = 451, /* Use HPL_pdlaswp00 */ + HPL_SWAP01 = 452, /* Use HPL_pdlaswp01 */ + HPL_SW_MIX = 453, /* Use HPL_pdlaswp00_ for small number of */ + /* columns, and HPL_pdlaswp01_ otherwise. */ + HPL_NO_SWP = 499 +} HPL_T_SWAP; + +typedef struct HPL_S_palg +{ + HPL_T_TOP btopo; /* row broadcast topology */ + int depth; /* look-ahead depth */ + int nbdiv; /* recursive division factor */ + int nbmin; /* recursion stopping criterium */ + HPL_T_FACT pfact; /* panel fact variant */ + HPL_T_FACT rfact; /* recursive fact variant */ + HPL_T_PFA_FUN pffun; /* panel fact function ptr */ + HPL_T_RFA_FUN rffun; /* recursive fact function ptr */ + HPL_T_UPD_FUN upfun; /* update function */ + HPL_T_SWAP fswap; /* Swapping algorithm */ + int fsthr; /* Swapping threshold */ + int equil; /* Equilibration */ + int align; /* data alignment constant */ +} HPL_T_palg; + +typedef struct HPL_S_pmat +{ +#ifdef HPL_CALL_VSIPL + vsip_block_d * block; +#endif + double * A; /* pointer to local piece of A */ + double * X; /* pointer to solution vector */ + int n; /* global problem size */ + int nb; /* blocking factor */ + int ld; /* local leading dimension */ + int mp; /* local number of rows */ + int nq; /* local number of columns */ + int info; /* computational flag */ +} HPL_T_pmat; +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define MSGID_BEGIN_PFACT 1001 /* message id ranges */ +#define MSGID_END_PFACT 2000 +#define MSGID_BEGIN_FACT 2001 +#define MSGID_END_FACT 3000 +#define MSGID_BEGIN_PTRSV 3001 +#define MSGID_END_PTRSV 4000 + +#define MSGID_BEGIN_COLL 9001 +#define MSGID_END_COLL 10000 +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define MNxtMgid( id_, beg_, end_ ) \ + (( (id_)+1 > (end_) ? (beg_) : (id_)+1 )) +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pipid +STDC_ARGS( ( + HPL_T_panel *, + int *, + int * +) ); +void HPL_plindx0 +STDC_ARGS( ( + HPL_T_panel *, + const int, + int *, + int *, + int *, + int * +) ); +void HPL_pdlaswp00N +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdlaswp00T +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_perm +STDC_ARGS( ( + const int, + int *, + int *, + int * +) ); +void HPL_logsort +STDC_ARGS( ( + const int, + const int, + int *, + int *, + int * +) ); +void HPL_plindx10 +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int *, + int *, + int *, + int * +) ); +void HPL_plindx1 +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int *, + int *, + int *, + int *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_spreadN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_SIDE, + const int, + double *, + const int, + const int, + const int *, + const int *, + const int * +) ); +void HPL_spreadT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_SIDE, + const int, + double *, + const int, + const int, + const int *, + const int *, + const int * +) ); +void HPL_equil +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_TRANS, + const int, + double *, + const int, + int *, + const int *, + const int *, + int * +) ); +void HPL_rollN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int, + double *, + const int, + const int *, + const int *, + const int * +) ); +void HPL_rollT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int, + double *, + const int, + const int *, + const int *, + const int * +) ); +void HPL_pdlaswp01N +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdlaswp01T +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_pdupdateNN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateNT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateTN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateTT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_pdgesv0 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesvK1 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesvK2 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesv +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); + +void HPL_pdtrsv +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_pmat * +) ); + +#endif +/* + * End of hpl_pgesv.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pmatgen.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pmatgen.h new file mode 100644 index 000000000..1091b0f60 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pmatgen.h @@ -0,0 +1,77 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PMATGEN_H +#define HPL_PMATGEN_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_matgen.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pdmatgen +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int, + const int, + double *, + const int, + const int +) ); + +#endif +/* + * End of hpl_pmatgen.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pmisc.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pmisc.h new file mode 100644 index 000000000..23550d47b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_pmisc.h @@ -0,0 +1,59 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PMISC_H +#define HPL_PMISC_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "mpi.h" + +#endif +/* + * End of hpl_pmisc.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_ptest.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_ptest.h new file mode 100644 index 000000000..5777bd536 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_ptest.h @@ -0,0 +1,151 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PTEST_H +#define HPL_PTEST_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pgesv.h" + +#include "hpl_ptimer.h" +#include "hpl_pmatgen.h" +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef struct HPL_S_test +{ + double epsil; /* epsilon machine */ + double thrsh; /* threshold */ + FILE * outfp; /* output stream (only in proc 0) */ + int kfail; /* # of tests failed */ + int kpass; /* # of tests passed */ + int kskip; /* # of tests skipped */ + int ktest; /* total number of tests */ +} HPL_T_test; + +/* + * --------------------------------------------------------------------- + * #define macro constants for testing only + * --------------------------------------------------------------------- + */ +#define HPL_LINE_MAX 256 +#define HPL_MAX_PARAM 20 +#define HPL_ISEED 100 +/* + * --------------------------------------------------------------------- + * global timers for timing analysis only + * --------------------------------------------------------------------- + */ +#ifdef HPL_DETAILED_TIMING +#define HPL_TIMING_BEG 11 /* timer 0 reserved, used by main */ +#define HPL_TIMING_N 6 /* number of timers defined below */ +#define HPL_TIMING_RPFACT 11 /* starting from here, contiguous */ +#define HPL_TIMING_PFACT 12 +#define HPL_TIMING_MXSWP 13 +#define HPL_TIMING_UPDATE 14 +#define HPL_TIMING_LASWP 15 +#define HPL_TIMING_PTRSV 16 +#endif +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pdinfo +STDC_ARGS( ( + HPL_T_test *, + int *, + int *, + int *, + int *, + HPL_T_ORDER *, + int *, + int *, + int *, + int *, + HPL_T_FACT *, + int *, + int *, + int *, + int *, + int *, + HPL_T_FACT *, + int *, + HPL_T_TOP *, + int *, + int *, + HPL_T_SWAP *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_pdtest +STDC_ARGS( ( + HPL_T_test *, + HPL_T_grid *, + HPL_T_palg *, + const int, + const int +) ); + +#endif +/* + * End of hpl_ptest.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_ptimer.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_ptimer.h new file mode 100644 index 000000000..43c8fe33a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_ptimer.h @@ -0,0 +1,96 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PTIMER_H +#define HPL_PTIMER_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_NPTIMER 64 +#define HPL_PTIMER_STARTFLAG 5.0 +#define HPL_PTIMER_ERROR -1.0 +/* + * --------------------------------------------------------------------- + * type definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_WALL_PTIME = 101, HPL_CPU_PTIME = 102 } HPL_T_PTIME; + +typedef enum +{ HPL_AMAX_PTIME = 201, HPL_AMIN_PTIME = 202, HPL_SUM_PTIME = 203 } +HPL_T_PTIME_OP; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +double HPL_ptimer_cputime STDC_ARGS( ( void ) ); +double HPL_ptimer_walltime STDC_ARGS( ( void ) ); + +void HPL_ptimer STDC_ARGS( ( const int ) ); +void HPL_ptimer_boot STDC_ARGS( ( void ) ); +void HPL_ptimer_combine +STDC_ARGS( +( MPI_Comm comm, const HPL_T_PTIME_OP, const HPL_T_PTIME, + const int, const int, double * ) ); +void HPL_ptimer_disable STDC_ARGS( ( void ) ); +void HPL_ptimer_enable STDC_ARGS( ( void ) ); +double HPL_ptimer_inquire +STDC_ARGS( +( const HPL_T_PTIME, const int ) ); + +#endif +/* + * End of hpl_ptimer.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_test.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_test.h new file mode 100644 index 000000000..1eedc97e0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_test.h @@ -0,0 +1,80 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_TEST_H +#define HPL_TEST_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_matgen.h" +#include "hpl_timer.h" +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dinfo +STDC_ARGS( +( FILE * *, int *, int *, int *, + HPL_T_FACT *, int *, int *, int *, + int *, int *, HPL_T_FACT *, int *, + double *, double * ) ); +void HPL_dtest +STDC_ARGS( +( FILE *, const int, const int, const int, + HPL_T_FACT, HPL_T_FACT, const int, const double, + const double, int *, int *, int * ) ); + +#endif +/* + * End of hpl_test.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_timer.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_timer.h new file mode 100644 index 000000000..4c91700ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_timer.h @@ -0,0 +1,88 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_TIMER_H +#define HPL_TIMER_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_NTIMER 64 +#define HPL_TIMER_STARTFLAG 5.0 +#define HPL_TIMER_ERROR -1.0 +/* + * --------------------------------------------------------------------- + * type definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_WALL_TIME = 101, HPL_CPU_TIME = 102 } HPL_T_TIME; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +double HPL_timer_cputime STDC_ARGS( ( void ) ); +double HPL_timer_walltime STDC_ARGS( ( void ) ); + +void HPL_timer STDC_ARGS( ( const int ) ); +void HPL_timer_boot STDC_ARGS( ( void ) ); +void HPL_timer_enable STDC_ARGS( ( void ) ); +void HPL_timer_disable STDC_ARGS( ( void ) ); +double HPL_timer_inquire +STDC_ARGS( +( const HPL_T_TIME, const int ) ); + +#endif +/* + * End of hpl_timer.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_units.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_units.h new file mode 100644 index 000000000..a96956497 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hpl_units.h @@ -0,0 +1,135 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_UNITS_H +#define HPL_UNITS_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_MAXROUT 50 +#define HPL_MAXRNAME 15 + +#define HPL_TRUE 'T' +#define HPL_FALSE 'F' + +#define HPL_INDXG2P_ROUT "HPL_indxg2p" +#define HPL_INDXG2L_ROUT "HPL_indxg2l" +#define HPL_INDXL2G_ROUT "HPL_indxl2g" +#define HPL_NUMROC_ROUT "HPL_numroc" +#define HPL_NUMROCI_ROUT "HPL_numrocI" +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_unit_info +STDC_ARGS( +( FILE * *, int *, int *, int *, + int *, int *, int *, int *, + int *, int *, int *, char [][HPL_MAXRNAME], + int [] ) ); + +void HPL_unit_indxg2l +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); +int HPL_chek_indxg2l +STDC_ARGS( +( FILE *, const char *, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); + +void HPL_unit_indxl2g +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); +int HPL_chek_indxl2g +STDC_ARGS( +( FILE *, const char *, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); + +void HPL_unit_indxg2p +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); +int HPL_chek_indxg2p +STDC_ARGS( +( FILE *, const char *, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); + +void HPL_unit_numroc +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); +void HPL_unit_numrocI +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, const int, long *, long * ) ); +int HPL_chek_numrocI +STDC_ARGS( +( FILE *, const char *, const int, const int, + const int, const int, const int, const int, + const int, const int, long *, long * ) ); + +#endif +/* + * End of hpl_units.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hplconfig.h.in b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hplconfig.h.in new file mode 100644 index 000000000..b4b3b9a35 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/include/hplconfig.h.in @@ -0,0 +1,67 @@ +/* include/hplconfig.h.in. Generated from configure.ac by autoheader. */ + +/* Define if you have a BLAS library. */ +#undef HAVE_BLAS + +/* Define to 1 if you have the `dgemm_' function. */ +#undef HAVE_DGEMM_ + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define if you have the MPI library. */ +#undef HAVE_MPI + +/* Define to 1 if you have the header file. */ +#undef HAVE_MPI_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Version number of package */ +#undef VERSION diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/install-sh b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/install-sh new file mode 100755 index 000000000..8175c640f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/install-sh @@ -0,0 +1,518 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2018-03-11.20; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# 'make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +tab=' ' +nl=' +' +IFS=" $tab$nl" + +# Set DOITPROG to "echo" to test this script. + +doit=${DOITPROG-} +doit_exec=${doit:-exec} + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +is_target_a_directory=possibly + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve the last data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -s $stripprog installed files. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -s) stripcmd=$stripprog;; + + -t) + is_target_a_directory=always + dst_arg=$2 + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; + + -T) is_target_a_directory=never;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +# We allow the use of options -d and -T together, by making -d +# take the precedence; this is for compatibility with GNU install. + +if test -n "$dir_arg"; then + if test -n "$dst_arg"; then + echo "$0: target directory not allowed when installing a directory." >&2 + exit 1 + fi +fi + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call 'install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + if test $# -gt 1 || test "$is_target_a_directory" = always; then + if test ! -d "$dst_arg"; then + echo "$0: $dst_arg: Is not a directory." >&2 + exit 1 + fi + fi +fi + +if test -z "$dir_arg"; then + do_exit='(exit $ret); exit $ret' + trap "ret=129; $do_exit" 1 + trap "ret=130; $do_exit" 2 + trap "ret=141; $do_exit" 13 + trap "ret=143; $do_exit" 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names problematic for 'test' and other utilities. + case $src in + -* | [=\(\)!]) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + dst=$dst_arg + + # If destination is a directory, append the input filename. + if test -d "$dst"; then + if test "$is_target_a_directory" = never; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dstbase=`basename "$src"` + case $dst in + */) dst=$dst$dstbase;; + *) dst=$dst/$dstbase;; + esac + dstdir_status=0 + else + dstdir=`dirname "$dst"` + test -d "$dstdir" + dstdir_status=$? + fi + fi + + case $dstdir in + */) dstdirslash=$dstdir;; + *) dstdirslash=$dstdir/;; + esac + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + # Note that $RANDOM variable is not portable (e.g. dash); Use it + # here however when possible just to lower collision chance. + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + + trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0 + + # Because "mkdir -p" follows existing symlinks and we likely work + # directly in world-writeable /tmp, make sure that the '$tmpdir' + # directory is successfully created first before we actually test + # 'mkdir -p' feature. + if (umask $mkdir_umask && + $mkdirprog $mkdir_mode "$tmpdir" && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + test_tmpdir="$tmpdir/a" + ls_ld_tmpdir=`ls -ld "$test_tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$test_tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null + fi + trap '' 0;; + esac;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # The umask is ridiculous, or mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; + esac + + oIFS=$IFS + IFS=/ + set -f + set fnord $dstdir + shift + set +f + IFS=$oIFS + + prefixes= + + for d + do + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=${dstdirslash}_inst.$$_ + rmtmp=${dstdirslash}_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + set +f && + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd -f "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a new file mode 100644 index 000000000..f9f3f32c2 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.auxil b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.auxil new file mode 100644 index 000000000..e92d18b80 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.auxil @@ -0,0 +1,100 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h +# +## Object files ######################################################## +# +HPL_au0obj = \ + HPL_dlacpy.o HPL_dlatcpy.o HPL_fprintf.o \ + HPL_warn.o HPL_abort.o HPL_dlaprnt.o \ + HPL_dlange.o +HPL_au1obj = \ + HPL_dlamch.o +HPL_auxobj = \ + $(HPL_au0obj) $(HPL_au1obj) +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_auxobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_auxobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dlacpy.o : ../HPL_dlacpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlacpy.c +HPL_dlatcpy.o : ../HPL_dlatcpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlatcpy.c +HPL_fprintf.o : ../HPL_fprintf.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_fprintf.c +HPL_warn.o : ../HPL_warn.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_warn.c +HPL_abort.o : ../HPL_abort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_abort.c +HPL_dlaprnt.o : ../HPL_dlaprnt.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaprnt.c +HPL_dlange.o : ../HPL_dlange.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlange.c +HPL_dlamch.o : ../HPL_dlamch.c $(INCdep) + $(CC) -o $@ -c $(CCNOOPT) ../HPL_dlamch.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.blas b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.blas new file mode 100644 index 000000000..ed9f3d0e2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.blas @@ -0,0 +1,98 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h +# +## Object files ######################################################## +# +HPL_blaobj = \ + HPL_dcopy.o HPL_daxpy.o HPL_dscal.o \ + HPL_idamax.o HPL_dgemv.o HPL_dtrsv.o \ + HPL_dger.o HPL_dgemm.o HPL_dtrsm.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_blaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_blaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dcopy.o : ../HPL_dcopy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dcopy.c +HPL_daxpy.o : ../HPL_daxpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_daxpy.c +HPL_dscal.o : ../HPL_dscal.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dscal.c +HPL_idamax.o : ../HPL_idamax.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_idamax.c +HPL_dgemv.o : ../HPL_dgemv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemv.c +HPL_dtrsv.o : ../HPL_dtrsv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsv.c +HPL_dger.o : ../HPL_dger.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dger.c +HPL_dgemm.o : ../HPL_dgemm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemm.c +HPL_dtrsm.o : ../HPL_dtrsm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsm.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.comm b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.comm new file mode 100644 index 000000000..529fe9aea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.comm @@ -0,0 +1,111 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h \ + $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_comobj = \ + HPL_1ring.o HPL_1rinM.o HPL_2ring.o \ + HPL_2rinM.o HPL_blong.o HPL_blonM.o \ + HPL_packL.o HPL_copyL.o HPL_binit.o \ + HPL_bcast.o HPL_bwait.o HPL_send.o \ + HPL_recv.o HPL_sdrv.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_comobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_comobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_1ring.o : ../HPL_1ring.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_1ring.c +HPL_1rinM.o : ../HPL_1rinM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_1rinM.c +HPL_2ring.o : ../HPL_2ring.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_2ring.c +HPL_2rinM.o : ../HPL_2rinM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_2rinM.c +HPL_blong.o : ../HPL_blong.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_blong.c +HPL_blonM.o : ../HPL_blonM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_blonM.c +HPL_packL.o : ../HPL_packL.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_packL.c +HPL_copyL.o : ../HPL_copyL.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_copyL.c +HPL_binit.o : ../HPL_binit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_binit.c +HPL_bcast.o : ../HPL_bcast.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_bcast.c +HPL_bwait.o : ../HPL_bwait.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_bwait.c +HPL_send.o : ../HPL_send.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_send.c +HPL_recv.o : ../HPL_recv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_recv.c +HPL_sdrv.o : ../HPL_sdrv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_sdrv.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.gesv b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.gesv new file mode 100644 index 000000000..2a8722559 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.gesv @@ -0,0 +1,83 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_gesv.h +# +## Object files ######################################################## +# +HPL_gesobj = \ + HPL_dgesv.o HPL_ipid.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_gesobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_gesobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dgesv.o : ../HPL_dgesv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgesv.c +HPL_ipid.o : ../HPL_ipid.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ipid.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.grid b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.grid new file mode 100644 index 000000000..51549d817 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.grid @@ -0,0 +1,103 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h +# +## Object files ######################################################## +# +HPL_griobj = \ + HPL_grid_init.o HPL_pnum.o HPL_grid_info.o \ + HPL_grid_exit.o HPL_broadcast.o HPL_reduce.o \ + HPL_all_reduce.o HPL_barrier.o HPL_min.o \ + HPL_max.o HPL_sum.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_griobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_griobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_grid_init.o : ../HPL_grid_init.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_init.c +HPL_pnum.o : ../HPL_pnum.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pnum.c +HPL_grid_info.o : ../HPL_grid_info.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_info.c +HPL_grid_exit.o : ../HPL_grid_exit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_exit.c +HPL_broadcast.o : ../HPL_broadcast.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_broadcast.c +HPL_reduce.o : ../HPL_reduce.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_reduce.c +HPL_all_reduce.o : ../HPL_all_reduce.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_all_reduce.c +HPL_barrier.o : ../HPL_barrier.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_barrier.c +HPL_min.o : ../HPL_min.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_min.c +HPL_max.o : ../HPL_max.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_max.c +HPL_sum.o : ../HPL_sum.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_sum.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.matgen b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.matgen new file mode 100644 index 000000000..f027fbc06 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.matgen @@ -0,0 +1,95 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_matgen.h +# +## Object files ######################################################## +# +HPL_matobj = \ + HPL_dmatgen.o HPL_ladd.o HPL_lmul.o \ + HPL_xjumpm.o HPL_jumpit.o HPL_rand.o \ + HPL_setran.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_matobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_matobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dmatgen.o : ../HPL_dmatgen.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dmatgen.c +HPL_ladd.o : ../HPL_ladd.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ladd.c +HPL_lmul.o : ../HPL_lmul.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_lmul.c +HPL_xjumpm.o : ../HPL_xjumpm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_xjumpm.c +HPL_jumpit.o : ../HPL_jumpit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_jumpit.c +HPL_rand.o : ../HPL_rand.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rand.c +HPL_setran.o : ../HPL_setran.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_setran.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.panel b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.panel new file mode 100644 index 000000000..804749cc2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.panel @@ -0,0 +1,90 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ + $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_panobj = \ + HPL_pdpanel_new.o HPL_pdpanel_init.o HPL_pdpanel_disp.o \ + HPL_pdpanel_free.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_panobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_panobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pdpanel_new.o : ../HPL_pdpanel_new.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_new.c +HPL_pdpanel_init.o : ../HPL_pdpanel_init.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_init.c +HPL_pdpanel_disp.o : ../HPL_pdpanel_disp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_disp.c +HPL_pdpanel_free.o : ../HPL_pdpanel_free.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_free.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pauxil b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pauxil new file mode 100644 index 000000000..ea93cd150 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pauxil @@ -0,0 +1,137 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_pauxil.h +# +## Object files ######################################################## +# +HPL_pauobj = \ + HPL_indxg2l.o HPL_indxg2lp.o HPL_indxg2p.o \ + HPL_indxl2g.o HPL_infog2l.o HPL_numroc.o \ + HPL_numrocI.o HPL_dlaswp00N.o HPL_dlaswp10N.o \ + HPL_dlaswp01N.o HPL_dlaswp01T.o HPL_dlaswp02N.o \ + HPL_dlaswp03N.o HPL_dlaswp03T.o HPL_dlaswp04N.o \ + HPL_dlaswp04T.o HPL_dlaswp05N.o HPL_dlaswp05T.o \ + HPL_dlaswp06N.o HPL_dlaswp06T.o HPL_pwarn.o \ + HPL_pabort.o HPL_pdlaprnt.o HPL_pdlamch.o \ + HPL_pdlange.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pauobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pauobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_indxg2l.o : ../HPL_indxg2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2l.c +HPL_indxg2lp.o : ../HPL_indxg2lp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2lp.c +HPL_indxg2p.o : ../HPL_indxg2p.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2p.c +HPL_indxl2g.o : ../HPL_indxl2g.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxl2g.c +HPL_infog2l.o : ../HPL_infog2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_infog2l.c +HPL_numroc.o : ../HPL_numroc.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_numroc.c +HPL_numrocI.o : ../HPL_numrocI.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_numrocI.c +HPL_dlaswp00N.o : ../HPL_dlaswp00N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp00N.c +HPL_dlaswp10N.o : ../HPL_dlaswp10N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp10N.c +HPL_dlaswp01N.o : ../HPL_dlaswp01N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01N.c +HPL_dlaswp01T.o : ../HPL_dlaswp01T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01T.c +HPL_dlaswp02N.o : ../HPL_dlaswp02N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp02N.c +HPL_dlaswp03N.o : ../HPL_dlaswp03N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03N.c +HPL_dlaswp03T.o : ../HPL_dlaswp03T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03T.c +HPL_dlaswp04N.o : ../HPL_dlaswp04N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04N.c +HPL_dlaswp04T.o : ../HPL_dlaswp04T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04T.c +HPL_dlaswp05N.o : ../HPL_dlaswp05N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05N.c +HPL_dlaswp05T.o : ../HPL_dlaswp05T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05T.c +HPL_dlaswp06N.o : ../HPL_dlaswp06N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06N.c +HPL_dlaswp06T.o : ../HPL_dlaswp06T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06T.c +HPL_pwarn.o : ../HPL_pwarn.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pwarn.c +HPL_pabort.o : ../HPL_pabort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pabort.c +HPL_pdlaprnt.o : ../HPL_pdlaprnt.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaprnt.c +HPL_pdlamch.o : ../HPL_pdlamch.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlamch.c +HPL_pdlange.o : ../HPL_pdlange.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlange.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pfact b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pfact new file mode 100644 index 000000000..bf4634d31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pfact @@ -0,0 +1,118 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pfact.h +# +## Object files ######################################################## +# +HPL_pfaobj = \ + HPL_dlocmax.o HPL_dlocswpN.o HPL_dlocswpT.o \ + HPL_pdmxswp.o HPL_pdpancrN.o HPL_pdpancrT.o \ + HPL_pdpanllN.o HPL_pdpanllT.o HPL_pdpanrlN.o \ + HPL_pdpanrlT.o HPL_pdrpanllN.o HPL_pdrpanllT.o \ + HPL_pdrpancrN.o HPL_pdrpancrT.o HPL_pdrpanrlN.o \ + HPL_pdrpanrlT.o HPL_pdfact.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pfaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pfaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dlocmax.o : ../HPL_dlocmax.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocmax.c +HPL_dlocswpN.o : ../HPL_dlocswpN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpN.c +HPL_dlocswpT.o : ../HPL_dlocswpT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpT.c +HPL_pdmxswp.o : ../HPL_pdmxswp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmxswp.c +HPL_pdpancrN.o : ../HPL_pdpancrN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrN.c +HPL_pdpancrT.o : ../HPL_pdpancrT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrT.c +HPL_pdpanllN.o : ../HPL_pdpanllN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllN.c +HPL_pdpanllT.o : ../HPL_pdpanllT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllT.c +HPL_pdpanrlN.o : ../HPL_pdpanrlN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlN.c +HPL_pdpanrlT.o : ../HPL_pdpanrlT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlT.c +HPL_pdrpanllN.o : ../HPL_pdrpanllN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllN.c +HPL_pdrpanllT.o : ../HPL_pdrpanllT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllT.c +HPL_pdrpancrN.o : ../HPL_pdrpancrN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrN.c +HPL_pdrpancrT.o : ../HPL_pdrpancrT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrT.c +HPL_pdrpanrlN.o : ../HPL_pdrpanrlN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlN.c +HPL_pdrpanrlT.o : ../HPL_pdrpanrlT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlT.c +HPL_pdfact.o : ../HPL_pdfact.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdfact.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pgesv b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pgesv new file mode 100644 index 000000000..7898665f0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pgesv @@ -0,0 +1,136 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ + $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_pgeobj = \ + HPL_pipid.o HPL_plindx0.o HPL_pdlaswp00N.o \ + HPL_pdlaswp00T.o HPL_perm.o HPL_logsort.o \ + HPL_plindx10.o HPL_plindx1.o HPL_spreadN.o \ + HPL_spreadT.o HPL_rollN.o HPL_rollT.o \ + HPL_equil.o HPL_pdlaswp01N.o HPL_pdlaswp01T.o \ + HPL_pdupdateNN.o HPL_pdupdateNT.o HPL_pdupdateTN.o \ + HPL_pdupdateTT.o HPL_pdtrsv.o HPL_pdgesv0.o \ + HPL_pdgesvK1.o HPL_pdgesvK2.o HPL_pdgesv.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pgeobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pgeobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pipid.o : ../HPL_pipid.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pipid.c +HPL_plindx0.o : ../HPL_plindx0.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx0.c +HPL_pdlaswp00N.o : ../HPL_pdlaswp00N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00N.c +HPL_pdlaswp00T.o : ../HPL_pdlaswp00T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00T.c +HPL_perm.o : ../HPL_perm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_perm.c +HPL_logsort.o : ../HPL_logsort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_logsort.c +HPL_plindx10.o : ../HPL_plindx10.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx10.c +HPL_plindx1.o : ../HPL_plindx1.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx1.c +HPL_spreadN.o : ../HPL_spreadN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadN.c +HPL_spreadT.o : ../HPL_spreadT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadT.c +HPL_rollN.o : ../HPL_rollN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollN.c +HPL_rollT.o : ../HPL_rollT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollT.c +HPL_equil.o : ../HPL_equil.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_equil.c +HPL_pdlaswp01N.o : ../HPL_pdlaswp01N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01N.c +HPL_pdlaswp01T.o : ../HPL_pdlaswp01T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01T.c +HPL_pdupdateNN.o : ../HPL_pdupdateNN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNN.c +HPL_pdupdateNT.o : ../HPL_pdupdateNT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNT.c +HPL_pdupdateTN.o : ../HPL_pdupdateTN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTN.c +HPL_pdupdateTT.o : ../HPL_pdupdateTT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTT.c +HPL_pdtrsv.o : ../HPL_pdtrsv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtrsv.c +HPL_pdgesv0.o : ../HPL_pdgesv0.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv0.c +HPL_pdgesvK1.o : ../HPL_pdgesvK1.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK1.c +HPL_pdgesvK2.o : ../HPL_pdgesvK2.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK2.c +HPL_pdgesv.o : ../HPL_pdgesv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pmatgen b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pmatgen new file mode 100644 index 000000000..bf33fcd7b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.pmatgen @@ -0,0 +1,81 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_matgen.h $(INCdir)/hpl_pmisc.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pmatgen.h +# +## Object files ######################################################## +# +HPL_pmaobj = \ + HPL_pdmatgen.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pmaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pmaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pdmatgen.o : ../HPL_pdmatgen.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmatgen.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.ptest b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.ptest new file mode 100644 index 000000000..cfc96e667 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.ptest @@ -0,0 +1,94 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_gesv.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h \ + $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h $(INCdir)/hpl_pmatgen.h \ + $(INCdir)/hpl_ptimer.h $(INCdir)/hpl_ptest.h +# +## Executable names #################################################### +# +xhpl = $(BINdir)/xhpl +# +## Object files ######################################################## +# +HPL_pteobj = \ + HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o +# +## Targets ############################################################# +# +all : dexe +# +dexe : dexe.grd +# +$(BINdir)/HPL.dat : ../HPL.dat + ( $(CP) ../HPL.dat $(BINdir) ) +# +dexe.grd: $(HPL_pteobj) $(HPLlib) + $(LINKER) $(LINKFLAGS) -o $(xhpl) $(HPL_pteobj) $(HPL_LIBS) + $(MAKE) $(BINdir)/HPL.dat + $(TOUCH) dexe.grd +# +# ###################################################################### +# +HPL_pddriver.o : ../HPL_pddriver.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pddriver.c +HPL_pdinfo.o : ../HPL_pdinfo.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdinfo.c +HPL_pdtest.o : ../HPL_pdtest.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtest.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.ptimer b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.ptimer new file mode 100644 index 000000000..971500764 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.ptimer @@ -0,0 +1,84 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_ptimer.h +# +## Object files ######################################################## +# +HPL_ptiobj = \ + HPL_ptimer.o HPL_ptimer_cputime.o HPL_ptimer_walltime.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_ptiobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_ptiobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_ptimer.o : ../HPL_ptimer.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer.c +HPL_ptimer_cputime.o : ../HPL_ptimer_cputime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_cputime.c +HPL_ptimer_walltime.o : ../HPL_ptimer_walltime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_walltime.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.test b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.test new file mode 100644 index 000000000..514d445b8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.test @@ -0,0 +1,93 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_gesv.h $(INCdir)/hpl_matgen.h $(INCdir)/hpl_timer.h \ + $(INCdir)/hpl_test.h +# +## Executable names #################################################### +# +xlinpack = $(BINdir)/xlinpack +# +## Object files ######################################################## +# +HPL_tesobj = \ + HPL_ddriver.o HPL_dinfo.o HPL_dtest.o +# +## Targets ############################################################# +# +all : dexe +# +dexe : dexe.grd +# +$(BINdir)/LINPACK.dat : ../LINPACK.dat + ( $(CP) ../LINPACK.dat $(BINdir) ) +# +dexe.grd: $(HPL_tesobj) $(HPLlib) + $(LINKER) $(LINKFLAGS) -o $(xlinpack) $(HPL_tesobj) HPL_make_libs + $(MAKE) $(BINdir)/LINPACK.dat + $(TOUCH) dexe.grd +# +# ###################################################################### +# +HPL_ddriver.o : ../HPL_ddriver.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ddriver.c +HPL_dinfo.o : ../HPL_dinfo.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dinfo.c +HPL_dtest.o : ../HPL_dtest.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtest.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.timer b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.timer new file mode 100644 index 000000000..b8009e88a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.timer @@ -0,0 +1,84 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_timer.h +# +## Object files ######################################################## +# +HPL_timobj = \ + HPL_timer.o HPL_timer_cputime.o HPL_timer_walltime.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_timobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_timobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_timer.o : ../HPL_timer.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer.c +HPL_timer_cputime.o : ../HPL_timer_cputime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_cputime.c +HPL_timer_walltime.o : ../HPL_timer_walltime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_walltime.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.units b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.units new file mode 100644 index 000000000..1c447f204 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/makes/Make.units @@ -0,0 +1,112 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ +@rout Make.units + $(INCdir)/hpl_misc.h $(INCdir)/hpl_auxil.h $(INCdir)/hpl_pmisc.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_units.h +# +## Executable names #################################################### +# +xunits = $(BINdir)/xunits +# +## Object files ######################################################## +# +HPL_uniobj = \ + HPL_unit_driver.o HPL_unit_info.o HPL_unit_indxg2l.o \ + HPL_chek_indxg2l.o HPL_unit_indxg2p.o HPL_chek_indxg2p.o \ + HPL_unit_indxl2g.o HPL_chek_indxl2g.o HPL_unit_numroc.o \ + HPL_unit_numrocI.o HPL_chek_numrocI.o +# +## Targets ############################################################# +# +all : dexe +# +dexe : dexe.grd +# +$(BINdir)/UNITS.dat : ../UNITS.dat + ( $(CP) ../UNITS.dat $(BINdir) ) +# +dexe.grd : $(HPL_uniobj) $(HPLlib) + $(LINKER) $(LINKFLAGS) -o $(xunits) $(HPL_uniobj) @(hpllibs) + $(MAKE) $(BINdir)/UNITS.dat + $(TOUCH) dexe.grd +# +# ###################################################################### +# +HPL_unit_driver.o : ../HPL_unit_driver.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_driver.c +HPL_unit_info.o : ../HPL_unit_info.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_info.c +HPL_unit_indxg2l.o : ../HPL_unit_indxg2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_indxg2l.c +HPL_chek_indxg2l.o : ../HPL_chek_indxg2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_indxg2l.c +HPL_unit_indxg2p.o : ../HPL_unit_indxg2p.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_indxg2p.c +HPL_chek_indxg2p.o : ../HPL_chek_indxg2p.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_indxg2p.c +HPL_unit_indxl2g.o : ../HPL_unit_indxl2g.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_indxl2g.c +HPL_chek_indxl2g.o : ../HPL_chek_indxl2g.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_indxl2g.c +HPL_unit_numroc.o : ../HPL_unit_numroc.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_numroc.c +HPL_unit_numrocI.o : ../HPL_unit_numrocI.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_numrocI.c +HPL_chek_numrocI.o : ../HPL_chek_numrocI.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_numrocI.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_abort.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_abort.3 new file mode 100644 index 000000000..c6a2c7a70 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_abort.3 @@ -0,0 +1,52 @@ +.TH HPL_abort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_abort \- halts execution. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_abort(\fR +\fB\&int\fR +\fI\&LINE\fR, +\fB\&const char *\fR +\fI\&SRNAME\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_abort\fR +displays an error message on stderr and halts execution. +.SH ARGUMENTS +.TP 8 +LINE (local input) int +On entry, LINE specifies the line number in the file where +the error has occured. When LINE is not a positive line +number, it is ignored. +.TP 8 +SRNAME (local input) const char * +On entry, SRNAME should be the name of the routine calling +this error handler. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + HPL_abort( __LINE__, __FILE__, "Halt.\en" ); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_fprintf \ (3), +.BR HPL_warn \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_all_reduce.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_all_reduce.3 new file mode 100644 index 000000000..70ec6c4ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_all_reduce.3 @@ -0,0 +1,49 @@ +.TH HPL_all_reduce 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_all_reduce \- All reduce operation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_all_reduce(\fR +\fB\&void *\fR +\fI\&BUFFER\fR, +\fB\&const int\fR +\fI\&COUNT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR, +\fB\&const HPL_T_OP \fR +\fI\&OP\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_all_reduce\fR +performs a global reduce operation across all +processes of a group leaving the results on all processes. +.SH ARGUMENTS +.TP 8 +BUFFER (local input/global out void * +On entry, BUFFER points to the buffer to be combined. On +exit, this array contains the combined data and is identical +on all processes in the group. +.TP 8 +COUNT (global input) const int +On entry, COUNT indicates the number of entries in BUFFER. +COUNT must be at least zero. +.TP 8 +DTYPE (global input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.TP 8 +OP (global input) const HPL_T_OP +On entry, OP is a pointer to the local combine function. +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_barrier.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_barrier.3 new file mode 100644 index 000000000..ffee7f291 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_barrier.3 @@ -0,0 +1,27 @@ +.TH HPL_barrier 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_barrier \- Barrier operation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_barrier(\fR +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_barrier\fR +blocks the caller until all process members have call it. +The call returns at any process only after all group members have +entered the call. +.SH ARGUMENTS +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_bcast.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_bcast.3 new file mode 100644 index 000000000..54eb54b25 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_bcast.3 @@ -0,0 +1,31 @@ +.TH HPL_bcast 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_bcast \- Perform the row broadcast. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_bcast(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&int *\fR +\fI\&IFLAG\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_bcast\fR +broadcasts the current panel. Successful completion is +indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to +HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was +not completed, in which case this function should be called again. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.TP 8 +IFLAG (output) int * +On exit, IFLAG indicates whether or not the broadcast has +occured. +.SH SEE ALSO +.BR HPL_binit \ (3), +.BR HPL_bwait \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_binit.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_binit.3 new file mode 100644 index 000000000..083776ab6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_binit.3 @@ -0,0 +1,23 @@ +.TH HPL_binit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_binit \- Initialize the row broadcast. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_binit(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_binit\fR +initializes a row broadcast. Successful completion is +indicated by the returned error code HPL_SUCCESS. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.SH SEE ALSO +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_broadcast.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_broadcast.3 new file mode 100644 index 000000000..317d374cf --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_broadcast.3 @@ -0,0 +1,49 @@ +.TH HPL_broadcast 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_broadcast \- Broadcast operation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_broadcast(\fR +\fB\&void *\fR +\fI\&BUFFER\fR, +\fB\&const int\fR +\fI\&COUNT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR, +\fB\&const int\fR +\fI\&ROOT\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_broadcast\fR +broadcasts a message from the process with rank ROOT to +all processes in the group. +.SH ARGUMENTS +.TP 8 +BUFFER (local input/output) void * +On entry, BUFFER points to the buffer to be broadcast. On +exit, this array contains the broadcast data and is identical +on all processes in the group. +.TP 8 +COUNT (global input) const int +On entry, COUNT indicates the number of entries in BUFFER. +COUNT must be at least zero. +.TP 8 +DTYPE (global input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.TP 8 +ROOT (global input) const int +On entry, ROOT is the coordinate of the source process. +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.SH SEE ALSO +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_bwait.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_bwait.3 new file mode 100644 index 000000000..0dac6fe58 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_bwait.3 @@ -0,0 +1,24 @@ +.TH HPL_bwait 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_bwait \- Finalize the row broadcast. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_bwait(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_bwait\fR +HPL_bwait waits for the row broadcast of the current panel to +terminate. Successful completion is indicated by the returned error +code HPL_SUCCESS. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.SH SEE ALSO +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_copyL.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_copyL.3 new file mode 100644 index 000000000..d60619a06 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_copyL.3 @@ -0,0 +1,28 @@ +.TH HPL_copyL 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_copyL \- Copy the current panel into a contiguous workspace. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_copyL(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_copyL\fR +copies the panel of columns, the L1 replicated submatrix, +the pivot array and the info scalar into a contiguous workspace for +later broadcast. + +The copy of this panel into a contiguous buffer can be enforced by +specifying -DHPL_COPY_L in the architecture specific Makefile. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.SH SEE ALSO +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_daxpy.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_daxpy.3 new file mode 100644 index 000000000..50bd0b0a8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_daxpy.3 @@ -0,0 +1,76 @@ +.TH HPL_daxpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_daxpy \- y := y + alpha * x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_daxpy(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_daxpy\fR +scales the vector x by alpha and adds it to y. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vectors x and y. N +must be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero, then the entries of the incremented array X +need not be set on input. +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +Y (local input/output) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +On exit, the entries of the incremented array Y are updated +with the scaled entries of the incremented array X. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3], y[3]; +.br + x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; +.br + y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; +.br + HPL_daxpy( 3, 2.0, x, 1, y, 1 ); +.br + printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dcopy \ (3), +.BR HPL_dscal \ (3), +.BR HPL_dswap \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dcopy.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dcopy.3 new file mode 100644 index 000000000..f2759ced9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dcopy.3 @@ -0,0 +1,69 @@ +.TH HPL_dcopy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dcopy \- y := x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dcopy(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dcopy\fR +copies the vector x into the vector y. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vectors x and y. N +must be at least zero. +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +Y (local input/output) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +On exit, the entries of the incremented array Y are updated +with the entries of the incremented array X. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3], y[3]; +.br + x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; +.br + y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; +.br + HPL_dcopy( 3, x, 1, y, 1 ); +.br + printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_daxpy \ (3), +.BR HPL_dscal \ (3), +.BR HPL_dswap \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dgemm.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dgemm.3 new file mode 100644 index 000000000..57c69f78c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dgemm.3 @@ -0,0 +1,160 @@ +.TH HPL_dgemm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dgemm \- C := alpha * op(A) * op(B) + beta * C. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dgemm(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANSA\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANSB\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&K\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&B\fR, +\fB\&const int\fR +\fI\&LDB\fR, +\fB\&const double\fR +\fI\&BETA\fR, +\fB\&double *\fR +\fI\&C\fR, +\fB\&const int\fR +\fI\&LDC\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dgemm\fR +performs one of the matrix-matrix operations + + C := alpha * op( A ) * op( B ) + beta * C + + where op( X ) is one of + + op( X ) = X or op( X ) = X^T. + +Alpha and beta are scalars, and A, B and C are matrices, with op(A) +an m by k matrix, op(B) a k by n matrix and C an m by n matrix. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +TRANSA (local input) const enum HPL_TRANS +On entry, TRANSA specifies the form of op(A) to be used in +the matrix-matrix operation follows: + TRANSA==HplNoTrans : op( A ) = A, + TRANSA==HplTrans : op( A ) = A^T, + TRANSA==HplConjTrans : op( A ) = A^T. +.TP 8 +TRANSB (local input) const enum HPL_TRANS +On entry, TRANSB specifies the form of op(B) to be used in +the matrix-matrix operation follows: + TRANSB==HplNoTrans : op( B ) = B, + TRANSB==HplTrans : op( B ) = B^T, + TRANSB==HplConjTrans : op( B ) = B^T. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix +op(A) and of the matrix C. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix +op(B) and the number of columns of the matrix C. N must be +at least zero. +.TP 8 +K (local input) const int +On entry, K specifies the number of columns of the matrix +op(A) and the number of rows of the matrix op(B). K must be +be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero then the elements of the matrices A and B +need not be set on input. +.TP 8 +A (local input) const double * +On entry, A is an array of dimension (LDA,ka), where ka is +k when TRANSA==HplNoTrans, and is m otherwise. Before +entry with TRANSA==HplNoTrans, the leading m by k part of +the array A must contain the matrix A, otherwise the leading +k by m part of the array A must contain the matrix A. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the first dimension of A as declared +in the calling (sub) program. When TRANSA==HplNoTrans then +LDA must be at least max(1,m), otherwise LDA must be at least +max(1,k). +.TP 8 +B (local input) const double * +On entry, B is an array of dimension (LDB,kb), where kb is +n when TRANSB==HplNoTrans, and is k otherwise. Before +entry with TRANSB==HplNoTrans, the leading k by n part of +the array B must contain the matrix B, otherwise the leading +n by k part of the array B must contain the matrix B. +.TP 8 +LDB (local input) const int +On entry, LDB specifies the first dimension of B as declared +in the calling (sub) program. When TRANSB==HplNoTrans then +LDB must be at least max(1,k), otherwise LDB must be at least +max(1,n). +.TP 8 +BETA (local input) const double +On entry, BETA specifies the scalar beta. When BETA is +supplied as zero then the elements of the matrix C need +not be set on input. +.TP 8 +C (local input/output) double * +On entry, C is an array of dimension (LDC,n). Before entry, +the leading m by n part of the array C must contain the +matrix C, except when beta is zero, in which case C need not +be set on entry. On exit, the array C is overwritten by the +m by n matrix ( alpha*op( A )*op( B ) + beta*C ). +.TP 8 +LDC (local input) const int +On entry, LDC specifies the first dimension of C as declared +in the calling (sub) program. LDC must be at least +max(1,m). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], b[2*2], c[2*2]; +.br + a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0; +.br + b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0; +.br + c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0; +.br + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, +.br + 2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 ); +.br + printf(" [%f,%f]\en", c[0], c[2]); +.br + printf("c=[%f,%f]\en", c[1], c[3]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dtrsm \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dgemv.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dgemv.3 new file mode 100644 index 000000000..f85db57fb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dgemv.3 @@ -0,0 +1,128 @@ +.TH HPL_dgemv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dgemv \- y := beta * y + alpha * op(A) * x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dgemv(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANS\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&const double\fR +\fI\&BETA\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dgemv\fR +performs one of the matrix-vector operations + + y := alpha * op( A ) * x + beta * y, + + where op( X ) is one of + + op( X ) = X or op( X ) = X^T. + +where alpha and beta are scalars, x and y are vectors and A is an m +by n matrix. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +TRANS (local input) const enum HPL_TRANS +On entry, TRANS specifies the operation to be performed as +follows: + TRANS = HplNoTrans y := alpha*A *x + beta*y, + TRANS = HplTrans y := alpha*A^T*x + beta*y. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero then A and X need not be set on input. +.TP 8 +A (local input) const double * +On entry, A points to an array of size equal to or greater +than LDA * n. Before entry, the leading m by n part of the +array A must contain the matrix coefficients. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of A as +declared in the calling (sub) program. LDA must be at +least MAX(1,m). +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +BETA (local input) const double +On entry, BETA specifies the scalar beta. When ALPHA is +supplied as zero then Y need not be set on input. +.TP 8 +Y (local input/output) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +Before entry with BETA non-zero, the incremented array Y must +contain the vector y. On exit, Y is overwritten by the +updated vector y. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], x[2], y[2]; +.br + a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0; +.br + x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0; +.br + HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0, +.br + a, 2, x, 1, -1.0, y, 1 ); +.br + printf("y=[%f,%f]\en", y[0], y[1]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dger \ (3), +.BR HPL_dtrsv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dger.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dger.3 new file mode 100644 index 000000000..da9ddf495 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dger.3 @@ -0,0 +1,108 @@ +.TH HPL_dger 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dger \- A := alpha * x * y^T + A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dger(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dger\fR +performs the rank 1 operation + + A := alpha * x * y^T + A, + +where alpha is a scalar, x is an m-element vector, y is an n-element +vector and A is an m by n matrix. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero then X and Y need not be set on input. +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( m - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +Y (local input) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.TP 8 +A (local input/output) double * +On entry, A points to an array of size equal to or greater +than LDA * n. Before entry, the leading m by n part of the +array A must contain the matrix coefficients. On exit, A is +overwritten by the updated matrix. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of A as +declared in the calling (sub) program. LDA must be at +least MAX(1,m). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], x[2], y[2]; +.br + a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0; +.br + x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0; +.br + HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1, +.br + a, 2 ); +.br + printf("y=[%f,%f]\en", y[0], y[1]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dgemv \ (3), +.BR HPL_dtrsv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlacpy.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlacpy.3 new file mode 100644 index 000000000..8da8b1316 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlacpy.3 @@ -0,0 +1,72 @@ +.TH HPL_dlacpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlacpy \- B := A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlacpy(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&B\fR, +\fB\&const int\fR +\fI\&LDB\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlacpy\fR +copies an array A into an array B. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the arrays A and +B. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the arrays A +and B. N must be at least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,N). +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +B (local output) double * +On entry, B points to an array of dimension (LDB,N). On exit, +B is overwritten with A. +.TP 8 +LDB (local input) const int +On entry, LDB specifies the leading dimension of the array B. +LDB must be at least MAX(1,M). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], b[2*2]; +.br + a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; +.br + HPL_dlacpy( 2, 2, a, 2, b, 2 ); +.br + printf(" [%f,%f]\en", b[0], b[2]); +.br + printf("b=[%f,%f]\en", b[1], b[3]); +.br + exit(0); +.br + return(0); +.br +} +.SH SEE ALSO +.BR HPL_dlatcpy \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlamch.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlamch.3 new file mode 100644 index 000000000..9bf41b68a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlamch.3 @@ -0,0 +1,76 @@ +.TH HPL_dlamch 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlamch \- determines machine-specific arithmetic constants. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_dlamch(\fR +\fB\&const HPL_T_MACH\fR +\fI\&CMACH\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlamch\fR +determines machine-specific arithmetic constants such as +the relative machine precision (eps), the safe minimum (sfmin) such +that 1 / sfmin does not overflow, the base of the machine (base), the +precision (prec), the number of (base) digits in the mantissa (t), +whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the +minimum exponent before (gradual) underflow (emin), the underflow +threshold (rmin) base**(emin-1), the largest exponent before overflow +(emax), the overflow threshold (rmax) (base**emax)*(1-eps). +.SH ARGUMENTS +.TP 8 +CMACH (local input) const HPL_T_MACH +Specifies the value to be returned by HPL_dlamch + = HPL_MACH_EPS, HPL_dlamch := eps (default) + = HPL_MACH_SFMIN, HPL_dlamch := sfmin + = HPL_MACH_BASE, HPL_dlamch := base + = HPL_MACH_PREC, HPL_dlamch := eps*base + = HPL_MACH_MLEN, HPL_dlamch := t + = HPL_MACH_RND, HPL_dlamch := rnd + = HPL_MACH_EMIN, HPL_dlamch := emin + = HPL_MACH_RMIN, HPL_dlamch := rmin + = HPL_MACH_EMAX, HPL_dlamch := emax + = HPL_MACH_RMAX, HPL_dlamch := rmax + +where + + eps = relative machine precision, + sfmin = safe minimum, + base = base of the machine, + prec = eps*base, + t = number of digits in the mantissa, + rnd = 1.0 if rounding occurs in addition, + emin = minimum exponent before underflow, + rmin = underflow threshold, + emax = largest exponent before overflow, + rmax = overflow threshold. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double eps; +.br + eps = HPL_dlamch( HPL_MACH_EPS ); +.br + printf("eps=%18.8e\en", eps); +.br + exit(0); return(0); +.br +} +.SH REFERENCES +This function has been manually translated from the Fortran 77 LAPACK +auxiliary function dlamch.f (version 2.0 -- 1992), that was itself +based on the function ENVRON by Malcolm and incorporated suggestions +by Gentleman and Marovich. See + +Malcolm M. A., Algorithms to reveal properties of floating-point +arithmetic., Comms. of the ACM, 15, 949-951 (1972). + +Gentleman W. M. and Marovich S. B., More on algorithms that reveal +properties of floating point arithmetic units., Comms. of the ACM, +17, 276-277 (1974). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlange.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlange.3 new file mode 100644 index 000000000..ffbab554f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlange.3 @@ -0,0 +1,73 @@ +.TH HPL_dlange 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlange \- Compute ||A||. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_dlange(\fR +\fB\&const HPL_T_NORM\fR +\fI\&NORM\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlange\fR +returns the value of the one norm, or the infinity norm, +or the element of largest absolute value of a matrix A: + + max(abs(A(i,j))) when NORM = HPL_NORM_A, + norm1(A), when NORM = HPL_NORM_1, + normI(A), when NORM = HPL_NORM_I, + +where norm1 denotes the one norm of a matrix (maximum column sum) and +normI denotes the infinity norm of a matrix (maximum row sum). Note +that max(abs(A(i,j))) is not a matrix norm. +.SH ARGUMENTS +.TP 8 +NORM (local input) const HPL_T_NORM +On entry, NORM specifies the value to be returned by this +function as described above. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,N), that +contains the matrix A. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,M). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2]; +.br + a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; +.br + norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 ); +.br + printf("norm=%f\en", norm); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dlaprnt \ (3), +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaprnt.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaprnt.3 new file mode 100644 index 000000000..8fdd89b8c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaprnt.3 @@ -0,0 +1,70 @@ +.TH HPL_dlaprnt 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaprnt \- Print the matrix A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaprnt(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&IA\fR, +\fB\&const int\fR +\fI\&JA\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const char *\fR +\fI\&CMATNM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaprnt\fR +prints to standard error an M-by-N matrix A. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A. M must be at +least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of A. N must be +at least zero. +.TP 8 +A (local input) double * +On entry, A points to an array of dimension (LDA,N). +.TP 8 +IA (local input) const int +On entry, IA specifies the starting row index to be printed. +.TP 8 +JA (local input) const int +On entry, JA specifies the starting column index to be +printed. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,M). +.TP 8 +CMATNM (local input) const char * +On entry, CMATNM is the name of the matrix to be printed. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2]; +.br + a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; +.br + HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" ); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp00N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp00N.3 new file mode 100644 index 000000000..efe3580b3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp00N.3 @@ -0,0 +1,60 @@ +.TH HPL_dlaswp00N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp00N \- performs a series of row interchanges. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp00N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int *\fR +\fI\&IPIV\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp00N\fR +performs a series of local row interchanges on a matrix +A. One row interchange is initiated for rows 0 through M-1 of A. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the array A to be +interchanged. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the array A. +N must be at least zero. +.TP 8 +A (local input/output) double * +On entry, A points to an array of dimension (LDA,N) to which +the row interchanges will be applied. On exit, the permuted +matrix. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +IPIV (local input) const int * +On entry, IPIV is an array of size M that contains the +pivoting information. For k in [0..M), IPIV[k]=IROFF + l +implies that local rows k and l are to be interchanged. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp01N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp01N.3 new file mode 100644 index 000000000..662913e54 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp01N.3 @@ -0,0 +1,88 @@ +.TH HPL_dlaswp01N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp01N \- copies rows of A into itself and into U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp01N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp01N\fR +copies scattered rows of A into itself and into an +array U. The row offsets in A of the source rows are specified by +LINDXA. The destination of those rows are specified by LINDXAU. A +positive value of LINDXAU indicates that the array destination is U, +and A otherwise. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +moved within A or copied into U. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of rows of A that should be +moved within A or copied into U. N must be at least zero. +.TP 8 +A (local input/output) double * +On entry, A points to an array of dimension (LDA,N). The rows +of this array specified by LINDXA should be moved within A or +copied into U. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,N). The rows +of A specified by LINDXA are be copied within this array U at +the positions indicated by positive values of LINDXAU. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be moved within A or +or copied into U. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local row indexes of U where the rows of A should be +copied at. This array also contains the local row offsets in +A where some of the rows of A should be moved to. A positive +value of LINDXAU[i] indicates that the row LINDXA[i] of A +should be copied into U at the position LINDXAU[i]; otherwise +the row LINDXA[i] of A should be moved at the position +-LINDXAU[i] within A. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp01T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp01T.3 new file mode 100644 index 000000000..738507755 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp01T.3 @@ -0,0 +1,89 @@ +.TH HPL_dlaswp01T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp01T \- copies rows of A into itself and into U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp01T(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp01T\fR +copies scattered rows of A into itself and into an +array U. The row offsets in A of the source rows are specified by +LINDXA. The destination of those rows are specified by LINDXAU. A +positive value of LINDXAU indicates that the array destination is U, +and A otherwise. Rows of A are stored as columns in U. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +moved within A or copied into U. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of rows of A that should be +moved within A or copied into U. N must be at least zero. +.TP 8 +A (local input/output) double * +On entry, A points to an array of dimension (LDA,N). The rows +of this array specified by LINDXA should be moved within A or +copied into U. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,M). The rows +of A specified by LINDXA are copied within this array U at +the positions indicated by positive values of LINDXAU. The +rows of A are stored as columns in U. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be moved within A or +or copied into U. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local row indexes of U where the rows of A should be +copied at. This array also contains the local row offsets in +A where some of the rows of A should be moved to. A positive +value of LINDXAU[i] indicates that the row LINDXA[i] of A +should be copied into U at the position LINDXAU[i]; otherwise +the row LINDXA[i] of A should be moved at the position +-LINDXAU[i] within A. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp02N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp02N.3 new file mode 100644 index 000000000..600449c68 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp02N.3 @@ -0,0 +1,85 @@ +.TH HPL_dlaswp02N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp02N \- pack rows of A into columns of W. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp02N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&W0\fR, +\fB\&double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp02N\fR +packs scattered rows of an array A into workspace W. +The row offsets in A are specified by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +copied into W. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of rows of A that should be +copied into W. N must be at least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,N). The rows +of this array specified by LINDXA should be copied into W. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +W0 (local input/output) double * +On exit, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local output) double * +On entry, W is an array of size (LDW,M). On exit, W contains +the rows LINDXA[i] for i in [0..M) of A stored contiguously +in W(:,i). +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be copied into W. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local row indexes of U that should be copied into A and +replaced by the rows of W. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp03N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp03N.3 new file mode 100644 index 000000000..1ba0b3208 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp03N.3 @@ -0,0 +1,75 @@ +.TH HPL_dlaswp03N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp03N \- copy rows of W into U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp03N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const double *\fR +\fI\&W0\fR, +\fB\&const double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp03N\fR +copies columns of W into rows of an array U. The +destination in U of these columns contained in W is stored within W0. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of columns of W stored +contiguously that should be copied into U. M must be at least +zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of columns of W stored +contiguously that should be copied into U. N must be at least +zero. +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,N). Columns +of W are copied as rows within this array U at the positions +specified in W0. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M). +.TP 8 +W0 (local input) const double * +On entry, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local input) const double * +On entry, W is an array of size (LDW,M), that contains data +to be copied into U. For i in [0..M), entries W(:,i) should +be copied into the row or column W0(i*LDW) of U. +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp03T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp03T.3 new file mode 100644 index 000000000..d8bd11ec1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp03T.3 @@ -0,0 +1,75 @@ +.TH HPL_dlaswp03T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp03T \- copy columns of W into U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp03T(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const double *\fR +\fI\&W0\fR, +\fB\&const double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp03T\fR +copies columns of W into an array U. The destination +in U of these columns contained in W is stored within W0. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of columns of W stored +contiguously that should be copied into U. M must be at least +zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of columns of W stored +contiguously that should be copied into U. N must be at least +zero. +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,M). Columns +of W are copied within the array U at the positions specified +in W0. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +W0 (local input) const double * +On entry, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local input) const double * +On entry, W is an array of size (LDW,M), that contains data +to be copied into U. For i in [0..M), entries W(:,i) should +be copied into the row or column W0(i*LDW) of U. +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp04N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp04N.3 new file mode 100644 index 000000000..9f12d79ab --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp04N.3 @@ -0,0 +1,106 @@ +.TH HPL_dlaswp04N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp04N \- copy rows of U in A and replace them with columns of W. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp04N(\fR +\fB\&const int\fR +\fI\&M0\fR, +\fB\&const int\fR +\fI\&M1\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&W0\fR, +\fB\&const double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp04N\fR +copies M0 rows of U into A and replaces those rows of U +with columns of W. In addition M1 - M0 columns of W are copied into +rows of U. +.SH ARGUMENTS +.TP 8 +M0 (local input) const int +On entry, M0 specifies the number of rows of U that should be +copied into A and replaced by columns of W. M0 must be at +least zero. +.TP 8 +M1 (local input) const int +On entry, M1 specifies the number of columns of W that should +be copied into rows of U. M1 must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the rows of U that should +be copied into A. N must be at least zero. +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,N). This +array contains the rows that are to be copied into A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M1). +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +rows of U indicated by LINDXAU. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M0). +.TP 8 +W0 (local input) const double * +On entry, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local input) const double * +On entry, W is an array of size (LDW,M0+M1), that contains +data to be copied into U. For i in [M0..M0+M1), the entries +W(:,i) are copied into the row W0(i*LDW) of U. +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M0 containing the +local row indexes A into which rows of U are copied. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M0 that contains +the local row indexes of U that should be copied into A and +replaced by the columns of W. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp04T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp04T.3 new file mode 100644 index 000000000..448334148 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp04T.3 @@ -0,0 +1,107 @@ +.TH HPL_dlaswp04T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp04T \- copy columns of U in rows of A and replace them with columns of W. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp04T(\fR +\fB\&const int\fR +\fI\&M0\fR, +\fB\&const int\fR +\fI\&M1\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&W0\fR, +\fB\&const double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp04T\fR +copies M0 columns of U into rows of A and replaces those +columns of U with columns of W. In addition M1 - M0 columns of W are +copied into U. +.SH ARGUMENTS +.TP 8 +M0 (local input) const int +On entry, M0 specifies the number of columns of U that should +be copied into A and replaced by columns of W. M0 must be at +least zero. +.TP 8 +M1 (local input) const int +On entry, M1 specifies the number of columnns of W that will +be copied into U. M1 must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the columns of U that +will be copied into rows of A. N must be at least zero. +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,*). This +array contains the columns that are to be copied into rows of +A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +columns of U indicated by LINDXAU. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M0). +.TP 8 +W0 (local input) const double * +On entry, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local input) const double * +On entry, W is an array of size (LDW,M0+M1), that contains +data to be copied into U. For i in [M0..M0+M1), the entries +W(:,i) are copied into the column W0(i*LDW) of U. +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M0 containing the +local row indexes A into which columns of U are copied. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M0 that contains +the local column indexes of U that should be copied into A +and replaced by the columns of W. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp05N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp05N.3 new file mode 100644 index 000000000..371dd0b92 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp05N.3 @@ -0,0 +1,77 @@ +.TH HPL_dlaswp05N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp05N \- copy rows of U into A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp05N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp05N\fR +copies rows of U of global offset LINDXAU into rows of +A at positions indicated by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of U that should be +copied into A. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the rows of U that should +be copied into A. N must be at least zero. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +rows of U indicated by LINDXAU. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) const double * +On entry, U points to an array of dimension (LDU,N). This +array contains the rows that are to be copied into A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be copied from U. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local row indexes of U that should be copied in A. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp05T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp05T.3 new file mode 100644 index 000000000..5d70a7a16 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp05T.3 @@ -0,0 +1,77 @@ +.TH HPL_dlaswp05T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp05T \- copy rows of U into A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp05T(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp05T\fR +copies columns of U of global offset LINDXAU into rows +of A at positions indicated by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of columns of U that shouldbe copied into A. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the columns of U that will +be copied into rows of A. N must be at least zero. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +columns of U indicated by LINDXAU. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) const double * +On entry, U points to an array of dimension (LDU,*). This +array contains the columns that are to be copied into rows of +A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be copied from U. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local column indexes of U that should be copied in A. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp06N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp06N.3 new file mode 100644 index 000000000..7fa19d41a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp06N.3 @@ -0,0 +1,72 @@ +.TH HPL_dlaswp06N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp06N \- swap rows of U with rows of A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp06N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp06N\fR +swaps rows of U with rows of A at positions +indicated by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +swapped with rows of U. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the rows of A that should +be swapped with rows of U. N must be at least zero. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +rows or columns of U. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,N). This +array contains the rows of U that are to be swapped with rows +of A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be swapped with U. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp06T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp06T.3 new file mode 100644 index 000000000..41fa3d6ee --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp06T.3 @@ -0,0 +1,72 @@ +.TH HPL_dlaswp06T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp06T \- swap rows or columns of U with rows of A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp06T(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp06T\fR +swaps columns of U with rows of A at positions +indicated by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +swapped with columns of U. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the rows of A that should +be swapped with columns of U. N must be at least zero. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +columns of U. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,*). This +array contains the columns of U that are to be swapped with +rows of A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be swapped with U. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp10N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp10N.3 new file mode 100644 index 000000000..23465895c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlaswp10N.3 @@ -0,0 +1,59 @@ +.TH HPL_dlaswp10N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp10N \- performs a series column interchanges. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp10N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int *\fR +\fI\&IPIV\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp10N\fR +performs a sequence of local column interchanges on a +matrix A. One column interchange is initiated for columns 0 through +N-1 of A. +.SH ARGUMENTS +.TP 8 +M (local input) const int +__arg0__ +.TP 8 +N (local input) const int +On entry, M specifies the number of rows of the array A. M +must be at least zero. +.TP 8 +A (local input/output) double * +On entry, N specifies the number of columns of the array A. N +must be at least zero. +.TP 8 +LDA (local input) const int +On entry, A points to an array of dimension (LDA,N). This +array contains the columns onto which the interchanges should +be applied. On exit, A contains the permuted matrix. +.TP 8 +IPIV (local input) const int * +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlatcpy.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlatcpy.3 new file mode 100644 index 000000000..dc940e321 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlatcpy.3 @@ -0,0 +1,70 @@ +.TH HPL_dlatcpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlatcpy \- B := A^T +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlatcpy(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&B\fR, +\fB\&const int\fR +\fI\&LDB\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlatcpy\fR +copies the transpose of an array A into an array B. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the array B and +the number of columns of A. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of rows of the array A and +the number of columns of B. N must be at least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,M). +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,N). +.TP 8 +B (local output) double * +On entry, B points to an array of dimension (LDB,N). On exit, +B is overwritten with the transpose of A. +.TP 8 +LDB (local input) const int +On entry, LDB specifies the leading dimension of the array B. +LDB must be at least MAX(1,M). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], b[2*2]; +.br + a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; +.br + HPL_dlacpy( 2, 2, a, 2, b, 2 ); +.br + printf(" [%f,%f]\en", b[0], b[2]); +.br + printf("b=[%f,%f]\en", b[1], b[3]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dlacpy \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocmax.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocmax.3 new file mode 100644 index 000000000..f68f887c9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocmax.3 @@ -0,0 +1,69 @@ +.TH HPL_dlocmax 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlocmax \- finds the maximum entry in matrix column. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlocmax(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&II\fR, +\fB\&const int\fR +\fI\&JJ\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlocmax\fR +finds the maximum entry in the current column and packs +the useful information in WORK[0:3]. On exit, WORK[0] contains the +local maximum absolute value scalar, WORK[1] is the corresponding +local row index, WORK[2] is the corresponding global row index, and +WORK[3] is the coordinate of the process owning this max. When N is +less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set +to the total number of process rows. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +N (local input) const int +On entry, N specifies the local number of rows of the column +of A on which we operate. +.TP 8 +II (local input) const int +On entry, II specifies the row offset where the column to be +operated on starts with respect to the panel. +.TP 8 +JJ (local input) const int +On entry, JJ specifies the column offset where the column to +be operated on starts with respect to the panel. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 4. On exit, +WORK[0] contains the local maximum absolute value scalar, +WORK[1] contains the corresponding local row index, WORK[2] +contains the corresponding global row index, and WORK[3] is +the coordinate of process owning this max. +.SH SEE ALSO +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocswpN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocswpN.3 new file mode 100644 index 000000000..367e37e36 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocswpN.3 @@ -0,0 +1,62 @@ +.TH HPL_dlocswpN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlocswpN \- locally swaps rows within panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlocswpN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&II\fR, +\fB\&const int\fR +\fI\&JJ\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlocswpN\fR +performs the local swapping operations within a panel. +The lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +II (local input) const int +On entry, II specifies the row offset where the column to be +operated on starts with respect to the panel. +.TP 8 +JJ (local input) const int +On entry, JJ specifies the column offset where the column to +be operated on starts with respect to the panel. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2 * (4+2*N0). +WORK[0] contains the local maximum absolute value scalar, +WORK[1] contains the corresponding local row index, WORK[2] +contains the corresponding global row index, and WORK[3] is +the coordinate of process owning this max. The N0 length max +row is stored in WORK[4:4+N0-1]; Note that this is also the +JJth row (or column) of L1. The remaining part of this array +is used as workspace. +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocswpT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocswpT.3 new file mode 100644 index 000000000..f864de535 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dlocswpT.3 @@ -0,0 +1,62 @@ +.TH HPL_dlocswpT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlocswpT \- locally swaps rows within panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlocswpT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&II\fR, +\fB\&const int\fR +\fI\&JJ\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlocswpT\fR +performs the local swapping operations within a panel. +The lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +II (local input) const int +On entry, II specifies the row offset where the column to be +operated on starts with respect to the panel. +.TP 8 +JJ (local input) const int +On entry, JJ specifies the column offset where the column to +be operated on starts with respect to the panel. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2 * (4+2*N0). +WORK[0] contains the local maximum absolute value scalar, +WORK[1] contains the corresponding local row index, WORK[2] +contains the corresponding global row index, and WORK[3] is +the coordinate of process owning this max. The N0 length max +row is stored in WORK[4:4+N0-1]; Note that this is also the +JJth row (or column) of L1. The remaining part of this array +is used as workspace. +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dmatgen.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dmatgen.3 new file mode 100644 index 000000000..c287fb0fb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dmatgen.3 @@ -0,0 +1,55 @@ +.TH HPL_dmatgen 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dmatgen \- random matrix generator. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dmatgen(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int\fR +\fI\&ISEED\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dmatgen\fR +generates (or regenerates) a random matrix A. + +The pseudo-random generator uses the linear congruential algorithm: +X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer +Programming, Knuth 1973, Vol. 2. +.SH ARGUMENTS +.TP 8 +M (input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +A (output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +this array contains the coefficients of the randomly +generated matrix. +.TP 8 +LDA (input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,M). +.TP 8 +ISEED (input) const int +On entry, ISEED specifies the seed number to generate the +matrix A. ISEED must be at least zero. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dscal.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dscal.3 new file mode 100644 index 000000000..8f42a10f5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dscal.3 @@ -0,0 +1,62 @@ +.TH HPL_dscal 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dscal \- x = alpha * x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dscal(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dscal\fR +scales the vector x by alpha. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vector x. N must be +at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero, then the entries of the incremented array X +need not be set on input. +.TP 8 +X (local input/output) double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +On exit, the entries of the incremented array X are scaled +by the scalar alpha. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3]; +.br + x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; +.br + HPL_dscal( 3, 2.0, x, 1 ); +.br + printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_daxpy \ (3), +.BR HPL_dcopy \ (3), +.BR HPL_dswap \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dswap.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dswap.3 new file mode 100644 index 000000000..a398f795a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dswap.3 @@ -0,0 +1,73 @@ +.TH HPL_dswap 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dswap \- y <-> x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dswap(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dswap\fR +swaps the vectors x and y. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vectors x and y. N +must be at least zero. +.TP 8 +X (local input/output) double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +On exit, the entries of the incremented array X are updated +with the entries of the incremented array Y. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +Y (local input/output) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +On exit, the entries of the incremented array Y are updated +with the entries of the incremented array X. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3], y[3]; +.br + x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; +.br + y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; +.br + HPL_dswap( 3, x, 1, y, 1 ); +.br + printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]); +.br + printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_daxpy \ (3), +.BR HPL_dcopy \ (3), +.BR HPL_dscal \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dtrsm.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dtrsm.3 new file mode 100644 index 000000000..ad099eb83 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dtrsm.3 @@ -0,0 +1,152 @@ +.TH HPL_dtrsm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dtrsm \- B := A^{-1} * B or B := B * A^{-1}. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dtrsm(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const enum HPL_SIDE\fR +\fI\&SIDE\fR, +\fB\&const enum HPL_UPLO\fR +\fI\&UPLO\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANS\fR, +\fB\&const enum HPL_DIAG\fR +\fI\&DIAG\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&B\fR, +\fB\&const int\fR +\fI\&LDB\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dtrsm\fR +solves one of the matrix equations + + op( A ) * X = alpha * B, or X * op( A ) = alpha * B, + +where alpha is a scalar, X and B are m by n matrices, A is a unit, or +non-unit, upper or lower triangular matrix and op(A) is one of + + op( A ) = A or op( A ) = A^T. + +The matrix X is overwritten on B. + +No test for singularity or near-singularity is included in this +routine. Such tests must be performed before calling this routine. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +SIDE (local input) const enum HPL_SIDE +On entry, SIDE specifies whether op(A) appears on the left +or right of X as follows: + SIDE==HplLeft op( A ) * X = alpha * B, + SIDE==HplRight X * op( A ) = alpha * B. +.TP 8 +UPLO (local input) const enum HPL_UPLO +On entry, UPLO specifies whether the upper or lower +triangular part of the array A is to be referenced. When +UPLO==HplUpper, only the upper triangular part of A is to be +referenced, otherwise only the lower triangular part of A is +to be referenced. +.TP 8 +TRANS (local input) const enum HPL_TRANS +On entry, TRANSA specifies the form of op(A) to be used in +the matrix-matrix operation follows: + TRANSA==HplNoTrans : op( A ) = A, + TRANSA==HplTrans : op( A ) = A^T, + TRANSA==HplConjTrans : op( A ) = A^T. +.TP 8 +DIAG (local input) const enum HPL_DIAG +On entry, DIAG specifies whether A is unit triangular or +not. When DIAG==HplUnit, A is assumed to be unit triangular, +and otherwise, A is not assumed to be unit triangular. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix B. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix B. +N must be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero then the elements of the matrix B need not +be set on input. +.TP 8 +A (local input) const double * +On entry, A points to an array of size equal to or greater +than LDA * k, where k is m when SIDE==HplLeft and is n +otherwise. Before entry with UPLO==HplUpper, the leading +k by k upper triangular part of the array A must contain the +upper triangular matrix and the strictly lower triangular +part of A is not referenced. When UPLO==HplLower on entry, +the leading k by k lower triangular part of the array A must +contain the lower triangular matrix and the strictly upper +triangular part of A is not referenced. + +Note that when DIAG==HplUnit, the diagonal elements of A +not referenced either, but are assumed to be unity. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of A as +declared in the calling (sub) program. LDA must be at +least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise. +.TP 8 +B (local input/output) double * +On entry, B points to an array of size equal to or greater +than LDB * n. Before entry, the leading m by n part of the +array B must contain the matrix B, except when beta is zero, +in which case B need not be set on entry. On exit, the array +B is overwritten by the m by n solution matrix. +.TP 8 +LDB (local input) const int +On entry, LDB specifies the leading dimension of B as +declared in the calling (sub) program. LDB must be at +least MAX(1,m). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], b[2*2]; +.br + a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0; +.br + b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0; +.br + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, +.br + HplNoTrans, HplNonUnit, 2, 2, 2.0, +.br + a, 2, b, 2 ); +.br + printf(" [%f,%f]\en", b[0], b[2]); +.br + printf("b=[%f,%f]\en", b[1], b[3]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dgemm \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dtrsv.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dtrsv.3 new file mode 100644 index 000000000..5df37c78b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_dtrsv.3 @@ -0,0 +1,121 @@ +.TH HPL_dtrsv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dtrsv \- x := A^{-1} x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dtrsv(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const enum HPL_UPLO\fR +\fI\&UPLO\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANS\fR, +\fB\&const enum HPL_DIAG\fR +\fI\&DIAG\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dtrsv\fR +solves one of the systems of equations + + A * x = b, or A^T * x = b, + +where b and x are n-element vectors and A is an n by n non-unit, or +unit, upper or lower triangular matrix. + +No test for singularity or near-singularity is included in this +routine. Such tests must be performed before calling this routine. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +UPLO (local input) const enum HPL_UPLO +On entry, UPLO specifies whether the upper or lower +triangular part of the array A is to be referenced. When +UPLO==HplUpper, only the upper triangular part of A is to be +referenced, otherwise only the lower triangular part of A is +to be referenced. +.TP 8 +TRANS (local input) const enum HPL_TRANS +On entry, TRANS specifies the equations to be solved as +follows: + TRANS==HplNoTrans A * x = b, + TRANS==HplTrans A^T * x = b. +.TP 8 +DIAG (local input) const enum HPL_DIAG +On entry, DIAG specifies whether A is unit triangular or +not. When DIAG==HplUnit, A is assumed to be unit triangular, +and otherwise, A is not assumed to be unit triangular. +.TP 8 +N (local input) const int +On entry, N specifies the order of the matrix A. N must be at +least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of size equal to or greater +than LDA * n. Before entry with UPLO==HplUpper, the leading +n by n upper triangular part of the array A must contain the +upper triangular matrix and the strictly lower triangular +part of A is not referenced. When UPLO==HplLower on entry, +the leading n by n lower triangular part of the array A must +contain the lower triangular matrix and the strictly upper +triangular part of A is not referenced. + +Note that when DIAG==HplUnit, the diagonal elements of A +not referenced either, but are assumed to be unity. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of A as +declared in the calling (sub) program. LDA must be at +least MAX(1,n). +.TP 8 +X (local input/output) double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +Before entry, the incremented array X must contain the n +element right-hand side vector b. On exit, X is overwritten +with the solution vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], x[2]; +.br + a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0; +.br + x[0] = 2.0; x[1] = 1.0; +.br + HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, +.br + HplNoUnit, a, 2, x, 1 ); +.br + printf("x=[%f,%f]\en", x[0], x[1]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dger \ (3), +.BR HPL_dgemv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_equil.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_equil.3 new file mode 100644 index 000000000..817780e44 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_equil.3 @@ -0,0 +1,91 @@ +.TH HPL_equil 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_equil \- Equilibrate U and forward the column panel L. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_equil(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANS\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR, +\fB\&int *\fR +\fI\&IWORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_equil\fR +equilibrates the local pieces of U, so that on exit to +this function, pieces of U contained in every process row are of the +same size. This phase makes the rolling phase optimal. In addition, +this function probes for the column panel L and forwards it when +possible. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be equilibrated) information. +.TP 8 +TRANS (global input) const enum HPL_TRANS +On entry, TRANS specifies whether U is stored in transposed +or non-transposed form. +.TP 8 +N (local input) const int +On entry, N specifies the number of rows or columns of U. N +must be at least 0. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U in each process row. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,IPLEN[nprow]) when U is stored in +non-transposed form, and MAX(1,N) otherwise. +.TP 8 +IPLEN (global input) int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in process IPMAP[i]. +.TP 8 +IPMAP (global input) const int * +On entry, IPMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IPMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IPMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i. +.TP 8 +IWORK (workspace) int * +On entry, IWORK is a workarray of dimension NPROW+1. +.SH SEE ALSO +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_fprintf.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_fprintf.3 new file mode 100644 index 000000000..8a81c0bfb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_fprintf.3 @@ -0,0 +1,44 @@ +.TH HPL_fprintf 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_fprintf \- fprintf + fflush wrapper. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_fprintf(\fR +\fB\&FILE *\fR +\fI\&STREAM\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_fprintf\fR +is a wrapper around fprintf flushing the output stream. +.SH ARGUMENTS +.TP 8 +STREAM (local input) FILE * +On entry, STREAM specifies the output stream. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + HPL_fprintf( stdout, "Hello World.\en" ); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_abort \ (3), +.BR HPL_warn \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_exit.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_exit.3 new file mode 100644 index 000000000..dab8067e2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_exit.3 @@ -0,0 +1,25 @@ +.TH HPL_grid_exit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_grid_exit \- Exit process grid. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_grid_exit(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_grid_exit\fR +marks the process grid object for deallocation. The +returned error code MPI_SUCCESS indicates successful completion. +Other error codes are (MPI) implementation dependent. +.SH ARGUMENTS +.TP 8 +GRID (local input/output) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid to be released. +.SH SEE ALSO +.BR HPL_pnum \ (3), +.BR HPL_grid_init \ (3), +.BR HPL_grid_info \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_info.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_info.3 new file mode 100644 index 000000000..53c6a214b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_info.3 @@ -0,0 +1,52 @@ +.TH HPL_grid_info 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_grid_info \- Retrieve grid information. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_grid_info(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&int *\fR +\fI\&NPROW\fR, +\fB\&int *\fR +\fI\&NPCOL\fR, +\fB\&int *\fR +\fI\&MYROW\fR, +\fB\&int *\fR +\fI\&MYCOL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_grid_info\fR +returns the grid shape and the coordinates in the grid +of the calling process. Successful completion is indicated by the +returned error code MPI_SUCCESS. Other error codes depend on the MPI +implementation. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +NPROW (global output) int * +On exit, NPROW specifies the number of process rows in the +grid. NPROW is at least one. +.TP 8 +NPCOL (global output) int * +On exit, NPCOL specifies the number of process columns in +the grid. NPCOL is at least one. +.TP 8 +MYROW (global output) int * +On exit, MYROW specifies my row process coordinate in the +grid. MYROW is greater than or equal to zero and less than +NPROW. +.TP 8 +MYCOL (global output) int * +On exit, MYCOL specifies my column process coordinate in the +grid. MYCOL is greater than or equal to zero and less than +NPCOL. +.SH SEE ALSO +.BR HPL_pnum \ (3), +.BR HPL_grid_init \ (3), +.BR HPL_grid_exit \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_init.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_init.3 new file mode 100644 index 000000000..7792a522d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_grid_init.3 @@ -0,0 +1,55 @@ +.TH HPL_grid_init 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_grid_init \- Create a process grid. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_grid_init(\fR +\fB\&MPI_Comm\fR +\fI\&COMM\fR, +\fB\&const HPL_T_ORDER\fR +\fI\&ORDER\fR, +\fB\&const int\fR +\fI\&NPROW\fR, +\fB\&const int\fR +\fI\&NPCOL\fR, +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_grid_init\fR +creates a NPROW x NPCOL process grid using column- or +row-major ordering from an initial collection of processes identified +by an MPI communicator. Successful completion is indicated by the +returned error code MPI_SUCCESS. Other error codes depend on the MPI +implementation. The coordinates of processes that are not part of the +grid are set to values outside of [0..NPROW) x [0..NPCOL). +.SH ARGUMENTS +.TP 8 +COMM (global/local input) MPI_Comm +On entry, COMM is the MPI communicator identifying the +initial collection of processes out of which the grid is +formed. +.TP 8 +ORDER (global input) const HPL_T_ORDER +On entry, ORDER specifies how the processes should be ordered +in the grid as follows: + ORDER = HPL_ROW_MAJOR row-major ordering; + ORDER = HPL_COLUMN_MAJOR column-major ordering; +.TP 8 +NPROW (global input) const int +On entry, NPROW specifies the number of process rows in the +grid to be created. NPROW must be at least one. +.TP 8 +NPCOL (global input) const int +On entry, NPCOL specifies the number of process columns in +the grid to be created. NPCOL must be at least one. +.TP 8 +GRID (local input/output) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information to be initialized. +.SH SEE ALSO +.BR HPL_pnum \ (3), +.BR HPL_grid_info \ (3), +.BR HPL_grid_exit \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_idamax.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_idamax.3 new file mode 100644 index 000000000..c00292a02 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_idamax.3 @@ -0,0 +1,59 @@ +.TH HPL_idamax 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_idamax \- 1st k s.t. |x_k| = max_i(|x_i|). +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_idamax(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_idamax\fR +returns the index in an n-vector x of the first element +having maximum absolute value. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vector x. N must be +at least zero. +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3]; +.br + int imax; +.br + x[0] = 1.0; x[1] = 3.0; x[2] = 2.0; +.br + imax = HPL_idamax( 3, x, 1 ); +.br + printf("imax=%d\en", imax); +.br + exit(0); +.br + return(0); +.br +} +.SH SEE ALSO +.BR HPL_daxpy \ (3), +.BR HPL_dcopy \ (3), +.BR HPL_dscal \ (3), +.BR HPL_dswap \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2l.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2l.3 new file mode 100644 index 000000000..32c4d9e07 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2l.3 @@ -0,0 +1,53 @@ +.TH HPL_indxg2l 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_indxg2l \- Map a global index into a local one. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_indxg2l(\fR +\fB\&const int\fR +\fI\&IG\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_indxg2l\fR +computes the local index of a matrix entry pointed to by +the global index IG. This local returned index is the same in all +processes. +.SH ARGUMENTS +.TP 8 +IG (input) const int +On entry, IG specifies the global index of the matrix entry. +IG must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix. NB must be larger than one. +.TP 8 +SRCPROC (input) const int +On entry, if SRCPROC = -1, the data is not distributed but +replicated, in which case this routine returns IG in all +processes. Otherwise, the value of SRCPROC is ignored. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2lp \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2lp.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2lp.3 new file mode 100644 index 000000000..ca2004031 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2lp.3 @@ -0,0 +1,66 @@ +.TH HPL_indxg2lp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_indxg2lp \- Map a local index into a global one. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_indxg2lp(\fR +\fB\&int *\fR +\fI\&IL\fR, +\fB\&int *\fR +\fI\&PROC\fR, +\fB\&const int\fR +\fI\&IG\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_indxg2lp\fR +computes the local index of a matrix entry pointed to by +the global index IG as well as the process coordinate which posseses +this entry. The local returned index is the same in all processes. +.SH ARGUMENTS +.TP 8 +IL (output) int * +On exit, IL specifies the local index corresponding to IG. IL +is at least zero. +.TP 8 +PROC (output) int * +On exit, PROC is the coordinate of the process owning the +entry specified by the global index IG. PROC is at least zero +and less than NPROCS. +.TP 8 +IG (input) const int +On entry, IG specifies the global index of the matrix entry. +IG must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +SRCPROC (input) const int +On entry, if SRCPROC = -1, the data is not distributed but +replicated, in which case this routine returns IG in all +processes. Otherwise, the value of SRCPROC is ignored. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2p.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2p.3 new file mode 100644 index 000000000..5e0273feb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxg2p.3 @@ -0,0 +1,52 @@ +.TH HPL_indxg2p 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_indxg2p \- Map a global index into a process coordinate. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_indxg2p(\fR +\fB\&const int\fR +\fI\&IG\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_indxg2p\fR +computes the process coordinate which posseses the entry +of a matrix specified by a global index IG. +.SH ARGUMENTS +.TP 8 +IG (input) const int +On entry, IG specifies the global index of the matrix entry. +IG must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +SRCPROC (input) const int +On entry, SRCPROC specifies the coordinate of the process +that possesses the first row or column of the matrix. SRCPROC +must be at least zero and strictly less than NPROCS. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxl2g.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxl2g.3 new file mode 100644 index 000000000..ba6da53a7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_indxl2g.3 @@ -0,0 +1,59 @@ +.TH HPL_indxl2g 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_indxl2g \- Map a index-process pair into a global index. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_indxl2g(\fR +\fB\&const int\fR +\fI\&IL\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&PROC\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_indxl2g\fR +computes the global index of a matrix entry pointed to +by the local index IL of the process indicated by PROC. +.SH ARGUMENTS +.TP 8 +IL (input) const int +On entry, IL specifies the local index of the matrix entry. +IL must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +PROC (input) const int +On entry, PROC specifies the coordinate of the process whose +local array row or column is to be determined. PROC must be +at least zero and strictly less than NPROCS. +.TP 8 +SRCPROC (input) const int +On entry, SRCPROC specifies the coordinate of the process +that possesses the first row or column of the matrix. SRCPROC +must be at least zero and strictly less than NPROCS. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2lp \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_infog2l.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_infog2l.3 new file mode 100644 index 000000000..c07f276d5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_infog2l.3 @@ -0,0 +1,126 @@ +.TH HPL_infog2l 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_infog2l \- global to local index translation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_infog2l(\fR +\fB\&int\fR +\fI\&I\fR, +\fB\&int\fR +\fI\&J\fR, +\fB\&const int\fR +\fI\&IMB\fR, +\fB\&const int\fR +\fI\&MB\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&RSRC\fR, +\fB\&const int\fR +\fI\&CSRC\fR, +\fB\&const int\fR +\fI\&MYROW\fR, +\fB\&const int\fR +\fI\&MYCOL\fR, +\fB\&const int\fR +\fI\&NPROW\fR, +\fB\&const int\fR +\fI\&NPCOL\fR, +\fB\&int *\fR +\fI\&II\fR, +\fB\&int *\fR +\fI\&JJ\fR, +\fB\&int *\fR +\fI\&PROW\fR, +\fB\&int *\fR +\fI\&PCOL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_infog2l\fR +computes the starting local index II, JJ corresponding to +the submatrix starting globally at the entry pointed by I, J. This +routine returns the coordinates in the grid of the process owning the +matrix entry of global indexes I, J, namely PROW and PCOL. +.SH ARGUMENTS +.TP 8 +I (global input) int +On entry, I specifies the global row index of the matrix +entry. I must be at least zero. +.TP 8 +J (global input) int +On entry, J specifies the global column index of the matrix +entry. J must be at least zero. +.TP 8 +IMB (global input) const int +On entry, IMB specifies the size of the first row block of +the global matrix. IMB must be at least one. +.TP 8 +MB (global input) const int +On entry, MB specifies the blocking factor used to partition +and distribute the rows of the matrix A. MB must be larger +than one. +.TP 8 +INB (global input) const int +On entry, INB specifies the size of the first column block of +the global matrix. INB must be at least one. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the columns of the matrix A. NB must be larger +than one. +.TP 8 +RSRC (global input) const int +On entry, RSRC specifies the row coordinate of the process +that possesses the row I. RSRC must be at least zero and +strictly less than NPROW. +.TP 8 +CSRC (global input) const int +On entry, CSRC specifies the column coordinate of the process +that possesses the column J. CSRC must be at least zero and +strictly less than NPCOL. +.TP 8 +MYROW (local input) const int +On entry, MYROW specifies my row process coordinate in the +grid. MYROW is greater than or equal to zero and less than +NPROW. +.TP 8 +MYCOL (local input) const int +On entry, MYCOL specifies my column process coordinate in the +grid. MYCOL is greater than or equal to zero and less than +NPCOL. +.TP 8 +NPROW (global input) const int +On entry, NPROW specifies the number of process rows in the +grid. NPROW is at least one. +.TP 8 +NPCOL (global input) const int +On entry, NPCOL specifies the number of process columns in +the grid. NPCOL is at least one. +.TP 8 +II (local output) int * +On exit, II specifies the local starting row index of the +submatrix. On exit, II is at least 0. +.TP 8 +JJ (local output) int * +On exit, JJ specifies the local starting column index of the +submatrix. On exit, JJ is at least 0. +.TP 8 +PROW (global output) int * +On exit, PROW is the row coordinate of the process owning the +entry specified by the global index I. PROW is at least zero +and less than NPROW. +.TP 8 +PCOL (global output) int * +On exit, PCOL is the column coordinate of the process owning +the entry specified by the global index J. PCOL is at least +zero and less than NPCOL. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_jumpit.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_jumpit.3 new file mode 100644 index 000000000..66e77ac32 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_jumpit.3 @@ -0,0 +1,48 @@ +.TH HPL_jumpit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_jumpit \- jump into the random sequence. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_jumpit(\fR +\fB\&int *\fR +\fI\&MULT\fR, +\fB\&int *\fR +\fI\&IADD\fR, +\fB\&int *\fR +\fI\&IRANN\fR, +\fB\&int *\fR +\fI\&IRANM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_jumpit\fR +jumps in the random sequence from the number X(n) encoded +in IRANN to the number X(m) encoded in IRANM using the constants A +and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A +and C obviously depend on m and n, see the function HPL_xjumpm in +order to initialize them. +.SH ARGUMENTS +.TP 8 +MULT (local input) int * +On entry, MULT is an array of dimension 2, that contains the +16-lower and 15-higher bits of the constant A. +.TP 8 +IADD (local input) int * +On entry, IADD is an array of dimension 2, that contains the +16-lower and 15-higher bits of the constant C. +.TP 8 +IRANN (local input) int * +On entry, IRANN is an array of dimension 2, that contains +the 16-lower and 15-higher bits of the encoding of X(n). +.TP 8 +IRANM (local output) int * +On entry, IRANM is an array of dimension 2. On exit, this +array contains respectively the 16-lower and 15-higher bits +of the encoding of X(m). +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ladd.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ladd.3 new file mode 100644 index 000000000..9fd6805d3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ladd.3 @@ -0,0 +1,41 @@ +.TH HPL_ladd 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_ladd \- Adds two long positive integers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_ladd(\fR +\fB\&int *\fR +\fI\&J\fR, +\fB\&int *\fR +\fI\&K\fR, +\fB\&int *\fR +\fI\&I\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_ladd\fR +adds without carry two long positive integers K and J and +puts the result into I. The long integers I, J, K are encoded on 64 +bits using an array of 2 integers. The 32-lower bits are stored in +the first entry of each array, the 32-higher bits in the second +entry. +.SH ARGUMENTS +.TP 8 +J (local input) int * +On entry, J is an integer array of dimension 2 containing the +encoded long integer J. +.TP 8 +K (local input) int * +On entry, K is an integer array of dimension 2 containing the +encoded long integer K. +.TP 8 +I (local output) int * +On entry, I is an integer array of dimension 2. On exit, this +array contains the encoded long integer result. +.SH SEE ALSO +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_lmul.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_lmul.3 new file mode 100644 index 000000000..8be7380e0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_lmul.3 @@ -0,0 +1,42 @@ +.TH HPL_lmul 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_lmul \- multiplies 2 long positive integers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_lmul(\fR +\fB\&int *\fR +\fI\&K\fR, +\fB\&int *\fR +\fI\&J\fR, +\fB\&int *\fR +\fI\&I\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_lmul\fR +multiplies without carry two long positive integers K and J +and puts the result into I. The long integers I, J, K are encoded on +64 bits using an array of 2 integers. The 32-lower bits are stored in +the first entry of each array, the 32-higher bits in the second entry +of each array. For efficiency purposes, the intrisic modulo function +is inlined. +.SH ARGUMENTS +.TP 8 +K (local input) int * +On entry, K is an integer array of dimension 2 containing the +encoded long integer K. +.TP 8 +J (local input) int * +On entry, J is an integer array of dimension 2 containing the +encoded long integer J. +.TP 8 +I (local output) int * +On entry, I is an integer array of dimension 2. On exit, this +array contains the encoded long integer result. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_logsort.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_logsort.3 new file mode 100644 index 000000000..e7e80062a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_logsort.3 @@ -0,0 +1,65 @@ +.TH HPL_logsort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_logsort \- Sort the processes in logarithmic order. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_logsort(\fR +\fB\&const int\fR +\fI\&NPROCS\fR, +\fB\&const int\fR +\fI\&ICURROC\fR, +\fB\&int *\fR +\fI\&IPLEN\fR, +\fB\&int *\fR +\fI\&IPMAP\fR, +\fB\&int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_logsort\fR +computes an array IPMAP and its inverse IPMAPM1 that +contain the logarithmic sorted processes id with repect to the local +number of rows of U that they own. This is necessary to ensure that +the logarithmic spreading of U is optimal in terms of number of steps +and communication volume as well. In other words, the larget pieces +of U will be sent a minimal number of times. +.SH ARGUMENTS +.TP 8 +NPROCS (global input) const int +On entry, NPROCS specifies the number of process rows in the +process grid. NPROCS is at least one. +.TP 8 +ICURROC (global input) const int +On entry, ICURROC is the source process row. +.TP 8 +IPLEN (global input/output) int * +On entry, IPLEN is an array of dimension NPROCS+1, such that +IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U, +that process i-1 has. On exit, IPLEN[i] is the number of +rows of U in the processes before process IPMAP[i] after the +sort, with the convention that IPLEN[NPROCS] is the total +number of rows of the panel. In other words, IPLEN[i+1] - +IPLEN[i] is the number of rows of A that should be moved to +the process IPMAP[i]. IPLEN is such that the number of rows +of the source process row is IPLEN[1] - IPLEN[0], and the +remaining entries of this array are sorted so that the +quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted. +.TP 8 +IPMAP (global output) int * +On entry, IPMAP is an array of dimension NPROCS. On exit, +array contains the logarithmic mapping of the processes. In +other words, IPMAP[myroc] is the corresponding sorted process +coordinate. +.TP 8 +IPMAPM1 (global output) int * +On entry, IPMAPM1 is an array of dimension NPROCS. On exit, +this array contains the inverse of the logarithmic mapping +contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in +[0.. NPROCS) +.SH SEE ALSO +.BR HPL_plindx1 \ (3), +.BR HPL_plindx10 \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_max.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_max.3 new file mode 100644 index 000000000..16d8aecc6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_max.3 @@ -0,0 +1,43 @@ +.TH HPL_max 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_max \- Combine (max) two buffers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_max(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const void *\fR +\fI\&IN\fR, +\fB\&void *\fR +\fI\&INOUT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_max\fR +combines (max) two buffers. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the length of the buffers to be +combined. N must be at least zero. +.TP 8 +IN (input) const void * +On entry, IN points to the input-only buffer to be combined. +.TP 8 +INOUT (input/output) void * +On entry, INOUT points to the input-output buffer to be +combined. On exit, the entries of this array contains the +combined results. +.TP 8 +DTYPE (input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_min.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_min.3 new file mode 100644 index 000000000..a816d61b7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_min.3 @@ -0,0 +1,43 @@ +.TH HPL_min 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_min \- Combine (min) two buffers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_min(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const void *\fR +\fI\&IN\fR, +\fB\&void *\fR +\fI\&INOUT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_min\fR +combines (min) two buffers. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the length of the buffers to be +combined. N must be at least zero. +.TP 8 +IN (input) const void * +On entry, IN points to the input-only buffer to be combined. +.TP 8 +INOUT (input/output) void * +On entry, INOUT points to the input-output buffer to be +combined. On exit, the entries of this array contains the +combined results. +.TP 8 +DTYPE (input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_numroc.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_numroc.3 new file mode 100644 index 000000000..34c8acfa9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_numroc.3 @@ -0,0 +1,60 @@ +.TH HPL_numroc 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_numroc \- Compute the local number of row/columns. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_numroc(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&PROC\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_numroc\fR +returns the local number of matrix rows/columns process +PROC will get if we give out N rows/columns starting from global +index 0. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the number of rows/columns being dealt +out. N must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +PROC (input) const int +On entry, PROC specifies the coordinate of the process whose +local portion is determined. PROC must be at least zero and +strictly less than NPROCS. +.TP 8 +SRCPROC (input) const int +On entry, SRCPROC specifies the coordinate of the process +that possesses the first row or column of the matrix. SRCPROC +must be at least zero and strictly less than NPROCS. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2lp \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_numrocI.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_numrocI.3 new file mode 100644 index 000000000..1891f1ac9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_numrocI.3 @@ -0,0 +1,66 @@ +.TH HPL_numrocI 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_numrocI \- Compute the local number of row/columns. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_numrocI(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&I\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&PROC\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_numrocI\fR +returns the local number of matrix rows/columns process +PROC will get if we give out N rows/columns starting from global +index I. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the number of rows/columns being dealt +out. N must be at least zero. +.TP 8 +I (input) const int +On entry, I specifies the global index of the matrix entry +I must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of th +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +PROC (input) const int +On entry, PROC specifies the coordinate of the process whos +local portion is determined. PROC must be at least zero an +strictly less than NPROCS. +.TP 8 +SRCPROC (input) const int +On entry, SRCPROC specifies the coordinate of the proces +that possesses the first row or column of the matrix. SRCPRO +must be at least zero and strictly less than NPROCS. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process row +or columns over which the matrix is distributed. NPROCS mus +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2lp \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pabort.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pabort.3 new file mode 100644 index 000000000..044e87210 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pabort.3 @@ -0,0 +1,40 @@ +.TH HPL_pabort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pabort \- halts execution. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pabort(\fR +\fB\&int\fR +\fI\&LINE\fR, +\fB\&const char *\fR +\fI\&SRNAME\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pabort\fR +displays an error message on stderr and halts execution. +.SH ARGUMENTS +.TP 8 +LINE (local input) int +On entry, LINE specifies the line number in the file where +the error has occured. When LINE is not a positive line +number, it is ignored. +.TP 8 +SRNAME (local input) const char * +On entry, SRNAME should be the name of the routine calling +this error handler. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH SEE ALSO +.BR HPL_fprintf \ (3), +.BR HPL_pwarn \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_packL.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_packL.3 new file mode 100644 index 000000000..c79019c37 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_packL.3 @@ -0,0 +1,42 @@ +.TH HPL_packL 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_packL \- Form the MPI structure for the row ring broadcasts. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_packL(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&INDEX\fR, +\fB\&const int\fR +\fI\&LEN\fR, +\fB\&const int\fR +\fI\&IBUF\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_packL\fR +forms the MPI data type for the panel to be broadcast. +Successful completion is indicated by the returned error code +MPI_SUCCESS. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.TP 8 +INDEX (input) const int +On entry, INDEX points to the first entry of the packed +buffer being broadcast. +.TP 8 +LEN (input) const int +On entry, LEN is the length of the packed buffer. +.TP 8 +IBUF (input) const int +On entry, IBUF specifies the panel buffer/count/type entries +that should be initialized. +.SH SEE ALSO +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pddriver.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pddriver.3 new file mode 100644 index 000000000..30e55b62e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pddriver.3 @@ -0,0 +1,15 @@ +.TH main 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +main \- HPL main timing program. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&main();\fR +.SH DESCRIPTION +\fB\&main\fR +is the main driver program for testing the HPL routines. +This program is driven by a short data file named "HPL.dat". +.SH SEE ALSO +.BR HPL_pdinfo \ (3), +.BR HPL_pdtest \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdfact.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdfact.3 new file mode 100644 index 000000000..e3db5fb8b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdfact.3 @@ -0,0 +1,64 @@ +.TH HPL_pdfact 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdfact \- recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdfact(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdfact\fR +recursively factorizes a 1-dimensional panel of columns. +The RPFACT function pointer specifies the recursive algorithm to be +used, either Crout, Left- or Right looking. NBMIN allows to vary the +recursive stopping criterium in terms of the number of columns in the +panel, and NDIV allow to specify the number of subpanels each panel +should be divided into. Usuallly a value of 2 will be chosen. Finally +PFACT is a function pointer specifying the non-recursive algorithm to +to be used on at most NBMIN columns. One can also choose here between +Crout, Left- or Right looking. Empirical tests seem to indicate that +values of 4 or 8 for NBMIN give the best results. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesv.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesv.3 new file mode 100644 index 000000000..ab4b62c4e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesv.3 @@ -0,0 +1,40 @@ +.TH HPL_pdgesv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdgesv \- Solve A x = b. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdgesv(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdgesv\fR +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with or without look-ahead. The lower triangular factor is left +unpivoted and the pivots are not returned. The right hand side is the +N+1 column of the coefficient matrix. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.SH SEE ALSO +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdtrsv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesv0.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesv0.3 new file mode 100644 index 000000000..180f191f2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesv0.3 @@ -0,0 +1,47 @@ +.TH HPL_pdgesv0 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdgesv0 \- Factor an N x N+1 matrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdgesv0(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdgesv0\fR +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +without look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdfact \ (3), +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pdupdateTT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesvK1.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesvK1.3 new file mode 100644 index 000000000..64cee67ed --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesvK1.3 @@ -0,0 +1,46 @@ +.TH HPL_pdgesvK1 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdgesvK1 \- Factor an N x N+1 matrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdgesvK1(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdgesvK1\fR +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdfact \ (3), +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pdupdateTT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesvK2.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesvK2.3 new file mode 100644 index 000000000..9f389b9dd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdgesvK2.3 @@ -0,0 +1,47 @@ +.TH HPL_pdgesvK2 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdgesvK2 \- Factor an N x N+1 matrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdgesvK2(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdgesvK2\fR +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdfact \ (3), +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pdupdateTT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdinfo.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdinfo.3 new file mode 100644 index 000000000..eed541159 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdinfo.3 @@ -0,0 +1,212 @@ +.TH HPL_pdinfo 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdinfo \- Read input parameter file. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdinfo(\fR +\fB\&HPL_T_test *\fR +\fI\&TEST\fR, +\fB\&int *\fR +\fI\&NS\fR, +\fB\&int *\fR +\fI\&N\fR, +\fB\&int *\fR +\fI\&NBS\fR, +\fB\&int *\fR +\fI\&NB\fR, +\fB\&HPL_T_ORDER *\fR +\fI\&PMAPPIN\fR, +\fB\&int *\fR +\fI\&NPQS\fR, +\fB\&int *\fR +\fI\&P\fR, +\fB\&int *\fR +\fI\&Q\fR, +\fB\&int *\fR +\fI\&NPFS\fR, +\fB\&HPL_T_FACT *\fR +\fI\&PF\fR, +\fB\&int *\fR +\fI\&NBMS\fR, +\fB\&int *\fR +\fI\&NBM\fR, +\fB\&int *\fR +\fI\&NDVS\fR, +\fB\&int *\fR +\fI\&NDV\fR, +\fB\&int *\fR +\fI\&NRFS\fR, +\fB\&HPL_T_FACT *\fR +\fI\&RF\fR, +\fB\&int *\fR +\fI\&NTPS\fR, +\fB\&HPL_T_TOP *\fR +\fI\&TP\fR, +\fB\&int *\fR +\fI\&NDHS\fR, +\fB\&int *\fR +\fI\&DH\fR, +\fB\&HPL_T_SWAP *\fR +\fI\&FSWAP\fR, +\fB\&int *\fR +\fI\&TSWAP\fR, +\fB\&int *\fR +\fI\&L1NOTRAN\fR, +\fB\&int *\fR +\fI\&UNOTRAN\fR, +\fB\&int *\fR +\fI\&EQUIL\fR, +\fB\&int *\fR +\fI\&ALIGN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdinfo\fR +reads the startup information for the various tests and +transmits it to all processes. +.SH ARGUMENTS +.TP 8 +TEST (global output) HPL_T_test * +On entry, TEST points to a testing data structure. On exit, +the fields of this data structure are initialized as follows: +TEST->outfp specifies the output file where the results will +be printed. It is only defined and used by the process 0 of +the grid. TEST->thrsh specifies the threshhold value for the +test ratio. TEST->epsil is the relative machine precision of +the distributed computer. Finally the test counters, kfail, +kpass, kskip, ktest are initialized to zero. +.TP 8 +NS (global output) int * +On exit, NS specifies the number of different problem sizes +to be tested. NS is less than or equal to HPL_MAX_PARAM. +.TP 8 +N (global output) int * +On entry, N is an array of dimension HPL_MAX_PARAM. On exit, +the first NS entries of this array contain the problem sizes +to run the code with. +.TP 8 +NBS (global output) int * +On exit, NBS specifies the number of different distribution +blocking factors to be tested. NBS must be less than or equal +to HPL_MAX_PARAM. +.TP 8 +NB (global output) int * +On exit, PMAPPIN specifies the process mapping onto the no- +des of the MPI machine configuration. PMAPPIN defaults to +row-major ordering. +.TP 8 +PMAPPIN (global output) HPL_T_ORDER * +On entry, NB is an array of dimension HPL_MAX_PARAM. On exit, +the first NBS entries of this array contain the values of the +various distribution blocking factors, to run the code with. +.TP 8 +NPQS (global output) int * +On exit, NPQS specifies the number of different values that +can be used for P and Q, i.e., the number of process grids to +run the code with. NPQS must be less than or equal to +HPL_MAX_PARAM. +.TP 8 +P (global output) int * +On entry, P is an array of dimension HPL_MAX_PARAM. On exit, +the first NPQS entries of this array contain the values of P, +the number of process rows of the NPQS grids to run the code +with. +.TP 8 +Q (global output) int * +On entry, Q is an array of dimension HPL_MAX_PARAM. On exit, +the first NPQS entries of this array contain the values of Q, +the number of process columns of the NPQS grids to run the +code with. +.TP 8 +NPFS (global output) int * +On exit, NPFS specifies the number of different values that +can be used for PF : the panel factorization algorithm to run +the code with. NPFS is less than or equal to HPL_MAX_PARAM. +.TP 8 +PF (global output) HPL_T_FACT * +On entry, PF is an array of dimension HPL_MAX_PARAM. On exit, +the first NPFS entries of this array contain the various +panel factorization algorithms to run the code with. +.TP 8 +NBMS (global output) int * +On exit, NBMS specifies the number of various recursive +stopping criteria to be tested. NBMS must be less than or +equal to HPL_MAX_PARAM. +.TP 8 +NBM (global output) int * +On entry, NBM is an array of dimension HPL_MAX_PARAM. On +exit, the first NBMS entries of this array contain the values +of the various recursive stopping criteria to be tested. +.TP 8 +NDVS (global output) int * +On exit, NDVS specifies the number of various numbers of +panels in recursion to be tested. NDVS is less than or equal +to HPL_MAX_PARAM. +.TP 8 +NDV (global output) int * +On entry, NDV is an array of dimension HPL_MAX_PARAM. On +exit, the first NDVS entries of this array contain the values +of the various numbers of panels in recursion to be tested. +.TP 8 +NRFS (global output) int * +On exit, NRFS specifies the number of different values that +can be used for RF : the recursive factorization algorithm to +be tested. NRFS is less than or equal to HPL_MAX_PARAM. +.TP 8 +RF (global output) HPL_T_FACT * +On entry, RF is an array of dimension HPL_MAX_PARAM. On exit, +the first NRFS entries of this array contain the various +recursive factorization algorithms to run the code with. +.TP 8 +NTPS (global output) int * +On exit, NTPS specifies the number of different values that +can be used for the broadcast topologies to be tested. NTPS +is less than or equal to HPL_MAX_PARAM. +.TP 8 +TP (global output) HPL_T_TOP * +On entry, TP is an array of dimension HPL_MAX_PARAM. On exit, +the first NTPS entries of this array contain the various +broadcast (along rows) topologies to run the code with. +.TP 8 +NDHS (global output) int * +On exit, NDHS specifies the number of different values that +can be used for the lookahead depths to be tested. NDHS is +less than or equal to HPL_MAX_PARAM. +.TP 8 +DH (global output) int * +On entry, DH is an array of dimension HPL_MAX_PARAM. On +exit, the first NDHS entries of this array contain the values +of lookahead depths to run the code with. Such a value is at +least 0 (no-lookahead) or greater than zero. +.TP 8 +FSWAP (global output) HPL_T_SWAP * +On exit, FSWAP specifies the swapping algorithm to be used in +all tests. +.TP 8 +TSWAP (global output) int * +On exit, TSWAP specifies the swapping threshold as a number +of columns when the mixed swapping algorithm was chosen. +.TP 8 +L1NOTRA (global output) int * +On exit, L1NOTRAN specifies whether the upper triangle of the +panels of columns should be stored in no-transposed form +(L1NOTRAN=1) or in transposed form (L1NOTRAN=0). +.TP 8 +UNOTRAN (global output) int * +On exit, UNOTRAN specifies whether the panels of rows should +be stored in no-transposed form (UNOTRAN=1) or transposed +form (UNOTRAN=0) during their broadcast. +.TP 8 +EQUIL (global output) int * +On exit, EQUIL specifies whether equilibration during the +swap-broadcast of the panel of rows should be performed +(EQUIL=1) or not (EQUIL=0). +.TP 8 +ALIGN (global output) int * +On exit, ALIGN specifies the alignment of the dynamically +allocated buffers in double precision words. ALIGN is greater +than zero. +.SH SEE ALSO +.BR HPL_pddriver \ (3), +.BR HPL_pdtest \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlamch.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlamch.3 new file mode 100644 index 000000000..7ce46c23e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlamch.3 @@ -0,0 +1,53 @@ +.TH HPL_pdlamch 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlamch \- determines machine-specific arithmetic constants. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_pdlamch(\fR +\fB\&MPI_Comm\fR +\fI\&COMM\fR, +\fB\&const HPL_T_MACH\fR +\fI\&CMACH\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlamch\fR +determines machine-specific arithmetic constants such as +the relative machine precision (eps), the safe minimum(sfmin) such that +1/sfmin does not overflow, the base of the machine (base), the precision +(prec), the number of (base) digits in the mantissa (t), whether +rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum +exponent before (gradual) underflow (emin), the underflow threshold +(rmin)- base**(emin-1), the largest exponent before overflow (emax), the +overflow threshold (rmax) - (base**emax)*(1-eps). +.SH ARGUMENTS +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.TP 8 +CMACH (global input) const HPL_T_MACH +Specifies the value to be returned by HPL_pdlamch + = HPL_MACH_EPS, HPL_pdlamch := eps (default) + = HPL_MACH_SFMIN, HPL_pdlamch := sfmin + = HPL_MACH_BASE, HPL_pdlamch := base + = HPL_MACH_PREC, HPL_pdlamch := eps*base + = HPL_MACH_MLEN, HPL_pdlamch := t + = HPL_MACH_RND, HPL_pdlamch := rnd + = HPL_MACH_EMIN, HPL_pdlamch := emin + = HPL_MACH_RMIN, HPL_pdlamch := rmin + = HPL_MACH_EMAX, HPL_pdlamch := emax + = HPL_MACH_RMAX, HPL_pdlamch := rmax + +where + + eps = relative machine precision, + sfmin = safe minimum, + base = base of the machine, + prec = eps*base, + t = number of digits in the mantissa, + rnd = 1.0 if rounding occurs in addition, + emin = minimum exponent before underflow, + rmin = underflow threshold, + emax = largest exponent before overflow, + rmax = overflow threshold. diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlange.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlange.3 new file mode 100644 index 000000000..30593401b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlange.3 @@ -0,0 +1,68 @@ +.TH HPL_pdlange 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlange \- Compute ||A||. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_pdlange(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&const HPL_T_NORM\fR +\fI\&NORM\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlange\fR +returns the value of the one norm, or the infinity norm, +or the element of largest absolute value of a distributed matrix A: + + + max(abs(A(i,j))) when NORM = HPL_NORM_A, + norm1(A), when NORM = HPL_NORM_1, + normI(A), when NORM = HPL_NORM_I, + +where norm1 denotes the one norm of a matrix (maximum column sum) and +normI denotes the infinity norm of a matrix (maximum row sum). Note +that max(abs(A(i,j))) is not a matrix norm. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +NORM (global input) const HPL_T_NORM +On entry, NORM specifies the value to be returned by this +function as described above. +.TP 8 +M (global input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (global input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix. NB must be larger than one. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,LocQ(N)), +that contains the local pieces of the distributed matrix A. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,LocP(M)). +.SH SEE ALSO +.BR HPL_pdlaprnt \ (3), +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaprnt.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaprnt.3 new file mode 100644 index 000000000..feb010a67 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaprnt.3 @@ -0,0 +1,72 @@ +.TH HPL_pdlaprnt 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaprnt \- Print a distributed matrix A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaprnt(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int\fR +\fI\&IAROW\fR, +\fB\&const int\fR +\fI\&IACOL\fR, +\fB\&const char *\fR +\fI\&CMATNM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaprnt\fR +prints to standard error a distributed matrix A. The +local pieces of A are sent to the process of coordinates (0,0) in +the grid and then printed. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +M (global input) const int +On entry, M specifies the number of rows of the coefficient +matrix A. M must be at least zero. +.TP 8 +N (global input) const int +On entry, N specifies the number of columns of the +coefficient matrix A. N must be at least zero. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix. NB must be larger than one. +.TP 8 +A (local input) double * +On entry, A points to an array of dimension (LDA,LocQ(N)). +This array contains the coefficient matrix to be printed. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,LocP(M)). +.TP 8 +IAROW (global input) const int +On entry, IAROW specifies the row process coordinate owning +the first row of A. IAROW must be larger than or equal to +zero and less than NPROW. +.TP 8 +IACOL (global input) const int +On entry, IACOL specifies the column process coordinate +owning the first column of A. IACOL must be larger than or +equal to zero and less than NPCOL. +.TP 8 +CMATNM (global input) const char * +On entry, CMATNM is the name of the matrix to be printed. +.SH SEE ALSO +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp00N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp00N.3 new file mode 100644 index 000000000..3875400e3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp00N.3 @@ -0,0 +1,65 @@ +.TH HPL_pdlaswp00N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaswp00N \- Broadcast a column panel L and swap the row panel U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaswp00N(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaswp00N\fR +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +Bi-directional exchange is used to perform the swap :: broadcast of +the row panel U at once, resulting in a lower number of messages than +usual as well as a lower communication volume. With P process rows and +assuming bi-directional links, the running time of this function can +be approximated by: + + log_2(P) * (lat + NB*LocQ(N) / bdwth) + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. Mono +directional links will double this communication cost. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be broadcast and swapped) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be swapped and broadcast starting at +the current position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pipid \ (3), +.BR HPL_plindx0 \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp05N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp00T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp00T.3 new file mode 100644 index 000000000..39901ba4b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp00T.3 @@ -0,0 +1,65 @@ +.TH HPL_pdlaswp00T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaswp00T \- Broadcast a column panel L and swap the row panel U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaswp00T(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaswp00T\fR +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +Bi-directional exchange is used to perform the swap :: broadcast of +the row panel U at once, resulting in a lower number of messages than +usual as well as a lower communication volume. With P process rows and +assuming bi-directional links, the running time of this function can +be approximated by: + + log_2(P) * (lat + NB*LocQ(N) / bdwth) + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. Mono +directional links will double this communication cost. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be broadcast and swapped) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be swapped and broadcast starting at +the current position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTT \ (3), +.BR HPL_pipid \ (3), +.BR HPL_plindx0 \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp01N.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp01N.3 new file mode 100644 index 000000000..1ee14c0a8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp01N.3 @@ -0,0 +1,69 @@ +.TH HPL_pdlaswp01N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaswp01N \- Broadcast a column panel L and swap the row panel U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaswp01N(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaswp01N\fR +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +A "Spread then roll" algorithm performs the swap :: broadcast of the +row panel U at once, resulting in a minimal communication volume and +a "very good" use of the connectivity if available. With P process +rows and assuming bi-directional links, the running time of this +function can be approximated by: + + (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. K is +a constant in (2,3] that depends on the achieved bandwidth during a +simultaneous message exchange between two processes. An empirical +optimistic value of K is typically 2.4. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be swapped and broadcast starting at +the current position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pipid \ (3), +.BR HPL_plindx1 \ (3), +.BR HPL_plindx10 \ (3), +.BR HPL_spreadN \ (3), +.BR HPL_equil \ (3), +.BR HPL_rollN \ (3), +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp06N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp01T.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp01T.3 new file mode 100644 index 000000000..e5c5de024 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdlaswp01T.3 @@ -0,0 +1,69 @@ +.TH HPL_pdlaswp01T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaswp01T \- Broadcast a column panel L and swap the row panel U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaswp01T(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaswp01T\fR +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +A "Spread then roll" algorithm performs the swap :: broadcast of the +row panel U at once, resulting in a minimal communication volume and +a "very good" use of the connectivity if available. With P process +rows and assuming bi-directional links, the running time of this +function can be approximated by: + + (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. K is +a constant in (2,3] that depends on the achieved bandwidth during a +simultaneous message exchange between two processes. An empirical +optimistic value of K is typically 2.4. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be swapped and broadcast starting at +the current position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTT \ (3), +.BR HPL_pipid \ (3), +.BR HPL_plindx1 \ (3), +.BR HPL_plindx10 \ (3), +.BR HPL_spreadT \ (3), +.BR HPL_equil \ (3), +.BR HPL_rollT \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdmatgen.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdmatgen.3 new file mode 100644 index 000000000..5b4675c6e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdmatgen.3 @@ -0,0 +1,67 @@ +.TH HPL_pdmatgen 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdmatgen \- Parallel random matrix generator. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdmatgen(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int\fR +\fI\&ISEED\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdmatgen\fR +generates (or regenerates) a parallel random matrix A. + +The pseudo-random generator uses the linear congruential algorithm: +X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer +Programming, Knuth 1973, Vol. 2. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +M (global input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (global input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,LocQ(N)). +On exit, this array contains the coefficients of the randomly +generated matrix. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,LocP(M)). +.TP 8 +ISEED (global input) const int +On entry, ISEED specifies the seed number to generate the +matrix A. ISEED must be at least zero. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_drand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdmxswp.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdmxswp.3 new file mode 100644 index 000000000..41c604373 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdmxswp.3 @@ -0,0 +1,78 @@ +.TH HPL_pdmxswp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdmxswp \- swaps and broacast the pivot row. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdmxswp(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&II\fR, +\fB\&const int\fR +\fI\&JJ\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdmxswp\fR +swaps and broadcasts the absolute value max row using +bi-directional exchange. The buffer is partially set by HPL_dlocmax. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by + + log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) + +where lat and bdwth are the latency and bandwidth of the network for +double precision real elements. Communication only occurs in one +process column. Mono-directional links will cause the communication +cost to double. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of the matrix +column on which this function operates. +.TP 8 +II (local input) const int +On entry, II specifies the row offset where the column to be +operated on starts with respect to the panel. +.TP 8 +JJ (local input) const int +On entry, JJ specifies the column offset where the column to +be operated on starts with respect to the panel. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2 * (4+2*N0). +It is assumed that HPL_dlocmax was called prior to this +routine to initialize the first four entries of this array. +On exit, the N0 length max row is stored in WORK[4:4+N0-1]; +Note that this is also the JJth row (or column) of L1. The +remaining part is used as a temporary array. +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpancrN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpancrN.3 new file mode 100644 index 000000000..2e94a36a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpancrN.3 @@ -0,0 +1,82 @@ +.TH HPL_pdpancrN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpancrN \- Crout panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpancrN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpancrN\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Crout variant of the usual +one-dimensional algorithm. The lower triangular N0-by-N0 upper block +of the panel is stored in no-transpose form (i.e. just like the input +matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpancrT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpancrT.3 new file mode 100644 index 000000000..035e60d60 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpancrT.3 @@ -0,0 +1,81 @@ +.TH HPL_pdpancrT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpancrT \- Crout panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpancrT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpancrT\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Crout variant of the usual +one-dimensional algorithm. The lower triangular N0-by-N0 upper block +of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_disp.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_disp.3 new file mode 100644 index 000000000..94a212ced --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_disp.3 @@ -0,0 +1,24 @@ +.TH HPL_pdpanel_disp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanel_disp \- Deallocate a panel data structure. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_pdpanel_disp(\fR +\fB\&HPL_T_panel * *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanel_disp\fR +deallocates the panel structure and resources and +stores the error code returned by the panel factorization. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * * +On entry, PANEL points to the address of the panel data +structure to be deallocated. +.SH SEE ALSO +.BR HPL_pdpanel_new \ (3), +.BR HPL_pdpanel_init \ (3), +.BR HPL_pdpanel_free \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_free.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_free.3 new file mode 100644 index 000000000..cfad40c3d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_free.3 @@ -0,0 +1,24 @@ +.TH HPL_pdpanel_free 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanel_free \- Deallocate the panel ressources. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_pdpanel_free(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanel_free\fR +deallocates the panel resources and stores the error +code returned by the panel factorization. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the panel data structure from +which the resources should be deallocated. +.SH SEE ALSO +.BR HPL_pdpanel_new \ (3), +.BR HPL_pdpanel_init \ (3), +.BR HPL_pdpanel_disp \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_init.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_init.3 new file mode 100644 index 000000000..cbb0e7e3a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_init.3 @@ -0,0 +1,76 @@ +.TH HPL_pdpanel_init 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanel_init \- Initialize the panel resources. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanel_init(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&JB\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&IA\fR, +\fB\&const int\fR +\fI\&JA\fR, +\fB\&const int\fR +\fI\&TAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanel_init\fR +initializes a panel data structure. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +M (local input) const int +On entry, M specifies the global number of rows of the panel. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the global number of columns of the +panel and trailing submatrix. N must be at least zero. +.TP 8 +JB (global input) const int +On entry, JB specifies is the number of columns of the panel. +JB must be at least zero. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.TP 8 +IA (global input) const int +On entry, IA is the global row index identifying the panel +and trailing submatrix. IA must be at least zero. +.TP 8 +JA (global input) const int +On entry, JA is the global column index identifying the panel +and trailing submatrix. JA must be at least zero. +.TP 8 +TAG (global input) const int +On entry, TAG is the row broadcast message id. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.SH SEE ALSO +.BR HPL_pdpanel_new \ (3), +.BR HPL_pdpanel_disp \ (3), +.BR HPL_pdpanel_free \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_new.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_new.3 new file mode 100644 index 000000000..ed9fe1053 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanel_new.3 @@ -0,0 +1,76 @@ +.TH HPL_pdpanel_new 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanel_new \- Create a panel data structure. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanel_new(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&JB\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&IA\fR, +\fB\&const int\fR +\fI\&JA\fR, +\fB\&const int\fR +\fI\&TAG\fR, +\fB\&HPL_T_panel * *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanel_new\fR +creates and initializes a panel data structure. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +M (local input) const int +On entry, M specifies the global number of rows of the panel. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the global number of columns of the +panel and trailing submatrix. N must be at least zero. +.TP 8 +JB (global input) const int +On entry, JB specifies is the number of columns of the panel. +JB must be at least zero. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.TP 8 +IA (global input) const int +On entry, IA is the global row index identifying the panel +and trailing submatrix. IA must be at least zero. +.TP 8 +JA (global input) const int +On entry, JA is the global column index identifying the panel +and trailing submatrix. JA must be at least zero. +.TP 8 +TAG (global input) const int +On entry, TAG is the row broadcast message id. +.TP 8 +PANEL (local input/output) HPL_T_panel * * +On entry, PANEL points to the address of the panel data +structure to create and initialize. +.SH SEE ALSO +.BR HPL_pdpanel_new \ (3), +.BR HPL_pdpanel_init \ (3), +.BR HPL_pdpanel_disp \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanllN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanllN.3 new file mode 100644 index 000000000..eca1f4a34 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanllN.3 @@ -0,0 +1,82 @@ +.TH HPL_pdpanllN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanllN \- Left-looking panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanllN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanllN\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Left-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in no-transpose form (i.e. just like the +input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanllT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanllT.3 new file mode 100644 index 000000000..a18d52c61 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanllT.3 @@ -0,0 +1,81 @@ +.TH HPL_pdpanllT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanllT \- Left-looking panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanllT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanllT\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Left-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanrlN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanrlN.3 new file mode 100644 index 000000000..cae2b5b5b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanrlN.3 @@ -0,0 +1,82 @@ +.TH HPL_pdpanrlN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanrlN \- Right-looking panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanrlN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanrlN\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Right-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in no-transpose form (i.e. just like the +input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanrlT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanrlT.3 new file mode 100644 index 000000000..434444bf7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdpanrlT.3 @@ -0,0 +1,81 @@ +.TH HPL_pdpanrlT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanrlT \- Right-looking panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanrlT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanrlT\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Right-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpancrN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpancrN.3 new file mode 100644 index 000000000..fc6dd25f8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpancrN.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpancrN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpancrN \- Crout recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpancrN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpancrN\fR +HPL_pdrpancrN recursively factorizes a panel of columns using the +recursive Crout variant of the usual one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpancrT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpancrT.3 new file mode 100644 index 000000000..ea0a57bc9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpancrT.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpancrT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpancrT \- Crout recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpancrT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpancrT\fR +recursively factorizes a panel of columns using the +recursive Crout variant of the usual one-dimensional algorithm. +The lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanllN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanllN.3 new file mode 100644 index 000000000..29b6db40a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanllN.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpanllN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpanllN \- Left-looking recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpanllN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpanllN\fR +recursively factorizes a panel of columns using the +recursive Left-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanllT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanllT.3 new file mode 100644 index 000000000..18db5c1fb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanllT.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpanllT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpanllT \- Left-looking recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpanllT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpanllT\fR +recursively factorizes a panel of columns using the +recursive Left-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanrlN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanrlN.3 new file mode 100644 index 000000000..441560c14 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanrlN.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpanrlN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpanrlN \- Right-looking recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpanrlN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpanrlN\fR +recursively factorizes a panel of columns using the +recursive Right-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanrlT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanrlT.3 new file mode 100644 index 000000000..e5bd9d110 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdrpanrlT.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpanrlT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpanrlT \- Right-looking recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpanrlT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpanrlT\fR +recursively factorizes a panel of columns using the +recursive Right-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdtest.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdtest.3 new file mode 100644 index 000000000..eaaff2bff --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdtest.3 @@ -0,0 +1,63 @@ +.TH HPL_pdtest 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdtest \- Perform one test. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdtest(\fR +\fB\&HPL_T_test *\fR +\fI\&TEST\fR, +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&NB\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdtest\fR +performs one test given a set of parameters such as the +process grid, the problem size, the distribution blocking factor ... +This function generates the data, calls and times the linear system +solver, checks the accuracy of the obtained vector solution and +writes this information to the file pointed to by TEST->outfp. +.SH ARGUMENTS +.TP 8 +TEST (global input) HPL_T_test * +On entry, TEST points to a testing data structure: outfp +specifies the output file where the results will be printed. +It is only defined and used by the process 0 of the grid. +thrsh specifies the threshhold value for the test ratio. +Concretely, a test is declared "PASSED" if and only if the +following inequality is satisfied: +||Ax-b||_oo / ( epsil * + ( || x ||_oo * || A ||_oo + || b ||_oo ) * + N ) < thrsh. +epsil is the relative machine precision of the distributed +computer. Finally the test counters, kfail, kpass, kskip and +ktest are updated as follows: if the test passes, kpass is +incremented by one; if the test fails, kfail is incremented +by one; if the test is skipped, kskip is incremented by one. +ktest is left unchanged. +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters to be used for this test. +.TP 8 +N (global input) const int +On entry, N specifies the order of the coefficient matrix A. +N must be at least zero. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.SH SEE ALSO +.BR HPL_pddriver \ (3), +.BR HPL_pdinfo \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdtrsv.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdtrsv.3 new file mode 100644 index 000000000..5d2d14dcd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdtrsv.3 @@ -0,0 +1,49 @@ +.TH HPL_pdtrsv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdtrsv \- Solve triu( A ) x = b. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdtrsv(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_pmat *\fR +\fI\&AMAT\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdtrsv\fR +solves an upper triangular system of linear equations. + +The rhs is the last column of the N by N+1 matrix A. The solve starts +in the process column owning the Nth column of A, so the rhs b may +need to be moved one process column to the left at the beginning. The +routine therefore needs a column vector in every process column but +the one owning b. The result is replicated in all process rows, and +returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. + +The algorithm uses decreasing one-ring broadcast in process rows and +columns implemented in terms of synchronous communication point to +point primitives. The lookahead of depth 1 is used to minimize the +critical path. This entire operation is essentially ``latency'' bound +and an estimate of its running time is given by: + + (move rhs) lat + N / ( P bdwth ) + + (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + + gam2 N^2 / ( P Q ), + +where gam2 is an estimate of the Level 2 BLAS rate of execution. +There are N / NB diagonal blocks. One must exchange 2 messages of +length NB to compute the next NB entries of the vector solution, as +well as performing a total of N^2 floating point operations. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +AMAT (local input/output) HPL_T_pmat * +On entry, AMAT points to the data structure containing the +local array information. +.SH SEE ALSO +.BR HPL_pdgesv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateNN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateNN.3 new file mode 100644 index 000000000..e20929a27 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateNN.3 @@ -0,0 +1,48 @@ +.TH HPL_pdupdateNN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdupdateNN \- Broadcast a panel and update the trailing submatrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdupdateNN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdupdateNN\fR +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local output) int * +On exit, IFLAG indicates whether or not the broadcast has +been completed when PBCST is not NULL on entry. In that case, +IFLAG is left unchanged. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be updated) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be updated starting at the current +position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp01N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateNT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateNT.3 new file mode 100644 index 000000000..276c2ceda --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateNT.3 @@ -0,0 +1,48 @@ +.TH HPL_pdupdateNT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdupdateNT \- Broadcast a panel and update the trailing submatrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdupdateNT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdupdateNT\fR +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local output) int * +On exit, IFLAG indicates whether or not the broadcast has +been completed when PBCST is not NULL on entry. In that case, +IFLAG is left unchanged. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be updated) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be updated starting at the current +position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateTN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateTN.3 new file mode 100644 index 000000000..091859d01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateTN.3 @@ -0,0 +1,48 @@ +.TH HPL_pdupdateTN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdupdateTN \- Broadcast a panel and update the trailing submatrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdupdateTN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdupdateTN\fR +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local output) int * +On exit, IFLAG indicates whether or not the broadcast has +been completed when PBCST is not NULL on entry. In that case, +IFLAG is left unchanged. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be updated) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be updated starting at the current +position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp01N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateTT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateTT.3 new file mode 100644 index 000000000..34502c6ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pdupdateTT.3 @@ -0,0 +1,48 @@ +.TH HPL_pdupdateTT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdupdateTT \- Broadcast a panel and update the trailing submatrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdupdateTT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdupdateTT\fR +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local output) int * +On exit, IFLAG indicates whether or not the broadcast has +been completed when PBCST is not NULL on entry. In that case, +IFLAG is left unchanged. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be updated) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be updated starting at the current +position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_perm.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_perm.3 new file mode 100644 index 000000000..9476b5eff --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_perm.3 @@ -0,0 +1,50 @@ +.TH HPL_perm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_perm \- Combine 2 index arrays - Generate the permutation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_perm(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&int *\fR +\fI\&LINDXA\fR, +\fB\&int *\fR +\fI\&LINDXAU\fR, +\fB\&int *\fR +\fI\&IWORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_perm\fR +combines two index arrays and generate the corresponding +permutation. First, this function computes the inverse of LINDXA, and +then combine it with LINDXAU. Second, in order to be able to perform +the permutation in place, LINDXAU is overwritten by the sequence of +permutation producing the same result. What we ultimately want to +achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the +call to this function, this in place permutation can be performed by +for i in [0..N) swap U[i] with U[LINDXAU[i]]. +.SH ARGUMENTS +.TP 8 +N (global input) const int +On entry, N specifies the length of the arrays LINDXA and +LINDXAU. N should be at least zero. +.TP 8 +LINDXA (global input/output) int * +On entry, LINDXA is an array of dimension N containing the +source indexes. On exit, LINDXA contains the combined index +array. +.TP 8 +LINDXAU (global input/output) int * +On entry, LINDXAU is an array of dimension N containing the +target indexes. On exit, LINDXAU contains the sequence of +permutation, that should be applied in increasing order to +permute the underlying array U in place. +.TP 8 +IWORK (workspace) int * +On entry, IWORK is a workarray of dimension N. +.SH SEE ALSO +.BR HPL_plindx1 \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pipid.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pipid.3 new file mode 100644 index 000000000..6a8f5f277 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pipid.3 @@ -0,0 +1,79 @@ +.TH HPL_pipid 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pipid \- Simplify the pivot vector. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pipid(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&int *\fR +\fI\&K\fR, +\fB\&int *\fR +\fI\&IPID\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pipid\fR +computes an array IPID that contains the source and final +destination of matrix rows resulting from the application of N +interchanges as computed by the LU factorization with row partial +pivoting. The array IPID is such that the row of global index IPID(i) +should be mapped onto the row of global index IPID(i+1). Note that we +cannot really know the length of IPID a priori. However, we know that +this array is at least 2*N long, since there are N rows to swap and +broadcast. The length of this array must be smaller than or equal to +4*N, since every row is swapped with at most a single distinct remote +row. The algorithm constructing IPID goes as follows: Let IA be the +global index of the first row to be swapped. + +For every row src IA + i with i in [0..N) to be swapped with row dst +such that dst is given by DPIV[i]: + +Is row src the destination of a previous row of the current block, +that is, is there k odd such that IPID(k) is equal to src ? + Yes: update this destination with dst. For example, if the +pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, +we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it +was thought so far ... + No : add the pair (src,dst) at the end of IPID; row src has not +been moved yet. + +Is row dst different from src the destination of a previous row of +the current block, i.e., is there k odd such that IPID(k) is equal to +dst ? + Yes: update IPID(k) with src. For example, if the pivot array +is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in +fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought +so far ... + No : add the pair (dst,src) at the end of IPID; row dst has not +been moved yet. + +Note that when src is equal to dst, the pair (dst,src) should not be +added to IPID in order to avoid duplicated entries in this array. +During the construction of the array IPID, we make sure that the +first N entries are such that IPID(k) with k odd is equal to IA+k/2. +For k in [0..K/2), the row of global index IPID(2*k) should be +mapped onto the row of global index IPID(2*k+1). +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +K (global output) int * +On exit, K specifies the number of entries in IPID. K is at +least 2*N, and at most 4*N. +.TP 8 +IPID (global output) int * +On entry, IPID is an array of length 4*N. On exit, the first +K entries of that array contain the src and final destination +resulting from the application of the N interchanges as +specified by DPIV. The pairs (src,dst) are contiguously +stored and sorted so that IPID(2*i+1) is equal to IA+i with i +in [0..N) +.SH SEE ALSO +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx0.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx0.3 new file mode 100644 index 000000000..2b889947a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx0.3 @@ -0,0 +1,168 @@ +.TH HPL_plindx0 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_plindx0 \- Compute local swapping index arrays. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_plindx0(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&K\fR, +\fB\&int *\fR +\fI\&IPID\fR, +\fB\&int *\fR +\fI\&LINDXA\fR, +\fB\&int *\fR +\fI\&LINDXAU\fR, +\fB\&int *\fR +\fI\&LLEN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_plindx0\fR +computes two local arrays LINDXA and LINDXAU containing +the local source and final destination position resulting from the +application of row interchanges. + +On entry, the array IPID of length K is such that the row of global +index IPID(i) should be mapped onto row of global index IPID(i+1). +Let IA be the global index of the first row to be swapped. For k in +[0..K/2), the row of global index IPID(2*k) should be mapped onto the +row of global index IPID(2*k+1). The question then, is to determine +which rows should ultimately be part of U. + +First, some rows of the process ICURROW may be swapped locally. One +of this row belongs to U, the other one belongs to my local piece of +A. The other rows of the current block are swapped with remote rows +and are thus not part of U. These rows however should be sent along, +and grabbed by the other processes as we progress in the exchange +phase. + +So, assume that I am ICURROW and consider a row of index IPID(2*i) +that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less +than N, this row is locally swapped and should be copied into U at +the position IPID(2*i+1) - IA. No row will be exchanged for this one. +If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be +locally copied into my local piece of A at the position corresponding +to the row of global index IPID(2*i+1). + +If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) +is to be swapped away and strictly speaking does not belong to U, but +to A remotely. Since this process will however send this array U, +this row is copied into U, exactly where the row IPID(2*i+1) should +go. For this, we search IPID for k1, such that IPID(2*k1) is equal to +IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position +IPID(2*k1+1)-IA. + +It is thus important to put the rows that go into U, i.e., such that +IPID(2*i+1) - IA is less than N at the begining of the array IPID. By +doing so, U is formed, and the local copy is performed in just one +sweep. + +Two lists LINDXA and LINDXAU are built. LINDXA contains the local +index of the rows I have that should be copied. LINDXAU contains the +local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A +is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) +of A should be locally copied into A(-LINDXAU(k),:). In the process +ICURROW, the initial packing algorithm proceeds as follows. + + for all entries in IPID, + if IPID(2*i) is in ICURROW, + if IPID(2*i+1) is in ICURROW, + if( IPID(2*i+1) - IA < N ) + save corresponding local position + of this row (LINDXA); + save local position (LINDXAU) in U + where this row goes; + [copy row IPID(2*i) in U at position + IPID(2*i+1)-IA; ]; + else + save corresponding local position of + this row (LINDXA); + save local position (-LINDXAU) in A + where this row goes; + [copy row IPID(2*i) in my piece of A + at IPID(2*i+1);] + end if + else + find k1 such that IPID(2*k1) = IPID(2*i+1); + copy row IPID(2*i) in U at position + IPID(2*k1+1)-IA; + save corresponding local position of this + row (LINDXA); + save local position (LINDXAU) in U where + this row goes; + end if + end if + end for + +Second, if I am not the current row process ICURROW, all source rows +in IPID that I own are part of U. Indeed, they are swapped with one +row of the current block of rows, and the main factorization +algorithm proceeds one row after each other. The processes different +from ICURROW, should exchange and accumulate those rows until they +receive some data previously owned by the process ICURROW. + +In processes different from ICURROW, the initial packing algorithm +proceeds as follows. Consider a row of global index IPID(2*i) that I +own. When I will be receiving data previously owned by ICURROW, i.e., +U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, +and this particular row of U should be first copied into my piece of +A, at A(il,:), where il is the local row index corresponding to +IPID(2*i). Now,initially, this row will be packed into workspace, say +as the kth row of that work array. The following algorithm sets +LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row +should be copied. LINDXA(k) stores the local index in A where this +row of U should be copied, i.e il. + + for all entries in IPID, + if IPID(2*i) is not in ICURROW, + copy row IPID(2*i) in work array; + save corresponding local position + of this row (LINDXA); + save position (LINDXAU) in U where + this row should be copied; + end if + end for + +Since we are at it, we also globally figure out how many rows every +process has. That is necessary, because it would rather be cumbersome +to figure it on the fly during the bi-directional exchange phase. +This information is kept in the array LLEN of size NPROW. Also note +that the arrays LINDXA and LINDXAU are of max length equal to 2*N. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +K (global input) const int +On entry, K specifies the number of entries in IPID. K is at +least 2*N, and at most 4*N. +.TP 8 +IPID (global input) int * +On entry, IPID is an array of length K. The first K entries +of that array contain the src and final destination resulting +from the application of the interchanges. +.TP 8 +LINDXA (local output) int * +On entry, LINDXA is an array of dimension 2*N. On exit, this +array contains the local indexes of the rows of A I have that +should be copied into U. +.TP 8 +LINDXAU (local output) int * +On exit, LINDXAU is an array of dimension 2*N. On exit, this +array contains the local destination information encoded as +follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be +copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) +of A should be locally copied into A(-LINDXAU(k),:). +.TP 8 +LLEN (global output) int * +On entry, LLEN is an array of length NPROW. On exit, it +contains how many rows every process has. +.SH SEE ALSO +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx1.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx1.3 new file mode 100644 index 000000000..7d4f8feba --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx1.3 @@ -0,0 +1,106 @@ +.TH HPL_plindx1 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_plindx1 \- Compute local swapping index arrays. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_plindx1(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&K\fR, +\fB\&const int *\fR +\fI\&IPID\fR, +\fB\&int *\fR +\fI\&IPA\fR, +\fB\&int *\fR +\fI\&LINDXA\fR, +\fB\&int *\fR +\fI\&LINDXAU\fR, +\fB\&int *\fR +\fI\&IPLEN\fR, +\fB\&int *\fR +\fI\&IPMAP\fR, +\fB\&int *\fR +\fI\&IPMAPM1\fR, +\fB\&int *\fR +\fI\&PERMU\fR, +\fB\&int *\fR +\fI\&IWORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_plindx1\fR +computes two local arrays LINDXA and LINDXAU containing +the local source and final destination position resulting from the +application of row interchanges. In addition, this function computes +three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic +mapping information for the spreading phase. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +K (global input) const int +On entry, K specifies the number of entries in IPID. K is at +least 2*N, and at most 4*N. +.TP 8 +IPID (global input) const int * +On entry, IPID is an array of length K. The first K entries +of that array contain the src and final destination resulting +from the application of the interchanges. +.TP 8 +IPA (global output) int * +On exit, IPA specifies the number of rows that the current +process row has that either belong to U or should be swapped +with remote rows of A. +.TP 8 +LINDXA (global output) int * +On entry, LINDXA is an array of dimension 2*N. On exit, this +array contains the local indexes of the rows of A I have that +should be copied into U. +.TP 8 +LINDXAU (global output) int * +On exit, LINDXAU is an array of dimension 2*N. On exit, this +array contains the local destination information encoded as +follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be +copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) +of A should be locally copied into A(-LINDXAU(k),:). +.TP 8 +IPLEN (global output) int * +On entry, IPLEN is an array of dimension NPROW + 1. On exit, +this array is such that IPLEN[i] is the number of rows of A +in the processes before process IPMAP[i] after the sort +with the convention that IPLEN[nprow] is the total number of +rows of the panel. In other words IPLEN[i+1]-IPLEN[i] is the +local number of rows of A that should be moved to the process +IPMAP[i]. IPLEN is such that the number of rows of the source +process row can be computed as IPLEN[1] - IPLEN[0], and the +remaining entries of this array are sorted so that the +quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted. +.TP 8 +IPMAP (global output) int * +On entry, IPMAP is an array of dimension NPROW. On exit, this +array contains the logarithmic mapping of the processes. In +other words, IPMAP[myrow] is the corresponding sorted process +coordinate. +.TP 8 +IPMAPM1 (global output) int * +On entry, IPMAPM1 is an array of dimension NPROW. On exit, +this array contains the inverse of the logarithmic mapping +contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in +[0.. NPROCS) +.TP 8 +PERMU (global output) int * +On entry, PERMU is an array of dimension JB. On exit, PERMU +contains a sequence of permutations, that should be applied +in increasing order to permute in place the row panel U. +.TP 8 +IWORK (workspace) int * +On entry, IWORK is a workarray of dimension 2*JB. +.SH SEE ALSO +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx10.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx10.3 new file mode 100644 index 000000000..d22d64f36 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_plindx10.3 @@ -0,0 +1,68 @@ +.TH HPL_plindx10 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_plindx10 \- Compute the logarithmic maps for the spreading. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_plindx10(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&K\fR, +\fB\&const int *\fR +\fI\&IPID\fR, +\fB\&int *\fR +\fI\&IPLEN\fR, +\fB\&int *\fR +\fI\&IPMAP\fR, +\fB\&int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_plindx10\fR +computes three arrays IPLEN, IPMAP and IPMAPM1 that +contain the logarithmic mapping information for the spreading phase. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +K (global input) const int +On entry, K specifies the number of entries in IPID. K is at +least 2*N, and at most 4*N. +.TP 8 +IPID (global input) const int * +On entry, IPID is an array of length K. The first K entries +of that array contain the src and final destination resulting +from the application of the interchanges. +.TP 8 +IPLEN (global output) int * +On entry, IPLEN is an array of dimension NPROW + 1. On exit, +this array is such that IPLEN[i] is the number of rows of A +in the processes before process IMAP[i] after the sort, with +the convention that IPLEN[nprow] is the total number of rows. +In other words, IPLEN[i+1] - IPLEN[i] is the local number of +rows of A that should be moved for each process. IPLEN is +such that the number of rows of the source process row can be +computed as IPLEN[1] - IPLEN[0], and the remaining entries of +this array are sorted so that the quantities IPLEN[i+1] - +IPLEN[i] are logarithmically sorted. +.TP 8 +IPMAP (global output) int * +On entry, IPMAP is an array of dimension NPROW. On exit, this +array contains the logarithmic mapping of the processes. In +other words, IPMAP[myrow] is the corresponding sorted process +coordinate. +.TP 8 +IPMAPM1 (global output) int * +On entry, IPMAPM1 is an array of dimension NPROW. On exit, +this array contains the inverse of the logarithmic mapping +contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in +[0.. NPROW) +.SH SEE ALSO +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pnum.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pnum.3 new file mode 100644 index 000000000..38956c5a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pnum.3 @@ -0,0 +1,38 @@ +.TH HPL_pnum 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pnum \- Rank determination. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_pnum(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&const int\fR +\fI\&MYROW\fR, +\fB\&const int\fR +\fI\&MYCOL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pnum\fR +determines the rank of a process as a function of its +coordinates in the grid. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +MYROW (local input) const int +On entry, MYROW specifies the row coordinate of the process +whose rank is to be determined. MYROW must be greater than or +equal to zero and less than NPROW. +.TP 8 +MYCOL (local input) const int +On entry, MYCOL specifies the column coordinate of the +process whose rank is to be determined. MYCOL must be greater +than or equal to zero and less than NPCOL. +.SH SEE ALSO +.BR HPL_grid_init \ (3), +.BR HPL_grid_info \ (3), +.BR HPL_grid_exit \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer.3 new file mode 100644 index 000000000..550703aee --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer.3 @@ -0,0 +1,35 @@ +.TH HPL_ptimer 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_ptimer \- Timer facility. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_ptimer(\fR +\fB\&const int\fR +\fI\&I\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_ptimer\fR +provides a "stopwatch" functionality cpu/wall timer in +seconds. Up to 64 separate timers can be functioning at once. The +first call starts the timer, and the second stops it. This routine +can be disenabled by calling HPL_ptimer_disable(), so that calls to +the timer are ignored. This feature can be used to make sure certain +sections of code do not affect timings, even if they call routines +which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable +the timer functionality. One can retrieve the current value of a +timer by calling + +t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + +where I is the timer index in [0..64). To inititialize the timer +functionality, one must have called HPL_ptimer_boot() prior to any of +the functions mentioned above. +.SH ARGUMENTS +.TP 8 +I (global input) const int +On entry, I specifies the timer to stop/start. +.SH SEE ALSO +.BR HPL_ptimer_cputime \ (3), +.BR HPL_ptimer_walltime \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer_cputime.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer_cputime.3 new file mode 100644 index 000000000..a93a1c208 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer_cputime.3 @@ -0,0 +1,23 @@ +.TH HPL_ptimer_cputime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_ptimer_cputime \- Return the CPU time. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_ptimer_cputime();\fR +.SH DESCRIPTION +\fB\&HPL_ptimer_cputime\fR +returns the cpu time. If HPL_USE_CLOCK is defined, +the clock() function is used to return an approximation of processor +time used by the program. The value returned is the CPU time used so +far as a clock_t; to get the number of seconds used, the result is +divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C +standard library. If HPL_USE_TIMES is defined, the times() function +is used instead. This function returns the current process times. +times() returns the number of clock ticks that have elapsed since the +system has been up. Otherwise and by default, the standard library +function getrusage() is used. +.SH SEE ALSO +.BR HPL_ptimer_walltime \ (3), +.BR HPL_ptimer \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer_walltime.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer_walltime.3 new file mode 100644 index 000000000..37e5e8c54 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_ptimer_walltime.3 @@ -0,0 +1,14 @@ +.TH HPL_ptimer_walltime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_ptimer_walltime \- Return the elapsed (wall-clock) time. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_ptimer_walltime();\fR +.SH DESCRIPTION +\fB\&HPL_ptimer_walltime\fR +returns the elapsed (wall-clock) time. +.SH SEE ALSO +.BR HPL_ptimer_cputime \ (3), +.BR HPL_ptimer \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pwarn.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pwarn.3 new file mode 100644 index 000000000..14e4a65d3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_pwarn.3 @@ -0,0 +1,45 @@ +.TH HPL_pwarn 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pwarn \- displays an error message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pwarn(\fR +\fB\&FILE *\fR +\fI\&STREAM\fR, +\fB\&int\fR +\fI\&LINE\fR, +\fB\&const char *\fR +\fI\&SRNAME\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pwarn\fR +displays an error message. +.SH ARGUMENTS +.TP 8 +STREAM (local input) FILE * +On entry, STREAM specifies the output stream. +.TP 8 +LINE (local input) int +On entry, LINE specifies the line number in the file where +the error has occured. When LINE is not a positive line +number, it is ignored. +.TP 8 +SRNAME (local input) const char * +On entry, SRNAME should be the name of the routine calling +this error handler. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH SEE ALSO +.BR HPL_pabort \ (3), +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rand.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rand.3 new file mode 100644 index 000000000..8b1918fea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rand.3 @@ -0,0 +1,28 @@ +.TH HPL_rand 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_rand \- random number generator. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_rand();\fR +.SH DESCRIPTION +\fB\&HPL_rand\fR +generates the next number in the random sequence. This +function ensures that this number lies in the interval (-0.5, 0.5]. + +The static array irand contains the information (2 integers) required +to generate the next number in the sequence X(n). This number is +computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the +constant d is the largest 64 bit positive integer. The array irand is +then updated for the generation of the next number X(n+1) in the +random sequence as follows X(n+1) = a * X(n) + c. The constants a and +c should have been preliminarily stored in the arrays ias and ics as +2 pairs of integers. The initialization of ias, ics and irand is +performed by the function HPL_setran. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_recv.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_recv.3 new file mode 100644 index 000000000..d9136c14b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_recv.3 @@ -0,0 +1,49 @@ +.TH HPL_recv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_recv \- Receive a message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_recv(\fR +\fB\&double *\fR +\fI\&RBUF\fR, +\fB\&int\fR +\fI\&RCOUNT\fR, +\fB\&int\fR +\fI\&SRC\fR, +\fB\&int\fR +\fI\&RTAG\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_recv\fR +is a simple wrapper around MPI_Recv. Its main purpose is +to allow for some experimentation / tuning of this simple routine. +Successful completion is indicated by the returned error code +HPL_SUCCESS. In the case of messages of length less than or equal to +zero, this function returns immediately. +.SH ARGUMENTS +.TP 8 +RBUF (local output) double * +On entry, RBUF specifies the starting address of buffer to be +received. +.TP 8 +RCOUNT (local input) int +On entry, RCOUNT specifies the number of double precision +entries in RBUF. RCOUNT must be at least zero. +.TP 8 +SRC (local input) int +On entry, SRC specifies the rank of the sending process in +the communication space defined by COMM. +.TP 8 +RTAG (local input) int +On entry, STAG specifies the message tag to be used for this +communication operation. +.TP 8 +COMM (local input) MPI_Comm +The MPI communicator identifying the communication space. +.SH SEE ALSO +.BR HPL_send \ (3), +.BR HPL_sendrecv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_reduce.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_reduce.3 new file mode 100644 index 000000000..c48f04ded --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_reduce.3 @@ -0,0 +1,56 @@ +.TH HPL_reduce 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_reduce \- Reduce operation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_reduce(\fR +\fB\&void *\fR +\fI\&BUFFER\fR, +\fB\&const int\fR +\fI\&COUNT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR, +\fB\&const HPL_T_OP \fR +\fI\&OP\fR, +\fB\&const int\fR +\fI\&ROOT\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_reduce\fR +performs a global reduce operation across all processes of +a group. Note that the input buffer is used as workarray and in all +processes but the accumulating process corrupting the original data. +.SH ARGUMENTS +.TP 8 +BUFFER (local input/output) void * +On entry, BUFFER points to the buffer to be reduced. On +exit, and in process of rank ROOT this array contains the +reduced data. This buffer is also used as workspace during +the operation in the other processes of the group. +.TP 8 +COUNT (global input) const int +On entry, COUNT indicates the number of entries in BUFFER. +COUNT must be at least zero. +.TP 8 +DTYPE (global input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.TP 8 +OP (global input) const HPL_T_OP +On entry, OP is a pointer to the local combine function. +.TP 8 +ROOT (global input) const int +On entry, ROOT is the coordinate of the accumulating process. +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rollN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rollN.3 new file mode 100644 index 000000000..eac4deb66 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rollN.3 @@ -0,0 +1,77 @@ +.TH HPL_rollN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_rollN \- Roll U and forward the column panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_rollN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_rollN\fR +rolls the local arrays containing the local pieces of U, so +that on exit to this function U is replicated in every process row. +In addition, this function probe for the presence of the column panel +and forwards it when available. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be rolled) information. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of U. N must be +at least zero. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U in each process row. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,IPLEN[NPROW]). +.TP 8 +IPLEN (global input) const int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in each process row. +.TP 8 +IPMAP (global input) const int * +On entry, IMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. +.SH SEE ALSO +.BR HPL_pdlaswp01N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rollT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rollT.3 new file mode 100644 index 000000000..bab5bdffd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_rollT.3 @@ -0,0 +1,77 @@ +.TH HPL_rollT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_rollT \- Roll U and forward the column panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_rollT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_rollT\fR +rolls the local arrays containing the local pieces of U, so +that on exit to this function U is replicated in every process row. +In addition, this function probe for the presence of the column panel +and forwards it when available. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be rolled) information. +.TP 8 +N (local input) const int +On entry, N specifies the local number of rows of U. N must +be at least zero. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U in each process row. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,N). +.TP 8 +IPLEN (global input) const int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in each process row. +.TP 8 +IPMAP (global input) const int * +On entry, IMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. +.SH SEE ALSO +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_sdrv.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_sdrv.3 new file mode 100644 index 000000000..a11252d6a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_sdrv.3 @@ -0,0 +1,67 @@ +.TH HPL_sdrv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_sdrv \- Send and receive a message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_sdrv(\fR +\fB\&double *\fR +\fI\&SBUF\fR, +\fB\&int\fR +\fI\&SCOUNT\fR, +\fB\&int\fR +\fI\&STAG\fR, +\fB\&double *\fR +\fI\&RBUF\fR, +\fB\&int\fR +\fI\&RCOUNT\fR, +\fB\&int\fR +\fI\&RTAG\fR, +\fB\&int\fR +\fI\&PARTNER\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_sdrv\fR +is a simple wrapper around MPI_Sendrecv. Its main purpose is +to allow for some experimentation and tuning of this simple function. +Messages of length less than or equal to zero are not sent nor +received. Successful completion is indicated by the returned error +code HPL_SUCCESS. +.SH ARGUMENTS +.TP 8 +SBUF (local input) double * +On entry, SBUF specifies the starting address of buffer to be +sent. +.TP 8 +SCOUNT (local input) int +On entry, SCOUNT specifies the number of double precision +entries in SBUF. SCOUNT must be at least zero. +.TP 8 +STAG (local input) int +On entry, STAG specifies the message tag to be used for the +sending communication operation. +.TP 8 +RBUF (local output) double * +On entry, RBUF specifies the starting address of buffer to be +received. +.TP 8 +RCOUNT (local input) int +On entry, RCOUNT specifies the number of double precision +entries in RBUF. RCOUNT must be at least zero. +.TP 8 +RTAG (local input) int +On entry, RTAG specifies the message tag to be used for the +receiving communication operation. +.TP 8 +PARTNER (local input) int +On entry, PARTNER specifies the rank of the collaborative +process in the communication space defined by COMM. +.TP 8 +COMM (local input) MPI_Comm +The MPI communicator identifying the communication space. +.SH SEE ALSO +.BR HPL_send \ (3), +.BR HPL_recv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_send.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_send.3 new file mode 100644 index 000000000..48ffc5d62 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_send.3 @@ -0,0 +1,49 @@ +.TH HPL_send 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_send \- Send a message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_send(\fR +\fB\&double *\fR +\fI\&SBUF\fR, +\fB\&int\fR +\fI\&SCOUNT\fR, +\fB\&int\fR +\fI\&DEST\fR, +\fB\&int\fR +\fI\&STAG\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_send\fR +is a simple wrapper around MPI_Send. Its main purpose is +to allow for some experimentation / tuning of this simple routine. +Successful completion is indicated by the returned error code +MPI_SUCCESS. In the case of messages of length less than or equal to +zero, this function returns immediately. +.SH ARGUMENTS +.TP 8 +SBUF (local input) double * +On entry, SBUF specifies the starting address of buffer to be +sent. +.TP 8 +SCOUNT (local input) int +On entry, SCOUNT specifies the number of double precision +entries in SBUF. SCOUNT must be at least zero. +.TP 8 +DEST (local input) int +On entry, DEST specifies the rank of the receiving process in +the communication space defined by COMM. +.TP 8 +STAG (local input) int +On entry, STAG specifies the message tag to be used for this +communication operation. +.TP 8 +COMM (local input) MPI_Comm +The MPI communicator identifying the communication space. +.SH SEE ALSO +.BR HPL_recv \ (3), +.BR HPL_sendrecv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_setran.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_setran.3 new file mode 100644 index 000000000..e9a9433ae --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_setran.3 @@ -0,0 +1,37 @@ +.TH HPL_setran 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_setran \- Manage the random number generator. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_setran(\fR +\fB\&const int\fR +\fI\&OPTION\fR, +\fB\&int *\fR +\fI\&IRAN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_setran\fR +initializes the random generator with the encoding of the +first number X(0) in the sequence, and the constants a and c used to +compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), +a and c are stored in the static variables irand, ias and ics. When +OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the +values of the input array IRAN. When OPTION is 3, IRAN is set to the +current value of irand, and irand is then incremented. +.SH ARGUMENTS +.TP 8 +OPTION (local input) const int +On entry, OPTION is an integer that specifies the operations +to be performed on the random generator as specified above. +.TP 8 +IRAN (local input/output) int * +On entry, IRAN is an array of dimension 2, that contains the +16-lower and 15-higher bits of a random number. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_spreadN.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_spreadN.3 new file mode 100644 index 000000000..452b8da34 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_spreadN.3 @@ -0,0 +1,96 @@ +.TH HPL_spreadN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_spreadN \- Spread row panel U and forward current column panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_spreadN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const enum HPL_SIDE\fR +\fI\&SIDE\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int\fR +\fI\&SRCDIST\fR, +\fB\&const int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_spreadN\fR +spreads the local array containing local pieces of U, so +that on exit to this function, a piece of U is contained in every +process row. The array IPLEN contains the number of rows of U, that +should be spread on any given process row. This function also probes +for the presence of the column panel PBCST. In case of success, this +panel will be forwarded. If PBCST is NULL on input, this probing +mechanism will be disabled. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be spread) information. +.TP 8 +SIDE (global input) const enum HPL_SIDE +On entry, SIDE specifies whether the local piece of U located +in process IPMAP[SRCDIST] should be spread to the right or to +the left. This feature is used by the equilibration process. +.TP 8 +N (global input) const int +On entry, N specifies the local number of columns of U. N +must be at least zero. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,IPLEN[nprow]). +.TP 8 +SRCDIST (local input) const int +On entry, SRCDIST specifies the source process that spreads +its piece of U. +.TP 8 +IPLEN (global input) const int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in each process before process IPMAP[i], with the convention +that IPLEN[nprow] is the total number of rows. In other words +IPLEN[i+1] - IPLEN[i] is the local number of rows of U that +should be moved to process IPMAP[i]. +.TP 8 +IPMAP (global input) const int * +On entry, IPMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IPMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IPMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. +.SH SEE ALSO +.BR HPL_pdlaswp01N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_spreadT.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_spreadT.3 new file mode 100644 index 000000000..54f7dda31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_spreadT.3 @@ -0,0 +1,96 @@ +.TH HPL_spreadT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_spreadT \- Spread row panel U and forward current column panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_spreadT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const enum HPL_SIDE\fR +\fI\&SIDE\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int\fR +\fI\&SRCDIST\fR, +\fB\&const int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_spreadT\fR +spreads the local array containing local pieces of U, so +that on exit to this function, a piece of U is contained in every +process row. The array IPLEN contains the number of columns of U, +that should be spread on any given process row. This function also +probes for the presence of the column panel PBCST. If available, +this panel will be forwarded. If PBCST is NULL on input, this +probing mechanism will be disabled. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be spread) information. +.TP 8 +SIDE (global input) const enum HPL_SIDE +On entry, SIDE specifies whether the local piece of U located +in process IPMAP[SRCDIST] should be spread to the right or to +the left. This feature is used by the equilibration process. +.TP 8 +N (global input) const int +On entry, N specifies the local number of rows of U. N must +be at least zero. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,N). +.TP 8 +SRCDIST (local input) const int +On entry, SRCDIST specifies the source process that spreads +its piece of U. +.TP 8 +IPLEN (global input) const int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in each process before process IPMAP[i], with the convention +that IPLEN[nprow] is the total number of rows. In other words +IPLEN[i+1] - IPLEN[i] is the local number of rows of U that +should be moved to process IPMAP[i]. +.TP 8 +IPMAP (global input) const int * +On entry, IPMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IPMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IPMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. +.SH SEE ALSO +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_sum.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_sum.3 new file mode 100644 index 000000000..a3c4e2190 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_sum.3 @@ -0,0 +1,44 @@ +.TH HPL_sum 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_sum \- Combine (sum) two buffers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_sum(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const void *\fR +\fI\&IN\fR, +\fB\&void *\fR +\fI\&INOUT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_sum\fR +combines (sum) two buffers. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the length of the buffers to be +combined. N must be at least zero. +.TP 8 +IN (input) const void * +On entry, IN points to the input-only buffer to be combined. +.TP 8 +INOUT (input/output) void * +On entry, INOUT points to the input-output buffer to be +combined. On exit, the entries of this array contains the +combined results. +.TP 8 +DTYPE (input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer.3 new file mode 100644 index 000000000..61f3f7cb1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer.3 @@ -0,0 +1,35 @@ +.TH HPL_timer 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_timer \- Timer facility. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_timer(\fR +\fB\&const int\fR +\fI\&I\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_timer\fR +provides a "stopwatch" functionality cpu/wall timer in +seconds. Up to 64 separate timers can be functioning at once. The +first call starts the timer, and the second stops it. This routine +can be disenabled by calling HPL_timer_disable(), so that calls to +the timer are ignored. This feature can be used to make sure certain +sections of code do not affect timings, even if they call routines +which have HPL_timer calls in them. HPL_timer_enable() will re-enable +the timer functionality. One can retrieve the current value of a +timer by calling + +t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + +where I is the timer index in [0..64). To initialize the timer +functionality, one must have called HPL_timer_boot() prior to any of +the functions mentioned above. +.SH ARGUMENTS +.TP 8 +I (global input) const int +On entry, I specifies the timer to stop/start. +.SH SEE ALSO +.BR HPL_timer_cputime \ (3), +.BR HPL_timer_walltime \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer_cputime.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer_cputime.3 new file mode 100644 index 000000000..1f8987ca2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer_cputime.3 @@ -0,0 +1,23 @@ +.TH HPL_timer_cputime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_timer_cputime \- Return the CPU time. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_timer_cputime();\fR +.SH DESCRIPTION +\fB\&HPL_timer_cputime\fR +returns the cpu time. If HPL_USE_CLOCK is defined, +the clock() function is used to return an approximation of processor +time used by the program. The value returned is the CPU time used so +far as a clock_t; to get the number of seconds used, the result is +divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C +standard library. If HPL_USE_TIMES is defined, the times() function +is used instead. This function returns the current process times. +times() returns the number of clock ticks that have elapsed since the +system has been up. Otherwise and by default, the standard library +function getrusage() is used. +.SH SEE ALSO +.BR HPL_timer_walltime \ (3), +.BR HPL_timer \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer_walltime.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer_walltime.3 new file mode 100644 index 000000000..9a6e898e7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_timer_walltime.3 @@ -0,0 +1,14 @@ +.TH HPL_timer_walltime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_timer_walltime \- Return the elapsed (wall-clock) time. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_timer_walltime();\fR +.SH DESCRIPTION +\fB\&HPL_timer_walltime\fR +returns the elapsed (wall-clock) time. +.SH SEE ALSO +.BR HPL_timer_cputime \ (3), +.BR HPL_timer \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_warn.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_warn.3 new file mode 100644 index 000000000..6b051acb3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_warn.3 @@ -0,0 +1,59 @@ +.TH HPL_warn 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_warn \- displays an error message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_warn(\fR +\fB\&FILE *\fR +\fI\&STREAM\fR, +\fB\&int\fR +\fI\&LINE\fR, +\fB\&const char *\fR +\fI\&SRNAME\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_warn\fR +displays an error message. +.SH ARGUMENTS +.TP 8 +STREAM (local input) FILE * +On entry, STREAM specifies the output stream. +.TP 8 +LINE (local input) int +On entry, LINE specifies the line number in the file where +the error has occured. When LINE is not a positive line +number, it is ignored. +.TP 8 +SRNAME (local input) const char * +On entry, SRNAME should be the name of the routine calling +this error handler. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + HPL_warn( stderr, __LINE__, __FILE__, +.br + "Demo.\en" ); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_abort \ (3), +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_xjumpm.3 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_xjumpm.3 new file mode 100644 index 000000000..df3e0a954 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/man/man3/HPL_xjumpm.3 @@ -0,0 +1,77 @@ +.TH HPL_xjumpm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_xjumpm \- Compute constants to jump in the random sequence. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_xjumpm(\fR +\fB\&const int\fR +\fI\&JUMPM\fR, +\fB\&int *\fR +\fI\&MULT\fR, +\fB\&int *\fR +\fI\&IADD\fR, +\fB\&int *\fR +\fI\&IRANN\fR, +\fB\&int *\fR +\fI\&IRANM\fR, +\fB\&int *\fR +\fI\&IAM\fR, +\fB\&int *\fR +\fI\&ICM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_xjumpm\fR +computes the constants A and C to jump JUMPM numbers in +the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in +MULT and IADD specify how to jump from one entry in the sequence to +the next. +.SH ARGUMENTS +.TP 8 +JUMPM (local input) const int +On entry, JUMPM specifies the number of entries in the +sequence to jump over. When JUMPM is less or equal than zero, +A and C are not computed, IRANM is set to IRANN corresponding +to a jump of size zero. +.TP 8 +MULT (local input) int * +On entry, MULT is an array of dimension 2, that contains the +16-lower and 15-higher bits of the constant a to jump from +X(n) to X(n+1) = a*X(n) + c in the random sequence. +.TP 8 +IADD (local input) int * +On entry, IADD is an array of dimension 2, that contains the +16-lower and 15-higher bits of the constant c to jump from +X(n) to X(n+1) = a*X(n) + c in the random sequence. +.TP 8 +IRANN (local input) int * +On entry, IRANN is an array of dimension 2. that contains the +16-lower and 15-higher bits of the encoding of X(n). +.TP 8 +IRANM (local output) int * +On entry, IRANM is an array of dimension 2. On exit, this +array contains respectively the 16-lower and 15-higher bits +of the encoding of X(n+JUMPM). +.TP 8 +IAM (local output) int * +On entry, IAM is an array of dimension 2. On exit, when JUMPM +is greater than zero, this array contains the encoded +constant A to jump from X(n) to X(n+JUMPM) in the random +sequence. IAM(0:1) contains respectively the 16-lower and +15-higher bits of this constant A. When JUMPM is less or +equal than zero, this array is not referenced. +.TP 8 +ICM (local output) int * +On entry, ICM is an array of dimension 2. On exit, when JUMPM +is greater than zero, this array contains the encoded +constant C to jump from X(n) to X(n+JUMPM) in the random +sequence. ICM(0:1) contains respectively the 16-lower and +15-higher bits of this constant C. When JUMPM is less or +equal than zero, this array is not referenced. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/missing b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/missing new file mode 100755 index 000000000..625aeb118 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/missing @@ -0,0 +1,215 @@ +#! /bin/sh +# Common wrapper for a few potentially missing GNU programs. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# Originally written by Fran,cois Pinard , 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +if test $# -eq 0; then + echo 1>&2 "Try '$0 --help' for more information" + exit 1 +fi + +case $1 in + + --is-lightweight) + # Used by our autoconf macros to check whether the available missing + # script is modern enough. + exit 0 + ;; + + --run) + # Back-compat with the calling convention used by older automake. + shift + ;; + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due +to PROGRAM being missing or too old. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + +Supported PROGRAM values: + aclocal autoconf autoheader autom4te automake makeinfo + bison yacc flex lex help2man + +Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and +'g' are ignored when checking the name. + +Send bug reports to ." + exit $? + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing $scriptversion (GNU Automake)" + exit $? + ;; + + -*) + echo 1>&2 "$0: unknown '$1' option" + echo 1>&2 "Try '$0 --help' for more information" + exit 1 + ;; + +esac + +# Run the given program, remember its exit status. +"$@"; st=$? + +# If it succeeded, we are done. +test $st -eq 0 && exit 0 + +# Also exit now if we it failed (or wasn't found), and '--version' was +# passed; such an option is passed most likely to detect whether the +# program is present and works. +case $2 in --version|--help) exit $st;; esac + +# Exit code 63 means version mismatch. This often happens when the user +# tries to use an ancient version of a tool on a file that requires a +# minimum version. +if test $st -eq 63; then + msg="probably too old" +elif test $st -eq 127; then + # Program was missing. + msg="missing on your system" +else + # Program was found and executed, but failed. Give up. + exit $st +fi + +perl_URL=https://www.perl.org/ +flex_URL=https://github.com/westes/flex +gnu_software_URL=https://www.gnu.org/software + +program_details () +{ + case $1 in + aclocal|automake) + echo "The '$1' program is part of the GNU Automake package:" + echo "<$gnu_software_URL/automake>" + echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/autoconf>" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + autoconf|autom4te|autoheader) + echo "The '$1' program is part of the GNU Autoconf package:" + echo "<$gnu_software_URL/autoconf/>" + echo "It also requires GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + esac +} + +give_advice () +{ + # Normalize program name to check for. + normalized_program=`echo "$1" | sed ' + s/^gnu-//; t + s/^gnu//; t + s/^g//; t'` + + printf '%s\n' "'$1' is $msg." + + configure_deps="'configure.ac' or m4 files included by 'configure.ac'" + case $normalized_program in + autoconf*) + echo "You should only need it if you modified 'configure.ac'," + echo "or m4 files included by it." + program_details 'autoconf' + ;; + autoheader*) + echo "You should only need it if you modified 'acconfig.h' or" + echo "$configure_deps." + program_details 'autoheader' + ;; + automake*) + echo "You should only need it if you modified 'Makefile.am' or" + echo "$configure_deps." + program_details 'automake' + ;; + aclocal*) + echo "You should only need it if you modified 'acinclude.m4' or" + echo "$configure_deps." + program_details 'aclocal' + ;; + autom4te*) + echo "You might have modified some maintainer files that require" + echo "the 'autom4te' program to be rebuilt." + program_details 'autom4te' + ;; + bison*|yacc*) + echo "You should only need it if you modified a '.y' file." + echo "You may want to install the GNU Bison package:" + echo "<$gnu_software_URL/bison/>" + ;; + lex*|flex*) + echo "You should only need it if you modified a '.l' file." + echo "You may want to install the Fast Lexical Analyzer package:" + echo "<$flex_URL>" + ;; + help2man*) + echo "You should only need it if you modified a dependency" \ + "of a man page." + echo "You may want to install the GNU Help2man package:" + echo "<$gnu_software_URL/help2man/>" + ;; + makeinfo*) + echo "You should only need it if you modified a '.texi' file, or" + echo "any other file indirectly affecting the aspect of the manual." + echo "You might want to install the Texinfo package:" + echo "<$gnu_software_URL/texinfo/>" + echo "The spurious makeinfo call might also be the consequence of" + echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might" + echo "want to install GNU make:" + echo "<$gnu_software_URL/make/>" + ;; + *) + echo "You might have modified some files without having the proper" + echo "tools for further handling them. Check the 'README' file, it" + echo "often tells you about the needed prerequisites for installing" + echo "this package. You may also peek at any GNU archive site, in" + echo "case some other package contains this missing '$1' program." + ;; + esac +} + +give_advice "$1" | sed -e '1s/^/WARNING: /' \ + -e '2,$s/^/ /' >&2 + +# Propagate the correct exit status (expected to be 127 for a program +# not found, 63 for a program that failed due to version mismatch). +exit $st + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/.Makefile.dpct.patched.swp b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/.Makefile.dpct.patched.swp new file mode 100644 index 000000000..b7e1c370d Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/.Makefile.dpct.patched.swp differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/MainSourceFiles.yaml b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/MainSourceFiles.yaml new file mode 100644 index 000000000..19e73e079 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/MainSourceFiles.yaml @@ -0,0 +1,1000 @@ +--- +MainSourceFile: MainSrcFiles_placehold +Replacements: + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 6545 + Length: 0 + ReplacementText: "#include \n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 6822 + Length: 0 + ReplacementText: "\n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 6825 + Length: 18 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 6843 + Length: 26 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 6869 + Length: 20 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 6956 + Length: 9 + ReplacementText: 'dpct::err0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7044 + Length: 197 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7334 + Length: 0 + ReplacementText: ' try ' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7364 + Length: 0 + ReplacementText: " /*\n DPCT1010:1: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7372 + Length: 9 + ReplacementText: 'dpct::err0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7388 + Length: 18 + ReplacementText: '0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7416 + Length: 199 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7739 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7772 + Length: 208 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 8006 + Length: 0 + ReplacementText: "\ncatch (sycl::exception const &exc) {\n std::cerr << exc.what() << \"Exception caught at file:\" << __FILE__ << \", line:\" << __LINE__ << std::endl;\n std::exit(1);\n}" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 8954 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9513 + Length: 54 + ReplacementText: 'DPCT_CHECK_ERROR(devPtrA = sycl::malloc_device(K * LDA, q_ct1))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9587 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9637 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9662 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9687 + Length: 55 + ReplacementText: 'DPCT_CHECK_ERROR(devPtrB = sycl::malloc_device(N * LDB, q_ct1))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9762 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9813 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9838 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9863 + Length: 54 + ReplacementText: 'DPCT_CHECK_ERROR(devPtrC = sycl::malloc_device(N * LDC, q_ct1))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9937 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9987 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10012 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10025 + Length: 23 + ReplacementText: 'dev_ct1.queues_wait_and_throw()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10054 + Length: 85 + ReplacementText: 'oneapi::mkl::blas::column_major::gemm(*dpct::get_current_device().get_saved_queue(), oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, M, N, K, ALPHA, devPtrA, LDA, devPtrB, LDB, BETA, devPtrC, LDC).wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10145 + Length: 23 + ReplacementText: 'dev_ct1.queues_wait_and_throw()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10187 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10237 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10262 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10269 + Length: 23 + ReplacementText: 'dev_ct1.queues_wait_and_throw()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10299 + Length: 17 + ReplacementText: 'sycl::free(devPtrA, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10322 + Length: 17 + ReplacementText: 'sycl::free(devPtrB, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10345 + Length: 17 + ReplacementText: 'sycl::free(devPtrC, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10637 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11149 + Length: 55 + ReplacementText: 'DPCT_CHECK_ERROR(devPtrA = sycl::malloc_device(M * LDA, q_ct1))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11224 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11271 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11296 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11334 + Length: 55 + ReplacementText: 'DPCT_CHECK_ERROR(devPtrB = sycl::malloc_device(N * LDB, q_ct1))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11409 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11456 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11481 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11488 + Length: 23 + ReplacementText: 'dev_ct1.queues_wait_and_throw()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11522 + Length: 62 + ReplacementText: 'oneapi::mkl::blas::column_major::trsm(*dpct::get_current_device().get_saved_queue(), oneapi::mkl::side::left, oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, M, N, ALPHA, devPtrA, LDA, devPtrB, LDB).wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11595 + Length: 23 + ReplacementText: 'dev_ct1.queues_wait_and_throw()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11642 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11689 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11714 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11726 + Length: 23 + ReplacementText: 'dev_ct1.queues_wait_and_throw()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11755 + Length: 17 + ReplacementText: 'sycl::free(devPtrA, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11778 + Length: 17 + ReplacementText: 'sycl::free(devPtrB, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false +MainSourceFilesDigest: + - MainSourceFile: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Digest: c9ea63d69505b8c70080ff9792b77dd8 +DpctVersion: 18.0.0 +MainHelperFileName: '' +USMLevel: '' +FeatureMap: {} +CompileTargets: + /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl: + - MigratedFileName: './testing/ptest/HPL_pddriver.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdinfo.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdtest.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pddriver.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdinfo.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdtest.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pddriver.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdinfo.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdtest.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pddriver.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdinfo.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdtest.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a: + - MigratedFileName: './src/auxil/HPL_dlacpy.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_dlatcpy.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_fprintf.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_warn.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_abort.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_dlaprnt.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_dlange.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_dlamch.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dcopy.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_daxpy.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dscal.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_idamax.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dgemv.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dtrsv.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dger.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dgemm.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dtrsm.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_1ring.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_1rinM.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_2ring.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_2rinM.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_blong.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_blonM.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_packL.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_copyL.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_binit.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_bcast.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_bwait.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_send.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_recv.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_sdrv.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_grid_init.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_pnum.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_grid_info.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_grid_exit.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_broadcast.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_reduce.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_all_reduce.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_barrier.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_min.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_max.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_sum.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/panel/HPL_pdpanel_new.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/panel/HPL_pdpanel_init.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/panel/HPL_pdpanel_disp.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/panel/HPL_pdpanel_free.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_indxg2l.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_indxg2lp.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_indxg2p.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_indxl2g.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_infog2l.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_numroc.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_numrocI.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp00N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp10N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp01N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp01T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp02N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp03N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp03T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp04N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp04T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp05N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp05T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp06N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp06T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_pwarn.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_pabort.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_pdlaprnt.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_pdlamch.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_pdlange.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_dlocmax.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_dlocswpN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_dlocswpT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdmxswp.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdpancrN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdpancrT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdpanllN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdpanllT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdpanrlN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdpanrlT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdrpanllN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdrpanllT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdrpancrN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdrpancrT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdrpanrlN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdrpanrlT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdfact.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pipid.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_plindx0.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdlaswp00N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdlaswp00T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_perm.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_logsort.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_plindx10.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_plindx1.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_spreadN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_spreadT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_rollN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_rollT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_equil.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdlaswp01N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdlaswp01T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdupdateNN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdupdateNT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdupdateTN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdupdateTT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdtrsv.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdgesv0.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdgesvK1.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdgesvK2.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdgesv.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_dmatgen.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_ladd.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_lmul.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_xjumpm.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_jumpit.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_rand.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_setran.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/timer/HPL_timer.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/timer/HPL_timer_cputime.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/timer/HPL_timer_walltime.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/pmatgen/HPL_pdmatgen.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptimer/HPL_ptimer.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptimer/HPL_ptimer_cputime.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptimer/HPL_ptimer_walltime.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + libdgemm.so.1.0.1: + - MigratedFileName: './src/cuda/cuda_dgemm.cpp.dp.cpp' + CompileOptions: '-O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ' + Compiler: cc + - MigratedFileName: './src/cuda/cuda_dgemm.cpp.dp.cpp' + CompileOptions: '-O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ' + Compiler: cc +OptionMap: + AnalysisScopePath: + Value: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3' + Specified: false + AsyncHandler: + Value: 'false' + Specified: false + CommentsEnabled: + Value: 'false' + Specified: false + CompilationsDir: + Value: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3' + Specified: true + CtadEnabled: + Value: 'false' + Specified: false + EnablepProfiling: + Value: 'false' + Specified: false + ExperimentalFlag: + Value: '0' + Specified: false + ExplicitClNamespace: + Value: 'false' + Specified: false + ExplicitNamespace: + Value: '20' + Specified: false + ExtensionDDFlag: + Value: '0' + Specified: false + ExtensionDEFlag: + Value: '4294967295' + Specified: false + HelperFuncPreferenceFlag: + Value: '0' + Specified: false + NDRangeDim: + Value: '3' + Specified: false + NoDRYPattern: + Value: 'false' + Specified: false + NoUseGenericSpace: + Value: '' + Specified: true + OptimizeMigration: + Value: 'false' + Specified: false + ProcessAll: + Value: 'false' + Specified: false + RuleFile: + Value: '' + Specified: false + SyclNamedLambda: + Value: 'false' + Specified: false + UsmLevel: + Value: '1' + Specified: false +... diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/Makefile.dpct b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/Makefile.dpct new file mode 100644 index 000000000..dfae1e89e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/Makefile.dpct @@ -0,0 +1,1019 @@ +CC := icpx + +LD := $(CC) + +#DPCT2001:4: You can link with more library by add them here. +LIB := + +FLAGS := + +ifeq ($(shell which $(CC)),) + $(error ERROR - $(CC) compiler not found) +endif + +ROOT_DIR := $(shell dirname $(shell which $(CC))) +INCLUDE_SYCL := $(ROOT_DIR)/../include +INCLUDE_CL := $(ROOT_DIR)/../include/sycl + +TARGET_0_SRC_0 = ./testing/ptest/HPL_pddriver.c +TARGET_0_OBJ_0 = ./testing/ptest/HPL_pddriver.o +TARGET_0_FLAG_0 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_1 = ./testing/ptest/HPL_pdinfo.c +TARGET_0_OBJ_1 = ./testing/ptest/HPL_pdinfo.o +TARGET_0_FLAG_1 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_2 = ./testing/ptest/HPL_pdtest.c +TARGET_0_OBJ_2 = ./testing/ptest/HPL_pdtest.o +TARGET_0_FLAG_2 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_3 = ./testing/ptest/HPL_pddriver.c +TARGET_0_OBJ_3 = ./testing/ptest/HPL_pddriver.o +TARGET_0_FLAG_3 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_4 = ./testing/ptest/HPL_pdinfo.c +TARGET_0_OBJ_4 = ./testing/ptest/HPL_pdinfo.o +TARGET_0_FLAG_4 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_5 = ./testing/ptest/HPL_pdtest.c +TARGET_0_OBJ_5 = ./testing/ptest/HPL_pdtest.o +TARGET_0_FLAG_5 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_6 = ./testing/ptest/HPL_pddriver.c +TARGET_0_OBJ_6 = ./testing/ptest/HPL_pddriver.o +TARGET_0_FLAG_6 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_7 = ./testing/ptest/HPL_pdinfo.c +TARGET_0_OBJ_7 = ./testing/ptest/HPL_pdinfo.o +TARGET_0_FLAG_7 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_8 = ./testing/ptest/HPL_pdtest.c +TARGET_0_OBJ_8 = ./testing/ptest/HPL_pdtest.o +TARGET_0_FLAG_8 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_9 = ./testing/ptest/HPL_pddriver.c +TARGET_0_OBJ_9 = ./testing/ptest/HPL_pddriver.o +TARGET_0_FLAG_9 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_10 = ./testing/ptest/HPL_pdinfo.c +TARGET_0_OBJ_10 = ./testing/ptest/HPL_pdinfo.o +TARGET_0_FLAG_10 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_11 = ./testing/ptest/HPL_pdtest.c +TARGET_0_OBJ_11 = ./testing/ptest/HPL_pdtest.o +TARGET_0_FLAG_11 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_0 = ./src/auxil/HPL_dlacpy.c +TARGET_1_OBJ_0 = ./src/auxil/HPL_dlacpy.o +TARGET_1_FLAG_0 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_1 = ./src/auxil/HPL_dlatcpy.c +TARGET_1_OBJ_1 = ./src/auxil/HPL_dlatcpy.o +TARGET_1_FLAG_1 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_2 = ./src/auxil/HPL_fprintf.c +TARGET_1_OBJ_2 = ./src/auxil/HPL_fprintf.o +TARGET_1_FLAG_2 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_3 = ./src/auxil/HPL_warn.c +TARGET_1_OBJ_3 = ./src/auxil/HPL_warn.o +TARGET_1_FLAG_3 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_4 = ./src/auxil/HPL_abort.c +TARGET_1_OBJ_4 = ./src/auxil/HPL_abort.o +TARGET_1_FLAG_4 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_5 = ./src/auxil/HPL_dlaprnt.c +TARGET_1_OBJ_5 = ./src/auxil/HPL_dlaprnt.o +TARGET_1_FLAG_5 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_6 = ./src/auxil/HPL_dlange.c +TARGET_1_OBJ_6 = ./src/auxil/HPL_dlange.o +TARGET_1_FLAG_6 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_7 = ./src/auxil/HPL_dlamch.c +TARGET_1_OBJ_7 = ./src/auxil/HPL_dlamch.o +TARGET_1_FLAG_7 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -I ./include ${FLAGS} + +TARGET_1_SRC_8 = ./src/blas/HPL_dcopy.c +TARGET_1_OBJ_8 = ./src/blas/HPL_dcopy.o +TARGET_1_FLAG_8 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_9 = ./src/blas/HPL_daxpy.c +TARGET_1_OBJ_9 = ./src/blas/HPL_daxpy.o +TARGET_1_FLAG_9 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_10 = ./src/blas/HPL_dscal.c +TARGET_1_OBJ_10 = ./src/blas/HPL_dscal.o +TARGET_1_FLAG_10 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_11 = ./src/blas/HPL_idamax.c +TARGET_1_OBJ_11 = ./src/blas/HPL_idamax.o +TARGET_1_FLAG_11 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_12 = ./src/blas/HPL_dgemv.c +TARGET_1_OBJ_12 = ./src/blas/HPL_dgemv.o +TARGET_1_FLAG_12 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_13 = ./src/blas/HPL_dtrsv.c +TARGET_1_OBJ_13 = ./src/blas/HPL_dtrsv.o +TARGET_1_FLAG_13 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_14 = ./src/blas/HPL_dger.c +TARGET_1_OBJ_14 = ./src/blas/HPL_dger.o +TARGET_1_FLAG_14 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_15 = ./src/blas/HPL_dgemm.c +TARGET_1_OBJ_15 = ./src/blas/HPL_dgemm.o +TARGET_1_FLAG_15 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_16 = ./src/blas/HPL_dtrsm.c +TARGET_1_OBJ_16 = ./src/blas/HPL_dtrsm.o +TARGET_1_FLAG_16 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_17 = ./src/comm/HPL_1ring.c +TARGET_1_OBJ_17 = ./src/comm/HPL_1ring.o +TARGET_1_FLAG_17 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_18 = ./src/comm/HPL_1rinM.c +TARGET_1_OBJ_18 = ./src/comm/HPL_1rinM.o +TARGET_1_FLAG_18 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_19 = ./src/comm/HPL_2ring.c +TARGET_1_OBJ_19 = ./src/comm/HPL_2ring.o +TARGET_1_FLAG_19 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_20 = ./src/comm/HPL_2rinM.c +TARGET_1_OBJ_20 = ./src/comm/HPL_2rinM.o +TARGET_1_FLAG_20 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_21 = ./src/comm/HPL_blong.c +TARGET_1_OBJ_21 = ./src/comm/HPL_blong.o +TARGET_1_FLAG_21 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_22 = ./src/comm/HPL_blonM.c +TARGET_1_OBJ_22 = ./src/comm/HPL_blonM.o +TARGET_1_FLAG_22 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_23 = ./src/comm/HPL_packL.c +TARGET_1_OBJ_23 = ./src/comm/HPL_packL.o +TARGET_1_FLAG_23 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_24 = ./src/comm/HPL_copyL.c +TARGET_1_OBJ_24 = ./src/comm/HPL_copyL.o +TARGET_1_FLAG_24 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_25 = ./src/comm/HPL_binit.c +TARGET_1_OBJ_25 = ./src/comm/HPL_binit.o +TARGET_1_FLAG_25 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_26 = ./src/comm/HPL_bcast.c +TARGET_1_OBJ_26 = ./src/comm/HPL_bcast.o +TARGET_1_FLAG_26 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_27 = ./src/comm/HPL_bwait.c +TARGET_1_OBJ_27 = ./src/comm/HPL_bwait.o +TARGET_1_FLAG_27 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_28 = ./src/comm/HPL_send.c +TARGET_1_OBJ_28 = ./src/comm/HPL_send.o +TARGET_1_FLAG_28 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_29 = ./src/comm/HPL_recv.c +TARGET_1_OBJ_29 = ./src/comm/HPL_recv.o +TARGET_1_FLAG_29 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_30 = ./src/comm/HPL_sdrv.c +TARGET_1_OBJ_30 = ./src/comm/HPL_sdrv.o +TARGET_1_FLAG_30 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_31 = ./src/grid/HPL_grid_init.c +TARGET_1_OBJ_31 = ./src/grid/HPL_grid_init.o +TARGET_1_FLAG_31 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_32 = ./src/grid/HPL_pnum.c +TARGET_1_OBJ_32 = ./src/grid/HPL_pnum.o +TARGET_1_FLAG_32 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_33 = ./src/grid/HPL_grid_info.c +TARGET_1_OBJ_33 = ./src/grid/HPL_grid_info.o +TARGET_1_FLAG_33 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_34 = ./src/grid/HPL_grid_exit.c +TARGET_1_OBJ_34 = ./src/grid/HPL_grid_exit.o +TARGET_1_FLAG_34 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_35 = ./src/grid/HPL_broadcast.c +TARGET_1_OBJ_35 = ./src/grid/HPL_broadcast.o +TARGET_1_FLAG_35 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_36 = ./src/grid/HPL_reduce.c +TARGET_1_OBJ_36 = ./src/grid/HPL_reduce.o +TARGET_1_FLAG_36 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_37 = ./src/grid/HPL_all_reduce.c +TARGET_1_OBJ_37 = ./src/grid/HPL_all_reduce.o +TARGET_1_FLAG_37 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_38 = ./src/grid/HPL_barrier.c +TARGET_1_OBJ_38 = ./src/grid/HPL_barrier.o +TARGET_1_FLAG_38 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_39 = ./src/grid/HPL_min.c +TARGET_1_OBJ_39 = ./src/grid/HPL_min.o +TARGET_1_FLAG_39 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_40 = ./src/grid/HPL_max.c +TARGET_1_OBJ_40 = ./src/grid/HPL_max.o +TARGET_1_FLAG_40 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_41 = ./src/grid/HPL_sum.c +TARGET_1_OBJ_41 = ./src/grid/HPL_sum.o +TARGET_1_FLAG_41 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_42 = ./src/panel/HPL_pdpanel_new.c +TARGET_1_OBJ_42 = ./src/panel/HPL_pdpanel_new.o +TARGET_1_FLAG_42 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_43 = ./src/panel/HPL_pdpanel_init.c +TARGET_1_OBJ_43 = ./src/panel/HPL_pdpanel_init.o +TARGET_1_FLAG_43 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_44 = ./src/panel/HPL_pdpanel_disp.c +TARGET_1_OBJ_44 = ./src/panel/HPL_pdpanel_disp.o +TARGET_1_FLAG_44 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_45 = ./src/panel/HPL_pdpanel_free.c +TARGET_1_OBJ_45 = ./src/panel/HPL_pdpanel_free.o +TARGET_1_FLAG_45 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_46 = ./src/pauxil/HPL_indxg2l.c +TARGET_1_OBJ_46 = ./src/pauxil/HPL_indxg2l.o +TARGET_1_FLAG_46 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_47 = ./src/pauxil/HPL_indxg2lp.c +TARGET_1_OBJ_47 = ./src/pauxil/HPL_indxg2lp.o +TARGET_1_FLAG_47 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_48 = ./src/pauxil/HPL_indxg2p.c +TARGET_1_OBJ_48 = ./src/pauxil/HPL_indxg2p.o +TARGET_1_FLAG_48 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_49 = ./src/pauxil/HPL_indxl2g.c +TARGET_1_OBJ_49 = ./src/pauxil/HPL_indxl2g.o +TARGET_1_FLAG_49 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_50 = ./src/pauxil/HPL_infog2l.c +TARGET_1_OBJ_50 = ./src/pauxil/HPL_infog2l.o +TARGET_1_FLAG_50 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_51 = ./src/pauxil/HPL_numroc.c +TARGET_1_OBJ_51 = ./src/pauxil/HPL_numroc.o +TARGET_1_FLAG_51 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_52 = ./src/pauxil/HPL_numrocI.c +TARGET_1_OBJ_52 = ./src/pauxil/HPL_numrocI.o +TARGET_1_FLAG_52 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_53 = ./src/pauxil/HPL_dlaswp00N.c +TARGET_1_OBJ_53 = ./src/pauxil/HPL_dlaswp00N.o +TARGET_1_FLAG_53 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_54 = ./src/pauxil/HPL_dlaswp10N.c +TARGET_1_OBJ_54 = ./src/pauxil/HPL_dlaswp10N.o +TARGET_1_FLAG_54 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_55 = ./src/pauxil/HPL_dlaswp01N.c +TARGET_1_OBJ_55 = ./src/pauxil/HPL_dlaswp01N.o +TARGET_1_FLAG_55 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_56 = ./src/pauxil/HPL_dlaswp01T.c +TARGET_1_OBJ_56 = ./src/pauxil/HPL_dlaswp01T.o +TARGET_1_FLAG_56 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_57 = ./src/pauxil/HPL_dlaswp02N.c +TARGET_1_OBJ_57 = ./src/pauxil/HPL_dlaswp02N.o +TARGET_1_FLAG_57 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_58 = ./src/pauxil/HPL_dlaswp03N.c +TARGET_1_OBJ_58 = ./src/pauxil/HPL_dlaswp03N.o +TARGET_1_FLAG_58 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_59 = ./src/pauxil/HPL_dlaswp03T.c +TARGET_1_OBJ_59 = ./src/pauxil/HPL_dlaswp03T.o +TARGET_1_FLAG_59 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_60 = ./src/pauxil/HPL_dlaswp04N.c +TARGET_1_OBJ_60 = ./src/pauxil/HPL_dlaswp04N.o +TARGET_1_FLAG_60 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_61 = ./src/pauxil/HPL_dlaswp04T.c +TARGET_1_OBJ_61 = ./src/pauxil/HPL_dlaswp04T.o +TARGET_1_FLAG_61 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_62 = ./src/pauxil/HPL_dlaswp05N.c +TARGET_1_OBJ_62 = ./src/pauxil/HPL_dlaswp05N.o +TARGET_1_FLAG_62 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_63 = ./src/pauxil/HPL_dlaswp05T.c +TARGET_1_OBJ_63 = ./src/pauxil/HPL_dlaswp05T.o +TARGET_1_FLAG_63 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_64 = ./src/pauxil/HPL_dlaswp06N.c +TARGET_1_OBJ_64 = ./src/pauxil/HPL_dlaswp06N.o +TARGET_1_FLAG_64 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_65 = ./src/pauxil/HPL_dlaswp06T.c +TARGET_1_OBJ_65 = ./src/pauxil/HPL_dlaswp06T.o +TARGET_1_FLAG_65 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_66 = ./src/pauxil/HPL_pwarn.c +TARGET_1_OBJ_66 = ./src/pauxil/HPL_pwarn.o +TARGET_1_FLAG_66 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_67 = ./src/pauxil/HPL_pabort.c +TARGET_1_OBJ_67 = ./src/pauxil/HPL_pabort.o +TARGET_1_FLAG_67 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_68 = ./src/pauxil/HPL_pdlaprnt.c +TARGET_1_OBJ_68 = ./src/pauxil/HPL_pdlaprnt.o +TARGET_1_FLAG_68 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_69 = ./src/pauxil/HPL_pdlamch.c +TARGET_1_OBJ_69 = ./src/pauxil/HPL_pdlamch.o +TARGET_1_FLAG_69 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_70 = ./src/pauxil/HPL_pdlange.c +TARGET_1_OBJ_70 = ./src/pauxil/HPL_pdlange.o +TARGET_1_FLAG_70 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_71 = ./src/pfact/HPL_dlocmax.c +TARGET_1_OBJ_71 = ./src/pfact/HPL_dlocmax.o +TARGET_1_FLAG_71 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_72 = ./src/pfact/HPL_dlocswpN.c +TARGET_1_OBJ_72 = ./src/pfact/HPL_dlocswpN.o +TARGET_1_FLAG_72 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_73 = ./src/pfact/HPL_dlocswpT.c +TARGET_1_OBJ_73 = ./src/pfact/HPL_dlocswpT.o +TARGET_1_FLAG_73 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_74 = ./src/pfact/HPL_pdmxswp.c +TARGET_1_OBJ_74 = ./src/pfact/HPL_pdmxswp.o +TARGET_1_FLAG_74 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_75 = ./src/pfact/HPL_pdpancrN.c +TARGET_1_OBJ_75 = ./src/pfact/HPL_pdpancrN.o +TARGET_1_FLAG_75 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_76 = ./src/pfact/HPL_pdpancrT.c +TARGET_1_OBJ_76 = ./src/pfact/HPL_pdpancrT.o +TARGET_1_FLAG_76 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_77 = ./src/pfact/HPL_pdpanllN.c +TARGET_1_OBJ_77 = ./src/pfact/HPL_pdpanllN.o +TARGET_1_FLAG_77 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_78 = ./src/pfact/HPL_pdpanllT.c +TARGET_1_OBJ_78 = ./src/pfact/HPL_pdpanllT.o +TARGET_1_FLAG_78 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_79 = ./src/pfact/HPL_pdpanrlN.c +TARGET_1_OBJ_79 = ./src/pfact/HPL_pdpanrlN.o +TARGET_1_FLAG_79 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_80 = ./src/pfact/HPL_pdpanrlT.c +TARGET_1_OBJ_80 = ./src/pfact/HPL_pdpanrlT.o +TARGET_1_FLAG_80 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_81 = ./src/pfact/HPL_pdrpanllN.c +TARGET_1_OBJ_81 = ./src/pfact/HPL_pdrpanllN.o +TARGET_1_FLAG_81 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_82 = ./src/pfact/HPL_pdrpanllT.c +TARGET_1_OBJ_82 = ./src/pfact/HPL_pdrpanllT.o +TARGET_1_FLAG_82 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_83 = ./src/pfact/HPL_pdrpancrN.c +TARGET_1_OBJ_83 = ./src/pfact/HPL_pdrpancrN.o +TARGET_1_FLAG_83 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_84 = ./src/pfact/HPL_pdrpancrT.c +TARGET_1_OBJ_84 = ./src/pfact/HPL_pdrpancrT.o +TARGET_1_FLAG_84 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_85 = ./src/pfact/HPL_pdrpanrlN.c +TARGET_1_OBJ_85 = ./src/pfact/HPL_pdrpanrlN.o +TARGET_1_FLAG_85 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_86 = ./src/pfact/HPL_pdrpanrlT.c +TARGET_1_OBJ_86 = ./src/pfact/HPL_pdrpanrlT.o +TARGET_1_FLAG_86 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_87 = ./src/pfact/HPL_pdfact.c +TARGET_1_OBJ_87 = ./src/pfact/HPL_pdfact.o +TARGET_1_FLAG_87 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_88 = ./src/pgesv/HPL_pipid.c +TARGET_1_OBJ_88 = ./src/pgesv/HPL_pipid.o +TARGET_1_FLAG_88 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_89 = ./src/pgesv/HPL_plindx0.c +TARGET_1_OBJ_89 = ./src/pgesv/HPL_plindx0.o +TARGET_1_FLAG_89 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_90 = ./src/pgesv/HPL_pdlaswp00N.c +TARGET_1_OBJ_90 = ./src/pgesv/HPL_pdlaswp00N.o +TARGET_1_FLAG_90 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_91 = ./src/pgesv/HPL_pdlaswp00T.c +TARGET_1_OBJ_91 = ./src/pgesv/HPL_pdlaswp00T.o +TARGET_1_FLAG_91 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_92 = ./src/pgesv/HPL_perm.c +TARGET_1_OBJ_92 = ./src/pgesv/HPL_perm.o +TARGET_1_FLAG_92 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_93 = ./src/pgesv/HPL_logsort.c +TARGET_1_OBJ_93 = ./src/pgesv/HPL_logsort.o +TARGET_1_FLAG_93 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_94 = ./src/pgesv/HPL_plindx10.c +TARGET_1_OBJ_94 = ./src/pgesv/HPL_plindx10.o +TARGET_1_FLAG_94 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_95 = ./src/pgesv/HPL_plindx1.c +TARGET_1_OBJ_95 = ./src/pgesv/HPL_plindx1.o +TARGET_1_FLAG_95 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_96 = ./src/pgesv/HPL_spreadN.c +TARGET_1_OBJ_96 = ./src/pgesv/HPL_spreadN.o +TARGET_1_FLAG_96 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_97 = ./src/pgesv/HPL_spreadT.c +TARGET_1_OBJ_97 = ./src/pgesv/HPL_spreadT.o +TARGET_1_FLAG_97 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_98 = ./src/pgesv/HPL_rollN.c +TARGET_1_OBJ_98 = ./src/pgesv/HPL_rollN.o +TARGET_1_FLAG_98 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_99 = ./src/pgesv/HPL_rollT.c +TARGET_1_OBJ_99 = ./src/pgesv/HPL_rollT.o +TARGET_1_FLAG_99 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_100 = ./src/pgesv/HPL_equil.c +TARGET_1_OBJ_100 = ./src/pgesv/HPL_equil.o +TARGET_1_FLAG_100 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_101 = ./src/pgesv/HPL_pdlaswp01N.c +TARGET_1_OBJ_101 = ./src/pgesv/HPL_pdlaswp01N.o +TARGET_1_FLAG_101 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_102 = ./src/pgesv/HPL_pdlaswp01T.c +TARGET_1_OBJ_102 = ./src/pgesv/HPL_pdlaswp01T.o +TARGET_1_FLAG_102 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_103 = ./src/pgesv/HPL_pdupdateNN.c +TARGET_1_OBJ_103 = ./src/pgesv/HPL_pdupdateNN.o +TARGET_1_FLAG_103 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_104 = ./src/pgesv/HPL_pdupdateNT.c +TARGET_1_OBJ_104 = ./src/pgesv/HPL_pdupdateNT.o +TARGET_1_FLAG_104 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_105 = ./src/pgesv/HPL_pdupdateTN.c +TARGET_1_OBJ_105 = ./src/pgesv/HPL_pdupdateTN.o +TARGET_1_FLAG_105 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_106 = ./src/pgesv/HPL_pdupdateTT.c +TARGET_1_OBJ_106 = ./src/pgesv/HPL_pdupdateTT.o +TARGET_1_FLAG_106 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_107 = ./src/pgesv/HPL_pdtrsv.c +TARGET_1_OBJ_107 = ./src/pgesv/HPL_pdtrsv.o +TARGET_1_FLAG_107 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_108 = ./src/pgesv/HPL_pdgesv0.c +TARGET_1_OBJ_108 = ./src/pgesv/HPL_pdgesv0.o +TARGET_1_FLAG_108 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_109 = ./src/pgesv/HPL_pdgesvK1.c +TARGET_1_OBJ_109 = ./src/pgesv/HPL_pdgesvK1.o +TARGET_1_FLAG_109 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_110 = ./src/pgesv/HPL_pdgesvK2.c +TARGET_1_OBJ_110 = ./src/pgesv/HPL_pdgesvK2.o +TARGET_1_FLAG_110 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_111 = ./src/pgesv/HPL_pdgesv.c +TARGET_1_OBJ_111 = ./src/pgesv/HPL_pdgesv.o +TARGET_1_FLAG_111 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_112 = ./testing/matgen/HPL_dmatgen.c +TARGET_1_OBJ_112 = ./testing/matgen/HPL_dmatgen.o +TARGET_1_FLAG_112 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_113 = ./testing/matgen/HPL_ladd.c +TARGET_1_OBJ_113 = ./testing/matgen/HPL_ladd.o +TARGET_1_FLAG_113 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_114 = ./testing/matgen/HPL_lmul.c +TARGET_1_OBJ_114 = ./testing/matgen/HPL_lmul.o +TARGET_1_FLAG_114 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_115 = ./testing/matgen/HPL_xjumpm.c +TARGET_1_OBJ_115 = ./testing/matgen/HPL_xjumpm.o +TARGET_1_FLAG_115 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_116 = ./testing/matgen/HPL_jumpit.c +TARGET_1_OBJ_116 = ./testing/matgen/HPL_jumpit.o +TARGET_1_FLAG_116 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_117 = ./testing/matgen/HPL_rand.c +TARGET_1_OBJ_117 = ./testing/matgen/HPL_rand.o +TARGET_1_FLAG_117 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_118 = ./testing/matgen/HPL_setran.c +TARGET_1_OBJ_118 = ./testing/matgen/HPL_setran.o +TARGET_1_FLAG_118 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_119 = ./testing/timer/HPL_timer.c +TARGET_1_OBJ_119 = ./testing/timer/HPL_timer.o +TARGET_1_FLAG_119 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_120 = ./testing/timer/HPL_timer_cputime.c +TARGET_1_OBJ_120 = ./testing/timer/HPL_timer_cputime.o +TARGET_1_FLAG_120 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_121 = ./testing/timer/HPL_timer_walltime.c +TARGET_1_OBJ_121 = ./testing/timer/HPL_timer_walltime.o +TARGET_1_FLAG_121 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_122 = ./testing/pmatgen/HPL_pdmatgen.c +TARGET_1_OBJ_122 = ./testing/pmatgen/HPL_pdmatgen.o +TARGET_1_FLAG_122 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_123 = ./testing/ptimer/HPL_ptimer.c +TARGET_1_OBJ_123 = ./testing/ptimer/HPL_ptimer.o +TARGET_1_FLAG_123 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_124 = ./testing/ptimer/HPL_ptimer_cputime.c +TARGET_1_OBJ_124 = ./testing/ptimer/HPL_ptimer_cputime.o +TARGET_1_FLAG_124 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_125 = ./testing/ptimer/HPL_ptimer_walltime.c +TARGET_1_OBJ_125 = ./testing/ptimer/HPL_ptimer_walltime.o +TARGET_1_FLAG_125 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_2_SRC_0 = ./src/cuda/cuda_dgemm.cpp.dp.cpp +TARGET_2_OBJ_0 = ./src/cuda/cuda_dgemm.cpp.dp.o +TARGET_2_FLAG_0 = -O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS} + +TARGET_2_SRC_1 = ./src/cuda/cuda_dgemm.cpp.dp.cpp +TARGET_2_OBJ_1 = ./src/cuda/cuda_dgemm.cpp.dp.o +TARGET_2_FLAG_1 = -O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS} + +TARGET_0 := /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl +TARGET_1 := /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a +TARGET_2 := libdgemm.so.1.0.1 + +TARGET := ${TARGET_0} ${TARGET_1} ${TARGET_2} + +.PHONY:all clean +OBJS_0 := ${TARGET_0_OBJ_0} ${TARGET_0_OBJ_1} ${TARGET_0_OBJ_2} ${TARGET_0_OBJ_3} ${TARGET_0_OBJ_4} ${TARGET_0_OBJ_5} ${TARGET_0_OBJ_6} ${TARGET_0_OBJ_7} ${TARGET_0_OBJ_8} ${TARGET_0_OBJ_9} ${TARGET_0_OBJ_10} ${TARGET_0_OBJ_11} +OBJS_1 := ${TARGET_1_OBJ_0} ${TARGET_1_OBJ_1} ${TARGET_1_OBJ_2} ${TARGET_1_OBJ_3} ${TARGET_1_OBJ_4} ${TARGET_1_OBJ_5} ${TARGET_1_OBJ_6} ${TARGET_1_OBJ_7} ${TARGET_1_OBJ_8} ${TARGET_1_OBJ_9} ${TARGET_1_OBJ_10} ${TARGET_1_OBJ_11} ${TARGET_1_OBJ_12} ${TARGET_1_OBJ_13} ${TARGET_1_OBJ_14} ${TARGET_1_OBJ_15} ${TARGET_1_OBJ_16} ${TARGET_1_OBJ_17} ${TARGET_1_OBJ_18} ${TARGET_1_OBJ_19} ${TARGET_1_OBJ_20} ${TARGET_1_OBJ_21} ${TARGET_1_OBJ_22} ${TARGET_1_OBJ_23} ${TARGET_1_OBJ_24} ${TARGET_1_OBJ_25} ${TARGET_1_OBJ_26} ${TARGET_1_OBJ_27} ${TARGET_1_OBJ_28} ${TARGET_1_OBJ_29} ${TARGET_1_OBJ_30} ${TARGET_1_OBJ_31} ${TARGET_1_OBJ_32} ${TARGET_1_OBJ_33} ${TARGET_1_OBJ_34} ${TARGET_1_OBJ_35} ${TARGET_1_OBJ_36} ${TARGET_1_OBJ_37} ${TARGET_1_OBJ_38} ${TARGET_1_OBJ_39} ${TARGET_1_OBJ_40} ${TARGET_1_OBJ_41} ${TARGET_1_OBJ_42} ${TARGET_1_OBJ_43} ${TARGET_1_OBJ_44} ${TARGET_1_OBJ_45} ${TARGET_1_OBJ_46} ${TARGET_1_OBJ_47} ${TARGET_1_OBJ_48} ${TARGET_1_OBJ_49} ${TARGET_1_OBJ_50} ${TARGET_1_OBJ_51} ${TARGET_1_OBJ_52} ${TARGET_1_OBJ_53} ${TARGET_1_OBJ_54} ${TARGET_1_OBJ_55} ${TARGET_1_OBJ_56} ${TARGET_1_OBJ_57} ${TARGET_1_OBJ_58} ${TARGET_1_OBJ_59} ${TARGET_1_OBJ_60} ${TARGET_1_OBJ_61} ${TARGET_1_OBJ_62} ${TARGET_1_OBJ_63} ${TARGET_1_OBJ_64} ${TARGET_1_OBJ_65} ${TARGET_1_OBJ_66} ${TARGET_1_OBJ_67} ${TARGET_1_OBJ_68} ${TARGET_1_OBJ_69} ${TARGET_1_OBJ_70} ${TARGET_1_OBJ_71} ${TARGET_1_OBJ_72} ${TARGET_1_OBJ_73} ${TARGET_1_OBJ_74} ${TARGET_1_OBJ_75} ${TARGET_1_OBJ_76} ${TARGET_1_OBJ_77} ${TARGET_1_OBJ_78} ${TARGET_1_OBJ_79} ${TARGET_1_OBJ_80} ${TARGET_1_OBJ_81} ${TARGET_1_OBJ_82} ${TARGET_1_OBJ_83} ${TARGET_1_OBJ_84} ${TARGET_1_OBJ_85} ${TARGET_1_OBJ_86} ${TARGET_1_OBJ_87} ${TARGET_1_OBJ_88} ${TARGET_1_OBJ_89} ${TARGET_1_OBJ_90} ${TARGET_1_OBJ_91} ${TARGET_1_OBJ_92} ${TARGET_1_OBJ_93} ${TARGET_1_OBJ_94} ${TARGET_1_OBJ_95} ${TARGET_1_OBJ_96} ${TARGET_1_OBJ_97} ${TARGET_1_OBJ_98} ${TARGET_1_OBJ_99} ${TARGET_1_OBJ_100} ${TARGET_1_OBJ_101} ${TARGET_1_OBJ_102} ${TARGET_1_OBJ_103} ${TARGET_1_OBJ_104} ${TARGET_1_OBJ_105} ${TARGET_1_OBJ_106} ${TARGET_1_OBJ_107} ${TARGET_1_OBJ_108} ${TARGET_1_OBJ_109} ${TARGET_1_OBJ_110} ${TARGET_1_OBJ_111} ${TARGET_1_OBJ_112} ${TARGET_1_OBJ_113} ${TARGET_1_OBJ_114} ${TARGET_1_OBJ_115} ${TARGET_1_OBJ_116} ${TARGET_1_OBJ_117} ${TARGET_1_OBJ_118} ${TARGET_1_OBJ_119} ${TARGET_1_OBJ_120} ${TARGET_1_OBJ_121} ${TARGET_1_OBJ_122} ${TARGET_1_OBJ_123} ${TARGET_1_OBJ_124} ${TARGET_1_OBJ_125} +OBJS_2 := ${TARGET_2_OBJ_0} ${TARGET_2_OBJ_1} +all: $(TARGET) +$(TARGET_0): $(OBJS_0) + $(CC) -fsycl -o $@ $^ $(LIB) -qmkl + +$(TARGET_0_OBJ_0):$(TARGET_0_SRC_0) + cc -c ${TARGET_0_SRC_0} -o ${TARGET_0_OBJ_0} $(TARGET_0_FLAG_0) + +$(TARGET_0_OBJ_1):$(TARGET_0_SRC_1) + cc -c ${TARGET_0_SRC_1} -o ${TARGET_0_OBJ_1} $(TARGET_0_FLAG_1) + +$(TARGET_0_OBJ_2):$(TARGET_0_SRC_2) + cc -c ${TARGET_0_SRC_2} -o ${TARGET_0_OBJ_2} $(TARGET_0_FLAG_2) + +$(TARGET_0_OBJ_3):$(TARGET_0_SRC_3) + cc -c ${TARGET_0_SRC_3} -o ${TARGET_0_OBJ_3} $(TARGET_0_FLAG_3) + +$(TARGET_0_OBJ_4):$(TARGET_0_SRC_4) + cc -c ${TARGET_0_SRC_4} -o ${TARGET_0_OBJ_4} $(TARGET_0_FLAG_4) + +$(TARGET_0_OBJ_5):$(TARGET_0_SRC_5) + cc -c ${TARGET_0_SRC_5} -o ${TARGET_0_OBJ_5} $(TARGET_0_FLAG_5) + +$(TARGET_0_OBJ_6):$(TARGET_0_SRC_6) + cc -c ${TARGET_0_SRC_6} -o ${TARGET_0_OBJ_6} $(TARGET_0_FLAG_6) + +$(TARGET_0_OBJ_7):$(TARGET_0_SRC_7) + cc -c ${TARGET_0_SRC_7} -o ${TARGET_0_OBJ_7} $(TARGET_0_FLAG_7) + +$(TARGET_0_OBJ_8):$(TARGET_0_SRC_8) + cc -c ${TARGET_0_SRC_8} -o ${TARGET_0_OBJ_8} $(TARGET_0_FLAG_8) + +$(TARGET_0_OBJ_9):$(TARGET_0_SRC_9) + cc -c ${TARGET_0_SRC_9} -o ${TARGET_0_OBJ_9} $(TARGET_0_FLAG_9) + +$(TARGET_0_OBJ_10):$(TARGET_0_SRC_10) + cc -c ${TARGET_0_SRC_10} -o ${TARGET_0_OBJ_10} $(TARGET_0_FLAG_10) + +$(TARGET_0_OBJ_11):$(TARGET_0_SRC_11) + cc -c ${TARGET_0_SRC_11} -o ${TARGET_0_OBJ_11} $(TARGET_0_FLAG_11) + +$(TARGET_1): $(OBJS_1) + ar -r $@ $^ $(LIB) -qmkl + +$(TARGET_1_OBJ_0):$(TARGET_1_SRC_0) + cc -c ${TARGET_1_SRC_0} -o ${TARGET_1_OBJ_0} $(TARGET_1_FLAG_0) + +$(TARGET_1_OBJ_1):$(TARGET_1_SRC_1) + cc -c ${TARGET_1_SRC_1} -o ${TARGET_1_OBJ_1} $(TARGET_1_FLAG_1) + +$(TARGET_1_OBJ_2):$(TARGET_1_SRC_2) + cc -c ${TARGET_1_SRC_2} -o ${TARGET_1_OBJ_2} $(TARGET_1_FLAG_2) + +$(TARGET_1_OBJ_3):$(TARGET_1_SRC_3) + cc -c ${TARGET_1_SRC_3} -o ${TARGET_1_OBJ_3} $(TARGET_1_FLAG_3) + +$(TARGET_1_OBJ_4):$(TARGET_1_SRC_4) + cc -c ${TARGET_1_SRC_4} -o ${TARGET_1_OBJ_4} $(TARGET_1_FLAG_4) + +$(TARGET_1_OBJ_5):$(TARGET_1_SRC_5) + cc -c ${TARGET_1_SRC_5} -o ${TARGET_1_OBJ_5} $(TARGET_1_FLAG_5) + +$(TARGET_1_OBJ_6):$(TARGET_1_SRC_6) + cc -c ${TARGET_1_SRC_6} -o ${TARGET_1_OBJ_6} $(TARGET_1_FLAG_6) + +$(TARGET_1_OBJ_7):$(TARGET_1_SRC_7) + cc -c ${TARGET_1_SRC_7} -o ${TARGET_1_OBJ_7} $(TARGET_1_FLAG_7) + +$(TARGET_1_OBJ_8):$(TARGET_1_SRC_8) + cc -c ${TARGET_1_SRC_8} -o ${TARGET_1_OBJ_8} $(TARGET_1_FLAG_8) + +$(TARGET_1_OBJ_9):$(TARGET_1_SRC_9) + cc -c ${TARGET_1_SRC_9} -o ${TARGET_1_OBJ_9} $(TARGET_1_FLAG_9) + +$(TARGET_1_OBJ_10):$(TARGET_1_SRC_10) + cc -c ${TARGET_1_SRC_10} -o ${TARGET_1_OBJ_10} $(TARGET_1_FLAG_10) + +$(TARGET_1_OBJ_11):$(TARGET_1_SRC_11) + cc -c ${TARGET_1_SRC_11} -o ${TARGET_1_OBJ_11} $(TARGET_1_FLAG_11) + +$(TARGET_1_OBJ_12):$(TARGET_1_SRC_12) + cc -c ${TARGET_1_SRC_12} -o ${TARGET_1_OBJ_12} $(TARGET_1_FLAG_12) + +$(TARGET_1_OBJ_13):$(TARGET_1_SRC_13) + cc -c ${TARGET_1_SRC_13} -o ${TARGET_1_OBJ_13} $(TARGET_1_FLAG_13) + +$(TARGET_1_OBJ_14):$(TARGET_1_SRC_14) + cc -c ${TARGET_1_SRC_14} -o ${TARGET_1_OBJ_14} $(TARGET_1_FLAG_14) + +$(TARGET_1_OBJ_15):$(TARGET_1_SRC_15) + cc -c ${TARGET_1_SRC_15} -o ${TARGET_1_OBJ_15} $(TARGET_1_FLAG_15) + +$(TARGET_1_OBJ_16):$(TARGET_1_SRC_16) + cc -c ${TARGET_1_SRC_16} -o ${TARGET_1_OBJ_16} $(TARGET_1_FLAG_16) + +$(TARGET_1_OBJ_17):$(TARGET_1_SRC_17) + cc -c ${TARGET_1_SRC_17} -o ${TARGET_1_OBJ_17} $(TARGET_1_FLAG_17) + +$(TARGET_1_OBJ_18):$(TARGET_1_SRC_18) + cc -c ${TARGET_1_SRC_18} -o ${TARGET_1_OBJ_18} $(TARGET_1_FLAG_18) + +$(TARGET_1_OBJ_19):$(TARGET_1_SRC_19) + cc -c ${TARGET_1_SRC_19} -o ${TARGET_1_OBJ_19} $(TARGET_1_FLAG_19) + +$(TARGET_1_OBJ_20):$(TARGET_1_SRC_20) + cc -c ${TARGET_1_SRC_20} -o ${TARGET_1_OBJ_20} $(TARGET_1_FLAG_20) + +$(TARGET_1_OBJ_21):$(TARGET_1_SRC_21) + cc -c ${TARGET_1_SRC_21} -o ${TARGET_1_OBJ_21} $(TARGET_1_FLAG_21) + +$(TARGET_1_OBJ_22):$(TARGET_1_SRC_22) + cc -c ${TARGET_1_SRC_22} -o ${TARGET_1_OBJ_22} $(TARGET_1_FLAG_22) + +$(TARGET_1_OBJ_23):$(TARGET_1_SRC_23) + cc -c ${TARGET_1_SRC_23} -o ${TARGET_1_OBJ_23} $(TARGET_1_FLAG_23) + +$(TARGET_1_OBJ_24):$(TARGET_1_SRC_24) + cc -c ${TARGET_1_SRC_24} -o ${TARGET_1_OBJ_24} $(TARGET_1_FLAG_24) + +$(TARGET_1_OBJ_25):$(TARGET_1_SRC_25) + cc -c ${TARGET_1_SRC_25} -o ${TARGET_1_OBJ_25} $(TARGET_1_FLAG_25) + +$(TARGET_1_OBJ_26):$(TARGET_1_SRC_26) + cc -c ${TARGET_1_SRC_26} -o ${TARGET_1_OBJ_26} $(TARGET_1_FLAG_26) + +$(TARGET_1_OBJ_27):$(TARGET_1_SRC_27) + cc -c ${TARGET_1_SRC_27} -o ${TARGET_1_OBJ_27} $(TARGET_1_FLAG_27) + +$(TARGET_1_OBJ_28):$(TARGET_1_SRC_28) + cc -c ${TARGET_1_SRC_28} -o ${TARGET_1_OBJ_28} $(TARGET_1_FLAG_28) + +$(TARGET_1_OBJ_29):$(TARGET_1_SRC_29) + cc -c ${TARGET_1_SRC_29} -o ${TARGET_1_OBJ_29} $(TARGET_1_FLAG_29) + +$(TARGET_1_OBJ_30):$(TARGET_1_SRC_30) + cc -c ${TARGET_1_SRC_30} -o ${TARGET_1_OBJ_30} $(TARGET_1_FLAG_30) + +$(TARGET_1_OBJ_31):$(TARGET_1_SRC_31) + cc -c ${TARGET_1_SRC_31} -o ${TARGET_1_OBJ_31} $(TARGET_1_FLAG_31) + +$(TARGET_1_OBJ_32):$(TARGET_1_SRC_32) + cc -c ${TARGET_1_SRC_32} -o ${TARGET_1_OBJ_32} $(TARGET_1_FLAG_32) + +$(TARGET_1_OBJ_33):$(TARGET_1_SRC_33) + cc -c ${TARGET_1_SRC_33} -o ${TARGET_1_OBJ_33} $(TARGET_1_FLAG_33) + +$(TARGET_1_OBJ_34):$(TARGET_1_SRC_34) + cc -c ${TARGET_1_SRC_34} -o ${TARGET_1_OBJ_34} $(TARGET_1_FLAG_34) + +$(TARGET_1_OBJ_35):$(TARGET_1_SRC_35) + cc -c ${TARGET_1_SRC_35} -o ${TARGET_1_OBJ_35} $(TARGET_1_FLAG_35) + +$(TARGET_1_OBJ_36):$(TARGET_1_SRC_36) + cc -c ${TARGET_1_SRC_36} -o ${TARGET_1_OBJ_36} $(TARGET_1_FLAG_36) + +$(TARGET_1_OBJ_37):$(TARGET_1_SRC_37) + cc -c ${TARGET_1_SRC_37} -o ${TARGET_1_OBJ_37} $(TARGET_1_FLAG_37) + +$(TARGET_1_OBJ_38):$(TARGET_1_SRC_38) + cc -c ${TARGET_1_SRC_38} -o ${TARGET_1_OBJ_38} $(TARGET_1_FLAG_38) + +$(TARGET_1_OBJ_39):$(TARGET_1_SRC_39) + cc -c ${TARGET_1_SRC_39} -o ${TARGET_1_OBJ_39} $(TARGET_1_FLAG_39) + +$(TARGET_1_OBJ_40):$(TARGET_1_SRC_40) + cc -c ${TARGET_1_SRC_40} -o ${TARGET_1_OBJ_40} $(TARGET_1_FLAG_40) + +$(TARGET_1_OBJ_41):$(TARGET_1_SRC_41) + cc -c ${TARGET_1_SRC_41} -o ${TARGET_1_OBJ_41} $(TARGET_1_FLAG_41) + +$(TARGET_1_OBJ_42):$(TARGET_1_SRC_42) + cc -c ${TARGET_1_SRC_42} -o ${TARGET_1_OBJ_42} $(TARGET_1_FLAG_42) + +$(TARGET_1_OBJ_43):$(TARGET_1_SRC_43) + cc -c ${TARGET_1_SRC_43} -o ${TARGET_1_OBJ_43} $(TARGET_1_FLAG_43) + +$(TARGET_1_OBJ_44):$(TARGET_1_SRC_44) + cc -c ${TARGET_1_SRC_44} -o ${TARGET_1_OBJ_44} $(TARGET_1_FLAG_44) + +$(TARGET_1_OBJ_45):$(TARGET_1_SRC_45) + cc -c ${TARGET_1_SRC_45} -o ${TARGET_1_OBJ_45} $(TARGET_1_FLAG_45) + +$(TARGET_1_OBJ_46):$(TARGET_1_SRC_46) + cc -c ${TARGET_1_SRC_46} -o ${TARGET_1_OBJ_46} $(TARGET_1_FLAG_46) + +$(TARGET_1_OBJ_47):$(TARGET_1_SRC_47) + cc -c ${TARGET_1_SRC_47} -o ${TARGET_1_OBJ_47} $(TARGET_1_FLAG_47) + +$(TARGET_1_OBJ_48):$(TARGET_1_SRC_48) + cc -c ${TARGET_1_SRC_48} -o ${TARGET_1_OBJ_48} $(TARGET_1_FLAG_48) + +$(TARGET_1_OBJ_49):$(TARGET_1_SRC_49) + cc -c ${TARGET_1_SRC_49} -o ${TARGET_1_OBJ_49} $(TARGET_1_FLAG_49) + +$(TARGET_1_OBJ_50):$(TARGET_1_SRC_50) + cc -c ${TARGET_1_SRC_50} -o ${TARGET_1_OBJ_50} $(TARGET_1_FLAG_50) + +$(TARGET_1_OBJ_51):$(TARGET_1_SRC_51) + cc -c ${TARGET_1_SRC_51} -o ${TARGET_1_OBJ_51} $(TARGET_1_FLAG_51) + +$(TARGET_1_OBJ_52):$(TARGET_1_SRC_52) + cc -c ${TARGET_1_SRC_52} -o ${TARGET_1_OBJ_52} $(TARGET_1_FLAG_52) + +$(TARGET_1_OBJ_53):$(TARGET_1_SRC_53) + cc -c ${TARGET_1_SRC_53} -o ${TARGET_1_OBJ_53} $(TARGET_1_FLAG_53) + +$(TARGET_1_OBJ_54):$(TARGET_1_SRC_54) + cc -c ${TARGET_1_SRC_54} -o ${TARGET_1_OBJ_54} $(TARGET_1_FLAG_54) + +$(TARGET_1_OBJ_55):$(TARGET_1_SRC_55) + cc -c ${TARGET_1_SRC_55} -o ${TARGET_1_OBJ_55} $(TARGET_1_FLAG_55) + +$(TARGET_1_OBJ_56):$(TARGET_1_SRC_56) + cc -c ${TARGET_1_SRC_56} -o ${TARGET_1_OBJ_56} $(TARGET_1_FLAG_56) + +$(TARGET_1_OBJ_57):$(TARGET_1_SRC_57) + cc -c ${TARGET_1_SRC_57} -o ${TARGET_1_OBJ_57} $(TARGET_1_FLAG_57) + +$(TARGET_1_OBJ_58):$(TARGET_1_SRC_58) + cc -c ${TARGET_1_SRC_58} -o ${TARGET_1_OBJ_58} $(TARGET_1_FLAG_58) + +$(TARGET_1_OBJ_59):$(TARGET_1_SRC_59) + cc -c ${TARGET_1_SRC_59} -o ${TARGET_1_OBJ_59} $(TARGET_1_FLAG_59) + +$(TARGET_1_OBJ_60):$(TARGET_1_SRC_60) + cc -c ${TARGET_1_SRC_60} -o ${TARGET_1_OBJ_60} $(TARGET_1_FLAG_60) + +$(TARGET_1_OBJ_61):$(TARGET_1_SRC_61) + cc -c ${TARGET_1_SRC_61} -o ${TARGET_1_OBJ_61} $(TARGET_1_FLAG_61) + +$(TARGET_1_OBJ_62):$(TARGET_1_SRC_62) + cc -c ${TARGET_1_SRC_62} -o ${TARGET_1_OBJ_62} $(TARGET_1_FLAG_62) + +$(TARGET_1_OBJ_63):$(TARGET_1_SRC_63) + cc -c ${TARGET_1_SRC_63} -o ${TARGET_1_OBJ_63} $(TARGET_1_FLAG_63) + +$(TARGET_1_OBJ_64):$(TARGET_1_SRC_64) + cc -c ${TARGET_1_SRC_64} -o ${TARGET_1_OBJ_64} $(TARGET_1_FLAG_64) + +$(TARGET_1_OBJ_65):$(TARGET_1_SRC_65) + cc -c ${TARGET_1_SRC_65} -o ${TARGET_1_OBJ_65} $(TARGET_1_FLAG_65) + +$(TARGET_1_OBJ_66):$(TARGET_1_SRC_66) + cc -c ${TARGET_1_SRC_66} -o ${TARGET_1_OBJ_66} $(TARGET_1_FLAG_66) + +$(TARGET_1_OBJ_67):$(TARGET_1_SRC_67) + cc -c ${TARGET_1_SRC_67} -o ${TARGET_1_OBJ_67} $(TARGET_1_FLAG_67) + +$(TARGET_1_OBJ_68):$(TARGET_1_SRC_68) + cc -c ${TARGET_1_SRC_68} -o ${TARGET_1_OBJ_68} $(TARGET_1_FLAG_68) + +$(TARGET_1_OBJ_69):$(TARGET_1_SRC_69) + cc -c ${TARGET_1_SRC_69} -o ${TARGET_1_OBJ_69} $(TARGET_1_FLAG_69) + +$(TARGET_1_OBJ_70):$(TARGET_1_SRC_70) + cc -c ${TARGET_1_SRC_70} -o ${TARGET_1_OBJ_70} $(TARGET_1_FLAG_70) + +$(TARGET_1_OBJ_71):$(TARGET_1_SRC_71) + cc -c ${TARGET_1_SRC_71} -o ${TARGET_1_OBJ_71} $(TARGET_1_FLAG_71) + +$(TARGET_1_OBJ_72):$(TARGET_1_SRC_72) + cc -c ${TARGET_1_SRC_72} -o ${TARGET_1_OBJ_72} $(TARGET_1_FLAG_72) + +$(TARGET_1_OBJ_73):$(TARGET_1_SRC_73) + cc -c ${TARGET_1_SRC_73} -o ${TARGET_1_OBJ_73} $(TARGET_1_FLAG_73) + +$(TARGET_1_OBJ_74):$(TARGET_1_SRC_74) + cc -c ${TARGET_1_SRC_74} -o ${TARGET_1_OBJ_74} $(TARGET_1_FLAG_74) + +$(TARGET_1_OBJ_75):$(TARGET_1_SRC_75) + cc -c ${TARGET_1_SRC_75} -o ${TARGET_1_OBJ_75} $(TARGET_1_FLAG_75) + +$(TARGET_1_OBJ_76):$(TARGET_1_SRC_76) + cc -c ${TARGET_1_SRC_76} -o ${TARGET_1_OBJ_76} $(TARGET_1_FLAG_76) + +$(TARGET_1_OBJ_77):$(TARGET_1_SRC_77) + cc -c ${TARGET_1_SRC_77} -o ${TARGET_1_OBJ_77} $(TARGET_1_FLAG_77) + +$(TARGET_1_OBJ_78):$(TARGET_1_SRC_78) + cc -c ${TARGET_1_SRC_78} -o ${TARGET_1_OBJ_78} $(TARGET_1_FLAG_78) + +$(TARGET_1_OBJ_79):$(TARGET_1_SRC_79) + cc -c ${TARGET_1_SRC_79} -o ${TARGET_1_OBJ_79} $(TARGET_1_FLAG_79) + +$(TARGET_1_OBJ_80):$(TARGET_1_SRC_80) + cc -c ${TARGET_1_SRC_80} -o ${TARGET_1_OBJ_80} $(TARGET_1_FLAG_80) + +$(TARGET_1_OBJ_81):$(TARGET_1_SRC_81) + cc -c ${TARGET_1_SRC_81} -o ${TARGET_1_OBJ_81} $(TARGET_1_FLAG_81) + +$(TARGET_1_OBJ_82):$(TARGET_1_SRC_82) + cc -c ${TARGET_1_SRC_82} -o ${TARGET_1_OBJ_82} $(TARGET_1_FLAG_82) + +$(TARGET_1_OBJ_83):$(TARGET_1_SRC_83) + cc -c ${TARGET_1_SRC_83} -o ${TARGET_1_OBJ_83} $(TARGET_1_FLAG_83) + +$(TARGET_1_OBJ_84):$(TARGET_1_SRC_84) + cc -c ${TARGET_1_SRC_84} -o ${TARGET_1_OBJ_84} $(TARGET_1_FLAG_84) + +$(TARGET_1_OBJ_85):$(TARGET_1_SRC_85) + cc -c ${TARGET_1_SRC_85} -o ${TARGET_1_OBJ_85} $(TARGET_1_FLAG_85) + +$(TARGET_1_OBJ_86):$(TARGET_1_SRC_86) + cc -c ${TARGET_1_SRC_86} -o ${TARGET_1_OBJ_86} $(TARGET_1_FLAG_86) + +$(TARGET_1_OBJ_87):$(TARGET_1_SRC_87) + cc -c ${TARGET_1_SRC_87} -o ${TARGET_1_OBJ_87} $(TARGET_1_FLAG_87) + +$(TARGET_1_OBJ_88):$(TARGET_1_SRC_88) + cc -c ${TARGET_1_SRC_88} -o ${TARGET_1_OBJ_88} $(TARGET_1_FLAG_88) + +$(TARGET_1_OBJ_89):$(TARGET_1_SRC_89) + cc -c ${TARGET_1_SRC_89} -o ${TARGET_1_OBJ_89} $(TARGET_1_FLAG_89) + +$(TARGET_1_OBJ_90):$(TARGET_1_SRC_90) + cc -c ${TARGET_1_SRC_90} -o ${TARGET_1_OBJ_90} $(TARGET_1_FLAG_90) + +$(TARGET_1_OBJ_91):$(TARGET_1_SRC_91) + cc -c ${TARGET_1_SRC_91} -o ${TARGET_1_OBJ_91} $(TARGET_1_FLAG_91) + +$(TARGET_1_OBJ_92):$(TARGET_1_SRC_92) + cc -c ${TARGET_1_SRC_92} -o ${TARGET_1_OBJ_92} $(TARGET_1_FLAG_92) + +$(TARGET_1_OBJ_93):$(TARGET_1_SRC_93) + cc -c ${TARGET_1_SRC_93} -o ${TARGET_1_OBJ_93} $(TARGET_1_FLAG_93) + +$(TARGET_1_OBJ_94):$(TARGET_1_SRC_94) + cc -c ${TARGET_1_SRC_94} -o ${TARGET_1_OBJ_94} $(TARGET_1_FLAG_94) + +$(TARGET_1_OBJ_95):$(TARGET_1_SRC_95) + cc -c ${TARGET_1_SRC_95} -o ${TARGET_1_OBJ_95} $(TARGET_1_FLAG_95) + +$(TARGET_1_OBJ_96):$(TARGET_1_SRC_96) + cc -c ${TARGET_1_SRC_96} -o ${TARGET_1_OBJ_96} $(TARGET_1_FLAG_96) + +$(TARGET_1_OBJ_97):$(TARGET_1_SRC_97) + cc -c ${TARGET_1_SRC_97} -o ${TARGET_1_OBJ_97} $(TARGET_1_FLAG_97) + +$(TARGET_1_OBJ_98):$(TARGET_1_SRC_98) + cc -c ${TARGET_1_SRC_98} -o ${TARGET_1_OBJ_98} $(TARGET_1_FLAG_98) + +$(TARGET_1_OBJ_99):$(TARGET_1_SRC_99) + cc -c ${TARGET_1_SRC_99} -o ${TARGET_1_OBJ_99} $(TARGET_1_FLAG_99) + +$(TARGET_1_OBJ_100):$(TARGET_1_SRC_100) + cc -c ${TARGET_1_SRC_100} -o ${TARGET_1_OBJ_100} $(TARGET_1_FLAG_100) + +$(TARGET_1_OBJ_101):$(TARGET_1_SRC_101) + cc -c ${TARGET_1_SRC_101} -o ${TARGET_1_OBJ_101} $(TARGET_1_FLAG_101) + +$(TARGET_1_OBJ_102):$(TARGET_1_SRC_102) + cc -c ${TARGET_1_SRC_102} -o ${TARGET_1_OBJ_102} $(TARGET_1_FLAG_102) + +$(TARGET_1_OBJ_103):$(TARGET_1_SRC_103) + cc -c ${TARGET_1_SRC_103} -o ${TARGET_1_OBJ_103} $(TARGET_1_FLAG_103) + +$(TARGET_1_OBJ_104):$(TARGET_1_SRC_104) + cc -c ${TARGET_1_SRC_104} -o ${TARGET_1_OBJ_104} $(TARGET_1_FLAG_104) + +$(TARGET_1_OBJ_105):$(TARGET_1_SRC_105) + cc -c ${TARGET_1_SRC_105} -o ${TARGET_1_OBJ_105} $(TARGET_1_FLAG_105) + +$(TARGET_1_OBJ_106):$(TARGET_1_SRC_106) + cc -c ${TARGET_1_SRC_106} -o ${TARGET_1_OBJ_106} $(TARGET_1_FLAG_106) + +$(TARGET_1_OBJ_107):$(TARGET_1_SRC_107) + cc -c ${TARGET_1_SRC_107} -o ${TARGET_1_OBJ_107} $(TARGET_1_FLAG_107) + +$(TARGET_1_OBJ_108):$(TARGET_1_SRC_108) + cc -c ${TARGET_1_SRC_108} -o ${TARGET_1_OBJ_108} $(TARGET_1_FLAG_108) + +$(TARGET_1_OBJ_109):$(TARGET_1_SRC_109) + cc -c ${TARGET_1_SRC_109} -o ${TARGET_1_OBJ_109} $(TARGET_1_FLAG_109) + +$(TARGET_1_OBJ_110):$(TARGET_1_SRC_110) + cc -c ${TARGET_1_SRC_110} -o ${TARGET_1_OBJ_110} $(TARGET_1_FLAG_110) + +$(TARGET_1_OBJ_111):$(TARGET_1_SRC_111) + cc -c ${TARGET_1_SRC_111} -o ${TARGET_1_OBJ_111} $(TARGET_1_FLAG_111) + +$(TARGET_1_OBJ_112):$(TARGET_1_SRC_112) + cc -c ${TARGET_1_SRC_112} -o ${TARGET_1_OBJ_112} $(TARGET_1_FLAG_112) + +$(TARGET_1_OBJ_113):$(TARGET_1_SRC_113) + cc -c ${TARGET_1_SRC_113} -o ${TARGET_1_OBJ_113} $(TARGET_1_FLAG_113) + +$(TARGET_1_OBJ_114):$(TARGET_1_SRC_114) + cc -c ${TARGET_1_SRC_114} -o ${TARGET_1_OBJ_114} $(TARGET_1_FLAG_114) + +$(TARGET_1_OBJ_115):$(TARGET_1_SRC_115) + cc -c ${TARGET_1_SRC_115} -o ${TARGET_1_OBJ_115} $(TARGET_1_FLAG_115) + +$(TARGET_1_OBJ_116):$(TARGET_1_SRC_116) + cc -c ${TARGET_1_SRC_116} -o ${TARGET_1_OBJ_116} $(TARGET_1_FLAG_116) + +$(TARGET_1_OBJ_117):$(TARGET_1_SRC_117) + cc -c ${TARGET_1_SRC_117} -o ${TARGET_1_OBJ_117} $(TARGET_1_FLAG_117) + +$(TARGET_1_OBJ_118):$(TARGET_1_SRC_118) + cc -c ${TARGET_1_SRC_118} -o ${TARGET_1_OBJ_118} $(TARGET_1_FLAG_118) + +$(TARGET_1_OBJ_119):$(TARGET_1_SRC_119) + cc -c ${TARGET_1_SRC_119} -o ${TARGET_1_OBJ_119} $(TARGET_1_FLAG_119) + +$(TARGET_1_OBJ_120):$(TARGET_1_SRC_120) + cc -c ${TARGET_1_SRC_120} -o ${TARGET_1_OBJ_120} $(TARGET_1_FLAG_120) + +$(TARGET_1_OBJ_121):$(TARGET_1_SRC_121) + cc -c ${TARGET_1_SRC_121} -o ${TARGET_1_OBJ_121} $(TARGET_1_FLAG_121) + +$(TARGET_1_OBJ_122):$(TARGET_1_SRC_122) + cc -c ${TARGET_1_SRC_122} -o ${TARGET_1_OBJ_122} $(TARGET_1_FLAG_122) + +$(TARGET_1_OBJ_123):$(TARGET_1_SRC_123) + cc -c ${TARGET_1_SRC_123} -o ${TARGET_1_OBJ_123} $(TARGET_1_FLAG_123) + +$(TARGET_1_OBJ_124):$(TARGET_1_SRC_124) + cc -c ${TARGET_1_SRC_124} -o ${TARGET_1_OBJ_124} $(TARGET_1_FLAG_124) + +$(TARGET_1_OBJ_125):$(TARGET_1_SRC_125) + cc -c ${TARGET_1_SRC_125} -o ${TARGET_1_OBJ_125} $(TARGET_1_FLAG_125) + +$(TARGET_2): $(OBJS_2) + $(CC) -fsycl -o $@ $^ $(LIB) -qmkl + +$(TARGET_2_OBJ_0):$(TARGET_2_SRC_0) + cc -c ${TARGET_2_SRC_0} -o ${TARGET_2_OBJ_0} $(TARGET_2_FLAG_0) + +$(TARGET_2_OBJ_1):$(TARGET_2_SRC_1) + cc -c ${TARGET_2_SRC_1} -o ${TARGET_2_OBJ_1} $(TARGET_2_FLAG_1) + +clean: + rm -f ${OBJS_0} ${OBJS_1} ${OBJS_2} $(TARGET) diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/Makefile.dpct.patched b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/Makefile.dpct.patched new file mode 100644 index 000000000..08159b6dd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/Makefile.dpct.patched @@ -0,0 +1,1019 @@ +CC := icpx + +LD := $(CC) + +#DPCT2001:4: You can link with more library by add them here. +LIB := -lmpi + +FLAGS := -fPIC + +ifeq ($(shell which $(CC)),) + $(error ERROR - $(CC) compiler not found) +endif + +ROOT_DIR := $(shell dirname $(shell which $(CC))) +INCLUDE_SYCL := $(ROOT_DIR)/../include +INCLUDE_CL := $(ROOT_DIR)/../include/sycl + +TARGET_0_SRC_0 = ./testing/ptest/HPL_pddriver.c +TARGET_0_OBJ_0 = ./testing/ptest/HPL_pddriver.o +TARGET_0_FLAG_0 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_1 = ./testing/ptest/HPL_pdinfo.c +TARGET_0_OBJ_1 = ./testing/ptest/HPL_pdinfo.o +TARGET_0_FLAG_1 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_2 = ./testing/ptest/HPL_pdtest.c +TARGET_0_OBJ_2 = ./testing/ptest/HPL_pdtest.o +TARGET_0_FLAG_2 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_3 = ./testing/ptest/HPL_pddriver.c +TARGET_0_OBJ_3 = ./testing/ptest/HPL_pddriver.o +TARGET_0_FLAG_3 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_4 = ./testing/ptest/HPL_pdinfo.c +TARGET_0_OBJ_4 = ./testing/ptest/HPL_pdinfo.o +TARGET_0_FLAG_4 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_5 = ./testing/ptest/HPL_pdtest.c +TARGET_0_OBJ_5 = ./testing/ptest/HPL_pdtest.o +TARGET_0_FLAG_5 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_6 = ./testing/ptest/HPL_pddriver.c +TARGET_0_OBJ_6 = ./testing/ptest/HPL_pddriver.o +TARGET_0_FLAG_6 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_7 = ./testing/ptest/HPL_pdinfo.c +TARGET_0_OBJ_7 = ./testing/ptest/HPL_pdinfo.o +TARGET_0_FLAG_7 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_8 = ./testing/ptest/HPL_pdtest.c +TARGET_0_OBJ_8 = ./testing/ptest/HPL_pdtest.o +TARGET_0_FLAG_8 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_9 = ./testing/ptest/HPL_pddriver.c +TARGET_0_OBJ_9 = ./testing/ptest/HPL_pddriver.o +TARGET_0_FLAG_9 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_10 = ./testing/ptest/HPL_pdinfo.c +TARGET_0_OBJ_10 = ./testing/ptest/HPL_pdinfo.o +TARGET_0_FLAG_10 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_11 = ./testing/ptest/HPL_pdtest.c +TARGET_0_OBJ_11 = ./testing/ptest/HPL_pdtest.o +TARGET_0_FLAG_11 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_0 = ./src/auxil/HPL_dlacpy.c +TARGET_1_OBJ_0 = ./src/auxil/HPL_dlacpy.o +TARGET_1_FLAG_0 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_1 = ./src/auxil/HPL_dlatcpy.c +TARGET_1_OBJ_1 = ./src/auxil/HPL_dlatcpy.o +TARGET_1_FLAG_1 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_2 = ./src/auxil/HPL_fprintf.c +TARGET_1_OBJ_2 = ./src/auxil/HPL_fprintf.o +TARGET_1_FLAG_2 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_3 = ./src/auxil/HPL_warn.c +TARGET_1_OBJ_3 = ./src/auxil/HPL_warn.o +TARGET_1_FLAG_3 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_4 = ./src/auxil/HPL_abort.c +TARGET_1_OBJ_4 = ./src/auxil/HPL_abort.o +TARGET_1_FLAG_4 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_5 = ./src/auxil/HPL_dlaprnt.c +TARGET_1_OBJ_5 = ./src/auxil/HPL_dlaprnt.o +TARGET_1_FLAG_5 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_6 = ./src/auxil/HPL_dlange.c +TARGET_1_OBJ_6 = ./src/auxil/HPL_dlange.o +TARGET_1_FLAG_6 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_7 = ./src/auxil/HPL_dlamch.c +TARGET_1_OBJ_7 = ./src/auxil/HPL_dlamch.o +TARGET_1_FLAG_7 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -I ./include ${FLAGS} + +TARGET_1_SRC_8 = ./src/blas/HPL_dcopy.c +TARGET_1_OBJ_8 = ./src/blas/HPL_dcopy.o +TARGET_1_FLAG_8 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_9 = ./src/blas/HPL_daxpy.c +TARGET_1_OBJ_9 = ./src/blas/HPL_daxpy.o +TARGET_1_FLAG_9 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_10 = ./src/blas/HPL_dscal.c +TARGET_1_OBJ_10 = ./src/blas/HPL_dscal.o +TARGET_1_FLAG_10 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_11 = ./src/blas/HPL_idamax.c +TARGET_1_OBJ_11 = ./src/blas/HPL_idamax.o +TARGET_1_FLAG_11 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_12 = ./src/blas/HPL_dgemv.c +TARGET_1_OBJ_12 = ./src/blas/HPL_dgemv.o +TARGET_1_FLAG_12 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_13 = ./src/blas/HPL_dtrsv.c +TARGET_1_OBJ_13 = ./src/blas/HPL_dtrsv.o +TARGET_1_FLAG_13 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_14 = ./src/blas/HPL_dger.c +TARGET_1_OBJ_14 = ./src/blas/HPL_dger.o +TARGET_1_FLAG_14 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_15 = ./src/blas/HPL_dgemm.c +TARGET_1_OBJ_15 = ./src/blas/HPL_dgemm.o +TARGET_1_FLAG_15 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_16 = ./src/blas/HPL_dtrsm.c +TARGET_1_OBJ_16 = ./src/blas/HPL_dtrsm.o +TARGET_1_FLAG_16 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_17 = ./src/comm/HPL_1ring.c +TARGET_1_OBJ_17 = ./src/comm/HPL_1ring.o +TARGET_1_FLAG_17 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_18 = ./src/comm/HPL_1rinM.c +TARGET_1_OBJ_18 = ./src/comm/HPL_1rinM.o +TARGET_1_FLAG_18 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_19 = ./src/comm/HPL_2ring.c +TARGET_1_OBJ_19 = ./src/comm/HPL_2ring.o +TARGET_1_FLAG_19 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_20 = ./src/comm/HPL_2rinM.c +TARGET_1_OBJ_20 = ./src/comm/HPL_2rinM.o +TARGET_1_FLAG_20 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_21 = ./src/comm/HPL_blong.c +TARGET_1_OBJ_21 = ./src/comm/HPL_blong.o +TARGET_1_FLAG_21 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_22 = ./src/comm/HPL_blonM.c +TARGET_1_OBJ_22 = ./src/comm/HPL_blonM.o +TARGET_1_FLAG_22 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_23 = ./src/comm/HPL_packL.c +TARGET_1_OBJ_23 = ./src/comm/HPL_packL.o +TARGET_1_FLAG_23 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_24 = ./src/comm/HPL_copyL.c +TARGET_1_OBJ_24 = ./src/comm/HPL_copyL.o +TARGET_1_FLAG_24 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_25 = ./src/comm/HPL_binit.c +TARGET_1_OBJ_25 = ./src/comm/HPL_binit.o +TARGET_1_FLAG_25 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_26 = ./src/comm/HPL_bcast.c +TARGET_1_OBJ_26 = ./src/comm/HPL_bcast.o +TARGET_1_FLAG_26 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_27 = ./src/comm/HPL_bwait.c +TARGET_1_OBJ_27 = ./src/comm/HPL_bwait.o +TARGET_1_FLAG_27 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_28 = ./src/comm/HPL_send.c +TARGET_1_OBJ_28 = ./src/comm/HPL_send.o +TARGET_1_FLAG_28 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_29 = ./src/comm/HPL_recv.c +TARGET_1_OBJ_29 = ./src/comm/HPL_recv.o +TARGET_1_FLAG_29 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_30 = ./src/comm/HPL_sdrv.c +TARGET_1_OBJ_30 = ./src/comm/HPL_sdrv.o +TARGET_1_FLAG_30 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_31 = ./src/grid/HPL_grid_init.c +TARGET_1_OBJ_31 = ./src/grid/HPL_grid_init.o +TARGET_1_FLAG_31 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_32 = ./src/grid/HPL_pnum.c +TARGET_1_OBJ_32 = ./src/grid/HPL_pnum.o +TARGET_1_FLAG_32 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_33 = ./src/grid/HPL_grid_info.c +TARGET_1_OBJ_33 = ./src/grid/HPL_grid_info.o +TARGET_1_FLAG_33 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_34 = ./src/grid/HPL_grid_exit.c +TARGET_1_OBJ_34 = ./src/grid/HPL_grid_exit.o +TARGET_1_FLAG_34 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_35 = ./src/grid/HPL_broadcast.c +TARGET_1_OBJ_35 = ./src/grid/HPL_broadcast.o +TARGET_1_FLAG_35 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_36 = ./src/grid/HPL_reduce.c +TARGET_1_OBJ_36 = ./src/grid/HPL_reduce.o +TARGET_1_FLAG_36 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_37 = ./src/grid/HPL_all_reduce.c +TARGET_1_OBJ_37 = ./src/grid/HPL_all_reduce.o +TARGET_1_FLAG_37 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_38 = ./src/grid/HPL_barrier.c +TARGET_1_OBJ_38 = ./src/grid/HPL_barrier.o +TARGET_1_FLAG_38 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_39 = ./src/grid/HPL_min.c +TARGET_1_OBJ_39 = ./src/grid/HPL_min.o +TARGET_1_FLAG_39 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_40 = ./src/grid/HPL_max.c +TARGET_1_OBJ_40 = ./src/grid/HPL_max.o +TARGET_1_FLAG_40 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_41 = ./src/grid/HPL_sum.c +TARGET_1_OBJ_41 = ./src/grid/HPL_sum.o +TARGET_1_FLAG_41 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_42 = ./src/panel/HPL_pdpanel_new.c +TARGET_1_OBJ_42 = ./src/panel/HPL_pdpanel_new.o +TARGET_1_FLAG_42 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_43 = ./src/panel/HPL_pdpanel_init.c +TARGET_1_OBJ_43 = ./src/panel/HPL_pdpanel_init.o +TARGET_1_FLAG_43 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_44 = ./src/panel/HPL_pdpanel_disp.c +TARGET_1_OBJ_44 = ./src/panel/HPL_pdpanel_disp.o +TARGET_1_FLAG_44 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_45 = ./src/panel/HPL_pdpanel_free.c +TARGET_1_OBJ_45 = ./src/panel/HPL_pdpanel_free.o +TARGET_1_FLAG_45 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_46 = ./src/pauxil/HPL_indxg2l.c +TARGET_1_OBJ_46 = ./src/pauxil/HPL_indxg2l.o +TARGET_1_FLAG_46 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_47 = ./src/pauxil/HPL_indxg2lp.c +TARGET_1_OBJ_47 = ./src/pauxil/HPL_indxg2lp.o +TARGET_1_FLAG_47 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_48 = ./src/pauxil/HPL_indxg2p.c +TARGET_1_OBJ_48 = ./src/pauxil/HPL_indxg2p.o +TARGET_1_FLAG_48 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_49 = ./src/pauxil/HPL_indxl2g.c +TARGET_1_OBJ_49 = ./src/pauxil/HPL_indxl2g.o +TARGET_1_FLAG_49 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_50 = ./src/pauxil/HPL_infog2l.c +TARGET_1_OBJ_50 = ./src/pauxil/HPL_infog2l.o +TARGET_1_FLAG_50 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_51 = ./src/pauxil/HPL_numroc.c +TARGET_1_OBJ_51 = ./src/pauxil/HPL_numroc.o +TARGET_1_FLAG_51 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_52 = ./src/pauxil/HPL_numrocI.c +TARGET_1_OBJ_52 = ./src/pauxil/HPL_numrocI.o +TARGET_1_FLAG_52 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_53 = ./src/pauxil/HPL_dlaswp00N.c +TARGET_1_OBJ_53 = ./src/pauxil/HPL_dlaswp00N.o +TARGET_1_FLAG_53 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_54 = ./src/pauxil/HPL_dlaswp10N.c +TARGET_1_OBJ_54 = ./src/pauxil/HPL_dlaswp10N.o +TARGET_1_FLAG_54 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_55 = ./src/pauxil/HPL_dlaswp01N.c +TARGET_1_OBJ_55 = ./src/pauxil/HPL_dlaswp01N.o +TARGET_1_FLAG_55 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_56 = ./src/pauxil/HPL_dlaswp01T.c +TARGET_1_OBJ_56 = ./src/pauxil/HPL_dlaswp01T.o +TARGET_1_FLAG_56 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_57 = ./src/pauxil/HPL_dlaswp02N.c +TARGET_1_OBJ_57 = ./src/pauxil/HPL_dlaswp02N.o +TARGET_1_FLAG_57 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_58 = ./src/pauxil/HPL_dlaswp03N.c +TARGET_1_OBJ_58 = ./src/pauxil/HPL_dlaswp03N.o +TARGET_1_FLAG_58 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_59 = ./src/pauxil/HPL_dlaswp03T.c +TARGET_1_OBJ_59 = ./src/pauxil/HPL_dlaswp03T.o +TARGET_1_FLAG_59 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_60 = ./src/pauxil/HPL_dlaswp04N.c +TARGET_1_OBJ_60 = ./src/pauxil/HPL_dlaswp04N.o +TARGET_1_FLAG_60 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_61 = ./src/pauxil/HPL_dlaswp04T.c +TARGET_1_OBJ_61 = ./src/pauxil/HPL_dlaswp04T.o +TARGET_1_FLAG_61 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_62 = ./src/pauxil/HPL_dlaswp05N.c +TARGET_1_OBJ_62 = ./src/pauxil/HPL_dlaswp05N.o +TARGET_1_FLAG_62 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_63 = ./src/pauxil/HPL_dlaswp05T.c +TARGET_1_OBJ_63 = ./src/pauxil/HPL_dlaswp05T.o +TARGET_1_FLAG_63 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_64 = ./src/pauxil/HPL_dlaswp06N.c +TARGET_1_OBJ_64 = ./src/pauxil/HPL_dlaswp06N.o +TARGET_1_FLAG_64 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_65 = ./src/pauxil/HPL_dlaswp06T.c +TARGET_1_OBJ_65 = ./src/pauxil/HPL_dlaswp06T.o +TARGET_1_FLAG_65 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_66 = ./src/pauxil/HPL_pwarn.c +TARGET_1_OBJ_66 = ./src/pauxil/HPL_pwarn.o +TARGET_1_FLAG_66 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_67 = ./src/pauxil/HPL_pabort.c +TARGET_1_OBJ_67 = ./src/pauxil/HPL_pabort.o +TARGET_1_FLAG_67 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_68 = ./src/pauxil/HPL_pdlaprnt.c +TARGET_1_OBJ_68 = ./src/pauxil/HPL_pdlaprnt.o +TARGET_1_FLAG_68 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_69 = ./src/pauxil/HPL_pdlamch.c +TARGET_1_OBJ_69 = ./src/pauxil/HPL_pdlamch.o +TARGET_1_FLAG_69 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_70 = ./src/pauxil/HPL_pdlange.c +TARGET_1_OBJ_70 = ./src/pauxil/HPL_pdlange.o +TARGET_1_FLAG_70 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_71 = ./src/pfact/HPL_dlocmax.c +TARGET_1_OBJ_71 = ./src/pfact/HPL_dlocmax.o +TARGET_1_FLAG_71 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_72 = ./src/pfact/HPL_dlocswpN.c +TARGET_1_OBJ_72 = ./src/pfact/HPL_dlocswpN.o +TARGET_1_FLAG_72 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_73 = ./src/pfact/HPL_dlocswpT.c +TARGET_1_OBJ_73 = ./src/pfact/HPL_dlocswpT.o +TARGET_1_FLAG_73 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_74 = ./src/pfact/HPL_pdmxswp.c +TARGET_1_OBJ_74 = ./src/pfact/HPL_pdmxswp.o +TARGET_1_FLAG_74 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_75 = ./src/pfact/HPL_pdpancrN.c +TARGET_1_OBJ_75 = ./src/pfact/HPL_pdpancrN.o +TARGET_1_FLAG_75 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_76 = ./src/pfact/HPL_pdpancrT.c +TARGET_1_OBJ_76 = ./src/pfact/HPL_pdpancrT.o +TARGET_1_FLAG_76 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_77 = ./src/pfact/HPL_pdpanllN.c +TARGET_1_OBJ_77 = ./src/pfact/HPL_pdpanllN.o +TARGET_1_FLAG_77 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_78 = ./src/pfact/HPL_pdpanllT.c +TARGET_1_OBJ_78 = ./src/pfact/HPL_pdpanllT.o +TARGET_1_FLAG_78 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_79 = ./src/pfact/HPL_pdpanrlN.c +TARGET_1_OBJ_79 = ./src/pfact/HPL_pdpanrlN.o +TARGET_1_FLAG_79 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_80 = ./src/pfact/HPL_pdpanrlT.c +TARGET_1_OBJ_80 = ./src/pfact/HPL_pdpanrlT.o +TARGET_1_FLAG_80 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_81 = ./src/pfact/HPL_pdrpanllN.c +TARGET_1_OBJ_81 = ./src/pfact/HPL_pdrpanllN.o +TARGET_1_FLAG_81 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_82 = ./src/pfact/HPL_pdrpanllT.c +TARGET_1_OBJ_82 = ./src/pfact/HPL_pdrpanllT.o +TARGET_1_FLAG_82 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_83 = ./src/pfact/HPL_pdrpancrN.c +TARGET_1_OBJ_83 = ./src/pfact/HPL_pdrpancrN.o +TARGET_1_FLAG_83 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_84 = ./src/pfact/HPL_pdrpancrT.c +TARGET_1_OBJ_84 = ./src/pfact/HPL_pdrpancrT.o +TARGET_1_FLAG_84 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_85 = ./src/pfact/HPL_pdrpanrlN.c +TARGET_1_OBJ_85 = ./src/pfact/HPL_pdrpanrlN.o +TARGET_1_FLAG_85 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_86 = ./src/pfact/HPL_pdrpanrlT.c +TARGET_1_OBJ_86 = ./src/pfact/HPL_pdrpanrlT.o +TARGET_1_FLAG_86 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_87 = ./src/pfact/HPL_pdfact.c +TARGET_1_OBJ_87 = ./src/pfact/HPL_pdfact.o +TARGET_1_FLAG_87 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_88 = ./src/pgesv/HPL_pipid.c +TARGET_1_OBJ_88 = ./src/pgesv/HPL_pipid.o +TARGET_1_FLAG_88 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_89 = ./src/pgesv/HPL_plindx0.c +TARGET_1_OBJ_89 = ./src/pgesv/HPL_plindx0.o +TARGET_1_FLAG_89 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_90 = ./src/pgesv/HPL_pdlaswp00N.c +TARGET_1_OBJ_90 = ./src/pgesv/HPL_pdlaswp00N.o +TARGET_1_FLAG_90 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_91 = ./src/pgesv/HPL_pdlaswp00T.c +TARGET_1_OBJ_91 = ./src/pgesv/HPL_pdlaswp00T.o +TARGET_1_FLAG_91 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_92 = ./src/pgesv/HPL_perm.c +TARGET_1_OBJ_92 = ./src/pgesv/HPL_perm.o +TARGET_1_FLAG_92 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_93 = ./src/pgesv/HPL_logsort.c +TARGET_1_OBJ_93 = ./src/pgesv/HPL_logsort.o +TARGET_1_FLAG_93 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_94 = ./src/pgesv/HPL_plindx10.c +TARGET_1_OBJ_94 = ./src/pgesv/HPL_plindx10.o +TARGET_1_FLAG_94 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_95 = ./src/pgesv/HPL_plindx1.c +TARGET_1_OBJ_95 = ./src/pgesv/HPL_plindx1.o +TARGET_1_FLAG_95 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_96 = ./src/pgesv/HPL_spreadN.c +TARGET_1_OBJ_96 = ./src/pgesv/HPL_spreadN.o +TARGET_1_FLAG_96 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_97 = ./src/pgesv/HPL_spreadT.c +TARGET_1_OBJ_97 = ./src/pgesv/HPL_spreadT.o +TARGET_1_FLAG_97 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_98 = ./src/pgesv/HPL_rollN.c +TARGET_1_OBJ_98 = ./src/pgesv/HPL_rollN.o +TARGET_1_FLAG_98 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_99 = ./src/pgesv/HPL_rollT.c +TARGET_1_OBJ_99 = ./src/pgesv/HPL_rollT.o +TARGET_1_FLAG_99 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_100 = ./src/pgesv/HPL_equil.c +TARGET_1_OBJ_100 = ./src/pgesv/HPL_equil.o +TARGET_1_FLAG_100 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_101 = ./src/pgesv/HPL_pdlaswp01N.c +TARGET_1_OBJ_101 = ./src/pgesv/HPL_pdlaswp01N.o +TARGET_1_FLAG_101 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_102 = ./src/pgesv/HPL_pdlaswp01T.c +TARGET_1_OBJ_102 = ./src/pgesv/HPL_pdlaswp01T.o +TARGET_1_FLAG_102 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_103 = ./src/pgesv/HPL_pdupdateNN.c +TARGET_1_OBJ_103 = ./src/pgesv/HPL_pdupdateNN.o +TARGET_1_FLAG_103 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_104 = ./src/pgesv/HPL_pdupdateNT.c +TARGET_1_OBJ_104 = ./src/pgesv/HPL_pdupdateNT.o +TARGET_1_FLAG_104 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_105 = ./src/pgesv/HPL_pdupdateTN.c +TARGET_1_OBJ_105 = ./src/pgesv/HPL_pdupdateTN.o +TARGET_1_FLAG_105 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_106 = ./src/pgesv/HPL_pdupdateTT.c +TARGET_1_OBJ_106 = ./src/pgesv/HPL_pdupdateTT.o +TARGET_1_FLAG_106 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_107 = ./src/pgesv/HPL_pdtrsv.c +TARGET_1_OBJ_107 = ./src/pgesv/HPL_pdtrsv.o +TARGET_1_FLAG_107 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_108 = ./src/pgesv/HPL_pdgesv0.c +TARGET_1_OBJ_108 = ./src/pgesv/HPL_pdgesv0.o +TARGET_1_FLAG_108 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_109 = ./src/pgesv/HPL_pdgesvK1.c +TARGET_1_OBJ_109 = ./src/pgesv/HPL_pdgesvK1.o +TARGET_1_FLAG_109 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_110 = ./src/pgesv/HPL_pdgesvK2.c +TARGET_1_OBJ_110 = ./src/pgesv/HPL_pdgesvK2.o +TARGET_1_FLAG_110 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_111 = ./src/pgesv/HPL_pdgesv.c +TARGET_1_OBJ_111 = ./src/pgesv/HPL_pdgesv.o +TARGET_1_FLAG_111 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_112 = ./testing/matgen/HPL_dmatgen.c +TARGET_1_OBJ_112 = ./testing/matgen/HPL_dmatgen.o +TARGET_1_FLAG_112 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_113 = ./testing/matgen/HPL_ladd.c +TARGET_1_OBJ_113 = ./testing/matgen/HPL_ladd.o +TARGET_1_FLAG_113 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_114 = ./testing/matgen/HPL_lmul.c +TARGET_1_OBJ_114 = ./testing/matgen/HPL_lmul.o +TARGET_1_FLAG_114 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_115 = ./testing/matgen/HPL_xjumpm.c +TARGET_1_OBJ_115 = ./testing/matgen/HPL_xjumpm.o +TARGET_1_FLAG_115 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_116 = ./testing/matgen/HPL_jumpit.c +TARGET_1_OBJ_116 = ./testing/matgen/HPL_jumpit.o +TARGET_1_FLAG_116 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_117 = ./testing/matgen/HPL_rand.c +TARGET_1_OBJ_117 = ./testing/matgen/HPL_rand.o +TARGET_1_FLAG_117 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_118 = ./testing/matgen/HPL_setran.c +TARGET_1_OBJ_118 = ./testing/matgen/HPL_setran.o +TARGET_1_FLAG_118 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_119 = ./testing/timer/HPL_timer.c +TARGET_1_OBJ_119 = ./testing/timer/HPL_timer.o +TARGET_1_FLAG_119 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_120 = ./testing/timer/HPL_timer_cputime.c +TARGET_1_OBJ_120 = ./testing/timer/HPL_timer_cputime.o +TARGET_1_FLAG_120 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_121 = ./testing/timer/HPL_timer_walltime.c +TARGET_1_OBJ_121 = ./testing/timer/HPL_timer_walltime.o +TARGET_1_FLAG_121 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_122 = ./testing/pmatgen/HPL_pdmatgen.c +TARGET_1_OBJ_122 = ./testing/pmatgen/HPL_pdmatgen.o +TARGET_1_FLAG_122 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_123 = ./testing/ptimer/HPL_ptimer.c +TARGET_1_OBJ_123 = ./testing/ptimer/HPL_ptimer.o +TARGET_1_FLAG_123 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_124 = ./testing/ptimer/HPL_ptimer_cputime.c +TARGET_1_OBJ_124 = ./testing/ptimer/HPL_ptimer_cputime.o +TARGET_1_FLAG_124 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_125 = ./testing/ptimer/HPL_ptimer_walltime.c +TARGET_1_OBJ_125 = ./testing/ptimer/HPL_ptimer_walltime.o +TARGET_1_FLAG_125 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_2_SRC_0 = ./src/cuda/cuda_dgemm.cpp.dp.cpp +TARGET_2_OBJ_0 = ./src/cuda/cuda_dgemm.cpp.dp.o +TARGET_2_FLAG_0 = -O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS} + +TARGET_2_SRC_1 = ./src/cuda/cuda_dgemm.cpp.dp.cpp +TARGET_2_OBJ_1 = ./src/cuda/cuda_dgemm.cpp.dp.o +TARGET_2_FLAG_1 = -O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS} + +TARGET_0 := /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl +TARGET_1 := /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a +TARGET_2 := libdgemm.so.1.0.1 + +TARGET := ${TARGET_1} ${TARGET_2} ${TARGET_0} + +.PHONY:all clean +OBJS_0 := ${TARGET_0_OBJ_0} ${TARGET_0_OBJ_1} ${TARGET_0_OBJ_2} ${TARGET_0_OBJ_3} ${TARGET_0_OBJ_4} ${TARGET_0_OBJ_5} ${TARGET_0_OBJ_6} ${TARGET_0_OBJ_7} ${TARGET_0_OBJ_8} ${TARGET_0_OBJ_9} ${TARGET_0_OBJ_10} ${TARGET_0_OBJ_11} +OBJS_1 := ${TARGET_1_OBJ_0} ${TARGET_1_OBJ_1} ${TARGET_1_OBJ_2} ${TARGET_1_OBJ_3} ${TARGET_1_OBJ_4} ${TARGET_1_OBJ_5} ${TARGET_1_OBJ_6} ${TARGET_1_OBJ_7} ${TARGET_1_OBJ_8} ${TARGET_1_OBJ_9} ${TARGET_1_OBJ_10} ${TARGET_1_OBJ_11} ${TARGET_1_OBJ_12} ${TARGET_1_OBJ_13} ${TARGET_1_OBJ_14} ${TARGET_1_OBJ_15} ${TARGET_1_OBJ_16} ${TARGET_1_OBJ_17} ${TARGET_1_OBJ_18} ${TARGET_1_OBJ_19} ${TARGET_1_OBJ_20} ${TARGET_1_OBJ_21} ${TARGET_1_OBJ_22} ${TARGET_1_OBJ_23} ${TARGET_1_OBJ_24} ${TARGET_1_OBJ_25} ${TARGET_1_OBJ_26} ${TARGET_1_OBJ_27} ${TARGET_1_OBJ_28} ${TARGET_1_OBJ_29} ${TARGET_1_OBJ_30} ${TARGET_1_OBJ_31} ${TARGET_1_OBJ_32} ${TARGET_1_OBJ_33} ${TARGET_1_OBJ_34} ${TARGET_1_OBJ_35} ${TARGET_1_OBJ_36} ${TARGET_1_OBJ_37} ${TARGET_1_OBJ_38} ${TARGET_1_OBJ_39} ${TARGET_1_OBJ_40} ${TARGET_1_OBJ_41} ${TARGET_1_OBJ_42} ${TARGET_1_OBJ_43} ${TARGET_1_OBJ_44} ${TARGET_1_OBJ_45} ${TARGET_1_OBJ_46} ${TARGET_1_OBJ_47} ${TARGET_1_OBJ_48} ${TARGET_1_OBJ_49} ${TARGET_1_OBJ_50} ${TARGET_1_OBJ_51} ${TARGET_1_OBJ_52} ${TARGET_1_OBJ_53} ${TARGET_1_OBJ_54} ${TARGET_1_OBJ_55} ${TARGET_1_OBJ_56} ${TARGET_1_OBJ_57} ${TARGET_1_OBJ_58} ${TARGET_1_OBJ_59} ${TARGET_1_OBJ_60} ${TARGET_1_OBJ_61} ${TARGET_1_OBJ_62} ${TARGET_1_OBJ_63} ${TARGET_1_OBJ_64} ${TARGET_1_OBJ_65} ${TARGET_1_OBJ_66} ${TARGET_1_OBJ_67} ${TARGET_1_OBJ_68} ${TARGET_1_OBJ_69} ${TARGET_1_OBJ_70} ${TARGET_1_OBJ_71} ${TARGET_1_OBJ_72} ${TARGET_1_OBJ_73} ${TARGET_1_OBJ_74} ${TARGET_1_OBJ_75} ${TARGET_1_OBJ_76} ${TARGET_1_OBJ_77} ${TARGET_1_OBJ_78} ${TARGET_1_OBJ_79} ${TARGET_1_OBJ_80} ${TARGET_1_OBJ_81} ${TARGET_1_OBJ_82} ${TARGET_1_OBJ_83} ${TARGET_1_OBJ_84} ${TARGET_1_OBJ_85} ${TARGET_1_OBJ_86} ${TARGET_1_OBJ_87} ${TARGET_1_OBJ_88} ${TARGET_1_OBJ_89} ${TARGET_1_OBJ_90} ${TARGET_1_OBJ_91} ${TARGET_1_OBJ_92} ${TARGET_1_OBJ_93} ${TARGET_1_OBJ_94} ${TARGET_1_OBJ_95} ${TARGET_1_OBJ_96} ${TARGET_1_OBJ_97} ${TARGET_1_OBJ_98} ${TARGET_1_OBJ_99} ${TARGET_1_OBJ_100} ${TARGET_1_OBJ_101} ${TARGET_1_OBJ_102} ${TARGET_1_OBJ_103} ${TARGET_1_OBJ_104} ${TARGET_1_OBJ_105} ${TARGET_1_OBJ_106} ${TARGET_1_OBJ_107} ${TARGET_1_OBJ_108} ${TARGET_1_OBJ_109} ${TARGET_1_OBJ_110} ${TARGET_1_OBJ_111} ${TARGET_1_OBJ_112} ${TARGET_1_OBJ_113} ${TARGET_1_OBJ_114} ${TARGET_1_OBJ_115} ${TARGET_1_OBJ_116} ${TARGET_1_OBJ_117} ${TARGET_1_OBJ_118} ${TARGET_1_OBJ_119} ${TARGET_1_OBJ_120} ${TARGET_1_OBJ_121} ${TARGET_1_OBJ_122} ${TARGET_1_OBJ_123} ${TARGET_1_OBJ_124} ${TARGET_1_OBJ_125} +OBJS_2 := ${TARGET_2_OBJ_0} ${TARGET_2_OBJ_1} +all: $(TARGET) +$(TARGET_0): $(OBJS_0) + $(CC) -fsycl -o $@ $^ $(LIB) -qmkl libdgemm.so.1.0.1 /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a + +$(TARGET_0_OBJ_0):$(TARGET_0_SRC_0) + cc -c ${TARGET_0_SRC_0} -o ${TARGET_0_OBJ_0} $(TARGET_0_FLAG_0) + +$(TARGET_0_OBJ_1):$(TARGET_0_SRC_1) + cc -c ${TARGET_0_SRC_1} -o ${TARGET_0_OBJ_1} $(TARGET_0_FLAG_1) + +$(TARGET_0_OBJ_2):$(TARGET_0_SRC_2) + cc -c ${TARGET_0_SRC_2} -o ${TARGET_0_OBJ_2} $(TARGET_0_FLAG_2) + +$(TARGET_0_OBJ_3):$(TARGET_0_SRC_3) + cc -c ${TARGET_0_SRC_3} -o ${TARGET_0_OBJ_3} $(TARGET_0_FLAG_3) + +$(TARGET_0_OBJ_4):$(TARGET_0_SRC_4) + cc -c ${TARGET_0_SRC_4} -o ${TARGET_0_OBJ_4} $(TARGET_0_FLAG_4) + +$(TARGET_0_OBJ_5):$(TARGET_0_SRC_5) + cc -c ${TARGET_0_SRC_5} -o ${TARGET_0_OBJ_5} $(TARGET_0_FLAG_5) + +$(TARGET_0_OBJ_6):$(TARGET_0_SRC_6) + cc -c ${TARGET_0_SRC_6} -o ${TARGET_0_OBJ_6} $(TARGET_0_FLAG_6) + +$(TARGET_0_OBJ_7):$(TARGET_0_SRC_7) + cc -c ${TARGET_0_SRC_7} -o ${TARGET_0_OBJ_7} $(TARGET_0_FLAG_7) + +$(TARGET_0_OBJ_8):$(TARGET_0_SRC_8) + cc -c ${TARGET_0_SRC_8} -o ${TARGET_0_OBJ_8} $(TARGET_0_FLAG_8) + +$(TARGET_0_OBJ_9):$(TARGET_0_SRC_9) + cc -c ${TARGET_0_SRC_9} -o ${TARGET_0_OBJ_9} $(TARGET_0_FLAG_9) + +$(TARGET_0_OBJ_10):$(TARGET_0_SRC_10) + cc -c ${TARGET_0_SRC_10} -o ${TARGET_0_OBJ_10} $(TARGET_0_FLAG_10) + +$(TARGET_0_OBJ_11):$(TARGET_0_SRC_11) + cc -c ${TARGET_0_SRC_11} -o ${TARGET_0_OBJ_11} $(TARGET_0_FLAG_11) + +$(TARGET_1): $(OBJS_1) + ar -r $@ $^ $(LIB) + +$(TARGET_1_OBJ_0):$(TARGET_1_SRC_0) + cc -c ${TARGET_1_SRC_0} -o ${TARGET_1_OBJ_0} $(TARGET_1_FLAG_0) + +$(TARGET_1_OBJ_1):$(TARGET_1_SRC_1) + cc -c ${TARGET_1_SRC_1} -o ${TARGET_1_OBJ_1} $(TARGET_1_FLAG_1) + +$(TARGET_1_OBJ_2):$(TARGET_1_SRC_2) + cc -c ${TARGET_1_SRC_2} -o ${TARGET_1_OBJ_2} $(TARGET_1_FLAG_2) + +$(TARGET_1_OBJ_3):$(TARGET_1_SRC_3) + cc -c ${TARGET_1_SRC_3} -o ${TARGET_1_OBJ_3} $(TARGET_1_FLAG_3) + +$(TARGET_1_OBJ_4):$(TARGET_1_SRC_4) + cc -c ${TARGET_1_SRC_4} -o ${TARGET_1_OBJ_4} $(TARGET_1_FLAG_4) + +$(TARGET_1_OBJ_5):$(TARGET_1_SRC_5) + cc -c ${TARGET_1_SRC_5} -o ${TARGET_1_OBJ_5} $(TARGET_1_FLAG_5) + +$(TARGET_1_OBJ_6):$(TARGET_1_SRC_6) + cc -c ${TARGET_1_SRC_6} -o ${TARGET_1_OBJ_6} $(TARGET_1_FLAG_6) + +$(TARGET_1_OBJ_7):$(TARGET_1_SRC_7) + cc -c ${TARGET_1_SRC_7} -o ${TARGET_1_OBJ_7} $(TARGET_1_FLAG_7) + +$(TARGET_1_OBJ_8):$(TARGET_1_SRC_8) + cc -c ${TARGET_1_SRC_8} -o ${TARGET_1_OBJ_8} $(TARGET_1_FLAG_8) + +$(TARGET_1_OBJ_9):$(TARGET_1_SRC_9) + cc -c ${TARGET_1_SRC_9} -o ${TARGET_1_OBJ_9} $(TARGET_1_FLAG_9) + +$(TARGET_1_OBJ_10):$(TARGET_1_SRC_10) + cc -c ${TARGET_1_SRC_10} -o ${TARGET_1_OBJ_10} $(TARGET_1_FLAG_10) + +$(TARGET_1_OBJ_11):$(TARGET_1_SRC_11) + cc -c ${TARGET_1_SRC_11} -o ${TARGET_1_OBJ_11} $(TARGET_1_FLAG_11) + +$(TARGET_1_OBJ_12):$(TARGET_1_SRC_12) + cc -c ${TARGET_1_SRC_12} -o ${TARGET_1_OBJ_12} $(TARGET_1_FLAG_12) + +$(TARGET_1_OBJ_13):$(TARGET_1_SRC_13) + cc -c ${TARGET_1_SRC_13} -o ${TARGET_1_OBJ_13} $(TARGET_1_FLAG_13) + +$(TARGET_1_OBJ_14):$(TARGET_1_SRC_14) + cc -c ${TARGET_1_SRC_14} -o ${TARGET_1_OBJ_14} $(TARGET_1_FLAG_14) + +$(TARGET_1_OBJ_15):$(TARGET_1_SRC_15) + cc -c ${TARGET_1_SRC_15} -o ${TARGET_1_OBJ_15} $(TARGET_1_FLAG_15) + +$(TARGET_1_OBJ_16):$(TARGET_1_SRC_16) + cc -c ${TARGET_1_SRC_16} -o ${TARGET_1_OBJ_16} $(TARGET_1_FLAG_16) + +$(TARGET_1_OBJ_17):$(TARGET_1_SRC_17) + cc -c ${TARGET_1_SRC_17} -o ${TARGET_1_OBJ_17} $(TARGET_1_FLAG_17) + +$(TARGET_1_OBJ_18):$(TARGET_1_SRC_18) + cc -c ${TARGET_1_SRC_18} -o ${TARGET_1_OBJ_18} $(TARGET_1_FLAG_18) + +$(TARGET_1_OBJ_19):$(TARGET_1_SRC_19) + cc -c ${TARGET_1_SRC_19} -o ${TARGET_1_OBJ_19} $(TARGET_1_FLAG_19) + +$(TARGET_1_OBJ_20):$(TARGET_1_SRC_20) + cc -c ${TARGET_1_SRC_20} -o ${TARGET_1_OBJ_20} $(TARGET_1_FLAG_20) + +$(TARGET_1_OBJ_21):$(TARGET_1_SRC_21) + cc -c ${TARGET_1_SRC_21} -o ${TARGET_1_OBJ_21} $(TARGET_1_FLAG_21) + +$(TARGET_1_OBJ_22):$(TARGET_1_SRC_22) + cc -c ${TARGET_1_SRC_22} -o ${TARGET_1_OBJ_22} $(TARGET_1_FLAG_22) + +$(TARGET_1_OBJ_23):$(TARGET_1_SRC_23) + cc -c ${TARGET_1_SRC_23} -o ${TARGET_1_OBJ_23} $(TARGET_1_FLAG_23) + +$(TARGET_1_OBJ_24):$(TARGET_1_SRC_24) + cc -c ${TARGET_1_SRC_24} -o ${TARGET_1_OBJ_24} $(TARGET_1_FLAG_24) + +$(TARGET_1_OBJ_25):$(TARGET_1_SRC_25) + cc -c ${TARGET_1_SRC_25} -o ${TARGET_1_OBJ_25} $(TARGET_1_FLAG_25) + +$(TARGET_1_OBJ_26):$(TARGET_1_SRC_26) + cc -c ${TARGET_1_SRC_26} -o ${TARGET_1_OBJ_26} $(TARGET_1_FLAG_26) + +$(TARGET_1_OBJ_27):$(TARGET_1_SRC_27) + cc -c ${TARGET_1_SRC_27} -o ${TARGET_1_OBJ_27} $(TARGET_1_FLAG_27) + +$(TARGET_1_OBJ_28):$(TARGET_1_SRC_28) + cc -c ${TARGET_1_SRC_28} -o ${TARGET_1_OBJ_28} $(TARGET_1_FLAG_28) + +$(TARGET_1_OBJ_29):$(TARGET_1_SRC_29) + cc -c ${TARGET_1_SRC_29} -o ${TARGET_1_OBJ_29} $(TARGET_1_FLAG_29) + +$(TARGET_1_OBJ_30):$(TARGET_1_SRC_30) + cc -c ${TARGET_1_SRC_30} -o ${TARGET_1_OBJ_30} $(TARGET_1_FLAG_30) + +$(TARGET_1_OBJ_31):$(TARGET_1_SRC_31) + cc -c ${TARGET_1_SRC_31} -o ${TARGET_1_OBJ_31} $(TARGET_1_FLAG_31) + +$(TARGET_1_OBJ_32):$(TARGET_1_SRC_32) + cc -c ${TARGET_1_SRC_32} -o ${TARGET_1_OBJ_32} $(TARGET_1_FLAG_32) + +$(TARGET_1_OBJ_33):$(TARGET_1_SRC_33) + cc -c ${TARGET_1_SRC_33} -o ${TARGET_1_OBJ_33} $(TARGET_1_FLAG_33) + +$(TARGET_1_OBJ_34):$(TARGET_1_SRC_34) + cc -c ${TARGET_1_SRC_34} -o ${TARGET_1_OBJ_34} $(TARGET_1_FLAG_34) + +$(TARGET_1_OBJ_35):$(TARGET_1_SRC_35) + cc -c ${TARGET_1_SRC_35} -o ${TARGET_1_OBJ_35} $(TARGET_1_FLAG_35) + +$(TARGET_1_OBJ_36):$(TARGET_1_SRC_36) + cc -c ${TARGET_1_SRC_36} -o ${TARGET_1_OBJ_36} $(TARGET_1_FLAG_36) + +$(TARGET_1_OBJ_37):$(TARGET_1_SRC_37) + cc -c ${TARGET_1_SRC_37} -o ${TARGET_1_OBJ_37} $(TARGET_1_FLAG_37) + +$(TARGET_1_OBJ_38):$(TARGET_1_SRC_38) + cc -c ${TARGET_1_SRC_38} -o ${TARGET_1_OBJ_38} $(TARGET_1_FLAG_38) + +$(TARGET_1_OBJ_39):$(TARGET_1_SRC_39) + cc -c ${TARGET_1_SRC_39} -o ${TARGET_1_OBJ_39} $(TARGET_1_FLAG_39) + +$(TARGET_1_OBJ_40):$(TARGET_1_SRC_40) + cc -c ${TARGET_1_SRC_40} -o ${TARGET_1_OBJ_40} $(TARGET_1_FLAG_40) + +$(TARGET_1_OBJ_41):$(TARGET_1_SRC_41) + cc -c ${TARGET_1_SRC_41} -o ${TARGET_1_OBJ_41} $(TARGET_1_FLAG_41) + +$(TARGET_1_OBJ_42):$(TARGET_1_SRC_42) + cc -c ${TARGET_1_SRC_42} -o ${TARGET_1_OBJ_42} $(TARGET_1_FLAG_42) + +$(TARGET_1_OBJ_43):$(TARGET_1_SRC_43) + cc -c ${TARGET_1_SRC_43} -o ${TARGET_1_OBJ_43} $(TARGET_1_FLAG_43) + +$(TARGET_1_OBJ_44):$(TARGET_1_SRC_44) + cc -c ${TARGET_1_SRC_44} -o ${TARGET_1_OBJ_44} $(TARGET_1_FLAG_44) + +$(TARGET_1_OBJ_45):$(TARGET_1_SRC_45) + cc -c ${TARGET_1_SRC_45} -o ${TARGET_1_OBJ_45} $(TARGET_1_FLAG_45) + +$(TARGET_1_OBJ_46):$(TARGET_1_SRC_46) + cc -c ${TARGET_1_SRC_46} -o ${TARGET_1_OBJ_46} $(TARGET_1_FLAG_46) + +$(TARGET_1_OBJ_47):$(TARGET_1_SRC_47) + cc -c ${TARGET_1_SRC_47} -o ${TARGET_1_OBJ_47} $(TARGET_1_FLAG_47) + +$(TARGET_1_OBJ_48):$(TARGET_1_SRC_48) + cc -c ${TARGET_1_SRC_48} -o ${TARGET_1_OBJ_48} $(TARGET_1_FLAG_48) + +$(TARGET_1_OBJ_49):$(TARGET_1_SRC_49) + cc -c ${TARGET_1_SRC_49} -o ${TARGET_1_OBJ_49} $(TARGET_1_FLAG_49) + +$(TARGET_1_OBJ_50):$(TARGET_1_SRC_50) + cc -c ${TARGET_1_SRC_50} -o ${TARGET_1_OBJ_50} $(TARGET_1_FLAG_50) + +$(TARGET_1_OBJ_51):$(TARGET_1_SRC_51) + cc -c ${TARGET_1_SRC_51} -o ${TARGET_1_OBJ_51} $(TARGET_1_FLAG_51) + +$(TARGET_1_OBJ_52):$(TARGET_1_SRC_52) + cc -c ${TARGET_1_SRC_52} -o ${TARGET_1_OBJ_52} $(TARGET_1_FLAG_52) + +$(TARGET_1_OBJ_53):$(TARGET_1_SRC_53) + cc -c ${TARGET_1_SRC_53} -o ${TARGET_1_OBJ_53} $(TARGET_1_FLAG_53) + +$(TARGET_1_OBJ_54):$(TARGET_1_SRC_54) + cc -c ${TARGET_1_SRC_54} -o ${TARGET_1_OBJ_54} $(TARGET_1_FLAG_54) + +$(TARGET_1_OBJ_55):$(TARGET_1_SRC_55) + cc -c ${TARGET_1_SRC_55} -o ${TARGET_1_OBJ_55} $(TARGET_1_FLAG_55) + +$(TARGET_1_OBJ_56):$(TARGET_1_SRC_56) + cc -c ${TARGET_1_SRC_56} -o ${TARGET_1_OBJ_56} $(TARGET_1_FLAG_56) + +$(TARGET_1_OBJ_57):$(TARGET_1_SRC_57) + cc -c ${TARGET_1_SRC_57} -o ${TARGET_1_OBJ_57} $(TARGET_1_FLAG_57) + +$(TARGET_1_OBJ_58):$(TARGET_1_SRC_58) + cc -c ${TARGET_1_SRC_58} -o ${TARGET_1_OBJ_58} $(TARGET_1_FLAG_58) + +$(TARGET_1_OBJ_59):$(TARGET_1_SRC_59) + cc -c ${TARGET_1_SRC_59} -o ${TARGET_1_OBJ_59} $(TARGET_1_FLAG_59) + +$(TARGET_1_OBJ_60):$(TARGET_1_SRC_60) + cc -c ${TARGET_1_SRC_60} -o ${TARGET_1_OBJ_60} $(TARGET_1_FLAG_60) + +$(TARGET_1_OBJ_61):$(TARGET_1_SRC_61) + cc -c ${TARGET_1_SRC_61} -o ${TARGET_1_OBJ_61} $(TARGET_1_FLAG_61) + +$(TARGET_1_OBJ_62):$(TARGET_1_SRC_62) + cc -c ${TARGET_1_SRC_62} -o ${TARGET_1_OBJ_62} $(TARGET_1_FLAG_62) + +$(TARGET_1_OBJ_63):$(TARGET_1_SRC_63) + cc -c ${TARGET_1_SRC_63} -o ${TARGET_1_OBJ_63} $(TARGET_1_FLAG_63) + +$(TARGET_1_OBJ_64):$(TARGET_1_SRC_64) + cc -c ${TARGET_1_SRC_64} -o ${TARGET_1_OBJ_64} $(TARGET_1_FLAG_64) + +$(TARGET_1_OBJ_65):$(TARGET_1_SRC_65) + cc -c ${TARGET_1_SRC_65} -o ${TARGET_1_OBJ_65} $(TARGET_1_FLAG_65) + +$(TARGET_1_OBJ_66):$(TARGET_1_SRC_66) + cc -c ${TARGET_1_SRC_66} -o ${TARGET_1_OBJ_66} $(TARGET_1_FLAG_66) + +$(TARGET_1_OBJ_67):$(TARGET_1_SRC_67) + cc -c ${TARGET_1_SRC_67} -o ${TARGET_1_OBJ_67} $(TARGET_1_FLAG_67) + +$(TARGET_1_OBJ_68):$(TARGET_1_SRC_68) + cc -c ${TARGET_1_SRC_68} -o ${TARGET_1_OBJ_68} $(TARGET_1_FLAG_68) + +$(TARGET_1_OBJ_69):$(TARGET_1_SRC_69) + cc -c ${TARGET_1_SRC_69} -o ${TARGET_1_OBJ_69} $(TARGET_1_FLAG_69) + +$(TARGET_1_OBJ_70):$(TARGET_1_SRC_70) + cc -c ${TARGET_1_SRC_70} -o ${TARGET_1_OBJ_70} $(TARGET_1_FLAG_70) + +$(TARGET_1_OBJ_71):$(TARGET_1_SRC_71) + cc -c ${TARGET_1_SRC_71} -o ${TARGET_1_OBJ_71} $(TARGET_1_FLAG_71) + +$(TARGET_1_OBJ_72):$(TARGET_1_SRC_72) + cc -c ${TARGET_1_SRC_72} -o ${TARGET_1_OBJ_72} $(TARGET_1_FLAG_72) + +$(TARGET_1_OBJ_73):$(TARGET_1_SRC_73) + cc -c ${TARGET_1_SRC_73} -o ${TARGET_1_OBJ_73} $(TARGET_1_FLAG_73) + +$(TARGET_1_OBJ_74):$(TARGET_1_SRC_74) + cc -c ${TARGET_1_SRC_74} -o ${TARGET_1_OBJ_74} $(TARGET_1_FLAG_74) + +$(TARGET_1_OBJ_75):$(TARGET_1_SRC_75) + cc -c ${TARGET_1_SRC_75} -o ${TARGET_1_OBJ_75} $(TARGET_1_FLAG_75) + +$(TARGET_1_OBJ_76):$(TARGET_1_SRC_76) + cc -c ${TARGET_1_SRC_76} -o ${TARGET_1_OBJ_76} $(TARGET_1_FLAG_76) + +$(TARGET_1_OBJ_77):$(TARGET_1_SRC_77) + cc -c ${TARGET_1_SRC_77} -o ${TARGET_1_OBJ_77} $(TARGET_1_FLAG_77) + +$(TARGET_1_OBJ_78):$(TARGET_1_SRC_78) + cc -c ${TARGET_1_SRC_78} -o ${TARGET_1_OBJ_78} $(TARGET_1_FLAG_78) + +$(TARGET_1_OBJ_79):$(TARGET_1_SRC_79) + cc -c ${TARGET_1_SRC_79} -o ${TARGET_1_OBJ_79} $(TARGET_1_FLAG_79) + +$(TARGET_1_OBJ_80):$(TARGET_1_SRC_80) + cc -c ${TARGET_1_SRC_80} -o ${TARGET_1_OBJ_80} $(TARGET_1_FLAG_80) + +$(TARGET_1_OBJ_81):$(TARGET_1_SRC_81) + cc -c ${TARGET_1_SRC_81} -o ${TARGET_1_OBJ_81} $(TARGET_1_FLAG_81) + +$(TARGET_1_OBJ_82):$(TARGET_1_SRC_82) + cc -c ${TARGET_1_SRC_82} -o ${TARGET_1_OBJ_82} $(TARGET_1_FLAG_82) + +$(TARGET_1_OBJ_83):$(TARGET_1_SRC_83) + cc -c ${TARGET_1_SRC_83} -o ${TARGET_1_OBJ_83} $(TARGET_1_FLAG_83) + +$(TARGET_1_OBJ_84):$(TARGET_1_SRC_84) + cc -c ${TARGET_1_SRC_84} -o ${TARGET_1_OBJ_84} $(TARGET_1_FLAG_84) + +$(TARGET_1_OBJ_85):$(TARGET_1_SRC_85) + cc -c ${TARGET_1_SRC_85} -o ${TARGET_1_OBJ_85} $(TARGET_1_FLAG_85) + +$(TARGET_1_OBJ_86):$(TARGET_1_SRC_86) + cc -c ${TARGET_1_SRC_86} -o ${TARGET_1_OBJ_86} $(TARGET_1_FLAG_86) + +$(TARGET_1_OBJ_87):$(TARGET_1_SRC_87) + cc -c ${TARGET_1_SRC_87} -o ${TARGET_1_OBJ_87} $(TARGET_1_FLAG_87) + +$(TARGET_1_OBJ_88):$(TARGET_1_SRC_88) + cc -c ${TARGET_1_SRC_88} -o ${TARGET_1_OBJ_88} $(TARGET_1_FLAG_88) + +$(TARGET_1_OBJ_89):$(TARGET_1_SRC_89) + cc -c ${TARGET_1_SRC_89} -o ${TARGET_1_OBJ_89} $(TARGET_1_FLAG_89) + +$(TARGET_1_OBJ_90):$(TARGET_1_SRC_90) + cc -c ${TARGET_1_SRC_90} -o ${TARGET_1_OBJ_90} $(TARGET_1_FLAG_90) + +$(TARGET_1_OBJ_91):$(TARGET_1_SRC_91) + cc -c ${TARGET_1_SRC_91} -o ${TARGET_1_OBJ_91} $(TARGET_1_FLAG_91) + +$(TARGET_1_OBJ_92):$(TARGET_1_SRC_92) + cc -c ${TARGET_1_SRC_92} -o ${TARGET_1_OBJ_92} $(TARGET_1_FLAG_92) + +$(TARGET_1_OBJ_93):$(TARGET_1_SRC_93) + cc -c ${TARGET_1_SRC_93} -o ${TARGET_1_OBJ_93} $(TARGET_1_FLAG_93) + +$(TARGET_1_OBJ_94):$(TARGET_1_SRC_94) + cc -c ${TARGET_1_SRC_94} -o ${TARGET_1_OBJ_94} $(TARGET_1_FLAG_94) + +$(TARGET_1_OBJ_95):$(TARGET_1_SRC_95) + cc -c ${TARGET_1_SRC_95} -o ${TARGET_1_OBJ_95} $(TARGET_1_FLAG_95) + +$(TARGET_1_OBJ_96):$(TARGET_1_SRC_96) + cc -c ${TARGET_1_SRC_96} -o ${TARGET_1_OBJ_96} $(TARGET_1_FLAG_96) + +$(TARGET_1_OBJ_97):$(TARGET_1_SRC_97) + cc -c ${TARGET_1_SRC_97} -o ${TARGET_1_OBJ_97} $(TARGET_1_FLAG_97) + +$(TARGET_1_OBJ_98):$(TARGET_1_SRC_98) + cc -c ${TARGET_1_SRC_98} -o ${TARGET_1_OBJ_98} $(TARGET_1_FLAG_98) + +$(TARGET_1_OBJ_99):$(TARGET_1_SRC_99) + cc -c ${TARGET_1_SRC_99} -o ${TARGET_1_OBJ_99} $(TARGET_1_FLAG_99) + +$(TARGET_1_OBJ_100):$(TARGET_1_SRC_100) + cc -c ${TARGET_1_SRC_100} -o ${TARGET_1_OBJ_100} $(TARGET_1_FLAG_100) + +$(TARGET_1_OBJ_101):$(TARGET_1_SRC_101) + cc -c ${TARGET_1_SRC_101} -o ${TARGET_1_OBJ_101} $(TARGET_1_FLAG_101) + +$(TARGET_1_OBJ_102):$(TARGET_1_SRC_102) + cc -c ${TARGET_1_SRC_102} -o ${TARGET_1_OBJ_102} $(TARGET_1_FLAG_102) + +$(TARGET_1_OBJ_103):$(TARGET_1_SRC_103) + cc -c ${TARGET_1_SRC_103} -o ${TARGET_1_OBJ_103} $(TARGET_1_FLAG_103) + +$(TARGET_1_OBJ_104):$(TARGET_1_SRC_104) + cc -c ${TARGET_1_SRC_104} -o ${TARGET_1_OBJ_104} $(TARGET_1_FLAG_104) + +$(TARGET_1_OBJ_105):$(TARGET_1_SRC_105) + cc -c ${TARGET_1_SRC_105} -o ${TARGET_1_OBJ_105} $(TARGET_1_FLAG_105) + +$(TARGET_1_OBJ_106):$(TARGET_1_SRC_106) + cc -c ${TARGET_1_SRC_106} -o ${TARGET_1_OBJ_106} $(TARGET_1_FLAG_106) + +$(TARGET_1_OBJ_107):$(TARGET_1_SRC_107) + cc -c ${TARGET_1_SRC_107} -o ${TARGET_1_OBJ_107} $(TARGET_1_FLAG_107) + +$(TARGET_1_OBJ_108):$(TARGET_1_SRC_108) + cc -c ${TARGET_1_SRC_108} -o ${TARGET_1_OBJ_108} $(TARGET_1_FLAG_108) + +$(TARGET_1_OBJ_109):$(TARGET_1_SRC_109) + cc -c ${TARGET_1_SRC_109} -o ${TARGET_1_OBJ_109} $(TARGET_1_FLAG_109) + +$(TARGET_1_OBJ_110):$(TARGET_1_SRC_110) + cc -c ${TARGET_1_SRC_110} -o ${TARGET_1_OBJ_110} $(TARGET_1_FLAG_110) + +$(TARGET_1_OBJ_111):$(TARGET_1_SRC_111) + cc -c ${TARGET_1_SRC_111} -o ${TARGET_1_OBJ_111} $(TARGET_1_FLAG_111) + +$(TARGET_1_OBJ_112):$(TARGET_1_SRC_112) + cc -c ${TARGET_1_SRC_112} -o ${TARGET_1_OBJ_112} $(TARGET_1_FLAG_112) + +$(TARGET_1_OBJ_113):$(TARGET_1_SRC_113) + cc -c ${TARGET_1_SRC_113} -o ${TARGET_1_OBJ_113} $(TARGET_1_FLAG_113) + +$(TARGET_1_OBJ_114):$(TARGET_1_SRC_114) + cc -c ${TARGET_1_SRC_114} -o ${TARGET_1_OBJ_114} $(TARGET_1_FLAG_114) + +$(TARGET_1_OBJ_115):$(TARGET_1_SRC_115) + cc -c ${TARGET_1_SRC_115} -o ${TARGET_1_OBJ_115} $(TARGET_1_FLAG_115) + +$(TARGET_1_OBJ_116):$(TARGET_1_SRC_116) + cc -c ${TARGET_1_SRC_116} -o ${TARGET_1_OBJ_116} $(TARGET_1_FLAG_116) + +$(TARGET_1_OBJ_117):$(TARGET_1_SRC_117) + cc -c ${TARGET_1_SRC_117} -o ${TARGET_1_OBJ_117} $(TARGET_1_FLAG_117) + +$(TARGET_1_OBJ_118):$(TARGET_1_SRC_118) + cc -c ${TARGET_1_SRC_118} -o ${TARGET_1_OBJ_118} $(TARGET_1_FLAG_118) + +$(TARGET_1_OBJ_119):$(TARGET_1_SRC_119) + cc -c ${TARGET_1_SRC_119} -o ${TARGET_1_OBJ_119} $(TARGET_1_FLAG_119) + +$(TARGET_1_OBJ_120):$(TARGET_1_SRC_120) + cc -c ${TARGET_1_SRC_120} -o ${TARGET_1_OBJ_120} $(TARGET_1_FLAG_120) + +$(TARGET_1_OBJ_121):$(TARGET_1_SRC_121) + cc -c ${TARGET_1_SRC_121} -o ${TARGET_1_OBJ_121} $(TARGET_1_FLAG_121) + +$(TARGET_1_OBJ_122):$(TARGET_1_SRC_122) + cc -c ${TARGET_1_SRC_122} -o ${TARGET_1_OBJ_122} $(TARGET_1_FLAG_122) + +$(TARGET_1_OBJ_123):$(TARGET_1_SRC_123) + cc -c ${TARGET_1_SRC_123} -o ${TARGET_1_OBJ_123} $(TARGET_1_FLAG_123) + +$(TARGET_1_OBJ_124):$(TARGET_1_SRC_124) + cc -c ${TARGET_1_SRC_124} -o ${TARGET_1_OBJ_124} $(TARGET_1_FLAG_124) + +$(TARGET_1_OBJ_125):$(TARGET_1_SRC_125) + cc -c ${TARGET_1_SRC_125} -o ${TARGET_1_OBJ_125} $(TARGET_1_FLAG_125) + +$(TARGET_2): $(OBJS_2) + $(CC) -fPIC -shared -fsycl -o $@ $^ $(LIB) -qmkl + +$(TARGET_2_OBJ_0):$(TARGET_2_SRC_0) + cc -c ${TARGET_2_SRC_0} -o ${TARGET_2_OBJ_0} $(TARGET_2_FLAG_0) + +$(TARGET_2_OBJ_1):$(TARGET_2_SRC_1) + icpx -c ${TARGET_2_SRC_1} -o ${TARGET_2_OBJ_1} $(TARGET_2_FLAG_1) + +clean: + rm -f ${OBJS_0} ${OBJS_1} ${OBJS_2} $(TARGET) diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl.h new file mode 100644 index 000000000..6d131963f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl.h @@ -0,0 +1,97 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_H +#define HPL_H +/* + * --------------------------------------------------------------------- + * HPL default compile options that can overridden in the Make. + * --------------------------------------------------------------------- + */ +#ifndef HPL_NO_MPI_DATATYPE /* Use MPI user-defined data type */ +#define HPL_USE_MPI_DATATYPE +#endif + +#ifndef HPL_COPY_L /* do not copy L, use MPI user-defined data types */ +#define HPL_NO_COPY_L +#endif + +#ifndef HPL_DETAILED_TIMING /* Do not enable detailed timings */ +#define HPL_NO_DETAILED_TIMING +#endif + +#ifndef HPL_CALL_VSIPL /* Call the Fortran 77 BLAS interface */ +#ifndef HPL_CALL_CBLAS /* there can be only one */ +#define HPL_CALL_FBLAS +#endif +#endif +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pfact.h" +#include "hpl_pgesv.h" + +#include "hpl_timer.h" +#include "hpl_matgen.h" +#include "hpl_test.h" + +#include "hpl_ptimer.h" +#include "hpl_pmatgen.h" +#include "hpl_ptest.h" + +#endif +/* + * End of hpl.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_auxil.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_auxil.h new file mode 100644 index 000000000..861caf380 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_auxil.h @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_AUXIL_H +#define HPL_AUXIL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +/* + * --------------------------------------------------------------------- + * typedef definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_NORM_A = 800, HPL_NORM_1 = 801, HPL_NORM_I = 802 } HPL_T_NORM; + +typedef enum +{ + HPL_MACH_EPS = 900, /* relative machine precision */ + HPL_MACH_SFMIN = 901, /* safe minimum st 1/sfmin does not overflow */ + HPL_MACH_BASE = 902, /* base = base of the machine */ + HPL_MACH_PREC = 903, /* prec = eps*base */ + HPL_MACH_MLEN = 904, /* number of (base) digits in the mantissa */ + HPL_MACH_RND = 905, /* 1.0 if rounding occurs in addition */ + HPL_MACH_EMIN = 906, /* min exponent before (gradual) underflow */ + HPL_MACH_RMIN = 907, /* underflow threshold base**(emin-1) */ + HPL_MACH_EMAX = 908, /* largest exponent before overflow */ + HPL_MACH_RMAX = 909 /* overflow threshold - (base**emax)*(1-eps) */ + +} HPL_T_MACH; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_fprintf +STDC_ARGS( ( + FILE *, + const char *, + ... +) ); +void HPL_warn +STDC_ARGS( ( + FILE *, + int, + const char *, + const char *, + ... +) ); +void HPL_abort +STDC_ARGS( ( + int, + const char *, + const char *, + ... +) ); +void HPL_dlacpy +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dlatcpy +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dlaprnt +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int, + const int, + const char * +) ); +double HPL_dlange +STDC_ARGS( ( + const HPL_T_NORM, + const int, + const int, + const double *, + const int +) ); +double HPL_dlamch +STDC_ARGS( ( + const HPL_T_MACH +) ); + +#endif +/* + * End of hpl_auxil.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_blas.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_blas.h new file mode 100644 index 000000000..2a510471a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_blas.h @@ -0,0 +1,630 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_BLAS_H +#define HPL_BLAS_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" + + +/* + * --------------------------------------------------------------------- + * typedef definitions + * --------------------------------------------------------------------- + */ +enum HPL_ORDER +{ HplRowMajor = 101, HplColumnMajor = 102 }; +enum HPL_TRANS +{ HplNoTrans = 111, HplTrans = 112, HplConjTrans = 113 }; +enum HPL_UPLO +{ HplUpper = 121, HplLower = 122 }; +enum HPL_DIAG +{ HplNonUnit = 131, HplUnit = 132 }; +enum HPL_SIDE +{ HplLeft = 141, HplRight = 142 }; + + +#ifdef HPL_CALL_CBLAS + + +/* + * --------------------------------------------------------------------- + * The C interface of the BLAS is available ... + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define CBLAS_INDEX int + +#define CBLAS_ORDER HPL_ORDER +#define CblasRowMajor HplRowMajor +#define CblasColMajor HplColMajor + +#define CBLAS_TRANSPOSE HPL_TRANS +#define CblasNoTrans HplNoTrans +#define CblasTrans HplTrans +#define CblasConjTrans HplConjTrans + +#define CBLAS_UPLO HPL_UPLO +#define CblasUpper HplUpper +#define CblasLower HplLower + +#define CBLAS_DIAG HPL_DIAG +#define CblasNonUnit HplNonUnit +#define CblasUnit HplUnit + +#define CBLAS_SIDE HPL_SIDE +#define CblasLeft HplLeft +#define CblasRight HplRight +/* + * --------------------------------------------------------------------- + * CBLAS Function prototypes + * --------------------------------------------------------------------- + */ +CBLAS_INDEX cblas_idamax +STDC_ARGS( +( const int, const double *, const int ) ); +void cblas_dswap +STDC_ARGS( +( const int, double *, const int, double *, + const int ) ); +void cblas_dcopy +STDC_ARGS( +( const int, const double *, const int, double *, + const int ) ); +void cblas_daxpy +STDC_ARGS( +( const int, const double, const double *, const int, + double *, const int ) ); +void cblas_dscal +STDC_ARGS( +( const int, const double, double *, const int ) ); + +void cblas_dgemv +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const int, const int, const double, const double *, + const int, const double *, const int, const double, + double *, const int ) ); + +void cblas_dger +STDC_ARGS( +( const enum CBLAS_ORDER, const int, const int, + const double, const double *, const int, const double *, + const int, double *, const int ) ); +void cblas_dtrsv +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_UPLO, + const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG, + const int, const double *, const int, double *, + const int ) ); + +void cblas_dgemm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const enum CBLAS_TRANSPOSE, const int, const int, + const int, const double, const double *, const int, + const double *, const int, const double, double *, + const int ) ); + +void cblas_dtrsm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_SIDE, + const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, + const enum CBLAS_DIAG, const int, const int, + const double, const double *, const int, double *, + const int ) ); +void dpcpp_dgemm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const enum CBLAS_TRANSPOSE, const int, const int, + const int, const double, const double *, const int, + const double *, const int, const double, double *, + const int ) ); + +void dpcpp_dtrsm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_SIDE, + const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, + const enum CBLAS_DIAG, const int, const int, + const double, const double *, const int, double *, + const int ) ); +/* + * --------------------------------------------------------------------- + * HPL C BLAS macro definition + * --------------------------------------------------------------------- + */ +#define HPL_dswap cblas_dswap +#define HPL_dcopy cblas_dcopy +#define HPL_daxpy cblas_daxpy +#define HPL_dscal cblas_dscal +#define HPL_idamax cblas_idamax + +#define HPL_dgemv cblas_dgemv +#define HPL_dtrsv cblas_dtrsv +#define HPL_dger cblas_dger + +//#define HPL_dgemm cblas_dgemm +//#define HPL_dtrsm cblas_dtrsm +#define HPL_dgemm dpcpp_dgemm +#define HPL_dtrsm dpcpp_dtrsm + +#endif + +//#define HPL_hello sss_gemm + +#ifdef HPL_CALL_FBLAS +/* + * --------------------------------------------------------------------- + * Use the Fortran 77 interface of the BLAS ... + * --------------------------------------------------------------------- + * Defaults: Add_, F77_INTEGER=int, StringSunStyle + * --------------------------------------------------------------------- + */ +#ifndef NoChange +#ifndef UpCase +#ifndef Add__ +#ifndef Add_ + +#define Add_ + +#endif +#endif +#endif +#endif + +#ifndef F77_INTEGER +#define F77_INTEGER int +#else +#define HPL_USE_F77_INTEGER_DEF +#endif + +#ifndef StringCrayStyle +#ifndef StringStructVal +#ifndef StringStructPtr +#ifndef StringSunStyle + +#define StringSunStyle + +#endif +#endif +#endif +#endif +/* + * --------------------------------------------------------------------- + * Fortran 77 <-> C interface + * --------------------------------------------------------------------- + * + * These macros identifies how Fortran routines will be called. + * + * Add_ : the Fortran compiler expects the name of C functions to be + * in all lower case and to have an underscore postfixed it (Suns, Intel + * compilers expect this). + * + * NoChange : the Fortran compiler expects the name of C functions to be + * in all lower case (IBM RS6K compilers do this). + * + * UpCase : the Fortran compiler expects the name of C functions to be + * in all upcase. (Cray compilers expect this). + * + * Add__ : the Fortran compiler in use is f2c, a Fortran to C conver- + * ter. + */ +#ifdef NoChange +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm(...) + */ +#define F77dswap dswap +#define F77dscal dscal +#define F77dcopy dcopy +#define F77daxpy daxpy +#define F77idamax idamax + +#define F77dgemv dgemv +#define F77dtrsv dtrsv +#define F77dger dger + +#define F77dgemm dgemm +#define F77dtrsm dtrsm + +#endif + +#ifdef UpCase +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) DGEMM(...) + */ +#ifdef CRAY_BLAS + +#define F77dswap SSWAP +#define F77dscal SSCAL +#define F77dcopy SCOPY +#define F77daxpy SAXPY +#define F77idamax ISAMAX + +#define F77dgemv SGEMV +#define F77dtrsv STRSV +#define F77dger SGER + +#define F77dgemm SGEMM +#define F77dtrsm STRSM + +#else + +#define F77dswap DSWAP +#define F77dscal DSCAL +#define F77dcopy DCOPY +#define F77daxpy DAXPY +#define F77idamax IDAMAX + +#define F77dgemv DGEMV +#define F77dtrsv DTRSV +#define F77dger DGER + +#define F77dgemm DGEMM +#define F77dtrsm DTRSM + +#endif + +#endif + +#ifdef Add_ +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm_(...) + */ +#define F77dswap dswap_ +#define F77dscal dscal_ +#define F77dcopy dcopy_ +#define F77daxpy daxpy_ +#define F77idamax idamax_ + +#define F77dgemv dgemv_ +#define F77dtrsv dtrsv_ +#define F77dger dger_ + +#define F77dgemm dgemm_ +#define F77dtrsm dtrsm_ + +#endif + +#ifdef Add__ +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm_(...) + */ +#define F77dswap dswap_ +#define F77dscal dscal_ +#define F77dcopy dcopy_ +#define F77daxpy daxpy_ +#define F77idamax idamax_ + +#define F77dgemv dgemv_ +#define F77dtrsv dtrsv_ +#define F77dger dger_ + +#define F77dgemm dgemm_ +#define F77dtrsm dtrsm_ +//#define F77hello sss_gemm + +#endif +//#define F77hello sss_gemm +/* + * --------------------------------------------------------------------- + * Typedef definitions and conversion utilities + * --------------------------------------------------------------------- + */ +#ifdef StringCrayStyle + +#include + /* Type of character argument in a FORTRAN call */ +#define F77_CHAR _fcd + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(_fcdtocp(c) )) +#define HPL_C2F_CHAR(c) (_cptofcd(&(c), 1)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringStructVal + /* Type of character argument in a FORTRAN call */ +typedef struct { char *cp; F77_INTEGER len; } F77_CHAR; + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c.cp)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringStructPtr + /* Type of character argument in a FORTRAN call */ +typedef struct { char *cp; F77_INTEGER len; } F77_CHAR; + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c->cp)) + +#define F77_CHAR_DECL F77_CHAR * /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringSunStyle + /* Type of character argument in a FORTRAN call */ +#define F77_CHAR char * + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c)) +#define HPL_C2F_CHAR(c) (&(c)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ +#define F77_1_CHAR , F77_INTEGER +#define F77_2_CHAR F77_1_CHAR F77_1_CHAR +#define F77_3_CHAR F77_2_CHAR F77_1_CHAR +#define F77_4_CHAR F77_3_CHAR F77_1_CHAR + +#endif +/* ------------------------------------------------------------------ */ + +#ifndef F77_1_CHAR +#define F77_1_CHAR +#define F77_2_CHAR +#define F77_3_CHAR +#define F77_4_CHAR +#endif + +#define F77_INT_DECL const F77_INTEGER * /* input integer */ +#define F77_SIN_DECL const double * /* input scalar */ +#define F77_VIN_DECL const double * /* input vector */ +#define F77_VINOUT_DECL double * /* input/output matrix */ +#define F77_MIN_DECL const double * /* input matrix */ +#define F77_MINOUT_DECL double * /* input/output matrix */ + +#ifdef CRAY_PVP_ENV /* Type of FORTRAN functions */ +#define F77_VOID_FUN extern fortran void /* subroutine */ +#define F77_INT_FUN extern fortran int /* integer function */ +#else +#define F77_VOID_FUN extern void /* subroutine */ +#define F77_INT_FUN extern int /* integer function */ +#endif +/* + * --------------------------------------------------------------------- + * Fortran 77 BLAS function prototypes + * --------------------------------------------------------------------- + */ +F77_VOID_FUN F77dswap +STDC_ARGS( +( F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL, F77_VINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77dscal +STDC_ARGS( +( F77_INT_DECL, F77_SIN_DECL, F77_VINOUT_DECL, F77_INT_DECL ) ); +F77_VOID_FUN F77dcopy +STDC_ARGS( +( F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, F77_VINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77daxpy +STDC_ARGS( +( F77_INT_DECL, F77_SIN_DECL, F77_VIN_DECL, F77_INT_DECL, + F77_VINOUT_DECL, F77_INT_DECL ) ); +F77_INT_FUN F77idamax +STDC_ARGS( +( F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL ) ); + +F77_VOID_FUN F77dgemv +STDC_ARGS( +( F77_CHAR_DECL, F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, + F77_SIN_DECL, F77_VINOUT_DECL, F77_INT_DECL F77_1_CHAR ) ); +F77_VOID_FUN F77dger +STDC_ARGS( +( F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_VIN_DECL, + F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, F77_MINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77dtrsv +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, F77_INT_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL + F77_3_CHAR ) ); + +F77_VOID_FUN F77dgemm +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_INT_DECL, F77_INT_DECL, + F77_INT_DECL, F77_SIN_DECL, F77_MIN_DECL, F77_INT_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MINOUT_DECL, + F77_INT_DECL F77_2_CHAR ) ); +F77_VOID_FUN F77dtrsm +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, + F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MIN_DECL, + F77_INT_DECL, F77_MINOUT_DECL, F77_INT_DECL F77_4_CHAR ) ); + +#endif +/* + * --------------------------------------------------------------------- + * HPL BLAS Function prototypes + * --------------------------------------------------------------------- + */ +#ifndef HPL_CALL_CBLAS + +int HPL_idamax +STDC_ARGS( ( + const int, + const double *, + const int +) ); +void HPL_daxpy +STDC_ARGS( ( + const int, + const double, + const double *, + const int, + double *, + const int +) ); +void HPL_dcopy +STDC_ARGS( ( + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dscal +STDC_ARGS( ( + const int, + const double, + double *, + const int +) ); +void HPL_dswap +STDC_ARGS( ( + const int, + double *, + const int, + double *, + const int +) ); +void HPL_dgemv +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_TRANS, + const int, + const int, + const double, + const double *, + const int, + const double *, + const int, + const double, + double *, + const int +) ); +void HPL_dger +STDC_ARGS( ( + const enum HPL_ORDER, + const int, + const int, + const double, + const double *, + const int, + double *, + const int, + double *, + const int +) ); +void HPL_dtrsv +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_UPLO, + const enum HPL_TRANS, + const enum HPL_DIAG, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dgemm +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_TRANS, + const enum HPL_TRANS, + const int, + const int, + const int, + const double, + const double *, + const int, + const double *, + const int, + const double, + double *, + const int +) ); +void HPL_hello +STDC_ARGS( ( +) ); +#endif +void HPL_dtrsm +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_SIDE, + const enum HPL_UPLO, + const enum HPL_TRANS, + const enum HPL_DIAG, + const int, + const int, + const double, + const double *, + const int, + double *, + const int +) ); + +//#endif + +#endif +/* + * hpl_blas.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_comm.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_comm.h new file mode 100644 index 000000000..e3ba51a57 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_comm.h @@ -0,0 +1,161 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_COMM_H +#define HPL_COMM_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +#include "hpl_panel.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_1RING = 401, /* Increasing ring */ + HPL_1RING_M = 402, /* Increasing ring (modified) */ + HPL_2RING = 403, /* Increasing 2-ring */ + HPL_2RING_M = 404, /* Increasing 2-ring (modified) */ + HPL_BLONG = 405, /* long broadcast */ + HPL_BLONG_M = 406 /* long broadcast (modified) */ +} HPL_T_TOP; +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_FAILURE 0 +#define HPL_SUCCESS 1 +#define HPL_KEEP_TESTING 2 +/* + * --------------------------------------------------------------------- + * comm function prototypes + * --------------------------------------------------------------------- + */ +int HPL_send +STDC_ARGS( ( + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_recv +STDC_ARGS( ( + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_sdrv +STDC_ARGS( ( + double *, + int, + int, + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_binit +STDC_ARGS( ( + HPL_T_panel * +) ); +int HPL_bcast +STDC_ARGS( ( + HPL_T_panel *, + int * +) ); +int HPL_bwait +STDC_ARGS( ( + HPL_T_panel * +) ); +int HPL_packL +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int +) ); +void HPL_copyL +STDC_ARGS( ( + HPL_T_panel * +) ); + +int HPL_binit_1ring STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_1ring STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_1ring STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_1rinM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_1rinM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_1rinM STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_2ring STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_2ring STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_2ring STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_2rinM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_2rinM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_2rinM STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_blong STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_blong STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_blong STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_blonM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_blonM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_blonM STDC_ARGS( ( HPL_T_panel * ) ); + +#endif +/* + * End of hpl_comm.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_gesv.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_gesv.h new file mode 100644 index 000000000..ce671cf2b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_gesv.h @@ -0,0 +1,87 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_GESV_H +#define HPL_GESV_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_LEFT_LOOKING = 301, /* Left looking lu fact variant */ + HPL_CROUT = 302, /* Crout lu fact variant */ + HPL_RIGHT_LOOKING = 303 /* Right looking lu fact variant */ +} HPL_T_FACT; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dgesv +STDC_ARGS( +( const int, const int, const int, const HPL_T_FACT, + const HPL_T_FACT, const int, double *, + const int, int * ) ); +void HPL_ipid +STDC_ARGS( +( const int, double *, int *, int *, + int *, int *, int *, int *, + const int, const int, const int, const int, + const int ) ); + +#endif +/* + * End of hpl_gesv.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_grid.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_grid.h new file mode 100644 index 000000000..1895a5ed4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_grid.h @@ -0,0 +1,212 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_GRID_H +#define HPL_GRID_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum { HPL_INT = 100, HPL_DOUBLE = 101 } HPL_T_TYPE; + +typedef enum +{ + HPL_ROW_MAJOR = 201, + HPL_COLUMN_MAJOR = 202 +} HPL_T_ORDER; + +typedef struct HPL_S_grid +{ + MPI_Comm all_comm; /* grid communicator */ + MPI_Comm row_comm; /* row communicator */ + MPI_Comm col_comm; /* column communicator */ + HPL_T_ORDER order; /* ordering of the procs in the grid */ + int iam; /* my rank in the grid */ + int myrow; /* my row number in the grid */ + int mycol; /* my column number in the grid */ + int nprow; /* the total # of rows in the grid */ + int npcol; /* the total # of columns in the grid */ + int nprocs; /* the total # of procs in the grid */ + int row_ip2; /* largest power of two <= nprow */ + int row_hdim; /* row_ip2 procs hypercube dimension */ + int row_ip2m1; /* largest power of two <= nprow-1 */ + int row_mask; /* row_ip2m1 procs hypercube mask */ + int col_ip2; /* largest power of two <= npcol */ + int col_hdim; /* col_ip2 procs hypercube dimension */ + int col_ip2m1; /* largest power of two <= npcol-1 */ + int col_mask; /* col_ip2m1 procs hypercube mask */ +} HPL_T_grid; + +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef void (*HPL_T_OP) +( const int, const void *, void *, const HPL_T_TYPE ); +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define HPL_2_MPI_TYPE( typ ) \ + ( ( typ == HPL_INT ? MPI_INT : MPI_DOUBLE ) ) +/* + * The following macros perform common modulo operations; All functions + * except MPosMod assume arguments are < d (i.e., arguments are themsel- + * ves within modulo range). + */ + /* increment with mod */ +#define MModInc(I, d) if(++(I) == (d)) (I) = 0 + /* decrement with mod */ +#define MModDec(I, d) if(--(I) == -1) (I) = (d)-1 + /* positive modulo */ +#define MPosMod(I, d) ( (I) - ((I)/(d))*(d) ) + /* add two numbers */ +#define MModAdd(I1, I2, d) \ + ( ( (I1) + (I2) < (d) ) ? (I1) + (I2) : (I1) + (I2) - (d) ) + /* add 1 to # */ +#define MModAdd1(I, d) ( ((I) != (d)-1) ? (I) + 1 : 0 ) + /* subtract two numbers */ +#define MModSub(I1, I2, d) \ + ( ( (I1) < (I2) ) ? (d) + (I1) - (I2) : (I1) - (I2) ) + /* sub 1 from # */ +#define MModSub1(I, d) ( ((I)!=0) ? (I)-1 : (d)-1 ) +/* + * --------------------------------------------------------------------- + * grid function prototypes + * --------------------------------------------------------------------- + */ +int HPL_grid_init +STDC_ARGS( ( + MPI_Comm, + const HPL_T_ORDER, + const int, + const int, + HPL_T_grid * +) ); +int HPL_grid_exit +STDC_ARGS( ( + HPL_T_grid * +) ); + +int HPL_grid_info +STDC_ARGS( ( + const HPL_T_grid *, + int *, + int *, + int *, + int * +) ); +int HPL_pnum +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int +) ); + +int HPL_barrier +STDC_ARGS( ( + MPI_Comm +) ); +int HPL_broadcast +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const int, + MPI_Comm +) ); +int HPL_reduce +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const HPL_T_OP , + const int, + MPI_Comm +) ); +int HPL_all_reduce +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const HPL_T_OP , + MPI_Comm +) ); + +void HPL_max +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); +void HPL_min +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); +void HPL_sum +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); + +#endif +/* + * End of hpl_grid.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_matgen.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_matgen.h new file mode 100644 index 000000000..de6503eea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_matgen.h @@ -0,0 +1,120 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_MATGEN_H +#define HPL_MATGEN_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_MULT0 1284865837 +#define HPL_MULT1 1481765933 +#define HPL_IADD0 1 +#define HPL_IADD1 0 +#define HPL_DIVFAC 2147483648.0 +#define HPL_POW16 65536.0 +#define HPL_HALF 0.5 +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dmatgen +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int +) ); +void HPL_lmul +STDC_ARGS( ( + int *, + int *, + int * +) ); +void HPL_ladd +STDC_ARGS( ( + int *, + int *, + int * +) ); +void HPL_xjumpm +STDC_ARGS( ( + const int, + int *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_setran +STDC_ARGS( ( + const int, + int * +) ); +void HPL_jumpit +STDC_ARGS( ( + int *, + int *, + int *, + int * +) ); +double HPL_rand STDC_ARGS( ( void ) ); + +#endif +/* + * End of hpl_matgen.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_misc.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_misc.h new file mode 100644 index 000000000..ea421a403 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_misc.h @@ -0,0 +1,110 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_MISC_H +#define HPL_MISC_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#ifdef __STDC__ +#define STDC_HEADERS +#endif + +#include +#include +#include +#include + +#ifdef STDC_HEADERS +#include +#define STDC_ARGS(p) p +#else +#include +#define STDC_ARGS(p) () +#endif + +#ifdef HPL_CALL_VSIPL +#include +#endif +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_rone 1.0 +#define HPL_rtwo 2.0 +#define HPL_rzero 0.0 +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define Mabs( a_ ) ( ( (a_) < 0 ) ? -(a_) : (a_) ) +#define Mmin( a_, b_ ) ( ( (a_) < (b_) ) ? (a_) : (b_) ) +#define Mmax( a_, b_ ) ( ( (a_) > (b_) ) ? (a_) : (b_) ) + +#define Mfloor(a,b) (((a)>0) ? (((a)/(b))) : (-(((-(a))+(b)-1)/(b)))) +#define Mceil(a,b) ( ( (a)+(b)-1 ) / (b) ) +#define Miceil(a,b) (((a)>0) ? ((((a)+(b)-1)/(b))) : (-((-(a))/(b)))) + +#define Mupcase(C) (((C)>96 && (C)<123) ? (C) & 0xDF : (C)) +#define Mlowcase(C) (((C)>64 && (C)< 91) ? (C) | 32 : (C)) +/* + * Mptr returns a pointer to a_( i_, j_ ) for readability reasons and + * also less silly errors ... + */ +#define Mptr( a_, i_, j_, lda_ ) \ + ( (a_) + (size_t)(i_) + (size_t)(j_)*(size_t)(lda_) ) +/* + * Align pointer + */ +#define HPL_PTR( ptr_, al_ ) \ + ( ( ( (size_t)(ptr_)+(al_)-1 ) / (al_) ) * (al_) ) +#endif +/* + * End of hpl_misc.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_panel.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_panel.h new file mode 100644 index 000000000..d5ba2939c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_panel.h @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PANEL_H +#define HPL_PANEL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +#include "hpl_grid.h" +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef struct HPL_S_panel +{ + struct HPL_S_grid * grid; /* ptr to the process grid */ + struct HPL_S_palg * algo; /* ptr to the algo parameters */ + struct HPL_S_pmat * pmat; /* ptr to the local array info */ + double * A; /* ptr to trailing part of A */ + double * WORK; /* work space */ + double * L2; /* ptr to L */ + double * L1; /* ptr to jb x jb upper block of A */ + double * DPIV; /* ptr to replicated jb pivot array */ + double * DINFO; /* ptr to replicated scalar info */ + double * U; /* ptr to U */ + int * IWORK; /* integer workspace for swapping */ + void * * * buffers[2]; /* buffers for panel bcast */ + int counts [2]; /* counts for panel bcast */ + MPI_Datatype dtypes [2]; /* data types for panel bcast */ + MPI_Request request[1]; /* requests for panel bcast */ + MPI_Status status [1]; /* status for panel bcast */ + int nb; /* distribution blocking factor */ + int jb; /* panel width */ + int m; /* global # of rows of trailing part of A */ + int n; /* global # of cols of trailing part of A */ + int ia; /* global row index of trailing part of A */ + int ja; /* global col index of trailing part of A */ + int mp; /* local # of rows of trailing part of A */ + int nq; /* local # of cols of trailing part of A */ + int ii; /* local row index of trailing part of A */ + int jj; /* local col index of trailing part of A */ + int lda; /* local leading dim of array A */ + int prow; /* proc. row owning 1st row of trail. A */ + int pcol; /* proc. col owning 1st col of trail. A */ + int msgid; /* message id for panel bcast */ + int ldl2; /* local leading dim of array L2 */ + int len; /* length of the buffer to broadcast */ +#ifdef HPL_CALL_VSIPL + vsip_block_d * Ablock; /* A block */ + vsip_block_d * L1block; /* L1 block */ + vsip_block_d * L2block; /* L2 block */ + vsip_block_d * Ublock; /* U block */ +#endif +} HPL_T_panel; + +/* + * --------------------------------------------------------------------- + * panel function prototypes + * --------------------------------------------------------------------- + */ +#include "hpl_pgesv.h" + +void HPL_pdpanel_new +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + const int, + const int, + const int, + HPL_T_pmat *, + const int, + const int, + const int, + HPL_T_panel * * +) ); +void HPL_pdpanel_init +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + const int, + const int, + const int, + HPL_T_pmat *, + const int, + const int, + const int, + HPL_T_panel * +) ); +int HPL_pdpanel_disp +STDC_ARGS( ( + HPL_T_panel * * +) ); +int HPL_pdpanel_free +STDC_ARGS( ( + HPL_T_panel * +) ); + +#endif +/* + * End of hpl_panel.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pauxil.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pauxil.h new file mode 100644 index 000000000..1fd0ee457 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pauxil.h @@ -0,0 +1,505 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PAUXIL_H +#define HPL_PAUXIL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" + +#include "hpl_pmisc.h" +#include "hpl_grid.h" +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +/* + * Mindxg2p returns the process coodinate owning the entry globally in- + * dexed by ig_. + */ +#define Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) && \ + ( (nprocs_) > 1 ) ) \ + { \ + proc_ = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_); \ + proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ + } \ + else \ + { \ + proc_ = (src_); \ + } \ + } + +#define Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ + ( (nprocs_) == 1 ) ) { il_ = (ig_); } \ + else \ + { \ + int i__, j__; \ + j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ + il_ = (nb_)*( j__ - i__ ) + \ + ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ + (ig_) - (inb_) : (ig_) ); \ + } \ + } + +#define Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ ) \ + { \ + if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ + ( (nprocs_) == 1 ) ) \ + { il_ = (ig_); proc_ = (src_); } \ + else \ + { \ + int i__, j__; \ + j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ + il_ = (nb_)*(j__-i__) + \ + ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ + (ig_) - (inb_) : (ig_) ); \ + proc_ = (src_) + 1 + i__; \ + proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ + } \ + } +/* + * Mindxl2g computes the global index ig_ corresponding to the local + * index il_ in process proc_. + */ +#define Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ + { \ + if( (proc_) == (src_) ) \ + { \ + if( (il_) < (inb_) ) ig_ = (il_); \ + else ig_ = (il_) + \ + (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \ + } \ + else if( (proc_) < (src_) ) \ + { \ + ig_ = (il_) + (inb_) + \ + (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ + (proc_)-(src_)-1+(nprocs_) ); \ + } \ + else \ + { \ + ig_ = (il_) + (inb_) + \ + (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ + (proc_)-(src_)-1 ); \ + } \ + } \ + else \ + { \ + ig_ = (il_); \ + } \ + } +/* + * MnumrocI computes the # of local indexes np_ residing in the process + * of coordinate proc_ corresponding to the interval of global indexes + * i_:i_+n_-1 assuming that the global index 0 resides in the process + * src_, and that the indexes are distributed from src_ using the para- + * meters inb_, nb_ and nprocs_. + */ +#define MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ + { \ + int inb__, mydist__, n__, nblk__, quot__, src__; \ + if( ( inb__ = (inb_) - (i_) ) <= 0 ) \ + { \ + nblk__ = (-inb__) / (nb_) + 1; \ + src__ = (src_) + nblk__; \ + src__ -= ( src__ / (nprocs_) ) * (nprocs_); \ + inb__ += nblk__*(nb_); \ + if( ( n__ = (n_) - inb__ ) <= 0 ) \ + { \ + if( (proc_) == src__ ) np_ = (n_); \ + else np_ = 0; \ + } \ + else \ + { \ + if( ( mydist__ = (proc_) - src__ ) < 0 ) \ + mydist__ += (nprocs_); \ + nblk__ = n__ / (nb_) + 1; \ + mydist__ -= nblk__ - \ + (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \ + if( mydist__ < 0 ) \ + { \ + if( (proc_) != src__ ) \ + np_ = (nb_) + (nb_) * quot__; \ + else \ + np_ = inb__ + (nb_) * quot__; \ + } \ + else if( mydist__ > 0 ) \ + { \ + np_ = (nb_) * quot__; \ + } \ + else \ + { \ + if( (proc_) != src__ ) \ + np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ + else \ + np_ = (n_)+ (nb_)*(quot__ - nblk__); \ + } \ + } \ + } \ + else \ + { \ + if( ( n__ = (n_) - inb__ ) <= 0 ) \ + { \ + if( (proc_) == (src_) ) np_ = (n_); \ + else np_ = 0; \ + } \ + else \ + { \ + if( ( mydist__ = (proc_) - (src_) ) < 0 ) \ + mydist__ += (nprocs_); \ + nblk__ = n__ / (nb_) + 1; \ + mydist__ -= nblk__ - \ + ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \ + if( mydist__ < 0 ) \ + { \ + if( (proc_) != (src_) ) \ + np_ = (nb_) + (nb_) * quot__; \ + else \ + np_ = inb__ + (nb_) * quot__; \ + } \ + else if( mydist__ > 0 ) \ + { \ + np_ = (nb_) * quot__; \ + } \ + else \ + { \ + if( (proc_) != (src_) ) \ + np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ + else \ + np_ = (n_)+ (nb_)*(quot__ - nblk__); \ + } \ + } \ + } \ + } \ + else \ + { \ + np_ = (n_); \ + } \ + } + +#define Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ ) \ + MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ ) +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_indxg2lp +STDC_ARGS( ( + int *, + int *, + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxg2l +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxg2p +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxl2g +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int +) ); +void HPL_infog2l +STDC_ARGS( ( + int, + int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + int *, + int *, + int *, + int * +) ); +int HPL_numroc +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int +) ); +int HPL_numrocI +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int, + const int +) ); + +void HPL_dlaswp00N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp10N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp01N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp01T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp02N +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp03N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const double *, + const int +) ); +void HPL_dlaswp03T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const double *, + const int +) ); +void HPL_dlaswp04N +STDC_ARGS( ( + const int, + const int, + const int, + double *, + const int, + double *, + const int, + const double *, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp04T +STDC_ARGS( ( + const int, + const int, + const int, + double *, + const int, + double *, + const int, + const double *, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp05N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp05T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp06N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp06T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int * +) ); + +void HPL_pabort +STDC_ARGS( ( + int, + const char *, + const char *, + ... +) ); +void HPL_pwarn +STDC_ARGS( ( + FILE *, + int, + const char *, + const char *, + ... +) ); +void HPL_pdlaprnt +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int, + const int, + double *, + const int, + const int, + const int, + const char * +) ); +double HPL_pdlamch +STDC_ARGS( ( + MPI_Comm, + const HPL_T_MACH +) ); +double HPL_pdlange +STDC_ARGS( ( + const HPL_T_grid *, + const HPL_T_NORM, + const int, + const int, + const int, + const double *, + const int +) ); + +#endif +/* + * End of hpl_pauxil.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pfact.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pfact.h new file mode 100644 index 000000000..09eee79ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pfact.h @@ -0,0 +1,216 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PFACT_H +#define HPL_PFACT_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef void (*HPL_T_PFA_FUN) +( HPL_T_panel *, const int, const int, const int, + double * ); +typedef void (*HPL_T_RFA_FUN) +( HPL_T_panel *, const int, const int, const int, + double * ); +typedef void (*HPL_T_UPD_FUN) +( HPL_T_panel *, int *, HPL_T_panel *, const int ); +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dlocmax +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_dlocswpN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + double * +) ); +void HPL_dlocswpT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + double * +) ); +void HPL_pdmxswp +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdpancrN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpancrT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanllN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanllT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanrlN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanrlT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdrpancrN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpancrT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanllN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanllT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanrlN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanrlT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdfact +STDC_ARGS( ( + HPL_T_panel * +) ); + +#endif +/* + * End of hpl_pfact.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pgesv.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pgesv.h new file mode 100644 index 000000000..3ca576c68 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pgesv.h @@ -0,0 +1,346 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PGESV_H +#define HPL_PGESV_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" + +#include "hpl_pmisc.h" +#include "hpl_grid.h" +#include "hpl_comm.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pfact.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_SWAP00 = 451, /* Use HPL_pdlaswp00 */ + HPL_SWAP01 = 452, /* Use HPL_pdlaswp01 */ + HPL_SW_MIX = 453, /* Use HPL_pdlaswp00_ for small number of */ + /* columns, and HPL_pdlaswp01_ otherwise. */ + HPL_NO_SWP = 499 +} HPL_T_SWAP; + +typedef struct HPL_S_palg +{ + HPL_T_TOP btopo; /* row broadcast topology */ + int depth; /* look-ahead depth */ + int nbdiv; /* recursive division factor */ + int nbmin; /* recursion stopping criterium */ + HPL_T_FACT pfact; /* panel fact variant */ + HPL_T_FACT rfact; /* recursive fact variant */ + HPL_T_PFA_FUN pffun; /* panel fact function ptr */ + HPL_T_RFA_FUN rffun; /* recursive fact function ptr */ + HPL_T_UPD_FUN upfun; /* update function */ + HPL_T_SWAP fswap; /* Swapping algorithm */ + int fsthr; /* Swapping threshold */ + int equil; /* Equilibration */ + int align; /* data alignment constant */ +} HPL_T_palg; + +typedef struct HPL_S_pmat +{ +#ifdef HPL_CALL_VSIPL + vsip_block_d * block; +#endif + double * A; /* pointer to local piece of A */ + double * X; /* pointer to solution vector */ + int n; /* global problem size */ + int nb; /* blocking factor */ + int ld; /* local leading dimension */ + int mp; /* local number of rows */ + int nq; /* local number of columns */ + int info; /* computational flag */ +} HPL_T_pmat; +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define MSGID_BEGIN_PFACT 1001 /* message id ranges */ +#define MSGID_END_PFACT 2000 +#define MSGID_BEGIN_FACT 2001 +#define MSGID_END_FACT 3000 +#define MSGID_BEGIN_PTRSV 3001 +#define MSGID_END_PTRSV 4000 + +#define MSGID_BEGIN_COLL 9001 +#define MSGID_END_COLL 10000 +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define MNxtMgid( id_, beg_, end_ ) \ + (( (id_)+1 > (end_) ? (beg_) : (id_)+1 )) +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pipid +STDC_ARGS( ( + HPL_T_panel *, + int *, + int * +) ); +void HPL_plindx0 +STDC_ARGS( ( + HPL_T_panel *, + const int, + int *, + int *, + int *, + int * +) ); +void HPL_pdlaswp00N +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdlaswp00T +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_perm +STDC_ARGS( ( + const int, + int *, + int *, + int * +) ); +void HPL_logsort +STDC_ARGS( ( + const int, + const int, + int *, + int *, + int * +) ); +void HPL_plindx10 +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int *, + int *, + int *, + int * +) ); +void HPL_plindx1 +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int *, + int *, + int *, + int *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_spreadN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_SIDE, + const int, + double *, + const int, + const int, + const int *, + const int *, + const int * +) ); +void HPL_spreadT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_SIDE, + const int, + double *, + const int, + const int, + const int *, + const int *, + const int * +) ); +void HPL_equil +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_TRANS, + const int, + double *, + const int, + int *, + const int *, + const int *, + int * +) ); +void HPL_rollN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int, + double *, + const int, + const int *, + const int *, + const int * +) ); +void HPL_rollT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int, + double *, + const int, + const int *, + const int *, + const int * +) ); +void HPL_pdlaswp01N +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdlaswp01T +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_pdupdateNN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateNT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateTN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateTT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_pdgesv0 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesvK1 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesvK2 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesv +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); + +void HPL_pdtrsv +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_pmat * +) ); + +#endif +/* + * End of hpl_pgesv.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pmatgen.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pmatgen.h new file mode 100644 index 000000000..1091b0f60 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pmatgen.h @@ -0,0 +1,77 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PMATGEN_H +#define HPL_PMATGEN_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_matgen.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pdmatgen +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int, + const int, + double *, + const int, + const int +) ); + +#endif +/* + * End of hpl_pmatgen.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pmisc.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pmisc.h new file mode 100644 index 000000000..23550d47b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_pmisc.h @@ -0,0 +1,59 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PMISC_H +#define HPL_PMISC_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "mpi.h" + +#endif +/* + * End of hpl_pmisc.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_ptest.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_ptest.h new file mode 100644 index 000000000..5777bd536 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_ptest.h @@ -0,0 +1,151 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PTEST_H +#define HPL_PTEST_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pgesv.h" + +#include "hpl_ptimer.h" +#include "hpl_pmatgen.h" +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef struct HPL_S_test +{ + double epsil; /* epsilon machine */ + double thrsh; /* threshold */ + FILE * outfp; /* output stream (only in proc 0) */ + int kfail; /* # of tests failed */ + int kpass; /* # of tests passed */ + int kskip; /* # of tests skipped */ + int ktest; /* total number of tests */ +} HPL_T_test; + +/* + * --------------------------------------------------------------------- + * #define macro constants for testing only + * --------------------------------------------------------------------- + */ +#define HPL_LINE_MAX 256 +#define HPL_MAX_PARAM 20 +#define HPL_ISEED 100 +/* + * --------------------------------------------------------------------- + * global timers for timing analysis only + * --------------------------------------------------------------------- + */ +#ifdef HPL_DETAILED_TIMING +#define HPL_TIMING_BEG 11 /* timer 0 reserved, used by main */ +#define HPL_TIMING_N 6 /* number of timers defined below */ +#define HPL_TIMING_RPFACT 11 /* starting from here, contiguous */ +#define HPL_TIMING_PFACT 12 +#define HPL_TIMING_MXSWP 13 +#define HPL_TIMING_UPDATE 14 +#define HPL_TIMING_LASWP 15 +#define HPL_TIMING_PTRSV 16 +#endif +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pdinfo +STDC_ARGS( ( + HPL_T_test *, + int *, + int *, + int *, + int *, + HPL_T_ORDER *, + int *, + int *, + int *, + int *, + HPL_T_FACT *, + int *, + int *, + int *, + int *, + int *, + HPL_T_FACT *, + int *, + HPL_T_TOP *, + int *, + int *, + HPL_T_SWAP *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_pdtest +STDC_ARGS( ( + HPL_T_test *, + HPL_T_grid *, + HPL_T_palg *, + const int, + const int +) ); + +#endif +/* + * End of hpl_ptest.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_ptimer.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_ptimer.h new file mode 100644 index 000000000..43c8fe33a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_ptimer.h @@ -0,0 +1,96 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PTIMER_H +#define HPL_PTIMER_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_NPTIMER 64 +#define HPL_PTIMER_STARTFLAG 5.0 +#define HPL_PTIMER_ERROR -1.0 +/* + * --------------------------------------------------------------------- + * type definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_WALL_PTIME = 101, HPL_CPU_PTIME = 102 } HPL_T_PTIME; + +typedef enum +{ HPL_AMAX_PTIME = 201, HPL_AMIN_PTIME = 202, HPL_SUM_PTIME = 203 } +HPL_T_PTIME_OP; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +double HPL_ptimer_cputime STDC_ARGS( ( void ) ); +double HPL_ptimer_walltime STDC_ARGS( ( void ) ); + +void HPL_ptimer STDC_ARGS( ( const int ) ); +void HPL_ptimer_boot STDC_ARGS( ( void ) ); +void HPL_ptimer_combine +STDC_ARGS( +( MPI_Comm comm, const HPL_T_PTIME_OP, const HPL_T_PTIME, + const int, const int, double * ) ); +void HPL_ptimer_disable STDC_ARGS( ( void ) ); +void HPL_ptimer_enable STDC_ARGS( ( void ) ); +double HPL_ptimer_inquire +STDC_ARGS( +( const HPL_T_PTIME, const int ) ); + +#endif +/* + * End of hpl_ptimer.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_test.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_test.h new file mode 100644 index 000000000..1eedc97e0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_test.h @@ -0,0 +1,80 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_TEST_H +#define HPL_TEST_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_matgen.h" +#include "hpl_timer.h" +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dinfo +STDC_ARGS( +( FILE * *, int *, int *, int *, + HPL_T_FACT *, int *, int *, int *, + int *, int *, HPL_T_FACT *, int *, + double *, double * ) ); +void HPL_dtest +STDC_ARGS( +( FILE *, const int, const int, const int, + HPL_T_FACT, HPL_T_FACT, const int, const double, + const double, int *, int *, int * ) ); + +#endif +/* + * End of hpl_test.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_timer.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_timer.h new file mode 100644 index 000000000..4c91700ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/include/hpl_timer.h @@ -0,0 +1,88 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_TIMER_H +#define HPL_TIMER_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_NTIMER 64 +#define HPL_TIMER_STARTFLAG 5.0 +#define HPL_TIMER_ERROR -1.0 +/* + * --------------------------------------------------------------------- + * type definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_WALL_TIME = 101, HPL_CPU_TIME = 102 } HPL_T_TIME; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +double HPL_timer_cputime STDC_ARGS( ( void ) ); +double HPL_timer_walltime STDC_ARGS( ( void ) ); + +void HPL_timer STDC_ARGS( ( const int ) ); +void HPL_timer_boot STDC_ARGS( ( void ) ); +void HPL_timer_enable STDC_ARGS( ( void ) ); +void HPL_timer_disable STDC_ARGS( ( void ) ); +double HPL_timer_inquire +STDC_ARGS( +( const HPL_T_TIME, const int ) ); + +#endif +/* + * End of hpl_timer.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_abort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_abort.c new file mode 100644 index 000000000..bf0c5e727 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_abort.c @@ -0,0 +1,129 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_abort +( + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_abort( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_abort displays an error message on stderr and halts execution. + * + * + * Arguments + * ========= + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[128]; +#ifndef STDC_HEADERS + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( stderr, "%s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR in function", SRNAME, cline ); + else + HPL_fprintf( stderr, "%s %d %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR on line", LINE, "of function", SRNAME, cline ); + exit( 0 ); +/* + * End of HPL_abort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlacpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlacpy.c new file mode 100644 index 000000000..ec71180eb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlacpy.c @@ -0,0 +1,343 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factors + * #ifndef HPL_LACPY_M_DEPTH + * #define HPL_LACPY_M_DEPTH 32 + * #define HPL_LACPY_LOG2_M_DEPTH 5 + * #endif + * #ifndef HPL_LACPY_N_DEPTH + * #define HPL_LACPY_N_DEPTH 4 + * #define HPL_LACPY_LOG2_N_DEPTH 2 + * #endif + */ +#ifndef HPL_LACPY_M_DEPTH +#define HPL_LACPY_M_DEPTH 4 +#define HPL_LACPY_LOG2_M_DEPTH 2 +#endif +#ifndef HPL_LACPY_N_DEPTH +#define HPL_LACPY_N_DEPTH 2 +#define HPL_LACPY_LOG2_N_DEPTH 1 +#endif + +#ifdef STDC_HEADERS +void HPL_dlacpy +( + const int M, + const int N, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dlacpy +( M, N, A, LDA, B, LDB ) + const int M; + const int N; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlacpy copies an array A into an array B. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the arrays A and + * B. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the arrays A + * and B. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N). + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * B (local output) double * + * On entry, B points to an array of dimension (LDB,N). On exit, + * B is overwritten with A. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of the array B. + * LDB must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_LACPY_USE_COPY + register int j; +#else +#if ( HPL_LACPY_N_DEPTH == 1 ) + const double * A0 = A; + double * B0 = B; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + const double * A0 = A, * A1 = A + LDA; + double * B0 = B, * B1 = B + LDB; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + const double * A0 = A, * A1 = A + LDA, + * A2 = A + (LDA << 1), * A3 = A + 3 * LDA; + double * B0 = B, * B1 = B + LDB, + * B2 = B + (LDB << 1), * B3 = B + 3 * LDB; +#endif + const int incA = ( (unsigned int)(LDA) << + HPL_LACPY_LOG2_N_DEPTH ) - M, + incB = ( (unsigned int)(LDB) << + HPL_LACPY_LOG2_N_DEPTH ) - M, + incA0 = (unsigned int)(LDA) - M, + incB0 = (unsigned int)(LDB) - M; + int mu, nu; + register int i, j; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + +#ifdef HPL_LACPY_USE_COPY + for( j = 0; j < N; j++, A0 += LDA, B0 += LDB ) HPL_dcopy( M, A0, 1, B0, 1 ); +#else + mu = (int)( ( (unsigned int)(M) >> HPL_LACPY_LOG2_M_DEPTH ) << + HPL_LACPY_LOG2_M_DEPTH ); + nu = (int)( ( (unsigned int)(N) >> HPL_LACPY_LOG2_N_DEPTH ) << + HPL_LACPY_LOG2_N_DEPTH ); + + for( j = 0; j < nu; j += HPL_LACPY_N_DEPTH ) + { + for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH ) + { +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 0] = A0[ 0]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; B2[ 0] = A2[ 0]; B3[ 0] = A3[ 0]; +#endif + +#if ( HPL_LACPY_M_DEPTH > 1 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 1] = A0[ 1]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; B2[ 1] = A2[ 1]; B3[ 1] = A3[ 1]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 2 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B2[ 2] = A2[ 2]; B3[ 2] = A3[ 2]; + B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; B2[ 3] = A2[ 3]; B3[ 3] = A3[ 3]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 4 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; + B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B2[ 4] = A2[ 4]; B3[ 4] = A3[ 4]; + B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; B2[ 5] = A2[ 5]; B3[ 5] = A3[ 5]; + B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B2[ 6] = A2[ 6]; B3[ 6] = A3[ 6]; + B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; B2[ 7] = A2[ 7]; B3[ 7] = A3[ 7]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 8 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11]; + B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; + B0[10] = A0[10]; B1[10] = A1[10]; B0[11] = A0[11]; B1[11] = A1[11]; + B0[12] = A0[12]; B1[12] = A1[12]; B0[13] = A0[13]; B1[13] = A1[13]; + B0[14] = A0[14]; B1[14] = A1[14]; B0[15] = A0[15]; B1[15] = A1[15]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B2[ 8] = A2[ 8]; B3[ 8] = A3[ 8]; + B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; B2[ 9] = A2[ 9]; B3[ 9] = A3[ 9]; + B0[10] = A0[10]; B1[10] = A1[10]; B2[10] = A2[10]; B3[10] = A3[10]; + B0[11] = A0[11]; B1[11] = A1[11]; B2[11] = A2[11]; B3[11] = A3[11]; + B0[12] = A0[12]; B1[12] = A1[12]; B2[12] = A2[12]; B3[12] = A3[12]; + B0[13] = A0[13]; B1[13] = A1[13]; B2[13] = A2[13]; B3[13] = A3[13]; + B0[14] = A0[14]; B1[14] = A1[14]; B2[14] = A2[14]; B3[14] = A3[14]; + B0[15] = A0[15]; B1[15] = A1[15]; B2[15] = A2[15]; B3[15] = A3[15]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 16 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19]; + B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23]; + B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27]; + B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[16] = A0[16]; B1[16] = A1[16]; B0[17] = A0[17]; B1[17] = A1[17]; + B0[18] = A0[18]; B1[18] = A1[18]; B0[19] = A0[19]; B1[19] = A1[19]; + B0[20] = A0[20]; B1[20] = A1[20]; B0[21] = A0[21]; B1[21] = A1[21]; + B0[22] = A0[22]; B1[22] = A1[22]; B0[23] = A0[23]; B1[23] = A1[23]; + B0[24] = A0[24]; B1[24] = A1[24]; B0[25] = A0[25]; B1[25] = A1[25]; + B0[26] = A0[26]; B1[26] = A1[26]; B0[27] = A0[27]; B1[27] = A1[27]; + B0[28] = A0[28]; B1[28] = A1[28]; B0[29] = A0[29]; B1[29] = A1[29]; + B0[30] = A0[30]; B1[30] = A1[30]; B0[31] = A0[31]; B1[31] = A1[31]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[16] = A0[16]; B1[16] = A1[16]; B2[16] = A2[16]; B3[16] = A3[16]; + B0[17] = A0[17]; B1[17] = A1[17]; B2[17] = A2[17]; B3[17] = A3[17]; + B0[18] = A0[18]; B1[18] = A1[18]; B2[18] = A2[18]; B3[18] = A3[18]; + B0[19] = A0[19]; B1[19] = A1[19]; B2[19] = A2[19]; B3[19] = A3[19]; + B0[20] = A0[20]; B1[20] = A1[20]; B2[20] = A2[20]; B3[20] = A3[20]; + B0[21] = A0[21]; B1[21] = A1[21]; B2[21] = A2[21]; B3[21] = A3[21]; + B0[22] = A0[22]; B1[22] = A1[22]; B2[22] = A2[22]; B3[22] = A3[22]; + B0[23] = A0[23]; B1[23] = A1[23]; B2[23] = A2[23]; B3[23] = A3[23]; + B0[24] = A0[24]; B1[24] = A1[24]; B2[24] = A2[24]; B3[24] = A3[24]; + B0[25] = A0[25]; B1[25] = A1[25]; B2[25] = A2[25]; B3[25] = A3[25]; + B0[26] = A0[26]; B1[26] = A1[26]; B2[26] = A2[26]; B3[26] = A3[26]; + B0[27] = A0[27]; B1[27] = A1[27]; B2[27] = A2[27]; B3[27] = A3[27]; + B0[28] = A0[28]; B1[28] = A1[28]; B2[28] = A2[28]; B3[28] = A3[28]; + B0[29] = A0[29]; B1[29] = A1[29]; B2[29] = A2[29]; B3[29] = A3[29]; + B0[30] = A0[30]; B1[30] = A1[30]; B2[30] = A2[30]; B3[30] = A3[30]; + B0[31] = A0[31]; B1[31] = A1[31]; B2[31] = A2[31]; B3[31] = A3[31]; +#endif + +#endif + +#if ( HPL_LACPY_N_DEPTH == 1 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; + A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; + A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH; + A2 += HPL_LACPY_M_DEPTH; B2 += HPL_LACPY_M_DEPTH; + A3 += HPL_LACPY_M_DEPTH; B3 += HPL_LACPY_M_DEPTH; +#endif + } + + for( i = mu; i < M; i++ ) + { +#if ( HPL_LACPY_N_DEPTH == 1 ) + *B0 = *A0; B0++; A0++; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++; + *B2 = *A2; B2++; A2++; *B3 = *A3; B3++; A3++; +#endif + } + +#if ( HPL_LACPY_N_DEPTH == 1 ) + A0 += incA; B0 += incB; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + A0 += incA; B0 += incB; A1 += incA; B1 += incB; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + A0 += incA; B0 += incB; A1 += incA; B1 += incB; + A2 += incA; B2 += incB; A3 += incA; B3 += incB; +#endif + } + + for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 ) + { + for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH, + B0 += HPL_LACPY_M_DEPTH, A0 += HPL_LACPY_M_DEPTH ) + { + B0[ 0] = A0[ 0]; +#if ( HPL_LACPY_M_DEPTH > 1 ) + B0[ 1] = A0[ 1]; +#endif +#if ( HPL_LACPY_M_DEPTH > 2 ) + B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3]; +#endif +#if ( HPL_LACPY_M_DEPTH > 4 ) + B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7]; +#endif +#if ( HPL_LACPY_M_DEPTH > 8 ) + B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11]; + B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15]; +#endif +#if ( HPL_LACPY_M_DEPTH > 16 ) + B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19]; + B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23]; + B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27]; + B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31]; +#endif + } + for( i = mu; i < M; i++, B0++, A0++ ) { *B0 = *A0; } + } +#endif +/* + * End of HPL_dlacpy + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlamch.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlamch.c new file mode 100644 index 000000000..c685f0d5e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlamch.c @@ -0,0 +1,876 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static function prototypes + * --------------------------------------------------------------------- + */ +static void HPL_dlamc1 +STDC_ARGS( +( int *, int *, int *, int * ) ); +static void HPL_dlamc2 +STDC_ARGS( +( int *, int *, int *, double *, + int *, double *, int *, double * ) ); +static double HPL_dlamc3 +STDC_ARGS( +( const double, const double ) ); +static void HPL_dlamc4 +STDC_ARGS( +( int *, const double, const int ) ); +static void HPL_dlamc5 +STDC_ARGS( +( const int, const int, const int, const int, + int *, double * ) ); +static double HPL_dipow +STDC_ARGS( +( const double, const int ) ); + +#ifdef STDC_HEADERS +double HPL_dlamch +( + const HPL_T_MACH CMACH +) +#else +double HPL_dlamch +( CMACH ) + const HPL_T_MACH CMACH; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamch determines machine-specific arithmetic constants such as + * the relative machine precision (eps), the safe minimum (sfmin) such + * that 1 / sfmin does not overflow, the base of the machine (base), the + * precision (prec), the number of (base) digits in the mantissa (t), + * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the + * minimum exponent before (gradual) underflow (emin), the underflow + * threshold (rmin) base**(emin-1), the largest exponent before overflow + * (emax), the overflow threshold (rmax) (base**emax)*(1-eps). + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamch.f (version 2.0 -- 1992), that was itself + * based on the function ENVRON by Malcolm and incorporated suggestions + * by Gentleman and Marovich. See + * + * Malcolm M. A., Algorithms to reveal properties of floating-point + * arithmetic., Comms. of the ACM, 15, 949-951 (1972). + * + * Gentleman W. M. and Marovich S. B., More on algorithms that reveal + * properties of floating point arithmetic units., Comms. of the ACM, + * 17, 276-277 (1974). + * + * Arguments + * ========= + * + * CMACH (local input) const HPL_T_MACH + * Specifies the value to be returned by HPL_dlamch + * = HPL_MACH_EPS, HPL_dlamch := eps (default) + * = HPL_MACH_SFMIN, HPL_dlamch := sfmin + * = HPL_MACH_BASE, HPL_dlamch := base + * = HPL_MACH_PREC, HPL_dlamch := eps*base + * = HPL_MACH_MLEN, HPL_dlamch := t + * = HPL_MACH_RND, HPL_dlamch := rnd + * = HPL_MACH_EMIN, HPL_dlamch := emin + * = HPL_MACH_RMIN, HPL_dlamch := rmin + * = HPL_MACH_EMAX, HPL_dlamch := emax + * = HPL_MACH_RMAX, HPL_dlamch := rmax + * + * where + * + * eps = relative machine precision, + * sfmin = safe minimum, + * base = base of the machine, + * prec = eps*base, + * t = number of digits in the mantissa, + * rnd = 1.0 if rounding occurs in addition, + * emin = minimum exponent before underflow, + * rmin = underflow threshold, + * emax = largest exponent before overflow, + * rmax = overflow threshold. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + static double eps, sfmin, base, t, rnd, emin, rmin, emax, + rmax, prec; + double small; + static int first=1; + int beta=0, imax=0, imin=0, it=0, lrnd=0; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; + HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax ); + base = (double)(beta); t = (double)(it); + if( lrnd != 0 ) + { rnd = HPL_rone; eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; } + else + { rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it ); } + prec = eps * base; emin = (double)(imin); emax = (double)(imax); + sfmin = rmin; small = HPL_rone / rmax; +/* + * Use SMALL plus a bit, to avoid the possibility of rounding causing + * overflow when computing 1/sfmin. + */ + if( small >= sfmin ) sfmin = small * ( HPL_rone + eps ); + } + + if( CMACH == HPL_MACH_EPS ) return( eps ); + if( CMACH == HPL_MACH_SFMIN ) return( sfmin ); + if( CMACH == HPL_MACH_BASE ) return( base ); + if( CMACH == HPL_MACH_PREC ) return( prec ); + if( CMACH == HPL_MACH_MLEN ) return( t ); + if( CMACH == HPL_MACH_RND ) return( rnd ); + if( CMACH == HPL_MACH_EMIN ) return( emin ); + if( CMACH == HPL_MACH_RMIN ) return( rmin ); + if( CMACH == HPL_MACH_EMAX ) return( emax ); + if( CMACH == HPL_MACH_RMAX ) return( rmax ); + + return( eps ); +/* + * End of HPL_dlamch + */ +} + +#ifdef STDC_HEADERS +static void HPL_dlamc1 +( + int * BETA, + int * T, + int * RND, + int * IEEE1 +) +#else +static void HPL_dlamc1 +( BETA, T, RND, IEEE1 ) +/* + * .. Scalar Arguments .. + */ + int * BETA, * IEEE1, * RND, * T; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc1 determines the machine parameters given by BETA, T, RND, + * and IEEE1. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc1.f (version 2.0 -- 1992), that was itself + * based on the function ENVRON by Malcolm and incorporated suggestions + * by Gentleman and Marovich. See + * + * Malcolm M. A., Algorithms to reveal properties of floating-point + * arithmetic., Comms. of the ACM, 15, 949-951 (1972). + * + * Gentleman W. M. and Marovich S. B., More on algorithms that reveal + * properties of floating point arithmetic units., Comms. of the ACM, + * 17, 276-277 (1974). + * + * Arguments + * ========= + * + * BETA (local output) int * + * The base of the machine. + * + * T (local output) int * + * The number of ( BETA ) digits in the mantissa. + * + * RND (local output) int * + * Specifies whether proper rounding (RND=1) or chopping (RND=0) + * occurs in addition. This may not be a reliable guide to the + * way in which the machine performs its arithmetic. + * + * IEEE1 (local output) int * + * Specifies whether rounding appears to be done in the IEEE + * `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double a, b, c, f, one, qtr, savec, t1, t2; + static int first=1, lbeta, lieee1, lrnd, lt; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; one = HPL_rone; +/* + * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and + * RND. Throughout this routine we use the function HPL_dlamc3 to ensure + * that relevant values are stored and not held in registers, or are not + * affected by optimizers. + * + * Compute a = 2.0**m with the smallest positive integer m such that + * fl( a + 1.0 ) == a. + */ + a = HPL_rone; c = HPL_rone; + do + { a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); } + while( c == HPL_rone ); +/* + * Now compute b = 2.0**m with the smallest positive integer m such that + * fl( a + b ) > a. + */ + b = HPL_rone; c = HPL_dlamc3( a, b ); + while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); } +/* + * Now compute the base. a and c are neighbouring floating point num- + * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe- + * rence is BETA. Adding 0.25 to c is to ensure that it is truncated to + * BETA and not (BETA-1). + */ + qtr = one / 4.0; savec = c; + c = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr); +/* + * Now determine whether rounding or chopping occurs, by adding a bit + * less than BETA/2 and a bit more than BETA/2 to a. + */ + b = (double)(lbeta); + f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a ); + if( c == a ) { lrnd = 1; } else { lrnd = 0; } + f = HPL_dlamc3( b / HPL_rtwo, b / 100.0 ); c = HPL_dlamc3( f, a ); + if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0; +/* + * Try and decide whether rounding is done in the IEEE round to nea- + * rest style. b/2 is half a unit in the last place of the two numbers + * a and savec. Furthermore, a is even, i.e. has last bit zero, and sa- + * vec is odd. Thus adding b/2 to a should not change a, but adding b/2 + * to savec should change savec. + */ + t1 = HPL_dlamc3( b / HPL_rtwo, a ); + t2 = HPL_dlamc3( b / HPL_rtwo, savec ); + if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1; + else lieee1 = 0; +/* + * Now find the mantissa, T. It should be the integer part of log to the + * base BETA of a, however it is safer to determine T by powering. So we + * find T as the smallest positive integer for which fl( beta**t + 1.0 ) + * is equal to 1.0. + */ + lt = 0; a = HPL_rone; c = HPL_rone; + + do + { + lt++; a *= (double)(lbeta); + c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); + } while( c == HPL_rone ); + } + + *BETA = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1; +} + +#ifdef STDC_HEADERS +static void HPL_dlamc2 +( + int * BETA, + int * T, + int * RND, + double * EPS, + int * EMIN, + double * RMIN, + int * EMAX, + double * RMAX +) +#else +static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) +/* + * .. Scalar Arguments .. + */ + int * BETA, * EMAX, * EMIN, * RND, * T; + double * EPS, * RMAX, * RMIN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc2 determines the machine parameters specified in its argu- + * ment list. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc2.f (version 2.0 -- 1992), that was itself + * based on a function PARANOIA by W. Kahan of the University of Cali- + * fornia at Berkeley for the computation of the relative machine epsi- + * lon eps. + * + * Arguments + * ========= + * + * BETA (local output) int * + * The base of the machine. + * + * T (local output) int * + * The number of ( BETA ) digits in the mantissa. + * + * RND (local output) int * + * Specifies whether proper rounding (RND=1) or chopping (RND=0) + * occurs in addition. This may not be a reliable guide to the + * way in which the machine performs its arithmetic. + * + * EPS (local output) double * + * The smallest positive number such that fl( 1.0 - EPS ) < 1.0, + * where fl denotes the computed value. + * + * EMIN (local output) int * + * The minimum exponent before (gradual) underflow occurs. + * + * RMIN (local output) double * + * The smallest normalized number for the machine, given by + * BASE**( EMIN - 1 ), where BASE is the floating point value + * of BETA. + * + * EMAX (local output) int * + * The maximum exponent before overflow occurs. + * + * RMAX (local output) double * + * The largest positive number for the machine, given by + * BASE**EMAX * ( 1 - EPS ), where BASE is the floating point + * value of BETA. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + static double leps, lrmax, lrmin; + double a, b, c, half, one, rbase, sixth, small, + third, two, zero; + static int first=1, iwarn=0, lbeta=0, lemax, lemin, + lt=0; + int gnmin=0, gpmin=0, i, ieee, lieee1=0, + lrnd=0, ngnmin=0, ngpmin=0; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo; +/* + * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of BETA, + * T, RND, EPS, EMIN and RMIN. + * + * Throughout this routine we use the function HPL_dlamc3 to ensure that + * relevant values are stored and not held in registers, or are not af- + * fected by optimizers. + * + * HPL_dlamc1 returns the parameters lbeta, lt, lrnd and lieee1. + */ + HPL_dlamc1( &lbeta, <, &lrnd, &lieee1 ); +/* + * Start to find eps. + */ + b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a; +/* + * Try some tricks to see whether or not this is the correct EPS. + */ + b = two / 3.0; + half = one / HPL_rtwo; + sixth = HPL_dlamc3( b, -half ); + third = HPL_dlamc3( sixth, sixth ); + b = HPL_dlamc3( third, -half ); + b = HPL_dlamc3( b, sixth ); + b = Mabs( b ); if( b < leps ) b = leps; + + leps = HPL_rone; + + while( ( leps > b ) && ( b > zero ) ) + { + leps = b; + c = HPL_dlamc3( half * leps, + HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) ); + c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c ); + c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c ); + } + if( a < leps ) leps = a; +/* + * Computation of EPS complete. + * + * Now find EMIN. Let a = + or - 1, and + or - (1 + BASE**(-3)). Keep + * dividing a by BETA until (gradual) underflow occurs. This is detected + * when we cannot recover the previous a. + */ + rbase = one / (double)(lbeta); small = one; + for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero ); + a = HPL_dlamc3( one, small ); + HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta ); + HPL_dlamc4( &gpmin, a, lbeta ); HPL_dlamc4( &gnmin, -a, lbeta ); + + ieee = 0; + + if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) ) + { + if( ngpmin == gpmin ) + { +/* + * Non twos-complement machines, no gradual underflow; e.g., VAX ) + */ + lemin = ngpmin; + } + else if( ( gpmin-ngpmin ) == 3 ) + { +/* + * Non twos-complement machines with gradual underflow; e.g., IEEE stan- + * dard followers + */ + lemin = ngpmin - 1 + lt; ieee = 1; + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, gpmin ); + iwarn = 1; + } + } + else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) ) + { + if( Mabs( ngpmin-ngnmin ) == 1 ) + { +/* + * Twos-complement machines, no gradual underflow; e.g., CYBER 205 + */ + lemin = Mmax( ngpmin, ngnmin ); + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); + iwarn = 1; + } + } + else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) ) + { + if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 ) + { +/* + * Twos-complement machines with gradual underflow; no known machine + */ + lemin = Mmax( ngpmin, ngnmin ) - 1 + lt; + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); + iwarn = 1; + } + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin ); + lemin = Mmin( lemin, gnmin ); iwarn = 1; + } +/* + * Comment out this if block if EMIN is ok + */ + if( iwarn != 0 ) + { + first = 1; + HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n", +"WARNING. The value EMIN may be incorrect:- EMIN =", lemin, +"If, after inspection, the value EMIN looks acceptable, please comment ", +"out the if block as marked within the code of routine HPL_dlamc2, ", +"otherwise supply EMIN explicitly." ); + } +/* + * Assume IEEE arithmetic if we found denormalised numbers above, or if + * arithmetic seems to round in the IEEE style, determined in routine + * HPL_dlamc1. A true IEEE machine should have both things true; how- + * ever, faulty machines may have one or the other. + */ + if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1; + else ieee = 0; +/* + * Compute RMIN by successive division by BETA. We could compute RMIN + * as BASE**( EMIN - 1 ), but some machines underflow during this compu- + * tation. + */ + lrmin = HPL_rone; + for( i = 0; i < 1 - lemin; i++ ) + lrmin = HPL_dlamc3( lrmin*rbase, zero ); +/* + * Finally, call HPL_dlamc5 to compute emax and rmax. + */ + HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax ); + } + *BETA = lbeta; *T = lt; *RND = lrnd; *EPS = leps; + *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax; +} + +#ifdef STDC_HEADERS +static double HPL_dlamc3( const double A, const double B ) +#else +static double HPL_dlamc3( A, B ) +/* + * .. Scalar Arguments .. + */ + const double A, B; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc3 is intended to force a and b to be stored prior to doing + * the addition of a and b, for use in situations where optimizers + * might hold one of these in a register. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc3.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * A, B (local input) double + * The values a and b. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + return( A + B ); +} + +#ifdef STDC_HEADERS +static void HPL_dlamc4 +( + int * EMIN, + const double START, + const int BASE +) +#else +static void HPL_dlamc4( EMIN, START, BASE ) +/* + * .. Scalar Arguments .. + */ + int * EMIN; + const int BASE; + const double START; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc4 is a service function for HPL_dlamc2. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc4.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * EMIN (local output) int * + * The minimum exponent before (gradual) underflow, computed by + * setting A = START and dividing by BASE until the previous A + * can not be recovered. + * + * START (local input) double + * The starting point for determining EMIN. + * + * BASE (local input) int + * The base of the machine. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double a, b1, b2, c1, c2, d1, d2, one, rbase, zero; + int i; +/* .. + * .. Executable Statements .. + */ + a = START; one = HPL_rone; rbase = one / (double)(BASE); + zero = HPL_rzero; + *EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a; + + do + { + (*EMIN)--; a = b1; + b1 = HPL_dlamc3( a / BASE, zero ); + c1 = HPL_dlamc3( b1 * BASE, zero ); + d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1; + b2 = HPL_dlamc3( a * rbase, zero ); + c2 = HPL_dlamc3( b2 / rbase, zero ); + d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2; + } while( ( c1 == a ) && ( c2 == a ) && ( d1 == a ) && ( d2 == a ) ); +} + +#ifdef STDC_HEADERS +static void HPL_dlamc5 +( + const int BETA, + const int P, + const int EMIN, + const int IEEE, + int * EMAX, + double * RMAX +) +#else +static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX ) +/* + * .. Scalar Arguments .. + */ + const int BETA, EMIN, IEEE, P; + int * EMAX; + double * RMAX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc5 attempts to compute RMAX, the largest machine floating- + * point number, without overflow. It assumes that EMAX + abs(EMIN) sum + * approximately to a power of 2. It will fail on machines where this + * assumption does not hold, for example, the Cyber 205 (EMIN = -28625, + * EMAX = 28718). It will also fail if the value supplied for EMIN is + * too large (i.e. too close to zero), probably with overflow. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc5.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * BETA (local input) int + * The base of floating-point arithmetic. + * + * P (local input) int + * The number of base BETA digits in the mantissa of a floating- + * point value. + * + * EMIN (local input) int + * The minimum exponent before (gradual) underflow. + * + * IEEE (local input) int + * A logical flag specifying whether or not the arithmetic sys- + * tem is thought to comply with the IEEE standard. + * + * EMAX (local output) int * + * The largest exponent before overflow. + * + * RMAX (local output) double * + * The largest machine floating-point number. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double oldy=HPL_rzero, recbas, y, z; + int exbits=1, expsum, i, lexp=1, nbits, try, + uexp; +/* .. + * .. Executable Statements .. + */ +/* + * First compute lexp and uexp, two powers of 2 that bound abs(EMIN). + * We then assume that EMAX + abs( EMIN ) will sum approximately to the + * bound that is closest to abs( EMIN ). (EMAX is the exponent of the + * required number RMAX). + */ +l_10: + try = (int)( (unsigned int)(lexp) << 1 ); + if( try <= ( -EMIN ) ) { lexp = try; exbits++; goto l_10; } + + if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try; exbits++; } +/* + * Now -lexp is less than or equal to EMIN, and -uexp is greater than or + * equal to EMIN. exbits is the number of bits needed to store the expo- + * nent. + */ + if( ( uexp+EMIN ) > ( -lexp-EMIN ) ) + { expsum = (int)( (unsigned int)(lexp) << 1 ); } + else + { expsum = (int)( (unsigned int)(uexp) << 1 ); } +/* + * expsum is the exponent range, approximately equal to EMAX - EMIN + 1. + */ + *EMAX = expsum + EMIN - 1; +/* + * nbits is the total number of bits needed to store a floating-point + * number. + */ + nbits = 1 + exbits + P; + + if( ( nbits % 2 == 1 ) && ( BETA == 2 ) ) + { +/* + * Either there are an odd number of bits used to store a floating-point + * number, which is unlikely, or some bits are not used in the represen- + * tation of numbers, which is possible, (e.g. Cray machines) or the + * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines), + * which is perhaps the most likely. We have to assume the last alterna- + * tive. If this is true, then we need to reduce EMAX by one because + * there must be some way of representing zero in an implicit-bit sys- + * tem. On machines like Cray we are reducing EMAX by one unnecessarily. + */ + (*EMAX)--; + } + + if( IEEE != 0 ) + { +/* + * Assume we are on an IEEE machine which reserves one exponent for in- + * finity and NaN. + */ + (*EMAX)--; + } +/* + * Now create RMAX, the largest machine number, which should be equal to + * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being + * careful that the result is less than 1.0. + */ + recbas = HPL_rone / (double)(BETA); + z = (double)(BETA) - HPL_rone; + y = HPL_rzero; + + for( i = 0; i < P; i++ ) + { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); } + + if( y >= HPL_rone ) y = oldy; +/* + * Now multiply by BETA**EMAX to get RMAX. + */ + for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero ); + + *RMAX = y; +/* + * End of HPL_dlamch + */ +} + +#ifdef STDC_HEADERS +static double HPL_dipow +( + const double X, + const int N +) +#else +static double HPL_dipow( X, N ) +/* + * .. Scalar Arguments .. + */ + const int N; + const double X; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dipow computes the integer n-th power of a real scalar x. + * + * Arguments + * ========= + * + * X (local input) const double + * The real scalar x. + * + * N (local input) const int + * The integer power to raise x to. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r, y=HPL_rone; + int k, n; +/* .. + * .. Executable Statements .. + */ + if( X == HPL_rzero ) return( HPL_rzero ); + if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; } + for( k = 0; k < n; k++ ) y *= r; + + return( y ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlange.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlange.c new file mode 100644 index 000000000..82f118b6b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlange.c @@ -0,0 +1,184 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_dlange +( + const HPL_T_NORM NORM, + const int M, + const int N, + const double * A, + const int LDA +) +#else +double HPL_dlange +( NORM, M, N, A, LDA ) + const HPL_T_NORM NORM; + const int M; + const int N; + const double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlange returns the value of the one norm, or the infinity norm, + * or the element of largest absolute value of a matrix A: + * + * max(abs(A(i,j))) when NORM = HPL_NORM_A, + * norm1(A), when NORM = HPL_NORM_1, + * normI(A), when NORM = HPL_NORM_I, + * + * where norm1 denotes the one norm of a matrix (maximum column sum) and + * normI denotes the infinity norm of a matrix (maximum row sum). Note + * that max(abs(A(i,j))) is not a matrix norm. + * + * Arguments + * ========= + * + * NORM (local input) const HPL_T_NORM + * On entry, NORM specifies the value to be returned by this + * function as described above. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N), that + * contains the matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double s, v0=HPL_rzero, * work = NULL; + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return( HPL_rzero ); + + if( NORM == HPL_NORM_A ) + { +/* + * max( abs( A ) ) + */ + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) { v0 = Mmax( v0, Mabs( *A ) ); A++; } + A += LDA - M; + } + } + else if( NORM == HPL_NORM_1 ) + { +/* + * Find norm_1( A ). + */ + work = (double*)malloc( (size_t)(N) * sizeof( double ) ); + if( work == NULL ) + { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); } + else + { + for( j = 0; j < N; j++ ) + { + s = HPL_rzero; + for( i = 0; i < M; i++ ) { s += Mabs( *A ); A++; } + work[j] = s; A += LDA - M; + } +/* + * Find maximum sum of columns for 1-norm + */ + v0 = work[HPL_idamax( N, work, 1 )]; v0 = Mabs( v0 ); + if( work ) free( work ); + } + } + else if( NORM == HPL_NORM_I ) + { +/* + * Find norm_inf( A ) + */ + work = (double*)malloc( (size_t)(M) * sizeof( double ) ); + if( work == NULL ) + { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); } + else + { + for( i = 0; i < M; i++ ) { work[i] = HPL_rzero; } + + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) { work[i] += Mabs( *A ); A++; } + A += LDA - M; + } +/* + * Find maximum sum of rows for inf-norm + */ + v0 = work[HPL_idamax( M, work, 1 )]; v0 = Mabs( v0 ); + if( work ) free( work ); + } + } + + return( v0 ); +/* + * End of HPL_dlange + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlaprnt.c new file mode 100644 index 000000000..f29df3cd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlaprnt.c @@ -0,0 +1,130 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dlaprnt +( + const int M, + const int N, + double * A, + const int IA, + const int JA, + const int LDA, + const char * CMATNM +) +#else +void HPL_dlaprnt +( M, N, A, IA, JA, LDA, CMATNM ) + const int M; + const int N; + double * A; + const int IA; + const int JA; + const int LDA; + const char * CMATNM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaprnt prints to standard error an M-by-N matrix A. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A. M must be at + * least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of A. N must be + * at least zero. + * + * A (local input) double * + * On entry, A points to an array of dimension (LDA,N). + * + * IA (local input) const int + * On entry, IA specifies the starting row index to be printed. + * + * JA (local input) const int + * On entry, JA specifies the starting column index to be + * printed. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * CMATNM (local input) const char * + * On entry, CMATNM is the name of the matrix to be printed. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) + { + HPL_fprintf( stderr, "%s(%6d,%6d)=%30.18f\n", CMATNM, IA+i, + JA+j, *(Mptr( A, i, j, LDA )) ); + } + } +/* + * End of HPL_dlaprnt + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlatcpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlatcpy.c new file mode 100644 index 000000000..410451c24 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_dlatcpy.c @@ -0,0 +1,398 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factors + * #ifndef HPL_LATCPY_M_DEPTH + * #define HPL_LATCPY_M_DEPTH 32 + * #define HPL_LATCPY_LOG2_M_DEPTH 5 + * #endif + * #ifndef HPL_LATCPY_N_DEPTH + * #define HPL_LATCPY_N_DEPTH 4 + * #define HPL_LATCPY_LOG2_N_DEPTH 2 + * #endif + */ +#ifndef HPL_LATCPY_M_DEPTH +#define HPL_LATCPY_M_DEPTH 4 +#define HPL_LATCPY_LOG2_M_DEPTH 2 +#endif +#ifndef HPL_LATCPY_N_DEPTH +#define HPL_LATCPY_N_DEPTH 2 +#define HPL_LATCPY_LOG2_N_DEPTH 1 +#endif + +#ifdef STDC_HEADERS +void HPL_dlatcpy +( + const int M, + const int N, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dlatcpy +( M, N, A, LDA, B, LDB ) + const int M; + const int N; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlatcpy copies the transpose of an array A into an array B. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the array B and + * the number of columns of A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of rows of the array A and + * the number of columns of B. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,M). + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,N). + * + * B (local output) double * + * On entry, B points to an array of dimension (LDB,N). On exit, + * B is overwritten with the transpose of A. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of the array B. + * LDB must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_LATCPY_USE_COPY + register int j; +#else +#if ( HPL_LATCPY_N_DEPTH == 1 ) + const double * A0 = A; + double * B0 = B; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + const double * A0 = A, * A1 = A + 1; + double * B0 = B, * B1 = B + LDB; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + const double * A0 = A, * A1 = A + 1, + * A2 = A + 2, * A3 = A + 3; + double * B0 = B, * B1 = B + LDB, + * B2 = B + (LDB << 1), * B3 = B + 3 * LDB; +#endif + const int incA = -M * LDA + (1 << HPL_LATCPY_LOG2_N_DEPTH), + incB = ( (unsigned int)(LDB) << + HPL_LATCPY_LOG2_N_DEPTH ) - M, + incA0 = -M * LDA + 1, incB0 = LDB - M; + int mu, nu; + register int i, j; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + +#ifdef HPL_LATCPY_USE_COPY + for( j = 0; j < N; j++, B0 += LDB ) HPL_dcopy( M, A0+j, LDA, B0, 1 ); +#else + mu = (int)( ( (unsigned int)(M) >> HPL_LATCPY_LOG2_M_DEPTH ) << + HPL_LATCPY_LOG2_M_DEPTH ); + nu = (int)( ( (unsigned int)(N) >> HPL_LATCPY_LOG2_N_DEPTH ) << + HPL_LATCPY_LOG2_N_DEPTH ); + + for( j = 0; j < nu; j += HPL_LATCPY_N_DEPTH ) + { + for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH ) + { +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 0] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA; + B2[ 0] = *A2; A2 += LDA; B3[ 0] = *A3; A3 += LDA; +#endif + +#if ( HPL_LATCPY_M_DEPTH > 1 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 1] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA; + B2[ 1] = *A2; A2 += LDA; B3[ 1] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 2 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 2] = *A0; A0 += LDA; B0[ 3] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA; + B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA; + B2[ 2] = *A2; A2 += LDA; B3[ 2] = *A3; A3 += LDA; + B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA; + B2[ 3] = *A2; A2 += LDA; B3[ 3] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 4 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 4] = *A0; A0 += LDA; B0[ 5] = *A0; A0 += LDA; + B0[ 6] = *A0; A0 += LDA; B0[ 7] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA; + B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA; + B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA; + B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA; + B2[ 4] = *A2; A2 += LDA; B3[ 4] = *A3; A3 += LDA; + B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA; + B2[ 5] = *A2; A2 += LDA; B3[ 5] = *A3; A3 += LDA; + B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA; + B2[ 6] = *A2; A2 += LDA; B3[ 6] = *A3; A3 += LDA; + B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA; + B2[ 7] = *A2; A2 += LDA; B3[ 7] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 8 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 8] = *A0; A0 += LDA; B0[ 9] = *A0; A0 += LDA; + B0[10] = *A0; A0 += LDA; B0[11] = *A0; A0 += LDA; + B0[12] = *A0; A0 += LDA; B0[13] = *A0; A0 += LDA; + B0[14] = *A0; A0 += LDA; B0[15] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA; + B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA; + B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA; + B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA; + B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA; + B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA; + B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA; + B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA; + B2[ 8] = *A2; A2 += LDA; B3[ 8] = *A3; A3 += LDA; + B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA; + B2[ 9] = *A2; A2 += LDA; B3[ 9] = *A3; A3 += LDA; + B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA; + B2[10] = *A2; A2 += LDA; B3[10] = *A3; A3 += LDA; + B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA; + B2[11] = *A2; A2 += LDA; B3[11] = *A3; A3 += LDA; + B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA; + B2[12] = *A2; A2 += LDA; B3[12] = *A3; A3 += LDA; + B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA; + B2[13] = *A2; A2 += LDA; B3[13] = *A3; A3 += LDA; + B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA; + B2[14] = *A2; A2 += LDA; B3[14] = *A3; A3 += LDA; + B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA; + B2[15] = *A2; A2 += LDA; B3[15] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 16 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[16] = *A0; A0 += LDA; B0[17] = *A0; A0 += LDA; + B0[18] = *A0; A0 += LDA; B0[19] = *A0; A0 += LDA; + B0[20] = *A0; A0 += LDA; B0[21] = *A0; A0 += LDA; + B0[22] = *A0; A0 += LDA; B0[23] = *A0; A0 += LDA; + B0[24] = *A0; A0 += LDA; B0[25] = *A0; A0 += LDA; + B0[26] = *A0; A0 += LDA; B0[27] = *A0; A0 += LDA; + B0[28] = *A0; A0 += LDA; B0[29] = *A0; A0 += LDA; + B0[30] = *A0; A0 += LDA; B0[31] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA; + B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA; + B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA; + B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA; + B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA; + B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA; + B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA; + B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA; + B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA; + B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA; + B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA; + B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA; + B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA; + B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA; + B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA; + B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA; + B2[16] = *A2; A2 += LDA; B3[16] = *A3; A3 += LDA; + B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA; + B2[17] = *A2; A2 += LDA; B3[17] = *A3; A3 += LDA; + B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA; + B2[18] = *A2; A2 += LDA; B3[18] = *A3; A3 += LDA; + B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA; + B2[19] = *A2; A2 += LDA; B3[19] = *A3; A3 += LDA; + B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA; + B2[20] = *A2; A2 += LDA; B3[20] = *A3; A3 += LDA; + B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA; + B2[21] = *A2; A2 += LDA; B3[21] = *A3; A3 += LDA; + B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA; + B2[22] = *A2; A2 += LDA; B3[22] = *A3; A3 += LDA; + B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA; + B2[23] = *A2; A2 += LDA; B3[23] = *A3; A3 += LDA; + B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA; + B2[24] = *A2; A2 += LDA; B3[24] = *A3; A3 += LDA; + B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA; + B2[25] = *A2; A2 += LDA; B3[25] = *A3; A3 += LDA; + B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA; + B2[26] = *A2; A2 += LDA; B3[26] = *A3; A3 += LDA; + B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA; + B2[27] = *A2; A2 += LDA; B3[27] = *A3; A3 += LDA; + B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA; + B2[28] = *A2; A2 += LDA; B3[28] = *A3; A3 += LDA; + B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA; + B2[29] = *A2; A2 += LDA; B3[29] = *A3; A3 += LDA; + B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA; + B2[30] = *A2; A2 += LDA; B3[30] = *A3; A3 += LDA; + B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA; + B2[31] = *A2; A2 += LDA; B3[31] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0 += HPL_LATCPY_M_DEPTH; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH; + B2 += HPL_LATCPY_M_DEPTH; B3 += HPL_LATCPY_M_DEPTH; +#endif + } + + for( i = mu; i < M; i++ ) + { +#if ( HPL_LATCPY_N_DEPTH == 1 ) + *B0 = *A0; B0++; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA; + *B2 = *A2; B2++; A2 += LDA; *B3 = *A3; B3++; A3 += LDA; +#endif + } + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + A0 += incA; B0 += incB; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + A0 += incA; A1 += incA; B0 += incB; B1 += incB; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + A0 += incA; A1 += incA; A2 += incA; A3 += incA; + B0 += incB; B1 += incB; B2 += incB; B3 += incB; +#endif + } + + for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 ) + { + for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH, B0 += HPL_LATCPY_M_DEPTH ) + { + B0[ 0]=*A0; A0 += LDA; +#if ( HPL_LATCPY_M_DEPTH > 1 ) + B0[ 1]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 2 ) + B0[ 2]=*A0; A0 += LDA; B0[ 3]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 4 ) + B0[ 4]=*A0; A0 += LDA; B0[ 5]=*A0; A0 += LDA; + B0[ 6]=*A0; A0 += LDA; B0[ 7]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 8 ) + B0[ 8]=*A0; A0 += LDA; B0[ 9]=*A0; A0 += LDA; + B0[10]=*A0; A0 += LDA; B0[11]=*A0; A0 += LDA; + B0[12]=*A0; A0 += LDA; B0[13]=*A0; A0 += LDA; + B0[14]=*A0; A0 += LDA; B0[15]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 16 ) + B0[16]=*A0; A0 += LDA; B0[17]=*A0; A0 += LDA; + B0[18]=*A0; A0 += LDA; B0[19]=*A0; A0 += LDA; + B0[20]=*A0; A0 += LDA; B0[21]=*A0; A0 += LDA; + B0[22]=*A0; A0 += LDA; B0[23]=*A0; A0 += LDA; + B0[24]=*A0; A0 += LDA; B0[25]=*A0; A0 += LDA; + B0[26]=*A0; A0 += LDA; B0[27]=*A0; A0 += LDA; + B0[28]=*A0; A0 += LDA; B0[29]=*A0; A0 += LDA; + B0[30]=*A0; A0 += LDA; B0[31]=*A0; A0 += LDA; +#endif + } + + for( i = mu; i < M; i++, B0++, A0 += LDA ) { *B0 = *A0; } + } +#endif +/* + * End of HPL_dlatcpy + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_fprintf.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_fprintf.c new file mode 100644 index 000000000..adaf22b39 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_fprintf.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_fprintf +( + FILE * STREAM, + const char * FORM, + ... +) +#else +void HPL_fprintf( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_fprintf is a wrapper around fprintf flushing the output stream. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[256]; +#ifndef STDC_HEADERS + FILE * STREAM; + char * FORM; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + (void) fprintf( STREAM, "%s", cline ); + (void) fflush( STREAM ); +/* + * End of HPL_fprintf + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_warn.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_warn.c new file mode 100644 index 000000000..bc40818a9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/auxil/HPL_warn.c @@ -0,0 +1,134 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_warn +( + FILE * STREAM, + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_warn( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_warn displays an error message. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[128]; +#ifndef STDC_HEADERS + FILE * STREAM; + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( STREAM, "%s %s:\n>>> %s <<<\n\n", "HPL ERROR in function", + SRNAME, cline ); + else + HPL_fprintf( STREAM, "%s %d %s %s:\n>>> %s <<<\n\n", + "HPL ERROR on line", LINE, "of function", SRNAME, cline ); +/* + * End of HPL_warn + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_daxpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_daxpy.c new file mode 100644 index 000000000..72be5774b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_daxpy.c @@ -0,0 +1,175 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_daxpy + +#ifdef STDC_HEADERS +void HPL_daxpy +( + const int N, + const double ALPHA, + const double * X, + const int INCX, + double * Y, + const int INCY +) +#else +void HPL_daxpy +( N, ALPHA, X, INCX, Y, INCY ) + const int N; + const double ALPHA; + const double * X; + const int INCX; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_daxpy scales the vector x by alpha and adds it to y. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vectors x and y. N + * must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero, then the entries of the incremented array X + * need not be set on input. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * On exit, the entries of the incremented array Y are updated + * with the scaled entries of the incremented array X. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_daxpy( N, ALPHA, X, INCX, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + register const double alpha = ALPHA; + register double x0, x1, x2, x3, y0, y1, y2, y3; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY; + + if( ( N > 0 ) && ( alpha != HPL_rzero ) ) + { + if( ( nu = ( N >> 2 ) << 2 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); y0 = (*Y); x1 = X[INCX ]; y1 = Y[INCY ]; + x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3]; + + *Y = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1; + Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3; + + X += incX4; + Y += incY4; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { + x0 = (*X); + y0 = (*Y); + + *Y = y0 + alpha * x0; + + X += INCX; + Y += INCY; + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; +#else +#define F77N N +#define F77incx INCX +#define F77incy INCY +#endif + F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy ); +#endif +/* + * End of HPL_daxpy + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dcopy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dcopy.c new file mode 100644 index 000000000..a8fe24109 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dcopy.c @@ -0,0 +1,168 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dcopy + +#ifdef STDC_HEADERS +void HPL_dcopy +( + const int N, + const double * X, + const int INCX, + double * Y, + const int INCY +) +#else +void HPL_dcopy +( N, X, INCX, Y, INCY ) + const int N; + const double * X; + const int INCX; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dcopy copies the vector x into the vector y. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vectors x and y. N + * must be at least zero. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * On exit, the entries of the incremented array Y are updated + * with the entries of the incremented array X. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dcopy( N, X, INCX, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + register double x0, x1, x2, x3, x4, x5, x6, x7; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY, + incX5 = 5 * INCX, incY5 = 5 * INCY, + incX6 = 6 * INCX, incY6 = 6 * INCY, + incX7 = 7 * INCX, incY7 = 7 * INCY, + incX8 = 8 * INCX, incY8 = 8 * INCY; + + if( N > 0 ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + *Y = x0; Y[incY4] = x4; Y[INCY ] = x1; Y[incY5] = x5; + Y[incY2] = x2; Y[incY6] = x6; Y[incY3] = x3; Y[incY7] = x7; + + X += incX8; + Y += incY8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { + x0 = (*X); + *Y = x0; + + X += INCX; + Y += INCY; + } + } +#endif +#ifdef HPL_CALL_FBLAS +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; +#else +#define F77N N +#define F77incx INCX +#define F77incy INCY +#endif + F77dcopy( &F77N, X, &F77incx, Y, &F77incy ); +#endif +/* + * End of HPL_dcopy + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dgemm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dgemm.c new file mode 100644 index 000000000..b222e4717 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dgemm.c @@ -0,0 +1,521 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dgemm + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dgemmNN +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iail, iblj, icij, j, jal, jbj, jcj, l; + + for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) + { + HPL_dscal( M, BETA, C+jcj, 1 ); + for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 ) + { + t0 = ALPHA * B[iblj]; + for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) + { C[icij] += A[iail] * t0; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmNT +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iail, ibj, ibjl, icij, j, jal, jcj, l; + + for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) + { + HPL_dscal( M, BETA, C+jcj, 1 ); + for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB ) + { + t0 = ALPHA * B[ibjl]; + for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) + { C[icij] += A[iail] * t0; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmTN +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iai, iail, iblj, icij, j, jbj, jcj, l; + + for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) + { + for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA ) + { + t0 = HPL_rzero; + for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 ) + { t0 += A[iail] * B[iblj]; } + if( BETA == HPL_rzero ) C[icij] = HPL_rzero; + else C[icij] *= BETA; + C[icij] += ALPHA * t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmTT +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iali, ibj, ibjl, icij, j, jai, jcj, l; + + for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) + { + for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA ) + { + t0 = HPL_rzero; + for( l = 0, iali = jai, ibjl = ibj; + l < K; l++, iali += 1, ibjl += LDB ) t0 += A[iali] * B[ibjl]; + if( BETA == HPL_rzero ) C[icij] = HPL_rzero; + else C[icij] *= BETA; + C[icij] += ALPHA * t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemm0 +( + const enum HPL_TRANS TRANSA, + const enum HPL_TRANS TRANSB, + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, + BETA, C, LDC ) + const enum HPL_TRANS TRANSA, TRANSB; + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + int i, j; + + if( ( M == 0 ) || ( N == 0 ) || + ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) && + ( BETA == HPL_rone ) ) ) return; + + if( ALPHA == HPL_rzero ) + { + for( j = 0; j < N; j++ ) + { for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; } + return; + } + + if( TRANSB == HplNoTrans ) + { + if( TRANSA == HplNoTrans ) + { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + else + { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + } + else + { + if( TRANSA == HplNoTrans ) + { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + else + { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dgemm +( + const enum HPL_ORDER ORDER, + const enum HPL_TRANS TRANSA, + const enum HPL_TRANS TRANSB, + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +void HPL_dgemm +( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const enum HPL_ORDER ORDER; + const enum HPL_TRANS TRANSA; + const enum HPL_TRANS TRANSB; + const int M; + const int N; + const int K; + const double ALPHA; + const double * A; + const int LDA; + const double * B; + const int LDB; + const double BETA; + double * C; + const int LDC; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dgemm performs one of the matrix-matrix operations + * + * C := alpha * op( A ) * op( B ) + beta * C + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X^T. + * + * Alpha and beta are scalars, and A, B and C are matrices, with op(A) + * an m by k matrix, op(B) a k by n matrix and C an m by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * TRANSA (local input) const enum HPL_TRANS + * On entry, TRANSA specifies the form of op(A) to be used in + * the matrix-matrix operation follows: + * TRANSA==HplNoTrans : op( A ) = A, + * TRANSA==HplTrans : op( A ) = A^T, + * TRANSA==HplConjTrans : op( A ) = A^T. + * + * TRANSB (local input) const enum HPL_TRANS + * On entry, TRANSB specifies the form of op(B) to be used in + * the matrix-matrix operation follows: + * TRANSB==HplNoTrans : op( B ) = B, + * TRANSB==HplTrans : op( B ) = B^T, + * TRANSB==HplConjTrans : op( B ) = B^T. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix + * op(A) and of the matrix C. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix + * op(B) and the number of columns of the matrix C. N must be + * at least zero. + * + * K (local input) const int + * On entry, K specifies the number of columns of the matrix + * op(A) and the number of rows of the matrix op(B). K must be + * be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then the elements of the matrices A and B + * need not be set on input. + * + * A (local input) const double * + * On entry, A is an array of dimension (LDA,ka), where ka is + * k when TRANSA==HplNoTrans, and is m otherwise. Before + * entry with TRANSA==HplNoTrans, the leading m by k part of + * the array A must contain the matrix A, otherwise the leading + * k by m part of the array A must contain the matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the first dimension of A as declared + * in the calling (sub) program. When TRANSA==HplNoTrans then + * LDA must be at least max(1,m), otherwise LDA must be at least + * max(1,k). + * + * B (local input) const double * + * On entry, B is an array of dimension (LDB,kb), where kb is + * n when TRANSB==HplNoTrans, and is k otherwise. Before + * entry with TRANSB==HplNoTrans, the leading k by n part of + * the array B must contain the matrix B, otherwise the leading + * n by k part of the array B must contain the matrix B. + * + * LDB (local input) const int + * On entry, LDB specifies the first dimension of B as declared + * in the calling (sub) program. When TRANSB==HplNoTrans then + * LDB must be at least max(1,k), otherwise LDB must be at least + * max(1,n). + * + * BETA (local input) const double + * On entry, BETA specifies the scalar beta. When BETA is + * supplied as zero then the elements of the matrix C need + * not be set on input. + * + * C (local input/output) double * + * On entry, C is an array of dimension (LDC,n). Before entry, + * the leading m by n part of the array C must contain the + * matrix C, except when beta is zero, in which case C need not + * be set on entry. On exit, the array C is overwritten by the + * m by n matrix ( alpha*op( A )*op( B ) + beta*C ). + * + * LDC (local input) const int + * On entry, LDC specifies the first dimension of C as declared + * in the calling (sub) program. LDC must be at least + * max(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + printf("Order %d, TransA %d, TransB %d, M %d, N %d, K %d\n", ORDER, TRANSA, TRANSB, M, N, K); + cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, + C, LDC ); + } + else + { + HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA, + C, LDC ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA, beta = BETA; +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef StringStructPtr + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef StringCrayStyle + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, F77K = K, + F77lda = LDA, F77ldb = LDB, F77ldc = LDC; +#else +#define F77M M +#define F77N N +#define F77K K +#define F77lda LDA +#define F77ldb LDB +#define F77ldc LDC +#endif + char ctransa, ctransb; + + if( TRANSA == HplNoTrans ) ctransa = 'N'; + else if( TRANSA == HplTrans ) ctransa = 'T'; + else ctransa = 'C'; + + if( TRANSB == HplNoTrans ) ctransb = 'N'; + else if( TRANSB == HplTrans ) ctransb = 'T'; + else ctransb = 'C'; + + if( ORDER == HplColumnMajor ) + { +#ifdef StringSunStyle + F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); + F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif +#ifdef StringStructVal + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif +#ifdef StringStructPtr + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif + } + else + { +#ifdef StringSunStyle + F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); + F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif +#ifdef StringStructVal + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif +#ifdef StringStructPtr + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif + } +#endif +/* + * End of HPL_dgemm + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dgemv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dgemv.c new file mode 100644 index 000000000..6366c5a48 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dgemv.c @@ -0,0 +1,326 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dgemv + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dgemv0 +( + const enum HPL_TRANS TRANS, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + const double * X, + const int INCX, + const double BETA, + double * Y, + const int INCY +) +#else +static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) + const enum HPL_TRANS TRANS; + const int INCX, INCY, LDA, M, N; + const double ALPHA, BETA; + const double * A, * X; + double * Y; +#endif +{ +/* + * .. Local Variables .. + */ + int i, iaij, ix, iy, j, jaj, jx, jy; + register double t0; +/* .. + * .. Executable Statements .. + */ + if( ( M == 0 ) || ( N == 0 ) || + ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone ) ) ) return; + + if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; } + + if( TRANS == HplNoTrans ) + { + HPL_dscal( M, BETA, Y, INCY ); + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = ALPHA * X[jx]; + for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY ) + { Y[iy] += A[iaij] * t0; } + } + } + else + { + for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) + { + t0 = HPL_rzero; + for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) + { t0 += A[iaij] * X[ix]; } + if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0; + else Y[jy] = BETA * Y[jy] + ALPHA * t0; + } + } +} +#endif + +#ifdef STDC_HEADERS +void HPL_dgemv +( + const enum HPL_ORDER ORDER, + const enum HPL_TRANS TRANS, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + const double * X, + const int INCX, + const double BETA, + double * Y, + const int INCY +) +#else +void HPL_dgemv +( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) + const enum HPL_ORDER ORDER; + const enum HPL_TRANS TRANS; + const int M; + const int N; + const double ALPHA; + const double * A; + const int LDA; + const double * X; + const int INCX; + const double BETA; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dgemv performs one of the matrix-vector operations + * + * y := alpha * op( A ) * x + beta * y, + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X^T. + * + * where alpha and beta are scalars, x and y are vectors and A is an m + * by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANS specifies the operation to be performed as + * follows: + * TRANS = HplNoTrans y := alpha*A *x + beta*y, + * TRANS = HplTrans y := alpha*A^T*x + beta*y. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then A and X need not be set on input. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry, the leading m by n part of the + * array A must contain the matrix coefficients. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m). + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * BETA (local input) const double + * On entry, BETA specifies the scalar beta. When ALPHA is + * supplied as zero then Y need not be set on input. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * Before entry with BETA non-zero, the incremented array Y must + * contain the vector y. On exit, Y is overwritten by the + * updated vector y. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); + } + else + { + HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), + N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA, beta = BETA; +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR ftran; +#endif +#ifdef StringStructPtr + F77_CHAR ftran; +#endif +#ifdef StringCrayStyle + F77_CHAR ftran; +#endif + +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77incx = INCX, F77incy = INCY; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77incx INCX +#define F77incy INCY +#endif + char ctran; + + if( ORDER == HplColumnMajor ) + { + ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); + +#ifdef StringSunStyle + F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); + F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructVal + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructPtr + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif + } + else + { + ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); +#ifdef StringSunStyle + F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); + F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructVal + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructPtr + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif + } + +#endif +/* + * End of HPL_dgemv + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dger.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dger.c new file mode 100644 index 000000000..5ea702778 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dger.c @@ -0,0 +1,195 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dger + +#ifdef STDC_HEADERS +void HPL_dger +( + const enum HPL_ORDER ORDER, + const int M, + const int N, + const double ALPHA, + const double * X, + const int INCX, + double * Y, + const int INCY, + double * A, + const int LDA +) +#else +void HPL_dger +( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) + const enum HPL_ORDER ORDER; + const int M; + const int N; + const double ALPHA; + const double * X; + const int INCX; + double * Y; + const int INCY; + double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dger performs the rank 1 operation + * + * A := alpha * x * y^T + A, + * + * where alpha is a scalar, x is an m-element vector, y is an n-element + * vector and A is an m by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then X and Y need not be set on input. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( m - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * A (local input/output) double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry, the leading m by n part of the + * array A must contain the matrix coefficients. On exit, A is + * overwritten by the updated matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dger( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA ); +#endif +#ifdef HPL_CALL_VSIPL + register double t0; + int i, iaij, ix, iy, j, jaj, jx, jy; + + if( ( M == 0 ) || ( N == 0 ) || ( ALPHA == HPL_rzero ) ) return; + + if( ORDER == HplColumnMajor ) + { + for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) + { + t0 = ALPHA * Y[jy]; + for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) + { A[iaij] += X[ix] * t0; } + } + } + else + { + for( j = 0, jaj = 0, jx = 0; j < M; j++, jaj += LDA, jx += INCX ) + { + t0 = ALPHA * X[jx]; + for( i = 0, iaij = jaj, iy = 0; i < N; i++, iaij += 1, iy += INCY ) + { A[iaij] += Y[iy] * t0; } + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77incx = INCX, F77incy = INCY; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77incx INCX +#define F77incy INCY +#endif + + if( ORDER == HplColumnMajor ) + { F77dger( &F77M, &F77N, &alpha, X, &F77incx, Y, &F77incy, A, &F77lda ); } + else + { F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); } +#endif +/* + * End of HPL_dger + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dscal.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dscal.c new file mode 100644 index 000000000..7e041991f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dscal.c @@ -0,0 +1,179 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dscal + +#ifdef STDC_HEADERS +void HPL_dscal +( + const int N, + const double ALPHA, + double * X, + const int INCX +) +#else +void HPL_dscal +( N, ALPHA, X, INCX ) + const int N; + const double ALPHA; + double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dscal scales the vector x by alpha. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vector x. N must be + * at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero, then the entries of the incremented array X + * need not be set on input. + * + * X (local input/output) double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * On exit, the entries of the incremented array X are scaled + * by the scalar alpha. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dscal( N, ALPHA, X, INCX ); +#endif +#ifdef HPL_CALL_VSIPL + register double x0, x1, x2, x3, x4, x5, x6, x7; + register const double alpha = ALPHA; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incX3 = 3 * INCX, + incX4 = 4 * INCX, incX5 = 5 * INCX, + incX6 = 6 * INCX, incX7 = 7 * INCX, + incX8 = 8 * INCX; + + if( ( N > 0 ) && ( alpha != HPL_rone ) ) + { + if( alpha == HPL_rzero ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = (double *)X + nu * INCX; + + do + { + (*X) = HPL_rzero; X[incX4] = HPL_rzero; + X[INCX ] = HPL_rzero; X[incX5] = HPL_rzero; + X[incX2] = HPL_rzero; X[incX6] = HPL_rzero; + X[incX3] = HPL_rzero; X[incX7] = HPL_rzero; X += incX8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) { *X = HPL_rzero; X += INCX; } + } + else + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + x0 *= alpha; x4 *= alpha; x1 *= alpha; x5 *= alpha; + x2 *= alpha; x6 *= alpha; x3 *= alpha; x7 *= alpha; + + (*X) = x0; X[incX4] = x4; X[INCX ] = x1; X[incX5] = x5; + X[incX2] = x2; X[incX6] = x6; X[incX3] = x3; X[incX7] = x7; + + X += incX8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { x0 = (*X); x0 *= alpha; *X = x0; X += INCX; } + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX; +#else +#define F77N N +#define F77incx INCX +#endif + + F77dscal( &F77N, &alpha, X, &F77incx ); +#endif +/* + * End of HPL_dscal + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dtrsm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dtrsm.c new file mode 100644 index 000000000..a336a7d29 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dtrsm.c @@ -0,0 +1,977 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dtrsm + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) + { + B[ibkj] /= A[k+jak]; + for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; + i < M; i++, iaik +=1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) + { + for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; + i < M; i++, iaik +=1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; + i >= 0; i--, jai -= LDA, ibij -= 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; + k < M; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + t0 /= A[i+jai]; + B[ibij] = t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; + i >= 0; i--, jai -= LDA, ibij -= 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; + k < M; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + B[ibij] = t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; + k >= 0; k--, jak -= LDA, ibkj -= 1 ) + { + B[ibkj] /= A[k+jak]; + for( i = 0, iaik = jak, ibij = jbj; + i < k; i++, iaik += 1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; + k >= 0; k--, jak -= LDA, ibkj -= 1 ) + { + for( i = 0, iaik = jak, ibij = jbj; + i < k; i++, iaik += 1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaki, ibij, ibkj, j, jai, jbj, k; + register double t0; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + t0 /= A[i+jai]; + B[ibij] = t0; + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + B[ibij] = t0; + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; + j >= 0; j--, jaj -= LDA, jbj -= LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; + k < N; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; + j >= 0; j--, jaj -= LDA, jbj -= LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; + k < N; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) + { + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } + for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; + j < N; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) + { + for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; + j < N; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; + k >= 0; k--, jak -= LDA, jbk -= LDB ) + { + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } + for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; + k >= 0; k--, jak -= LDA, jbk -= LDB ) + { + for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsm0 +( + const enum HPL_SIDE SIDE, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) + const enum HPL_SIDE SIDE; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, j; + + if( ( M == 0 ) || ( N == 0 ) ) return; + + if( ALPHA == HPL_rzero ) + { + for( j = 0; j < N; j++ ) + { for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; } + return; + } + + if( SIDE == HplLeft ) + { + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + } + else + { + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dtrsm +( + const enum HPL_ORDER ORDER, + const enum HPL_SIDE SIDE, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dtrsm +( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) + const enum HPL_ORDER ORDER; + const enum HPL_SIDE SIDE; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int M; + const int N; + const double ALPHA; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dtrsm solves one of the matrix equations + * + * op( A ) * X = alpha * B, or X * op( A ) = alpha * B, + * + * where alpha is a scalar, X and B are m by n matrices, A is a unit, or + * non-unit, upper or lower triangular matrix and op(A) is one of + * + * op( A ) = A or op( A ) = A^T. + * + * The matrix X is overwritten on B. + * + * No test for singularity or near-singularity is included in this + * routine. Such tests must be performed before calling this routine. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * SIDE (local input) const enum HPL_SIDE + * On entry, SIDE specifies whether op(A) appears on the left + * or right of X as follows: + * SIDE==HplLeft op( A ) * X = alpha * B, + * SIDE==HplRight X * op( A ) = alpha * B. + * + * UPLO (local input) const enum HPL_UPLO + * On entry, UPLO specifies whether the upper or lower + * triangular part of the array A is to be referenced. When + * UPLO==HplUpper, only the upper triangular part of A is to be + * referenced, otherwise only the lower triangular part of A is + * to be referenced. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANSA specifies the form of op(A) to be used in + * the matrix-matrix operation follows: + * TRANSA==HplNoTrans : op( A ) = A, + * TRANSA==HplTrans : op( A ) = A^T, + * TRANSA==HplConjTrans : op( A ) = A^T. + * + * DIAG (local input) const enum HPL_DIAG + * On entry, DIAG specifies whether A is unit triangular or + * not. When DIAG==HplUnit, A is assumed to be unit triangular, + * and otherwise, A is not assumed to be unit triangular. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix B. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix B. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then the elements of the matrix B need not + * be set on input. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * k, where k is m when SIDE==HplLeft and is n + * otherwise. Before entry with UPLO==HplUpper, the leading + * k by k upper triangular part of the array A must contain the + * upper triangular matrix and the strictly lower triangular + * part of A is not referenced. When UPLO==HplLower on entry, + * the leading k by k lower triangular part of the array A must + * contain the lower triangular matrix and the strictly upper + * triangular part of A is not referenced. + * + * Note that when DIAG==HplUnit, the diagonal elements of A + * not referenced either, but are assumed to be unity. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise. + * + * B (local input/output) double * + * On entry, B points to an array of size equal to or greater + * than LDB * n. Before entry, the leading m by n part of the + * array B must contain the matrix B, except when beta is zero, + * in which case B need not be set on entry. On exit, the array + * B is overwritten by the m by n solution matrix. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of B as + * declared in the calling (sub) program. LDB must be at + * least MAX(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); + } + else + { + HPL_dtrsm0( ( SIDE == HplRight ? HplLeft : HplRight ), + ( UPLO == HplLower ? HplUpper : HplLower ), + TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef StringSunStyle +#if defined( HPL_USE_F77_INTEGER_DEF ) + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef StringStructPtr + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef StringCrayStyle + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77ldb = LDB; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77ldb LDB +#endif + char cside, cuplo, ctran, cdiag; + + if( TRANS == HplNoTrans ) ctran = 'N'; + else if( TRANS == HplTrans ) ctran = 'T'; + else ctran = 'C'; + cdiag = ( DIAG == HplUnit ? 'U' : 'N' ); + + if( ORDER == HplColumnMajor ) + { + cside = ( SIDE == HplRight ? 'R' : 'L' ); + cuplo = ( UPLO == HplLower ? 'L' : 'U' ); +#ifdef StringSunStyle + F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructVal + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructPtr + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif + } + else + { + cside = ( SIDE == HplRight ? 'L' : 'R' ); + cuplo = ( UPLO == HplLower ? 'U' : 'L' ); +#ifdef StringSunStyle + F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructVal + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructPtr + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif + } +#endif +/* + * End of HPL_dtrsm + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dtrsv.c new file mode 100644 index 000000000..99e84f073 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_dtrsv.c @@ -0,0 +1,520 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dtrsv + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dtrsvLNN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLNN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX ) + { + X[jx] /= A[jaj]; t0 = X[jx]; + for( i = j+1, iaij = jaj+1, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLNU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLNU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = jaj+1, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLTN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLTN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX; + j >= 0; j--, jaj -= ldap1, jx -= INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = 1+jaj, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } + t0 /= A[jaj]; X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLTU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLTU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX; + j >= 0; j--, jaj -= ldap1, jx -= INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = 1+jaj, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } + X[jx] = t0; + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUNN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUNN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX; + j >= 0; j--, jaj -= LDA, jx -= INCX ) + { + X[jx] /= A[j+jaj]; t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { X[ix] -= t0 * A[iaij]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUNU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUNU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX; + j >= 0; j--, jaj -= LDA, jx -= INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { X[ix] -= t0 * A[iaij]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUTN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUTN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = 0, jaj = 0,jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { t0 -= A[iaij] * X[ix]; } + t0 /= A[iaij]; X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvUTU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUTU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { t0 -= A[iaij] * X[ix]; } + X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsv0 +( + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + if( N == 0 ) return; + + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) { HPL_dtrsvUNN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvUNU( N, A, LDA, X, INCX ); } + } + else + { + if( DIAG == HplNonUnit ) { HPL_dtrsvUTN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvUTU( N, A, LDA, X, INCX ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) { HPL_dtrsvLNN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvLNU( N, A, LDA, X, INCX ); } + } + else + { + if( DIAG == HplNonUnit ) { HPL_dtrsvLTN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvLTU( N, A, LDA, X, INCX ); } + } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dtrsv +( + const enum HPL_ORDER ORDER, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +void HPL_dtrsv +( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) + const enum HPL_ORDER ORDER; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int N; + const double * A; + const int LDA; + double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dtrsv solves one of the systems of equations + * + * A * x = b, or A^T * x = b, + * + * where b and x are n-element vectors and A is an n by n non-unit, or + * unit, upper or lower triangular matrix. + * + * No test for singularity or near-singularity is included in this + * routine. Such tests must be performed before calling this routine. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * UPLO (local input) const enum HPL_UPLO + * On entry, UPLO specifies whether the upper or lower + * triangular part of the array A is to be referenced. When + * UPLO==HplUpper, only the upper triangular part of A is to be + * referenced, otherwise only the lower triangular part of A is + * to be referenced. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANS specifies the equations to be solved as + * follows: + * TRANS==HplNoTrans A * x = b, + * TRANS==HplTrans A^T * x = b. + * + * DIAG (local input) const enum HPL_DIAG + * On entry, DIAG specifies whether A is unit triangular or + * not. When DIAG==HplUnit, A is assumed to be unit triangular, + * and otherwise, A is not assumed to be unit triangular. + * + * N (local input) const int + * On entry, N specifies the order of the matrix A. N must be at + * least zero. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry with UPLO==HplUpper, the leading + * n by n upper triangular part of the array A must contain the + * upper triangular matrix and the strictly lower triangular + * part of A is not referenced. When UPLO==HplLower on entry, + * the leading n by n lower triangular part of the array A must + * contain the lower triangular matrix and the strictly upper + * triangular part of A is not referenced. + * + * Note that when DIAG==HplUnit, the diagonal elements of A + * not referenced either, but are assumed to be unity. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,n). + * + * X (local input/output) double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * Before entry, the incremented array X must contain the n + * element right-hand side vector b. On exit, X is overwritten + * with the solution vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dtrsv( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ); + } + else + { + HPL_dtrsv0( ( UPLO == HplUpper ? HplLower : HplUpper ), + ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), + DIAG, N, A, LDA, X, INCX ); + } +#endif +#ifdef HPL_CALL_FBLAS +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR fuplo, ftran, fdiag; +#endif +#ifdef StringStructPtr + F77_CHAR fuplo, ftran, fdiag; +#endif +#ifdef StringCrayStyle + F77_CHAR fuplo, ftran, fdiag; +#endif + +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77lda = LDA, F77incx = INCX; +#else +#define F77N N +#define F77lda LDA +#define F77incx INCX +#endif + char cuplo, ctran, cdiag; + + if( ORDER == HplColumnMajor ) + { + cuplo = ( UPLO == HplUpper ? 'U' : 'L' ); + ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); + } + else + { + cuplo = ( UPLO == HplUpper ? 'L' : 'U' ); + ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); + } + cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' ); + +#ifdef StringSunStyle + F77dtrsv( &cuplo, &ctran, &cdiag, &F77N, A, &F77lda, X, &F77incx, + IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + fuplo = HPL_C2F_CHAR( cuplo ); + F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif +#ifdef StringStructVal + fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; + fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif +#ifdef StringStructPtr + fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; + fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsv( &fuplo, &ftran, &fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif + +#endif +/* + * End of HPL_dtrsv + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_idamax.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_idamax.c new file mode 100644 index 000000000..5ceabdf25 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/blas/HPL_idamax.c @@ -0,0 +1,167 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_idamax + +#ifdef STDC_HEADERS +int HPL_idamax +( + const int N, + const double * X, + const int INCX +) +#else +int HPL_idamax +( N, X, INCX ) + const int N; + const double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_idamax returns the index in an n-vector x of the first element + * having maximum absolute value. + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vector x. N must be + * at least zero. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + return( (int)(cblas_idamax( N, X, INCX )) ); +#endif +#ifdef HPL_CALL_VSIPL + register double absxi, smax = HPL_rzero, x0, x1, x2, x3, + x4, x5, x6, x7; + const double * StX; + register int imax = 0, i = 0, j; + int nu; + const int incX2 = 2 * INCX, incX3 = 3 * INCX, + incX4 = 4 * INCX, incX5 = 5 * INCX, + incX6 = 6 * INCX, incX7 = 7 * INCX, + incX8 = 8 * INCX; + + if( N > 0 ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x1 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x2 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x3 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x4 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x5 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x6 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x7 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + + X += incX8; + + } while( X != StX ); + } + + for( j = N - nu; j != 0; j-- ) + { + x0 = (*X); + absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + X += INCX; + } + } + return( imax ); +#endif +#ifdef HPL_CALL_FBLAS +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX; +#else +#define F77N N +#define F77incx INCX +#endif + int imax = 0; + + if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1; + return( imax ); +#endif +/* + * End of HPL_idamax + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_1rinM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_1rinM.c new file mode 100644 index 000000000..dd03b79b1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_1rinM.c @@ -0,0 +1,224 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_1rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_1rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_1rinM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_1rinM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, prev, + rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, then send message to its two + * next neighbors. Otherwise, probe for message. If the message is here, + * then receive it, and if I am not the last process of the ring, or + * just after the root process, then forward it to the next. Otherwise, + * inform the caller that the panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( next, + size ), msgid, comm ); + } + } + else + { + prev = MModSub1( rank, size ); + if( ( size > 2 ) && + ( MModSub1( prev, size ) == root ) ) partner = root; + else partner = prev; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && + ( prev != root ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_1rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_1rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_1ring.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_1ring.c new file mode 100644 index 000000000..dd5eb2d12 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_1ring.c @@ -0,0 +1,216 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_1ring +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_1ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_1ring +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_1ring( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, prev, rank, root, + size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, start spreading the panel. If + * I am not the root process, probe for message. If the message is here, + * then receive it, and if I am not the last process of the ring, then + * forward it to the next. Otherwise, inform the caller that the panel + * has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( rank, + size ), msgid, comm ); + } + else + { + prev = MModSub1( rank, size ); + + ierr = MPI_Iprobe( prev, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, prev, msgid, + comm, &PANEL->status[0] ); + next = MModAdd1( rank, size ); + if( ( ierr == MPI_SUCCESS ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, + msgid, comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_1ring +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_1ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_2rinM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_2rinM.c new file mode 100644 index 000000000..56581ea0d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_2rinM.c @@ -0,0 +1,236 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_2rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_2rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_2rinM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_2rinM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, prev, + rank, roo2, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process send to its two right neighbors and mid-pro- + * cess. If I am not the root process, probe for message. If the message + * is there, then receive it. If I am not the last process of both rings + * then forward it to the next. Otherwise, inform the caller that the + * panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); roo2 = ( ( size + 1 ) >> 1 ); + roo2 = MModAdd( root, roo2, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + if( MModAdd1( next, size ) != roo2 ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, + MModAdd1( next, size ), msgid, comm ); + } + + if( ierr == MPI_SUCCESS ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid, + comm ); + } + } + } + else + { + prev = MModSub1( rank, size ); + if( ( prev == root ) || ( rank == roo2 ) || + ( MModSub1( prev, size ) == root ) ) partner = root; + else partner = prev; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && ( prev != root ) && + ( next != roo2 ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_2rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_2rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_2ring.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_2ring.c new file mode 100644 index 000000000..f0e6e2647 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_2ring.c @@ -0,0 +1,224 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_2ring +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_2ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_2ring +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_2ring( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, rank, + roo2, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process send to its right neighbor and mid-process. + * If I am not the root process, probe for message. If the message is + * there, then receive it, and if I am not the last process of both + * rings, then forward it to the next. Otherwise, inform the caller that + * the panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); roo2 = ( ( size + 1 ) >> 1 ); + roo2 = MModAdd( root, roo2, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid, + comm ); + } + } + else + { + partner = MModSub1( rank, size ); + if( ( partner == root ) || ( rank == roo2 ) ) partner = root; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && + ( next != roo2 ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_2ring +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_2ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_bcast.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_bcast.c new file mode 100644 index 000000000..100161152 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_bcast.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_bcast +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast +( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_bcast broadcasts the current panel. Successful completion is + * indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to + * HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was + * not completed, in which case this function should be called again. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * IFLAG (output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * occured. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_bcast_1rinM( PANEL, IFLAG ); break; + case HPL_1RING : ierr = HPL_bcast_1ring( PANEL, IFLAG ); break; + case HPL_2RING_M : ierr = HPL_bcast_2rinM( PANEL, IFLAG ); break; + case HPL_2RING : ierr = HPL_bcast_2ring( PANEL, IFLAG ); break; + case HPL_BLONG_M : ierr = HPL_bcast_blonM( PANEL, IFLAG ); break; + case HPL_BLONG : ierr = HPL_bcast_blong( PANEL, IFLAG ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_bcast + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_binit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_binit.c new file mode 100644 index 000000000..3daf72b7d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_binit.c @@ -0,0 +1,108 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_binit +( + HPL_T_panel * PANEL +) +#else +int HPL_binit +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_binit initializes a row broadcast. Successful completion is + * indicated by the returned error code HPL_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS ); +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_binit_1rinM( PANEL ); break; + case HPL_1RING : ierr = HPL_binit_1ring( PANEL ); break; + case HPL_2RING_M : ierr = HPL_binit_2rinM( PANEL ); break; + case HPL_2RING : ierr = HPL_binit_2ring( PANEL ); break; + case HPL_BLONG_M : ierr = HPL_binit_blonM( PANEL ); break; + case HPL_BLONG : ierr = HPL_binit_blong( PANEL ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_binit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_blonM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_blonM.c new file mode 100644 index 000000000..5fa221937 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_blonM.c @@ -0,0 +1,445 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +int HPL_binit_blonM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_blonM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif + return( HPL_SUCCESS ); +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF_S1 PANEL->buffers[I_SEND] +#define _M_COUNT_S1 PANEL->counts[I_SEND] +#define _M_TYPE_S1 PANEL->dtypes[I_SEND] + +#define _M_BUFF_S2 PANEL->buffers[I_SEND] +#define _M_COUNT_S2 PANEL->counts[I_SEND] +#define _M_TYPE_S2 PANEL->dtypes[I_SEND] + +#define _M_BUFF_R1 PANEL->buffers[I_RECV] +#define _M_COUNT_R1 PANEL->counts[I_RECV] +#define _M_TYPE_R1 PANEL->dtypes[I_RECV] + +#define _M_BUFF_R2 PANEL->buffers[I_RECV] +#define _M_COUNT_R2 PANEL->counts[I_RECV] +#define _M_TYPE_R2 PANEL->dtypes[I_RECV] + +#define _M_ROLL_BUFF_S PANEL->buffers[I_SEND] +#define _M_ROLL_COUNT_S PANEL->counts[I_SEND] +#define _M_ROLL_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_ROLL_BUFF_R PANEL->buffers[I_RECV] +#define _M_ROLL_COUNT_R PANEL->counts[I_RECV] +#define _M_ROLL_TYPE_R PANEL->dtypes[I_RECV] + +#else + +#define _M_BUFF_S1 (void *)(PANEL->L2) +#define _M_COUNT_S1 PANEL->len +#define _M_TYPE_S1 MPI_DOUBLE + +#define _M_BUFF_S2 (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_S2 lbuf +#define _M_TYPE_S2 MPI_DOUBLE + +#define _M_BUFF_R1 (void *)(PANEL->L2) +#define _M_COUNT_R1 PANEL->len +#define _M_TYPE_R1 MPI_DOUBLE + +#define _M_BUFF_R2 (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_R2 lbuf +#define _M_TYPE_R2 MPI_DOUBLE + +#define _M_ROLL_BUFF_S (void *)(PANEL->L2 + ibufS) +#define _M_ROLL_COUNT_S lbufS +#define _M_ROLL_TYPE_S MPI_DOUBLE +#define _M_ROLL_BUFF_R (void *)(PANEL->L2 + ibufR) +#define _M_ROLL_COUNT_R lbufR +#define _M_ROLL_TYPE_R MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_blonM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_blonM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int COUNT, count, go=1, ierr=MPI_SUCCESS, ibuf, + ibufR, ibufS, dummy=0, indx, ip2=1, k, l, + lbuf, lbufR, lbufS, mask=1, msgid, mydist, + mydist2, next, npm1, npm2, partner, prev, + rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process sends to its right neighbor, then spread + * the panel on the other npcol - 2 processes. If I am not the root + * process, probe for message received. If the message is there, then + * receive it. If I am just after the root process, return. Otherwise, + * keep spreading on those npcol - 2 processes. Otherwise, inform the + * caller that the panel has still not been received. + */ + comm = PANEL->grid->row_comm; rank = PANEL->grid->mycol; + root = PANEL->pcol; msgid = PANEL->msgid; + prev = MModSub1( rank, size ); + + if( rank == root ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, 0, PANEL->len, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S1, _M_COUNT_S1, _M_TYPE_S1, + MModAdd1( rank, size ), msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else if( prev == root ) + { +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + * + * ierr = MPI_Iprobe( root, msgid, comm, &go, &PANEL->status[0] ); + */ + if( ierr == MPI_SUCCESS ) + { /* if panel is here, proceed */ + if( go != 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + ierr = HPL_packL( PANEL, 0, PANEL->len, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R1, _M_COUNT_R1, _M_TYPE_R1, + root, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } + } +/* + * if I am just after the root, exit now. The message receive completed + * successfully, this guy is done. If there are only 2 processes in each + * row of processes, we are done as well. + */ + if( ( prev == root ) || ( size == 2 ) ) + { + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + return( *IFLAG ); + } +/* + * Otherwise, proceed with broadcast - Spread the panel across process + * columns + */ + npm2 = ( npm1 = size - 1 ) - 1; COUNT = PANEL->len; + + k = npm2; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + if( rank == root ) mydist2 = ( mydist = 0 ); + else mydist2 = ( mydist = MModSub( rank, root, size ) - 1 ); + + indx = ip2; count = COUNT / npm1; count = Mmax( count, 1 ); + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = COUNT - ( ibuf = indx * count ); + if( indx + ip2 < npm1 ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); } + + partner = mydist ^ ip2; + + if( ( mydist & ip2 ) != 0 ) + { + partner = MModAdd( root, partner, size ); + if( partner != root ) partner = MModAdd1( partner, size ); +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + */ +#if 0 + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { /* if panel is not here, return and keep testing */ + if( go == 0 ) + { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } +#endif + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R2, _M_COUNT_R2, _M_TYPE_R2, + partner, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + } + else if( partner < npm1 ) + { + partner = MModAdd( root, partner, size ); + if( partner != root ) partner = MModAdd1( partner, size ); + + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S2, _M_COUNT_S2, _M_TYPE_S2, + partner, msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE, + partner, msgid, comm ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; indx -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; } + + } while( ip2 > 0 ); +/* + * Roll the pieces + */ + prev = MModSub1( rank, size ); + if( MModSub1( prev, size ) == root ) prev = root; + next = MModAdd1( rank, size ); + if( rank == root ) next = MModAdd1( next, size ); + + for( k = 0; k < npm2; k++ ) + { + l = ( k >> 1 ); +/* + * Who is sending to who and how much + */ + if( ( ( mydist + k ) & 1 ) != 0 ) + { + ibufS = ( indx = MModAdd( mydist, l, npm1 ) ) * count; + lbufS = ( indx == npm2 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModSub( mydist, l+1, npm1 ) ) * count; + lbufR = ( indx == npm2 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = prev; + } + else + { + ibufS = ( indx = MModSub( mydist, l, npm1 ) ) * count; + lbufS = ( indx == npm2 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModAdd( mydist, l+1, npm1 ) ) * count; + lbufR = ( indx == npm2 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = next; + } +/* + * Exchange the messages + */ + if( lbufS > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufS, lbufS, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S, + _M_ROLL_TYPE_S, partner, msgid, comm, + &PANEL->request[0] ); + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->request[0] ); + } + + if( lbufR > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufR, lbufR, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R, + _M_ROLL_TYPE_R, partner, msgid, comm, + &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait ( &PANEL->request[0], &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_blonM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_blonM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } + + return( HPL_SUCCESS ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_blong.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_blong.c new file mode 100644 index 000000000..e57f11bcc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_blong.c @@ -0,0 +1,363 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +int HPL_binit_blong +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_blong( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif + return( HPL_SUCCESS ); +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF_S PANEL->buffers[I_SEND] +#define _M_COUNT_S PANEL->counts[I_SEND] +#define _M_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_BUFF_R PANEL->buffers[I_RECV] +#define _M_COUNT_R PANEL->counts[I_RECV] +#define _M_TYPE_R PANEL->dtypes[I_RECV] + +#define _M_ROLL_BUFF_S PANEL->buffers[I_SEND] +#define _M_ROLL_COUNT_S PANEL->counts[I_SEND] +#define _M_ROLL_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_ROLL_BUFF_R PANEL->buffers[I_RECV] +#define _M_ROLL_COUNT_R PANEL->counts[I_RECV] +#define _M_ROLL_TYPE_R PANEL->dtypes[I_RECV] + +#else + +#define _M_BUFF_S (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_S lbuf +#define _M_TYPE_S MPI_DOUBLE + +#define _M_BUFF_R (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_R lbuf +#define _M_TYPE_R MPI_DOUBLE + +#define _M_ROLL_BUFF_S (void *)(PANEL->L2 + ibufS) +#define _M_ROLL_COUNT_S lbufS +#define _M_ROLL_TYPE_S MPI_DOUBLE + +#define _M_ROLL_BUFF_R (void *)(PANEL->L2 + ibufR) +#define _M_ROLL_COUNT_R lbufR +#define _M_ROLL_TYPE_R MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_blong +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_blong( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int COUNT, count, dummy=0, ierr=MPI_SUCCESS, + ibuf, ibufR, ibufS, indx, ip2, k, l, lbuf, + lbufR, lbufS, mask, msgid, mydist, mydist2, + next, npm1, partner, prev, rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, start spreading the panel. If + * I am not the root process, test for message receive completion. If + * the message is there, then receive it, and keep spreading in a + * blocking fashion this time. Otherwise, inform the caller that the + * panel has still not been received. + */ + comm = PANEL->grid->row_comm; rank = PANEL->grid->mycol; + mask = PANEL->grid->col_mask; ip2 = PANEL->grid->col_ip2m1; + root = PANEL->pcol; msgid = PANEL->msgid; + COUNT = PANEL->len; npm1 = size - 1; + mydist2 = ( mydist = MModSub( rank, root, size ) ); indx = ip2; + count = COUNT / size; count = Mmax( count, 1 ); +/* + * Spread the panel across process columns + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = COUNT - ( ibuf = indx * count ); + if( indx + ip2 < size ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); } + + partner = mydist ^ ip2; + + if( ( mydist & ip2 ) != 0 ) + { + partner = MModAdd( root, partner, size ); +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + */ +#if 0 + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + if( ierr == MPI_SUCCESS ) + { /* if panel is not here, return and keep testing */ + if( go == 0 ) + { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } +#endif + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R, _M_COUNT_R, _M_TYPE_R, + partner, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + } + else if( partner < size ) + { + partner = MModAdd( root, partner, size ); + + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S, _M_COUNT_S, _M_TYPE_S, + partner, msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else /* Send message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE, + partner, msgid, comm ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; indx -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; } + + } while( ip2 > 0 ); +/* + * Roll the pieces + */ + prev = MModSub1( rank, size ); next = MModAdd1( rank, size ); + + for( k = 0; k < npm1; k++ ) + { + l = ( k >> 1 ); +/* + * Who is sending to who and how much + */ + if( ( ( mydist + k ) & 1 ) != 0 ) + { + ibufS = ( indx = MModAdd( mydist, l, size ) ) * count; + lbufS = ( indx == npm1 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModSub( mydist, l+1, size ) ) * count; + lbufR = ( indx == npm1 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = prev; + } + else + { + ibufS = ( indx = MModSub( mydist, l, size ) ) * count; + lbufS = ( indx == npm1 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModAdd( mydist, l+1, size ) ) * count; + lbufR = ( indx == npm1 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = next; + } +/* + * Exchange the messages + */ + if( lbufS > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufS, lbufS, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S, + _M_ROLL_TYPE_S, partner, msgid, comm, + &PANEL->request[0] ); + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->request[0] ); + } + + if( lbufR > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufR, lbufR, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R, + _M_ROLL_TYPE_R, partner, msgid, comm, + &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait ( &PANEL->request[0], &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_blong +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_blong( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } + + return( HPL_SUCCESS ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_bwait.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_bwait.c new file mode 100644 index 000000000..a2e0f4df8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_bwait.c @@ -0,0 +1,109 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_bwait +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_bwait HPL_bwait waits for the row broadcast of the current panel to + * terminate. Successful completion is indicated by the returned error + * code HPL_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS ); +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_bwait_1rinM( PANEL ); break; + case HPL_1RING : ierr = HPL_bwait_1ring( PANEL ); break; + case HPL_2RING_M : ierr = HPL_bwait_2rinM( PANEL ); break; + case HPL_2RING : ierr = HPL_bwait_2ring( PANEL ); break; + case HPL_BLONG_M : ierr = HPL_bwait_blonM( PANEL ); break; + case HPL_BLONG : ierr = HPL_bwait_blong( PANEL ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_bwait + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_copyL.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_copyL.c new file mode 100644 index 000000000..04f765a6b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_copyL.c @@ -0,0 +1,108 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_copyL +( + HPL_T_panel * PANEL +) +#else +void HPL_copyL +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_copyL copies the panel of columns, the L1 replicated submatrix, + * the pivot array and the info scalar into a contiguous workspace for + * later broadcast. + * + * The copy of this panel into a contiguous buffer can be enforced by + * specifying -DHPL_COPY_L in the architecture specific Makefile. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int jb, lda; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->mycol == PANEL->pcol ) + { + jb = PANEL->jb; lda = PANEL->lda; + + if( PANEL->grid->myrow == PANEL->prow ) + { + HPL_dlacpy( PANEL->mp-jb, jb, Mptr( PANEL->A, jb, -jb, lda ), + lda, PANEL->L2, PANEL->ldl2 ); + } + else + { + HPL_dlacpy( PANEL->mp, jb, Mptr( PANEL->A, 0, -jb, lda ), + lda, PANEL->L2, PANEL->ldl2 ); + } + } +/* + * End of HPL_copyL + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_packL.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_packL.c new file mode 100644 index 000000000..8a70ef83d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_packL.c @@ -0,0 +1,245 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_packL +( + HPL_T_panel * PANEL, + const int INDEX, + const int LEN, + const int IBUF +) +#else +int HPL_packL +( PANEL, INDEX, LEN, IBUF ) + HPL_T_panel * PANEL; + const int INDEX; + const int LEN; + const int IBUF; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_packL forms the MPI data type for the panel to be broadcast. + * Successful completion is indicated by the returned error code + * MPI_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * INDEX (input) const int + * On entry, INDEX points to the first entry of the packed + * buffer being broadcast. + * + * LEN (input) const int + * On entry, LEN is the length of the packed buffer. + * + * IBUF (input) const int + * On entry, IBUF specifies the panel buffer/count/type entries + * that should be initialized. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ +#ifndef HPL_COPY_L + MPI_Datatype * type = NULL; + void * * * bufs = NULL; + double * A; + int * blen = NULL; + MPI_Aint * disp = NULL; + int curr, i, i1, ibuf, ierr=MPI_SUCCESS, j1, + jb, jbm, jbp1, lda, len, m, m1, nbufs; +#else + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_COPY_L +/* + * Panel + L1 + DPIV have been copied into a contiguous buffer - Create + * and commit a contiguous data type + */ + PANEL->buffers[IBUF] = (void *)(PANEL->L2 + INDEX); + PANEL->counts [IBUF] = 1; + + ierr = MPI_Type_contiguous( LEN, MPI_DOUBLE, &PANEL->dtypes[IBUF] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &PANEL->dtypes[IBUF] ); + + return( ierr ); +#else +/* + * Panel is not contiguous (because of LDA and also L1 + DPIV) - Create + * and commit a struct data type + */ + jbp1 = ( jb = PANEL->jb ) + 1; +/* + * Temporaries to create the type struct. + */ + bufs = (void * * *)malloc( jbp1 * sizeof( void * * ) ); + blen = (int *)malloc( jbp1 * sizeof( int ) ); + disp = (MPI_Aint *)malloc( jbp1 * sizeof( MPI_Aint ) ); + type = (MPI_Datatype *)malloc( jbp1 * sizeof( MPI_Datatype ) ); + + if( ( bufs != NULL ) && ( blen != NULL ) && + ( disp != NULL ) && ( type != NULL ) ) + { + m = PANEL->mp; curr = (int)( PANEL->grid->myrow == PANEL->prow ); + if( curr != 0 ) m -= jb; + + len = LEN; ibuf = INDEX; nbufs = 0; jbm = jb * m; + + if( ( m > 0 ) && ( ibuf < jbm ) ) + { +/* + * Retrieve proper pointers depending on process row and column + */ + if( PANEL->grid->mycol == PANEL->pcol ) + { + lda = PANEL->lda; + if( curr != 0 ) { A = Mptr( PANEL->A, jb, -jb, lda ); } + else { A = Mptr( PANEL->A, 0, -jb, lda ); } + } + else { lda = PANEL->ldl2; A = PANEL->L2; } +/* + * Pack the first (partial) column of L + */ + m1 = m - ( i1 = ibuf - ( j1 = ibuf / m ) * m ); + m1 = Mmin( len, m1 ); + + bufs[nbufs] = (void *)(Mptr( A, i1, j1, lda )); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = m1; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + + nbufs++; len -= m1; j1++; ibuf += m1; +/* + * Pack the remaining columns of L + */ + while( ( len > 0 ) && ( j1 < jb ) ) + { + m1 = Mmin( len, m ); + + bufs[nbufs] = (void*)(Mptr( A, 0, j1, lda )); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = m1; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + + nbufs++; len -= m1; j1++; ibuf += m1; + } + } +/* + * Pack L1, DPIV, DINFO + */ + if( len > 0 ) + { /* L1, DPIV, DINFO */ + bufs[nbufs] = (void *)(PANEL->L1 + ibuf - jbm); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = len; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + nbufs++; + } + + for( i = 1; i < nbufs; i++ ) disp[i] -= disp[0]; disp[0] = 0; + + PANEL->buffers[IBUF] = (void *)(bufs[0]); PANEL->counts [IBUF] = 1; +/* + * construct the struct type + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_create_struct( nbufs, blen, disp, type, + &PANEL->dtypes[IBUF] ); +/* + * release temporaries + */ + if( bufs ) free( bufs ); + if( blen ) free( blen ); + if( disp ) free( disp ); + if( type ) free( type ); +/* + * commit the type + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &PANEL->dtypes[IBUF] ); + + return( ierr ); + } + else + { +/* + * Memory allocation failed -> abort + */ + HPL_pabort( __LINE__, "HPL_packL", "Memory allocation failed" ); + return( MPI_SUCCESS ); /* never executed (hopefully ...) */ + } +#endif +#else + /* HPL_USE_MPI_DATATYPE not defined - Oops, there is a bug + somewhere, so, just in case and until I find it ... */ + return( MPI_SUCCESS ); +#endif +/* + * End of HPL_packL + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_recv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_recv.c new file mode 100644 index 000000000..ff426891c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_recv.c @@ -0,0 +1,142 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_recv +( + double * RBUF, + int RCOUNT, + int SRC, + int RTAG, + MPI_Comm COMM +) +#else +int HPL_recv +( RBUF, RCOUNT, SRC, RTAG, COMM ) + double * RBUF; + int RCOUNT; + int SRC; + int RTAG; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_recv is a simple wrapper around MPI_Recv. Its main purpose is + * to allow for some experimentation / tuning of this simple routine. + * Successful completion is indicated by the returned error code + * HPL_SUCCESS. In the case of messages of length less than or equal to + * zero, this function returns immediately. + * + * Arguments + * ========= + * + * RBUF (local output) double * + * On entry, RBUF specifies the starting address of buffer to be + * received. + * + * RCOUNT (local input) int + * On entry, RCOUNT specifies the number of double precision + * entries in RBUF. RCOUNT must be at least zero. + * + * SRC (local input) int + * On entry, SRC specifies the rank of the sending process in + * the communication space defined by COMM. + * + * RTAG (local input) int + * On entry, STAG specifies the message tag to be used for this + * communication operation. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Status status; +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type; +#endif + int ierr; +/* .. + * .. Executable Statements .. + */ + if( RCOUNT <= 0 ) return( HPL_SUCCESS ); + +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(RBUF), 1, type, SRC, RTAG, COMM, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else + ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, SRC, RTAG, + COMM, &status ); +#endif + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_recv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_sdrv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_sdrv.c new file mode 100644 index 000000000..0b2363563 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_sdrv.c @@ -0,0 +1,239 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_sdrv +( + double * SBUF, + int SCOUNT, + int STAG, + double * RBUF, + int RCOUNT, + int RTAG, + int PARTNER, + MPI_Comm COMM +) +#else +int HPL_sdrv +( SBUF, SCOUNT, STAG, RBUF, RCOUNT, RTAG, PARTNER, COMM ) + double * SBUF; + int SCOUNT; + int STAG; + double * RBUF; + int RCOUNT; + int RTAG; + int PARTNER; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_sdrv is a simple wrapper around MPI_Sendrecv. Its main purpose is + * to allow for some experimentation and tuning of this simple function. + * Messages of length less than or equal to zero are not sent nor + * received. Successful completion is indicated by the returned error + * code HPL_SUCCESS. + * + * Arguments + * ========= + * + * SBUF (local input) double * + * On entry, SBUF specifies the starting address of buffer to be + * sent. + * + * SCOUNT (local input) int + * On entry, SCOUNT specifies the number of double precision + * entries in SBUF. SCOUNT must be at least zero. + * + * STAG (local input) int + * On entry, STAG specifies the message tag to be used for the + * sending communication operation. + * + * RBUF (local output) double * + * On entry, RBUF specifies the starting address of buffer to be + * received. + * + * RCOUNT (local input) int + * On entry, RCOUNT specifies the number of double precision + * entries in RBUF. RCOUNT must be at least zero. + * + * RTAG (local input) int + * On entry, RTAG specifies the message tag to be used for the + * receiving communication operation. + * + * PARTNER (local input) int + * On entry, PARTNER specifies the rank of the collaborative + * process in the communication space defined by COMM. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type[2]; +#endif + MPI_Request request; + MPI_Status status; + int ierr; +/* .. + * .. Executable Statements .. + */ + if( RCOUNT > 0 ) + { + if( SCOUNT > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE +/* + * Post asynchronous receive + */ + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( (void *)(RBUF), 1, type[0], PARTNER, + RTAG, COMM, &request ); +/* + * Blocking send + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, + STAG, COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[1] ); +/* + * Wait for the receive to complete + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[0] ); +#else +/* + * Post asynchronous receive + */ + ierr = MPI_Irecv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, + PARTNER, RTAG, COMM, &request ); +/* + * Blocking send + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, + PARTNER, STAG, COMM ); +/* + * Wait for the receive to complete + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); +#endif + } + else + { +/* + * Blocking receive + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(RBUF), 1, type[0], PARTNER, RTAG, + COMM, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[0] ); +#else + ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, + PARTNER, RTAG, COMM, &status ); +#endif + } + } + else if( SCOUNT > 0 ) + { +/* + * Blocking send + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG, + COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[1] ) ); +#else + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, PARTNER, + STAG, COMM ); +#endif + } + else { ierr = MPI_SUCCESS; } + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_sdrv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_send.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_send.c new file mode 100644 index 000000000..9e9868594 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/comm/HPL_send.c @@ -0,0 +1,139 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_send +( + double * SBUF, + int SCOUNT, + int DEST, + int STAG, + MPI_Comm COMM +) +#else +int HPL_send +( SBUF, SCOUNT, DEST, STAG, COMM ) + double * SBUF; + int SCOUNT; + int DEST; + int STAG; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_send is a simple wrapper around MPI_Send. Its main purpose is + * to allow for some experimentation / tuning of this simple routine. + * Successful completion is indicated by the returned error code + * MPI_SUCCESS. In the case of messages of length less than or equal to + * zero, this function returns immediately. + * + * Arguments + * ========= + * + * SBUF (local input) double * + * On entry, SBUF specifies the starting address of buffer to be + * sent. + * + * SCOUNT (local input) int + * On entry, SCOUNT specifies the number of double precision + * entries in SBUF. SCOUNT must be at least zero. + * + * DEST (local input) int + * On entry, DEST specifies the rank of the receiving process in + * the communication space defined by COMM. + * + * STAG (local input) int + * On entry, STAG specifies the message tag to be used for this + * communication operation. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type; +#endif + int ierr; +/* .. + * .. Executable Statements .. + */ + if( SCOUNT <= 0 ) return( HPL_SUCCESS ); + +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type, DEST, STAG, COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, DEST, STAG, COMM ); +#endif + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_send + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/cuda/cuda_dgemm.cpp.dp.cpp b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/cuda/cuda_dgemm.cpp.dp.cpp new file mode 100644 index 000000000..644503181 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/cuda/cuda_dgemm.cpp.dp.cpp @@ -0,0 +1,310 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ + +#define NUMBER_OF_STREAMS 4 +#define CHUNK_SIZE 512 +#define NN 64 +#define NM 128 +#define ERRCODE(e) (-(__LINE__ * 1000 + (e))) +//#define DEVICE_DEBUG +//#ifdef MPI +//#include +//#endif + + +#define _GNU_SOURCE + +#define CUDA_ERROR_CHECK +#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ ) +#define CudaCheckError() __cudaCheckError( __FILE__, __LINE__ ) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include "mkl.h" + +extern "C" { + +inline void __cudaSafeCall(dpct::err0 err, const char *file, const int line) +{ + #ifdef CUDA_ERROR_CHECK + +#endif + + return; +} + +inline void __cudaCheckError(const char *file, const int line) try { +#ifdef CUDA_ERROR_CHECK + /* + DPCT1010:1: SYCL uses exceptions to report errors and does not use the + error codes. The call was replaced with 0. You need to rewrite this + code. + */ + dpct::err0 err = 0; + + // More careful checking. However, this will affect performance. + // Comment away if needed. + err = DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()); + +#endif + + return; +} +catch (sycl::exception const &exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ + << ", line:" << __LINE__ << std::endl; + std::exit(1); +} + + void dpcpp_dgemm + ( const int ORDER, + const int TRANSA, const int TRANSB, + const int M, const int N, const int K, + const double ALPHA, const double *A, const int LDA, + const double *B, const int LDB, const double BETA, + double *C, const int LDC); + + void dpcpp_dtrsm( + int HPL_ORDER, + int HPL_SIDE, + int HPL_UPLO, + int HPL_TRANS, + int HPL_DIAG, + const int, + const int, + const double, + const double *, + const int, + double *, + const int); +} + + +void dpcpp_dgemm +( const int ORDER, const int TRANSA, const int TRANSB, + const int M, const int N, const int K, + const double ALPHA,const double *A, const int LDA, + const double *B, const int LDB, + const double BETA, double *C, const int LDC) +{ + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + + if ((M==0)||(K==0)||(N==0)){ + return; + } + + + if ( (N) < NN || (M) < NM || (K) < 128){ + + #ifdef DEVICE_DEBUG + std::cout << "dgemm-Running on CPU" << std::endl; + #endif + + cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC); + return; + } + + + #ifdef DEVICE_DEBUG + std::cout << "dgemm-Running on GPU" << std::endl; + #endif + + double *devPtrA, *devPtrB, *devPtrC; + int status; + + CudaSafeCall(DPCT_CHECK_ERROR( + devPtrA = sycl::malloc_device(K * LDA, q_ct1))); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(devPtrA, &A[0], K * LDA * sizeof(double)).wait())); + + CudaSafeCall(DPCT_CHECK_ERROR( + devPtrB = sycl::malloc_device(N * LDB, q_ct1))); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(devPtrB, &B[0], N * LDB * sizeof(double)).wait())); + + CudaSafeCall(DPCT_CHECK_ERROR( + devPtrC = sycl::malloc_device(N * LDC, q_ct1))); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(devPtrC, &C[0], N * LDC * sizeof(double)).wait())); + + dev_ct1.queues_wait_and_throw(); + oneapi::mkl::blas::column_major::gemm( + *dpct::get_current_device().get_saved_queue(), + oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, M, + N, K, ALPHA, devPtrA, LDA, devPtrB, LDB, BETA, devPtrC, LDC) + .wait(); + dev_ct1.queues_wait_and_throw(); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(&C[0], devPtrC, N * LDC * sizeof(double)).wait())); + dev_ct1.queues_wait_and_throw(); + sycl::free(devPtrA, q_ct1); + sycl::free(devPtrB, q_ct1); + sycl::free(devPtrC, q_ct1); +} + +void dpcpp_dtrsm + +( const int ORDER, const int SIDE, + const int UPLO, const int TRANS, + const int DIAG, const int M, const int N, + const double ALPHA, const double* A, const int LDA, double* B, + const int LDB) +{ + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + + if ((M==0)||(N==0)){ + return; + } + + double *devPtrA, *devPtrB; + int status; + + + if ( (M) < 512 || (N) < 2*(M)){ + #ifdef DEVICE_DEBUG + std::cout << "dtrsm-Running on CPU" << std::endl; + #endif + cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, M, N, ALPHA, A, LDA, B, LDB); + + + return; + } + + #ifdef DEVICE_DEBUG + std::cout << "dtrsm-Running on GPU" << std::endl; + #endif + + CudaSafeCall(DPCT_CHECK_ERROR( + devPtrA = sycl::malloc_device(M * LDA, q_ct1))); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(devPtrA, A, M * LDA * sizeof(double)).wait())); + + CudaSafeCall(DPCT_CHECK_ERROR( + devPtrB = sycl::malloc_device(N * LDB, q_ct1))); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(devPtrB, B, N * LDB * sizeof(double)).wait())); + dev_ct1.queues_wait_and_throw(); + + oneapi::mkl::blas::column_major::trsm( + *dpct::get_current_device().get_saved_queue(), oneapi::mkl::side::left, + oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, + oneapi::mkl::diag::unit, M, N, ALPHA, devPtrA, LDA, devPtrB, LDB) + .wait(); + + dev_ct1.queues_wait_and_throw(); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(B, devPtrB, N * LDB * sizeof(double)).wait())); + + dev_ct1.queues_wait_and_throw(); + sycl::free(devPtrA, q_ct1); + sycl::free(devPtrB, q_ct1); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_all_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_all_reduce.c new file mode 100644 index 000000000..776f48504 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_all_reduce.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_all_reduce +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const HPL_T_OP OP, + MPI_Comm COMM +) +#else +int HPL_all_reduce +( BUFFER, COUNT, DTYPE, OP, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const HPL_T_OP OP; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_all_reduce performs a global reduce operation across all + * processes of a group leaving the results on all processes. + * + * Arguments + * ========= + * + * BUFFER (local input/global output) void * + * On entry, BUFFER points to the buffer to be combined. On + * exit, this array contains the combined data and is identical + * on all processes in the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * OP (global input) const HPL_T_OP + * On entry, OP is a pointer to the local combine function. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr; +/* .. + * .. Executable Statements .. + */ + hplerr = HPL_reduce( BUFFER, COUNT, DTYPE, OP, 0, COMM ); + if( hplerr != MPI_SUCCESS ) return( hplerr ); + return( HPL_broadcast( BUFFER, COUNT, DTYPE, 0, COMM ) ); +/* + * End of HPL_all_reduce + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_barrier.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_barrier.c new file mode 100644 index 000000000..9a5d9b10a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_barrier.c @@ -0,0 +1,90 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_barrier +( + MPI_Comm COMM +) +#else +int HPL_barrier +( COMM ) + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_barrier blocks the caller until all process members have call it. + * The call returns at any process only after all group members have + * entered the call. + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i=0; +/* .. + * .. Executable Statements .. + */ + return( HPL_broadcast( (void*)(&i), 1, HPL_INT, 0, COMM ) ); +/* + * End of HPL_barrier + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_broadcast.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_broadcast.c new file mode 100644 index 000000000..42d962864 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_broadcast.c @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_broadcast +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const int ROOT, + MPI_Comm COMM +) +#else +int HPL_broadcast +( BUFFER, COUNT, DTYPE, ROOT, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const int ROOT; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_broadcast broadcasts a message from the process with rank ROOT to + * all processes in the group. + * + * Arguments + * ========= + * + * BUFFER (local input/output) void * + * On entry, BUFFER points to the buffer to be broadcast. On + * exit, this array contains the broadcast data and is identical + * on all processes in the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * ROOT (global input) const int + * On entry, ROOT is the coordinate of the source process. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr=MPI_SUCCESS, ip2=1, kk, mask=1, + mpierr, mydist, partner, rank, size, + tag = MSGID_BEGIN_COLL; + MPI_Status status; +/* .. + * .. Executable Statements .. + */ + if( COUNT <= 0 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_size( COMM, &size ); if( size <= 1 ) return( mpierr ); + mpierr = MPI_Comm_rank( COMM, &rank ); + + kk = size - 1; + while( kk > 1 ) { kk >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist = MModSub( rank, ROOT, size ); + + do + { + mask ^= ip2; + if( ( mydist & mask ) == 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Recv( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM, &status ); + } + else if( partner < size ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM ); + } + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + ip2 >>= 1; + } while( ip2 ); + + return( hplerr ); +/* + * End of HPL_broadcast + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_exit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_exit.c new file mode 100644 index 000000000..f0d00b065 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_exit.c @@ -0,0 +1,109 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_exit +( + HPL_T_grid * GRID +) +#else +int HPL_grid_exit +( GRID ) + HPL_T_grid * GRID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_exit marks the process grid object for deallocation. The + * returned error code MPI_SUCCESS indicates successful completion. + * Other error codes are (MPI) implementation dependent. + * + * Arguments + * ========= + * + * GRID (local input/output) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid to be released. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr = MPI_SUCCESS, mpierr; +/* .. + * .. Executable Statements .. + */ + if( GRID->all_comm != MPI_COMM_NULL ) + { + mpierr = MPI_Comm_free( &(GRID->row_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + mpierr = MPI_Comm_free( &(GRID->col_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + mpierr = MPI_Comm_free( &(GRID->all_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + + GRID->order = HPL_COLUMN_MAJOR; + + GRID->iam = GRID->myrow = GRID->mycol = -1; + GRID->nprow = GRID->npcol = GRID->nprocs = -1; + + GRID->row_ip2 = GRID->row_hdim = GRID->row_ip2m1 = GRID->row_mask = -1; + GRID->col_ip2 = GRID->col_hdim = GRID->col_ip2m1 = GRID->col_mask = -1; + + return( hplerr ); +/* + * End of HPL_grid_exit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_info.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_info.c new file mode 100644 index 000000000..95c5a7315 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_info.c @@ -0,0 +1,116 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_info +( + const HPL_T_grid * GRID, + int * NPROW, + int * NPCOL, + int * MYROW, + int * MYCOL +) +#else +int HPL_grid_info +( GRID, NPROW, NPCOL, MYROW, MYCOL ) + const HPL_T_grid * GRID; + int * NPROW; + int * NPCOL; + int * MYROW; + int * MYCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_info returns the grid shape and the coordinates in the grid + * of the calling process. Successful completion is indicated by the + * returned error code MPI_SUCCESS. Other error codes depend on the MPI + * implementation. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * NPROW (global output) int * + * On exit, NPROW specifies the number of process rows in the + * grid. NPROW is at least one. + * + * NPCOL (global output) int * + * On exit, NPCOL specifies the number of process columns in + * the grid. NPCOL is at least one. + * + * MYROW (global output) int * + * On exit, MYROW specifies my row process coordinate in the + * grid. MYROW is greater than or equal to zero and less than + * NPROW. + * + * MYCOL (global output) int * + * On exit, MYCOL specifies my column process coordinate in the + * grid. MYCOL is greater than or equal to zero and less than + * NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + *NPROW = GRID->nprow; *NPCOL = GRID->npcol; + *MYROW = GRID->myrow; *MYCOL = GRID->mycol; + return( MPI_SUCCESS ); +/* + * End of HPL_grid_info + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_init.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_init.c new file mode 100644 index 000000000..52111ac52 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_grid_init.c @@ -0,0 +1,184 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_init +( + MPI_Comm COMM, + const HPL_T_ORDER ORDER, + const int NPROW, + const int NPCOL, + HPL_T_grid * GRID +) +#else +int HPL_grid_init +( COMM, ORDER, NPROW, NPCOL, GRID ) + MPI_Comm COMM; + const HPL_T_ORDER ORDER; + const int NPROW; + const int NPCOL; + HPL_T_grid * GRID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_init creates a NPROW x NPCOL process grid using column- or + * row-major ordering from an initial collection of processes identified + * by an MPI communicator. Successful completion is indicated by the + * returned error code MPI_SUCCESS. Other error codes depend on the MPI + * implementation. The coordinates of processes that are not part of the + * grid are set to values outside of [0..NPROW) x [0..NPCOL). + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * On entry, COMM is the MPI communicator identifying the + * initial collection of processes out of which the grid is + * formed. + * + * ORDER (global input) const HPL_T_ORDER + * On entry, ORDER specifies how the processes should be ordered + * in the grid as follows: + * ORDER = HPL_ROW_MAJOR row-major ordering; + * ORDER = HPL_COLUMN_MAJOR column-major ordering; + * + * NPROW (global input) const int + * On entry, NPROW specifies the number of process rows in the + * grid to be created. NPROW must be at least one. + * + * NPCOL (global input) const int + * On entry, NPCOL specifies the number of process columns in + * the grid to be created. NPCOL must be at least one. + * + * GRID (local input/output) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information to be initialized. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hdim, hplerr=MPI_SUCCESS, ierr, ip2, k, + mask, mycol, myrow, nprocs, rank, size; +/* .. + * .. Executable Statements .. + */ + MPI_Comm_rank( COMM, &rank ); MPI_Comm_size( COMM, &size ); +/* + * Abort if illegal process grid + */ + nprocs = NPROW * NPCOL; + if( ( nprocs > size ) || ( NPROW < 1 ) || ( NPCOL < 1 ) ) + { HPL_pabort( __LINE__, "HPL_grid_init", "Illegal Grid" ); } +/* + * Row- or column-major ordering of the processes + */ + if( ORDER == HPL_ROW_MAJOR ) + { + GRID->order = HPL_ROW_MAJOR; + myrow = rank / NPCOL; mycol = rank - myrow * NPCOL; + } + else + { + GRID->order = HPL_COLUMN_MAJOR; + mycol = rank / NPROW; myrow = rank - mycol * NPROW; + } + GRID->iam = rank; GRID->myrow = myrow; GRID->mycol = mycol; + GRID->nprow = NPROW; GRID->npcol = NPCOL; GRID->nprocs = nprocs; +/* + * row_ip2 : largest power of two <= nprow; + * row_hdim : row_ip2 procs hypercube dim; + * row_ip2m1 : largest power of two <= nprow-1; + * row_mask : row_ip2m1 procs hypercube mask; + */ + hdim = 0; ip2 = 1; k = NPROW; + while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; } + GRID->row_ip2 = ip2; GRID->row_hdim = hdim; + + mask = ip2 = 1; k = NPROW - 1; + while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + GRID->row_ip2m1 = ip2; GRID->row_mask = mask; +/* + * col_ip2 : largest power of two <= npcol; + * col_hdim : col_ip2 procs hypercube dim; + * col_ip2m1 : largest power of two <= npcol-1; + * col_mask : col_ip2m1 procs hypercube mask; + */ + hdim = 0; ip2 = 1; k = NPCOL; + while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; } + GRID->col_ip2 = ip2; GRID->col_hdim = hdim; + + mask = ip2 = 1; k = NPCOL - 1; + while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + GRID->col_ip2m1 = ip2; GRID->col_mask = mask; +/* + * All communicator, leave if I am not part of this grid. Creation of the + * row- and column communicators. + */ + ierr = MPI_Comm_split( COMM, ( rank < nprocs ? 0 : MPI_UNDEFINED ), + rank, &(GRID->all_comm) ); + if( GRID->all_comm == MPI_COMM_NULL ) return( ierr ); + + ierr = MPI_Comm_split( GRID->all_comm, myrow, mycol, &(GRID->row_comm) ); + if( ierr != MPI_SUCCESS ) hplerr = ierr; + + ierr = MPI_Comm_split( GRID->all_comm, mycol, myrow, &(GRID->col_comm) ); + if( ierr != MPI_SUCCESS ) hplerr = ierr; + + return( hplerr ); +/* + * End of HPL_grid_init + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_max.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_max.c new file mode 100644 index 000000000..002aabe01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_max.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_max +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_max +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_max combines (max) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] ); + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] ); + } +/* + * End of HPL_max + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_min.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_min.c new file mode 100644 index 000000000..a99e5e58a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_min.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_min +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_min +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_min combines (min) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] ); + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] ); + } +/* + * End of HPL_min + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_pnum.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_pnum.c new file mode 100644 index 000000000..c80885b9a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_pnum.c @@ -0,0 +1,103 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pnum +( + const HPL_T_grid * GRID, + const int MYROW, + const int MYCOL +) +#else +int HPL_pnum +( GRID, MYROW, MYCOL ) + const HPL_T_grid * GRID; + const int MYROW; + const int MYCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pnum determines the rank of a process as a function of its + * coordinates in the grid. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * MYROW (local input) const int + * On entry, MYROW specifies the row coordinate of the process + * whose rank is to be determined. MYROW must be greater than or + * equal to zero and less than NPROW. + * + * MYCOL (local input) const int + * On entry, MYCOL specifies the column coordinate of the + * process whose rank is to be determined. MYCOL must be greater + * than or equal to zero and less than NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + if( GRID->order == HPL_ROW_MAJOR ) + return( MYROW * GRID->npcol + MYCOL ); + else + return( MYCOL * GRID->nprow + MYROW ); +/* + * End of HPL_pnum + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_reduce.c new file mode 100644 index 000000000..417c21163 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_reduce.c @@ -0,0 +1,179 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_reduce +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const HPL_T_OP OP, + const int ROOT, + MPI_Comm COMM +) +#else +int HPL_reduce +( BUFFER, COUNT, DTYPE, OP, ROOT, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const HPL_T_OP OP; + const int ROOT; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_reduce performs a global reduce operation across all processes of + * a group. Note that the input buffer is used as workarray and in all + * processes but the accumulating process corrupting the original data. + * + * Arguments + * ========= + * + * BUFFER (local input/output) void * + * On entry, BUFFER points to the buffer to be reduced. On + * exit, and in process of rank ROOT this array contains the + * reduced data. This buffer is also used as workspace during + * the operation in the other processes of the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * OP (global input) const HPL_T_OP + * On entry, OP is a pointer to the local combine function. + * + * ROOT (global input) const int + * On entry, ROOT is the coordinate of the accumulating process. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Status status; + void * buffer = NULL; + int hplerr=MPI_SUCCESS, d=1, i, ip2=1, mask=0, + mpierr, mydist, partner, rank, size, + tag = MSGID_BEGIN_COLL; +/* .. + * .. Executable Statements .. + */ + if( COUNT <= 0 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_size( COMM, &size ); + if( size == 1 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_rank( COMM, &rank ); + i = size - 1; while( i > 1 ) { i >>= 1; d++; } + + if( DTYPE == HPL_INT ) + buffer = (void *)( (int *) malloc( (size_t)(COUNT) * + sizeof( int ) ) ); + else + buffer = (void *)( (double *)malloc( (size_t)(COUNT) * + sizeof( double ) ) ); + + if( !( buffer ) ) + { HPL_pabort( __LINE__, "HPL_reduce", "Memory allocation failed" ); } + + if( ( mydist = MModSub( rank, ROOT, size ) ) == 0 ) + { + do + { + mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ), + MModAdd( ROOT, ip2, size ), tag, COMM, + &status ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + OP( COUNT, buffer, BUFFER, DTYPE ); + ip2 <<= 1; d--; + } while( d ); + } + else + { + do + { + if( ( mydist & mask ) == 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM ); + } + else if( partner < size ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM, &status ); + OP( COUNT, buffer, BUFFER, DTYPE ); + } + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + mask ^= ip2; ip2 <<= 1; d--; + } while( d ); + } + if( buffer ) free( buffer ); + + return( hplerr ); +/* + * End of HPL_reduce + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_sum.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_sum.c new file mode 100644 index 000000000..34cf87210 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/grid/HPL_sum.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_sum +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_sum +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_sum combines (sum) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] += a[i]; + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] += a[i]; + } +/* + * End of HPL_sum + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_disp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_disp.c new file mode 100644 index 000000000..757dad242 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_disp.c @@ -0,0 +1,97 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pdpanel_disp +( + HPL_T_panel * * PANEL +) +#else +int HPL_pdpanel_disp +( PANEL ) + HPL_T_panel * * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_disp deallocates the panel structure and resources and + * stores the error code returned by the panel factorization. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * * + * On entry, PANEL points to the address of the panel data + * structure to be deallocated. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int mpierr; +/* .. + * .. Executable Statements .. + */ +/* + * Deallocate the panel resources and panel structure + */ + mpierr = HPL_pdpanel_free( *PANEL ); + if( *PANEL ) free( *PANEL ); + *PANEL = NULL; + + return( mpierr ); +/* + * End of HPL_pdpanel_disp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_free.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_free.c new file mode 100644 index 000000000..38b5b0d97 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_free.c @@ -0,0 +1,104 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pdpanel_free +( + HPL_T_panel * PANEL +) +#else +int HPL_pdpanel_free +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_free deallocates the panel resources and stores the error + * code returned by the panel factorization. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the panel data structure from + * which the resources should be deallocated. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( PANEL->pmat->info == 0 ) PANEL->pmat->info = *(PANEL->DINFO); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( PANEL->L1block, VSIP_TRUE ); + (void) vsip_blockrelease_d( PANEL->L2block, VSIP_TRUE ); + if( PANEL->grid->nprow > 1 ) + (void) vsip_blockrelease_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Destroy blocks + */ + vsip_blockdestroy_d( PANEL->L1block ); + vsip_blockdestroy_d( PANEL->L2block ); + if( PANEL->grid->nprow > 1 ) + vsip_blockdestroy_d( PANEL->Ublock ); +#endif + + if( PANEL->WORK ) free( PANEL->WORK ); + if( PANEL->IWORK ) free( PANEL->IWORK ); + + return( MPI_SUCCESS ); +/* + * End of HPL_pdpanel_free + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_init.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_init.c new file mode 100644 index 000000000..9e35c7fb4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_init.c @@ -0,0 +1,348 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +void HPL_pdpanel_init +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int M, + const int N, + const int JB, + HPL_T_pmat * A, + const int IA, + const int JA, + const int TAG, + HPL_T_panel * PANEL +) +#else +void HPL_pdpanel_init +( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int M; + const int N; + const int JB; + HPL_T_pmat * A; + const int IA; + const int JA; + const int TAG; + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_init initializes a panel data structure. + * + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * M (local input) const int + * On entry, M specifies the global number of rows of the panel. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the global number of columns of the + * panel and trailing submatrix. N must be at least zero. + * + * JB (global input) const int + * On entry, JB specifies is the number of columns of the panel. + * JB must be at least zero. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * IA (global input) const int + * On entry, IA is the global row index identifying the panel + * and trailing submatrix. IA must be at least zero. + * + * JA (global input) const int + * On entry, JA is the global column index identifying the panel + * and trailing submatrix. JA must be at least zero. + * + * TAG (global input) const int + * On entry, TAG is the row broadcast message id. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + size_t dalign; + int icurcol, icurrow, ii, itmp1, jj, lwork, + ml2, mp, mycol, myrow, nb, npcol, nprow, + nq, nu; +/* .. + * .. Executable Statements .. + */ + PANEL->grid = GRID; /* ptr to the process grid */ + PANEL->algo = ALGO; /* ptr to the algo parameters */ + PANEL->pmat = A; /* ptr to the local array info */ + + myrow = GRID->myrow; mycol = GRID->mycol; + nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb; + + HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol, + nprow, npcol, &ii, &jj, &icurrow, &icurcol ); + mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow ); + nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol ); + /* ptr to trailing part of A */ + PANEL->A = Mptr( (double *)(A->A), ii, jj, A->ld ); +/* + * Workspace pointers are initialized to NULL. + */ + PANEL->WORK = NULL; PANEL->L2 = NULL; PANEL->L1 = NULL; + PANEL->DPIV = NULL; PANEL->DINFO = NULL; PANEL->U = NULL; + PANEL->IWORK = NULL; +/* + * Local lengths, indexes process coordinates + */ + PANEL->nb = nb; /* distribution blocking factor */ + PANEL->jb = JB; /* panel width */ + PANEL->m = M; /* global # of rows of trailing part of A */ + PANEL->n = N; /* global # of cols of trailing part of A */ + PANEL->ia = IA; /* global row index of trailing part of A */ + PANEL->ja = JA; /* global col index of trailing part of A */ + PANEL->mp = mp; /* local # of rows of trailing part of A */ + PANEL->nq = nq; /* local # of cols of trailing part of A */ + PANEL->ii = ii; /* local row index of trailing part of A */ + PANEL->jj = jj; /* local col index of trailing part of A */ + PANEL->lda = A->ld; /* local leading dim of array A */ + PANEL->prow = icurrow; /* proc row owning 1st row of trailing A */ + PANEL->pcol = icurcol; /* proc col owning 1st col of trailing A */ + PANEL->msgid = TAG; /* message id to be used for panel bcast */ +/* + * Initialize ldl2 and len to temporary dummy values and Update tag for + * next panel + */ + PANEL->ldl2 = 0; /* local leading dim of array L2 */ + PANEL->len = 0; /* length of the buffer to broadcast */ +/* + * Figure out the exact amount of workspace needed by the factorization + * and the update - Allocate that space - Finish the panel data structu- + * re initialization. + * + * L1: JB x JB in all processes + * DPIV: JB in all processes + * DINFO: 1 in all processes + * + * We make sure that those three arrays are contiguous in memory for the + * later panel broadcast. We also choose to put this amount of space + * right after L2 (when it exist) so that one can receive a contiguous + * buffer. + */ + dalign = ALGO->align * sizeof( double ); + + if( npcol == 1 ) /* P x 1 process grid */ + { /* space for L1, DPIV, DINFO */ + lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 ); + if( nprow > 1 ) /* space for U */ + { nu = nq - JB; lwork += JB * Mmax( 0, nu ); } + + if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * + sizeof( double ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_init", + "Memory allocation failed" ); + } +/* + * Initialize the pointers of the panel structure - Always re-use A in + * the only process column + */ + PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 ); + PANEL->ldl2 = A->ld; + PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->DPIV = PANEL->L1 + JB * JB; + PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; + PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1: NULL ); + } + else + { /* space for L2, L1, DPIV */ + ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 ); + PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 ); +#ifdef HPL_COPY_L + lwork = ALGO->align + PANEL->len; +#else + lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len ); +#endif + if( nprow > 1 ) /* space for U */ + { + nu = ( mycol == icurcol ? nq - JB : nq ); + lwork += JB * Mmax( 0, nu ); + } + + if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * + sizeof( double ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_init", + "Memory allocation failed" ); + } +/* + * Initialize the pointers of the panel structure - Re-use A in the cur- + * rent process column when HPL_COPY_L is not defined. + */ +#ifdef HPL_COPY_L + PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->ldl2 = Mmax( 1, ml2 ); + PANEL->L1 = PANEL->L2 + ml2 * JB; +#else + if( mycol == icurcol ) + { + PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 ); + PANEL->ldl2 = A->ld; + PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign ); + } + else + { + PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->ldl2 = Mmax( 1, ml2 ); + PANEL->L1 = PANEL->L2 + ml2 * JB; + } +#endif + PANEL->DPIV = PANEL->L1 + JB * JB; + PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; + PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1 : NULL ); + } +#ifdef HPL_CALL_VSIPL + PANEL->Ablock = A->block; +/* + * Create blocks and bind them to the data pointers + */ + PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1), + (vsip_length)(JB*JB), VSIP_MEM_NONE ); + PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2), + (vsip_length)(PANEL->ldl2*JB), + VSIP_MEM_NONE ); + if( nprow > 1 ) + { + nu = ( mycol == icurcol ? nq - JB : nq ); + PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U), + (vsip_length)(JB * Mmax( 0, nu )), + VSIP_MEM_NONE ); + } + else { PANEL->Ublock = A->block; } +#endif +/* + * If nprow is 1, we just allocate an array of JB integers for the swap. + * When nprow > 1, we allocate the space for the index arrays immediate- + * ly. The exact size of this array depends on the swapping routine that + * will be used, so we allocate the maximum: + * + * IWORK[0] is of size at most 1 + + * IPL is of size at most 1 + + * IPID is of size at most 4 * JB + + * + * For HPL_pdlaswp00: + * lindxA is of size at most 2 * JB + + * lindxAU is of size at most 2 * JB + + * llen is of size at most NPROW + + * llen_sv is of size at most NPROW. + * + * For HPL_pdlaswp01: + * ipA is of size ar most 1 + + * lindxA is of size at most 2 * JB + + * lindxAU is of size at most 2 * JB + + * iplen is of size at most NPROW + 1 + + * ipmap is of size at most NPROW + + * ipmapm1 is of size at most NPROW + + * permU is of size at most JB + + * iwork is of size at most MAX( 2*JB, NPROW+1 ). + * + * that is 3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1)) + * = 4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ). + * + * We use the fist entry of this to work array to indicate whether the + * the local index arrays have already been computed, and if yes, by + * which function: + * IWORK[0] = -1: no index arrays have been computed so far; + * IWORK[0] = 0: HPL_pdlaswp00 already computed those arrays; + * IWORK[0] = 1: HPL_pdlaswp01 already computed those arrays; + * This allows to save some redundant and useless computations. + */ + if( nprow == 1 ) { lwork = JB; } + else + { + itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork ); + lwork = 4 + (9 * JB) + (3 * nprow) + itmp1; + } + + PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); + + if( PANEL->IWORK == NULL ) + { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); } + /* Initialize the first entry of the workarray */ + *(PANEL->IWORK) = -1; +/* + * End of HPL_pdpanel_init + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_new.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_new.c new file mode 100644 index 000000000..1dbd8a18f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/panel/HPL_pdpanel_new.c @@ -0,0 +1,152 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanel_new +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int M, + const int N, + const int JB, + HPL_T_pmat * A, + const int IA, + const int JA, + const int TAG, + HPL_T_panel * * PANEL +) +#else +void HPL_pdpanel_new +( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int M; + const int N; + const int JB; + HPL_T_pmat * A; + const int IA; + const int JA; + const int TAG; + HPL_T_panel * * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_new creates and initializes a panel data structure. + * + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * M (local input) const int + * On entry, M specifies the global number of rows of the panel. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the global number of columns of the + * panel and trailing submatrix. N must be at least zero. + * + * JB (global input) const int + * On entry, JB specifies is the number of columns of the panel. + * JB must be at least zero. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * IA (global input) const int + * On entry, IA is the global row index identifying the panel + * and trailing submatrix. IA must be at least zero. + * + * JA (global input) const int + * On entry, JA is the global column index identifying the panel + * and trailing submatrix. JA must be at least zero. + * + * TAG (global input) const int + * On entry, TAG is the row broadcast message id. + * + * PANEL (local input/output) HPL_T_panel * * + * On entry, PANEL points to the address of the panel data + * structure to create and initialize. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * p = NULL; +/* .. + * .. Executable Statements .. + */ +/* + * Allocate the panel structure - Check for enough memory + */ + if( !( p = (HPL_T_panel *)malloc( sizeof( HPL_T_panel ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_new", "Memory allocation failed" ); + } + + HPL_pdpanel_init( GRID, ALGO, M, N, JB, A, IA, JA, TAG, p ); + *PANEL = p; +/* + * End of HPL_pdpanel_new + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp00N.c new file mode 100644 index 000000000..7ad5a1a99 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp00N.c @@ -0,0 +1,198 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP00N_DEPTH +#define HPL_LASWP00N_DEPTH 32 +#define HPL_LASWP00N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp00N +( + const int M, + const int N, + double * A, + const int LDA, + const int * IPIV +) +#else +void HPL_dlaswp00N +( M, N, A, LDA, IPIV ) + const int M; + const int N; + double * A; + const int LDA; + const int * IPIV; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp00N performs a series of local row interchanges on a matrix + * A. One row interchange is initiated for rows 0 through M-1 of A. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the array A to be + * interchanged. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the array A. + * N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N) to which + * the row interchanges will be applied. On exit, the permuted + * matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * IPIV (local input) const int * + * On entry, IPIV is an array of size M that contains the + * pivoting information. For k in [0..M), IPIV[k]=IROFF + l + * implies that local rows k and l are to be interchanged. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register double r; + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP00N_LOG2_DEPTH ); + int ip, nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP00N_LOG2_DEPTH ) + << HPL_LASWP00N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP00N_DEPTH, A += incA ) + { + for( i = 0; i < M; i++ ) + { + if( i != ( ip = IPIV[i] ) ) + { + a0 = A + i; a1 = A + ip; + + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#if ( HPL_LASWP00N_DEPTH > 1 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 2 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 4 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 8 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 16 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif + } + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + if( i != ( ip = IPIV[i] ) ) + { + a0 = A + i; a1 = A + ip; + for( j = 0; j < nr; j++, a0 += LDA, a1 += LDA ) + { r = *a0; *a0 = *a1; *a1 = r; } + } + } + } +/* + * End of HPL_dlaswp00N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp01N.c new file mode 100644 index 000000000..786d1eff4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp01N.c @@ -0,0 +1,209 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP01N_DEPTH +#define HPL_LASWP01N_DEPTH 32 +#define HPL_LASWP01N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp01N +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp01N +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp01N copies scattered rows of A into itself and into an + * array U. The row offsets in A of the source rows are specified by + * LINDXA. The destination of those rows are specified by LINDXAU. A + * positive value of LINDXAU indicates that the array destination is U, + * and A otherwise. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * moved within A or copied into U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * moved within A or copied into U. N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be moved within A or + * copied into U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). The rows + * of A specified by LINDXA are be copied within this array U at + * the positions indicated by positive values of LINDXAU. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be moved within A or + * or copied into U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U where the rows of A should be + * copied at. This array also contains the local row offsets in + * A where some of the rows of A should be moved to. A positive + * value of LINDXAU[i] indicates that the row LINDXA[i] of A + * should be copied into U at the position LINDXAU[i]; otherwise + * the row LINDXA[i] of A should be moved at the position + * -LINDXAU[i] within A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP01N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP01N_LOG2_DEPTH ); + int lda1, nu, nr; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01N_LOG2_DEPTH ) << + HPL_LASWP01N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP01N_DEPTH, A += incA, U += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; } + else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; } + + *a1 = *a0; a1 += lda1; a0 += LDA; +#if ( HPL_LASWP01N_DEPTH > 1 ) + *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 2 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 4 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 8 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 16 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; } + else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; } + for( j = 0; j < nr; j++, a1 += lda1, a0 += LDA ) { *a1 = *a0; } + } + } +/* + * End of HPL_dlaswp01N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp01T.c new file mode 100644 index 000000000..429cfb6f2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp01T.c @@ -0,0 +1,252 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP01T_DEPTH +#define HPL_LASWP01T_DEPTH 32 +#define HPL_LASWP01T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp01T +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp01T +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp01T copies scattered rows of A into itself and into an + * array U. The row offsets in A of the source rows are specified by + * LINDXA. The destination of those rows are specified by LINDXAU. A + * positive value of LINDXAU indicates that the array destination is U, + * and A otherwise. Rows of A are stored as columns in U. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * moved within A or copied into U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * moved within A or copied into U. N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be moved within A or + * copied into U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,M). The rows + * of A specified by LINDXA are copied within this array U at + * the positions indicated by positive values of LINDXAU. The + * rows of A are stored as columns in U. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be moved within A or + * or copied into U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U where the rows of A should be + * copied at. This array also contains the local row offsets in + * A where some of the rows of A should be moved to. A positive + * value of LINDXAU[i] indicates that the row LINDXA[i] of A + * should be copied into U at the position LINDXAU[i]; otherwise + * the row LINDXA[i] of A should be moved at the position + * -LINDXAU[i] within A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP01T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP01T_LOG2_DEPTH ); + int nu, nr; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01T_LOG2_DEPTH ) << + HPL_LASWP01T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP01T_DEPTH, A += incA, U += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + + if( LINDXAU[i] >= 0 ) + { + a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU); + + a1[ 0] = *a0; a0 += LDA; +#if ( HPL_LASWP01T_DEPTH > 1 ) + a1[ 1] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 2 ) + a1[ 2] = *a0; a0 += LDA; a1[ 3] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 4 ) + a1[ 4] = *a0; a0 += LDA; a1[ 5] = *a0; a0 += LDA; + a1[ 6] = *a0; a0 += LDA; a1[ 7] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 8 ) + a1[ 8] = *a0; a0 += LDA; a1[ 9] = *a0; a0 += LDA; + a1[10] = *a0; a0 += LDA; a1[11] = *a0; a0 += LDA; + a1[12] = *a0; a0 += LDA; a1[13] = *a0; a0 += LDA; + a1[14] = *a0; a0 += LDA; a1[15] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 16 ) + a1[16] = *a0; a0 += LDA; a1[17] = *a0; a0 += LDA; + a1[18] = *a0; a0 += LDA; a1[19] = *a0; a0 += LDA; + a1[20] = *a0; a0 += LDA; a1[21] = *a0; a0 += LDA; + a1[22] = *a0; a0 += LDA; a1[23] = *a0; a0 += LDA; + a1[24] = *a0; a0 += LDA; a1[25] = *a0; a0 += LDA; + a1[26] = *a0; a0 += LDA; a1[27] = *a0; a0 += LDA; + a1[28] = *a0; a0 += LDA; a1[29] = *a0; a0 += LDA; + a1[30] = *a0; a0 += LDA; a1[31] = *a0; a0 += LDA; +#endif + } + else + { + a1 = A - (size_t)(LINDXAU[i]); + + *a1 = *a0; a1 += LDA; a0 += LDA; +#if ( HPL_LASWP01T_DEPTH > 1 ) + *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 2 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 4 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 8 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 16 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif + } + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + + if( LINDXAU[i] >= 0 ) + { + a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) { a1[j] = *a0; } + } + else + { + a1 = A - (size_t)(LINDXAU[i]); + for( j = 0; j < nr; j++, a1 += LDA, a0 += LDA ) { *a1 = *a0; } + } + } + } +/* + * End of HPL_dlaswp01T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp02N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp02N.c new file mode 100644 index 000000000..45c2f5f1f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp02N.c @@ -0,0 +1,205 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP02N_DEPTH +#define HPL_LASWP02N_DEPTH 32 +#define HPL_LASWP02N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp02N +( + const int M, + const int N, + const double * A, + const int LDA, + double * W0, + double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp02N +( M, N, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M; + const int N; + const double * A; + const int LDA; + double * W0; + double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp02N packs scattered rows of an array A into workspace W. + * The row offsets in A are specified by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * copied into W. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * copied into W. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be copied into W. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * W0 (local input/output) double * + * On exit, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local output) double * + * On entry, W is an array of size (LDW,M). On exit, W contains + * the rows LINDXA[i] for i in [0..M) of A stored contiguously + * in W(:,i). + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied into W. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U that should be copied into A and + * replaced by the rows of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * A0 = A, * a0; + double * w0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP02N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + for( i = 0; i < M; i++ ) + *(W0+(size_t)(i)*(size_t)(LDW)) = (double)(LINDXAU[i]); + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP02N_LOG2_DEPTH ) << + HPL_LASWP02N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP02N_DEPTH, A0 += incA, W += HPL_LASWP02N_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW); + + w0[ 0] = *a0; a0 += LDA; +#if ( HPL_LASWP02N_DEPTH > 1 ) + w0[ 1] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 2 ) + w0[ 2] = *a0; a0 += LDA; w0[ 3] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 4 ) + w0[ 4] = *a0; a0 += LDA; w0[ 5] = *a0; a0 += LDA; + w0[ 6] = *a0; a0 += LDA; w0[ 7] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 8 ) + w0[ 8] = *a0; a0 += LDA; w0[ 9] = *a0; a0 += LDA; + w0[10] = *a0; a0 += LDA; w0[11] = *a0; a0 += LDA; + w0[12] = *a0; a0 += LDA; w0[13] = *a0; a0 += LDA; + w0[14] = *a0; a0 += LDA; w0[15] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 16 ) + w0[16] = *a0; a0 += LDA; w0[17] = *a0; a0 += LDA; + w0[18] = *a0; a0 += LDA; w0[19] = *a0; a0 += LDA; + w0[20] = *a0; a0 += LDA; w0[21] = *a0; a0 += LDA; + w0[22] = *a0; a0 += LDA; w0[23] = *a0; a0 += LDA; + w0[24] = *a0; a0 += LDA; w0[25] = *a0; a0 += LDA; + w0[26] = *a0; a0 += LDA; w0[27] = *a0; a0 += LDA; + w0[28] = *a0; a0 += LDA; w0[29] = *a0; a0 += LDA; + w0[30] = *a0; a0 += LDA; w0[31] = *a0; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, a0 += LDA ) { w0[j] = *a0; } + } + } +/* + * End of HPL_dlaswp02N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp03N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp03N.c new file mode 100644 index 000000000..760732a8d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp03N.c @@ -0,0 +1,194 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP03N_DEPTH +#define HPL_LASWP03N_DEPTH 32 +#define HPL_LASWP03N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp03N +( + const int M, + const int N, + double * U, + const int LDU, + const double * W0, + const double * W, + const int LDW +) +#else +void HPL_dlaswp03N +( M, N, U, LDU, W0, W, LDW ) + const int M; + const int N; + double * U; + const int LDU; + const double * W0; + const double * W; + const int LDW; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp03N copies columns of W into rows of an array U. The + * destination in U of these columns contained in W is stored within W0. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of W stored + * contiguously that should be copied into U. M must be at least + * zero. + * + * N (local input) const int + * On entry, N specifies the length of columns of W stored + * contiguously that should be copied into U. N must be at least + * zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). Columns + * of W are copied as rows within this array U at the positions + * specified in W0. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M), that contains data + * to be copied into U. For i in [0..M), entries W(:,i) should + * be copied into the row or column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * u0; + const int incU = (int)( (unsigned int)(LDU) << + HPL_LASWP03N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03N_LOG2_DEPTH ) << + HPL_LASWP03N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP03N_DEPTH, U += incU, w += HPL_LASWP03N_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *u0 = w0[ 0]; u0 += LDU; +#if ( HPL_LASWP03N_DEPTH > 1 ) + *u0 = w0[ 1]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 2 ) + *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 4 ) + *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU; + *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 8 ) + *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU; + *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU; + *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU; + *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 16 ) + *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU; + *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU; + *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU; + *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU; + *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU; + *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU; + *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU; + *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; } + } + } +/* + * End of HPL_dlaswp03N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp03T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp03T.c new file mode 100644 index 000000000..fece692ce --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp03T.c @@ -0,0 +1,186 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP03T_DEPTH +#define HPL_LASWP03T_DEPTH 32 +#define HPL_LASWP03T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp03T +( + const int M, + const int N, + double * U, + const int LDU, + const double * W0, + const double * W, + const int LDW +) +#else +void HPL_dlaswp03T +( M, N, U, LDU, W0, W, LDW ) + const int M; + const int N; + double * U; + const int LDU; + const double * W0; + const double * W; + const int LDW; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp03T copies columns of W into an array U. The destination + * in U of these columns contained in W is stored within W0. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of W stored + * contiguously that should be copied into U. M must be at least + * zero. + * + * N (local input) const int + * On entry, N specifies the length of columns of W stored + * contiguously that should be copied into U. N must be at least + * zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,M). Columns + * of W are copied within the array U at the positions specified + * in W0. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M), that contains data + * to be copied into U. For i in [0..M), entries W(:,i) should + * be copied into the row or column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * u0; + const int incU = ( 1 << HPL_LASWP03T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03T_LOG2_DEPTH ) << + HPL_LASWP03T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP03T_DEPTH, U += incU, w += HPL_LASWP03T_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU); + w0 = w + (size_t)(i) * (size_t)(LDW); + + u0[ 0] = w0[ 0]; +#if ( HPL_LASWP03T_DEPTH > 1 ) + u0[ 1] = w0[ 1]; +#endif +#if ( HPL_LASWP03T_DEPTH > 2 ) + u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3]; +#endif +#if ( HPL_LASWP03T_DEPTH > 4 ) + u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7]; +#endif +#if ( HPL_LASWP03T_DEPTH > 8 ) + u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11]; + u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15]; +#endif +#if ( HPL_LASWP03T_DEPTH > 16 ) + u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19]; + u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23]; + u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27]; + u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31]; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; } + } + } +/* + * End of HPL_dlaswp03T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp04N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp04N.c new file mode 100644 index 000000000..4f9c490a5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp04N.c @@ -0,0 +1,285 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP04N_DEPTH +#define HPL_LASWP04N_DEPTH 32 +#define HPL_LASWP04N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp04N +( + const int M0, + const int M1, + const int N, + double * U, + const int LDU, + double * A, + const int LDA, + const double * W0, + const double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp04N +( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M0; + const int M1; + const int N; + double * U; + const int LDU; + double * A; + const int LDA; + const double * W0; + const double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp04N copies M0 rows of U into A and replaces those rows of U + * with columns of W. In addition M1 - M0 columns of W are copied into + * rows of U. + * + * Arguments + * ========= + * + * M0 (local input) const int + * On entry, M0 specifies the number of rows of U that should be + * copied into A and replaced by columns of W. M0 must be at + * least zero. + * + * M1 (local input) const int + * On entry, M1 specifies the number of columns of W that should + * be copied into rows of U. M1 must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of U that should + * be copied into A. N must be at least zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows that are to be copied into A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M1). + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M0). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M0+M1), that contains + * data to be copied into U. For i in [M0..M0+M1), the entries + * W(:,i) are copied into the row W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M0 containing the + * local row indexes A into which rows of U are copied. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M0 that contains + * the local row indexes of U that should be copied into A and + * replaced by the columns of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP04N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP04N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04N_LOG2_DEPTH ) << + HPL_LASWP04N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP04N_DEPTH, A += incA, U += incU, + w += HPL_LASWP04N_DEPTH ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U + (size_t)(LINDXAU[i]); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *a0 = *u0; *u0 = w0[ 0]; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP04N_DEPTH > 1 ) + *a0 = *u0; *u0 = w0[ 1]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 2 ) + *a0 = *u0; *u0 = w0[ 2]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 3]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 4 ) + *a0 = *u0; *u0 = w0[ 4]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 5]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 6]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 7]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 8 ) + *a0 = *u0; *u0 = w0[ 8]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 9]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[10]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[11]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[12]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[13]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[14]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[15]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 16 ) + *a0 = *u0; *u0 = w0[16]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[17]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[18]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[19]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[20]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[21]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[22]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[23]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[24]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[25]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[26]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[27]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[28]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[29]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[30]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[31]; a0 += LDA; u0 += LDU; +#endif + } + + for( i = M0; i < M1; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *u0 = w0[ 0]; u0 += LDU; +#if ( HPL_LASWP04N_DEPTH > 1 ) + *u0 = w0[ 1]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 2 ) + *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 4 ) + *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU; + *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 8 ) + *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU; + *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU; + *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU; + *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 16 ) + *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU; + *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU; + *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU; + *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU; + *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU; + *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU; + *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU; + *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U + (size_t)(LINDXAU[i]); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) + { *a0 = *u0; *u0 = w0[j]; } + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; } + } + } +/* + * End of HPL_dlaswp04N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp04T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp04T.c new file mode 100644 index 000000000..9cbb4c863 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp04T.c @@ -0,0 +1,270 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP04T_DEPTH +#define HPL_LASWP04T_DEPTH 32 +#define HPL_LASWP04T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp04T +( + const int M0, + const int M1, + const int N, + double * U, + const int LDU, + double * A, + const int LDA, + const double * W0, + const double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp04T +( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M0; + const int M1; + const int N; + double * U; + const int LDU; + double * A; + const int LDA; + const double * W0; + const double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp04T copies M0 columns of U into rows of A and replaces those + * columns of U with columns of W. In addition M1 - M0 columns of W are + * copied into U. + * + * Arguments + * ========= + * + * M0 (local input) const int + * On entry, M0 specifies the number of columns of U that should + * be copied into A and replaced by columns of W. M0 must be at + * least zero. + * + * M1 (local input) const int + * On entry, M1 specifies the number of columnns of W that will + * be copied into U. M1 must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the columns of U that + * will be copied into rows of A. N must be at least zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns that are to be copied into rows of + * A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M0). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M0+M1), that contains + * data to be copied into U. For i in [M0..M0+M1), the entries + * W(:,i) are copied into the column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M0 containing the + * local row indexes A into which columns of U are copied. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M0 that contains + * the local column indexes of U that should be copied into A + * and replaced by the columns of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP04T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP04T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04T_LOG2_DEPTH ) << + HPL_LASWP04T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP04T_DEPTH, A += incA, U += incU, + w += HPL_LASWP04T_DEPTH ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW; + + *a0 = u0[ 0]; u0[ 0] = w0[ 0]; a0 += LDA; +#if ( HPL_LASWP04T_DEPTH > 1 ) + *a0 = u0[ 1]; u0[ 1] = w0[ 1]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 2 ) + *a0 = u0[ 2]; u0[ 2] = w0[ 2]; a0 += LDA; + *a0 = u0[ 3]; u0[ 3] = w0[ 3]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 4 ) + *a0 = u0[ 4]; u0[ 4] = w0[ 4]; a0 += LDA; + *a0 = u0[ 5]; u0[ 5] = w0[ 5]; a0 += LDA; + *a0 = u0[ 6]; u0[ 6] = w0[ 6]; a0 += LDA; + *a0 = u0[ 7]; u0[ 7] = w0[ 7]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 8 ) + *a0 = u0[ 8]; u0[ 8] = w0[ 8]; a0 += LDA; + *a0 = u0[ 9]; u0[ 9] = w0[ 9]; a0 += LDA; + *a0 = u0[10]; u0[10] = w0[10]; a0 += LDA; + *a0 = u0[11]; u0[11] = w0[11]; a0 += LDA; + *a0 = u0[12]; u0[12] = w0[12]; a0 += LDA; + *a0 = u0[13]; u0[13] = w0[13]; a0 += LDA; + *a0 = u0[14]; u0[14] = w0[14]; a0 += LDA; + *a0 = u0[15]; u0[15] = w0[15]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 16 ) + *a0 = u0[16]; u0[16] = w0[16]; a0 += LDA; + *a0 = u0[17]; u0[17] = w0[17]; a0 += LDA; + *a0 = u0[18]; u0[18] = w0[18]; a0 += LDA; + *a0 = u0[19]; u0[19] = w0[19]; a0 += LDA; + *a0 = u0[20]; u0[20] = w0[20]; a0 += LDA; + *a0 = u0[21]; u0[21] = w0[21]; a0 += LDA; + *a0 = u0[22]; u0[22] = w0[22]; a0 += LDA; + *a0 = u0[23]; u0[23] = w0[23]; a0 += LDA; + *a0 = u0[24]; u0[24] = w0[24]; a0 += LDA; + *a0 = u0[25]; u0[25] = w0[25]; a0 += LDA; + *a0 = u0[26]; u0[26] = w0[26]; a0 += LDA; + *a0 = u0[27]; u0[27] = w0[27]; a0 += LDA; + *a0 = u0[28]; u0[28] = w0[28]; a0 += LDA; + *a0 = u0[29]; u0[29] = w0[29]; a0 += LDA; + *a0 = u0[30]; u0[30] = w0[30]; a0 += LDA; + *a0 = u0[31]; u0[31] = w0[31]; a0 += LDA; +#endif + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW; + + u0[ 0] = w0[ 0]; +#if ( HPL_LASWP04T_DEPTH > 1 ) + u0[ 1] = w0[ 1]; +#endif +#if ( HPL_LASWP04T_DEPTH > 2 ) + u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3]; +#endif +#if ( HPL_LASWP04T_DEPTH > 4 ) + u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7]; +#endif +#if ( HPL_LASWP04T_DEPTH > 8 ) + u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11]; + u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15]; +#endif +#if ( HPL_LASWP04T_DEPTH > 16 ) + u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19]; + u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23]; + u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27]; + u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31]; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW; + for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; u0[j] = w0[j]; } + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW; + for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; } + } + } +/* + * End of HPL_dlaswp04T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp05N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp05N.c new file mode 100644 index 000000000..3edcf91a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp05N.c @@ -0,0 +1,195 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP05N_DEPTH +#define HPL_LASWP05N_DEPTH 32 +#define HPL_LASWP05N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp05N +( + const int M, + const int N, + double * A, + const int LDA, + const double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp05N +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + const double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp05N copies rows of U of global offset LINDXAU into rows of + * A at positions indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of U that should be + * copied into A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of U that should + * be copied into A. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) const double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows that are to be copied into A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied from U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U that should be copied in A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * U0 = U, * u0; + double * a0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP05N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP05N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05N_LOG2_DEPTH ) << + HPL_LASWP05N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP05N_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]); + + *a0 = *u0; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP05N_DEPTH > 1 ) + *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 2 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 4 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 8 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 16 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { *a0 = *u0; } + } + } +/* + * End of HPL_dlaswp05N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp05T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp05T.c new file mode 100644 index 000000000..0adaa102d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp05T.c @@ -0,0 +1,196 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP05T_DEPTH +#define HPL_LASWP05T_DEPTH 32 +#define HPL_LASWP05T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp05T +( + const int M, + const int N, + double * A, + const int LDA, + const double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp05T +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + const double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp05T copies columns of U of global offset LINDXAU into rows + * of A at positions indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of U that shouldbe copied into A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the columns of U that will + * be copied into rows of A. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) const double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns that are to be copied into rows of + * A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied from U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local column indexes of U that should be copied in A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * U0 = U, * u0; + double * a0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP05T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP05T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05T_LOG2_DEPTH ) << + HPL_LASWP05T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP05T_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[ i]); + u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU); + + *a0 = u0[ 0]; a0 += LDA; +#if ( HPL_LASWP05T_DEPTH > 1 ) + *a0 = u0[ 1]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 2 ) + *a0 = u0[ 2]; a0 += LDA; *a0 = u0[ 3]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 4 ) + *a0 = u0[ 4]; a0 += LDA; *a0 = u0[ 5]; a0 += LDA; + *a0 = u0[ 6]; a0 += LDA; *a0 = u0[ 7]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 8 ) + *a0 = u0[ 8]; a0 += LDA; *a0 = u0[ 9]; a0 += LDA; + *a0 = u0[10]; a0 += LDA; *a0 = u0[11]; a0 += LDA; + *a0 = u0[12]; a0 += LDA; *a0 = u0[13]; a0 += LDA; + *a0 = u0[14]; a0 += LDA; *a0 = u0[15]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 16 ) + *a0 = u0[16]; a0 += LDA; *a0 = u0[17]; a0 += LDA; + *a0 = u0[18]; a0 += LDA; *a0 = u0[19]; a0 += LDA; + *a0 = u0[20]; a0 += LDA; *a0 = u0[21]; a0 += LDA; + *a0 = u0[22]; a0 += LDA; *a0 = u0[23]; a0 += LDA; + *a0 = u0[24]; a0 += LDA; *a0 = u0[25]; a0 += LDA; + *a0 = u0[26]; a0 += LDA; *a0 = u0[27]; a0 += LDA; + *a0 = u0[28]; a0 += LDA; *a0 = u0[29]; a0 += LDA; + *a0 = u0[30]; a0 += LDA; *a0 = u0[31]; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[ i]); + u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; } + } + } +/* + * End of HPL_dlaswp05T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp06N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp06N.c new file mode 100644 index 000000000..a74bae75c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp06N.c @@ -0,0 +1,206 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP06N_DEPTH +#define HPL_LASWP06N_DEPTH 32 +#define HPL_LASWP06N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp06N +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA +) +#else +void HPL_dlaswp06N +( M, N, A, LDA, U, LDU, LINDXA ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp06N swaps rows of U with rows of A at positions + * indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * swapped with rows of U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of A that should + * be swapped with rows of U. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows or columns of U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows of U that are to be swapped with rows + * of A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be swapped with U. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * U0 = U, * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP06N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP06N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06N_LOG2_DEPTH ) << + HPL_LASWP06N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP06N_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i); + + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP06N_DEPTH > 1 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 2 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 4 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 8 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 16 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) + { r = *a0; *a0 = *u0; *u0 = r; } + } + } +/* + * End of HPL_dlaswp06N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp06T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp06T.c new file mode 100644 index 000000000..fb53c2a31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp06T.c @@ -0,0 +1,207 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP06T_DEPTH +#define HPL_LASWP06T_DEPTH 32 +#define HPL_LASWP06T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp06T +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA +) +#else +void HPL_dlaswp06T +( M, N, A, LDA, U, LDU, LINDXA ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp06T swaps columns of U with rows of A at positions + * indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * swapped with columns of U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of A that should + * be swapped with columns of U. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns of U that are to be swapped with + * rows of A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be swapped with U. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * U0 = U, * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP06T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP06T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06T_LOG2_DEPTH ) << + HPL_LASWP06T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP06T_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U0 + (size_t)(i) * (size_t)(LDU); + + r = *a0; *a0 = u0[ 0]; u0[ 0] = r; a0 += LDA; +#if ( HPL_LASWP06T_DEPTH > 1 ) + r = *a0; *a0 = u0[ 1]; u0[ 1] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 2 ) + r = *a0; *a0 = u0[ 2]; u0[ 2] = r; a0 += LDA; + r = *a0; *a0 = u0[ 3]; u0[ 3] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 4 ) + r = *a0; *a0 = u0[ 4]; u0[ 4] = r; a0 += LDA; + r = *a0; *a0 = u0[ 5]; u0[ 5] = r; a0 += LDA; + r = *a0; *a0 = u0[ 6]; u0[ 6] = r; a0 += LDA; + r = *a0; *a0 = u0[ 7]; u0[ 7] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 8 ) + r = *a0; *a0 = u0[ 8]; u0[ 8] = r; a0 += LDA; + r = *a0; *a0 = u0[ 9]; u0[ 9] = r; a0 += LDA; + r = *a0; *a0 = u0[10]; u0[10] = r; a0 += LDA; + r = *a0; *a0 = u0[11]; u0[11] = r; a0 += LDA; + r = *a0; *a0 = u0[12]; u0[12] = r; a0 += LDA; + r = *a0; *a0 = u0[13]; u0[13] = r; a0 += LDA; + r = *a0; *a0 = u0[14]; u0[14] = r; a0 += LDA; + r = *a0; *a0 = u0[15]; u0[15] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 16 ) + r = *a0; *a0 = u0[16]; u0[16] = r; a0 += LDA; + r = *a0; *a0 = u0[17]; u0[17] = r; a0 += LDA; + r = *a0; *a0 = u0[18]; u0[18] = r; a0 += LDA; + r = *a0; *a0 = u0[19]; u0[19] = r; a0 += LDA; + r = *a0; *a0 = u0[20]; u0[20] = r; a0 += LDA; + r = *a0; *a0 = u0[21]; u0[21] = r; a0 += LDA; + r = *a0; *a0 = u0[22]; u0[22] = r; a0 += LDA; + r = *a0; *a0 = u0[23]; u0[23] = r; a0 += LDA; + r = *a0; *a0 = u0[24]; u0[24] = r; a0 += LDA; + r = *a0; *a0 = u0[25]; u0[25] = r; a0 += LDA; + r = *a0; *a0 = u0[26]; u0[26] = r; a0 += LDA; + r = *a0; *a0 = u0[27]; u0[27] = r; a0 += LDA; + r = *a0; *a0 = u0[28]; u0[28] = r; a0 += LDA; + r = *a0; *a0 = u0[29]; u0[29] = r; a0 += LDA; + r = *a0; *a0 = u0[30]; u0[30] = r; a0 += LDA; + r = *a0; *a0 = u0[31]; u0[31] = r; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U0 + (size_t)(i) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) + { r = *a0; *a0 = u0[j]; u0[j] = r; } + } + } +/* + * End of HPL_dlaswp06T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp10N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp10N.c new file mode 100644 index 000000000..7dbf934f2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_dlaswp10N.c @@ -0,0 +1,186 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP10N_DEPTH +#define HPL_LASWP10N_DEPTH 32 +#define HPL_LASWP10N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp10N +( + const int M, + const int N, + double * A, + const int LDA, + const int * IPIV +) +#else +void HPL_dlaswp10N +( M, N, A, LDA, IPIV ) + const int M; + const int N; + double * A; + const int LDA; + const int * IPIV; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp10N performs a sequence of local column interchanges on a + * matrix A. One column interchange is initiated for columns 0 through + * N-1 of A. + * + * Arguments + * ========= + * + * M (local input) const int + * __arg0__ + * + * N (local input) const int + * On entry, M specifies the number of rows of the array A. M + * must be at least zero. + * + * A (local input/output) double * + * On entry, N specifies the number of columns of the array A. N + * must be at least zero. + * + * LDA (local input) const int + * On entry, A points to an array of dimension (LDA,N). This + * array contains the columns onto which the interchanges should + * be applied. On exit, A contains the permuted matrix. + * + * IPIV (local input) const int * + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * a0, * a1; + const int incA = ( 1 << HPL_LASWP10N_LOG2_DEPTH ); + int jp, mr, mu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + mr = M - ( mu = (int)( ( (unsigned int)(M) >> HPL_LASWP10N_LOG2_DEPTH ) + << HPL_LASWP10N_LOG2_DEPTH ) ); + + for( j = 0; j < N; j++ ) + { + if( j != ( jp = IPIV[j] ) ) + { + a0 = A + j * LDA; a1 = A + jp * LDA; + + for( i = 0; i < mu; i += incA, a0 += incA, a1 += incA ) + { + r = *a0; *a0 = *a1; *a1 = r; +#if ( HPL_LASWP10N_DEPTH > 1 ) + r = a0[ 1]; a0[ 1] = a1[ 1]; a1[ 1] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 2 ) + r = a0[ 2]; a0[ 2] = a1[ 2]; a1[ 2] = r; + r = a0[ 3]; a0[ 3] = a1[ 3]; a1[ 3] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 4 ) + r = a0[ 4]; a0[ 4] = a1[ 4]; a1[ 4] = r; + r = a0[ 5]; a0[ 5] = a1[ 5]; a1[ 5] = r; + r = a0[ 6]; a0[ 6] = a1[ 6]; a1[ 6] = r; + r = a0[ 7]; a0[ 7] = a1[ 7]; a1[ 7] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 8 ) + r = a0[ 8]; a0[ 8] = a1[ 8]; a1[ 8] = r; + r = a0[ 9]; a0[ 9] = a1[ 9]; a1[ 9] = r; + r = a0[10]; a0[10] = a1[10]; a1[10] = r; + r = a0[11]; a0[11] = a1[11]; a1[11] = r; + r = a0[12]; a0[12] = a1[12]; a1[12] = r; + r = a0[13]; a0[13] = a1[13]; a1[13] = r; + r = a0[14]; a0[14] = a1[14]; a1[14] = r; + r = a0[15]; a0[15] = a1[15]; a1[15] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 16 ) + r = a0[16]; a0[16] = a1[16]; a1[16] = r; + r = a0[17]; a0[17] = a1[17]; a1[17] = r; + r = a0[18]; a0[18] = a1[18]; a1[18] = r; + r = a0[19]; a0[19] = a1[19]; a1[19] = r; + r = a0[20]; a0[20] = a1[20]; a1[20] = r; + r = a0[21]; a0[21] = a1[21]; a1[21] = r; + r = a0[22]; a0[22] = a1[22]; a1[22] = r; + r = a0[23]; a0[23] = a1[23]; a1[23] = r; + r = a0[24]; a0[24] = a1[24]; a1[24] = r; + r = a0[25]; a0[25] = a1[25]; a1[25] = r; + r = a0[26]; a0[26] = a1[26]; a1[26] = r; + r = a0[27]; a0[27] = a1[27]; a1[27] = r; + r = a0[28]; a0[28] = a1[28]; a1[28] = r; + r = a0[29]; a0[29] = a1[29]; a1[29] = r; + r = a0[30]; a0[30] = a1[30]; a1[30] = r; + r = a0[31]; a0[31] = a1[31]; a1[31] = r; +#endif + } + + for( i = 0; i < mr; i++ ) + { r = a0[i]; a0[i] = a1[i]; a1[i] = r; } + } + } +/* + * End of HPL_dlaswp10N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2l.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2l.c new file mode 100644 index 000000000..e1b5bbfac --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2l.c @@ -0,0 +1,151 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxg2l +( + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxg2l +( IG, INB, NB, SRCPROC, NPROCS ) + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2l computes the local index of a matrix entry pointed to by + * the global index IG. This local returned index is the same in all + * processes. + * + * Arguments + * ========= + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, if SRCPROC = -1, the data is not distributed but + * replicated, in which case this routine returns IG in all + * processes. Otherwise, the value of SRCPROC is ignored. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + return( IG ); +/* + * IG = INB - NB + ( l * NPROCS + MYROC ) * NB + X with 0 <= X < NB, + * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC + * with 0 <= MYROC < NPROCS. The local index to be returned depends on + * whether IG resides in the process owning the first partial block of + * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB, + * so that if NPROCS divides i+1, i.e. MYROC=0, we have i+1 = l*NPROCS. + * If we set j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is + * equal to (j+1) * NPROCS. Conversely, if NPROCS does not divide i+1, + * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that + * j=l and thus (j+1)*NPROCS > i+1. + */ + j = ( i = ( IG - INB ) / NB ) / NPROCS; +/* + * When IG resides in the process owning the first partial block of size + * INB (MYROC = 0), then the result IL can be written as: + * IL = INB - NB + l * NB + X = IG + ( l - (l * NPROCS + MYROC) ) * NB. + * Using the above notation, we have i+1 = l*NPROCS + MYROC = l*NPROCS, + * i.e l = ( i+1 ) / NPROCS = j+1, since NPROCS divides i+1, therefore + * IL = IG + ( j + 1 - ( i + 1 ) ) * NB. + * + * Otherwise when MYROC >= 1, the result IL can be written as: + * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB. + * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1, + * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e + * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB. + */ + return( NB * (j - i) + + ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ) ); +/* + * End of HPL_indxg2l + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2lp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2lp.c new file mode 100644 index 000000000..74662f9d2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2lp.c @@ -0,0 +1,176 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_indxg2lp +( + int * IL, + int * PROC, + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +void HPL_indxg2lp +( IL, PROC, IG, INB, NB, SRCPROC, NPROCS ) + int * IL; + int * PROC; + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2lp computes the local index of a matrix entry pointed to by + * the global index IG as well as the process coordinate which posseses + * this entry. The local returned index is the same in all processes. + * + * Arguments + * ========= + * + * IL (output) int * + * On exit, IL specifies the local index corresponding to IG. IL + * is at least zero. + * + * PROC (output) int * + * On exit, PROC is the coordinate of the process owning the + * entry specified by the global index IG. PROC is at least zero + * and less than NPROCS. + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, if SRCPROC = -1, the data is not distributed but + * replicated, in which case this routine returns IG in all + * processes. Otherwise, the value of SRCPROC is ignored. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) + { +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + *IL = IG; + *PROC = SRCPROC; + } + else + { +/* + * IG = INB - NB + ( l * NPROCS + MYROC ) * NB + X with 0 <= X < NB, + * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC + * with 0 <= MYROC < NPROCS. The local index to be returned depends on + * whether IG resides in the process owning the first partial block of + * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB, + * so that if NPROCS divides i+1, i.e. MYROC=0, we have i+1 = l*NPROCS. + * If we set j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is + * equal to (j+1) * NPROCS. Conversely, if NPROCS does not divide i+1, + * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that + * j=l and thus (j+1)*NPROCS > i+1. + */ + j = ( i = ( IG - INB ) / NB ) / NPROCS; +/* + * IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC and take + * the NPROCS modulo (definition of the block-cyclic data distribution). + */ + *PROC = SRCPROC + 1 + i; + *PROC = MPosMod( *PROC, NPROCS ); +/* + * When IG resides in the process owning the first partial block of size + * INB (MYROC = 0), then the result IL can be written as: + * IL = INB - NB + l * NB + X = IG + ( l - (l * NPROCS + MYROC) ) * NB. + * Using the above notation, we have i+1 = l*NPROCS + MYROC = l*NPROCS, + * i.e l = ( i+1 ) / NPROCS = j+1, since NPROCS divides i+1, therefore + * IL = IG + ( j + 1 - ( i + 1 ) ) * NB. + * + * Otherwise when MYROC >= 1, the result IL can be written as: + * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB. + * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1, + * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e + * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB. + */ + *IL = NB * (j - i) + + ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ); + } +/* + * End of HPL_indxg2lp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2p.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2p.c new file mode 100644 index 000000000..d0e75f516 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxg2p.c @@ -0,0 +1,128 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxg2p +( + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxg2p +( IG, INB, NB, SRCPROC, NPROCS ) + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2p computes the process coordinate which posseses the entry + * of a matrix specified by a global index IG. + * + * Arguments + * ========= + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int proc; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + return( SRCPROC ); +/* + * Otherwise, IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC + * and take the NPROCS modulo (definition of the block-cyclic data dis- + * tribution). + */ + proc = SRCPROC + 1 + ( IG - INB ) / NB; + return( MPosMod( proc, NPROCS ) ); +/* + * End of HPL_indxg2p + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxl2g.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxl2g.c new file mode 100644 index 000000000..7f139425a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_indxl2g.c @@ -0,0 +1,164 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxl2g +( + const int IL, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxl2g +( IL, INB, NB, PROC, SRCPROC, NPROCS ) + const int IL; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxl2g computes the global index of a matrix entry pointed to + * by the local index IL of the process indicated by PROC. + * + * Arguments + * ========= + * + * IL (input) const int + * On entry, IL specifies the local index of the matrix entry. + * IL must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whose + * local array row or column is to be determined. PROC must be + * at least zero and strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) + { +/* + * The data is not distributed, or there is just one process in this di- + * mension of the grid. + */ + return( IL ); + } + else if( PROC == SRCPROC ) + { +/* + * If I am SRCPROC, my first block is of size INB + */ + if( IL < INB ) +/* + * If IL belongs to the first block, the local and global indexes are + * equal. + */ + return ( IL ); +/* + * The number of entire blocks before the one IL belongs to is + * ( IL - INB ) / NB + 1. In the other NPROCS-1 processes, there are + * thus NB*( ( IL-INB )/NB + 1 ) entries, that are globally before the + * global entry corresponding to IL. + */ + return( ( NPROCS - 1 ) * NB * ( ( IL - INB ) / NB + 1 ) + IL ); + } + else if( PROC < SRCPROC ) + { +/* + * Otherwise, the process of coordinate MOD(SRCPROC+1, NPROCS) owns the + * second block. Let IPROC = PROC-SRCPROC-1+NPROCS be the number of pro- + * cesses between this process and PROC not included when going from + * left to right on the process line with possible wrap around. These + * IPROC processes have one more NB block than the other processes, who + * own IL / NB blocks of size NB. + */ + return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1+NPROCS )+IL+INB ); + } + else + { +/* + * Same reasoning as above with IPROC = PROC - SRCPROC - 1. + */ + return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1 )+IL+INB ); + } +/* + * End of HPL_indxl2g + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_infog2l.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_infog2l.c new file mode 100644 index 000000000..2580f2ad4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_infog2l.c @@ -0,0 +1,382 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_infog2l +( + int I, + int J, + const int IMB, + const int MB, + const int INB, + const int NB, + const int RSRC, + const int CSRC, + const int MYROW, + const int MYCOL, + const int NPROW, + const int NPCOL, + int * II, + int * JJ, + int * PROW, + int * PCOL +) +#else +void HPL_infog2l +( I, J, IMB, MB, INB, NB, RSRC, CSRC, MYROW, MYCOL, NPROW, NPCOL, II, JJ, PROW, PCOL ) + int I; + int J; + const int IMB; + const int MB; + const int INB; + const int NB; + const int RSRC; + const int CSRC; + const int MYROW; + const int MYCOL; + const int NPROW; + const int NPCOL; + int * II; + int * JJ; + int * PROW; + int * PCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_infog2l computes the starting local index II, JJ corresponding to + * the submatrix starting globally at the entry pointed by I, J. This + * routine returns the coordinates in the grid of the process owning the + * matrix entry of global indexes I, J, namely PROW and PCOL. + * + * Arguments + * ========= + * + * I (global input) int + * On entry, I specifies the global row index of the matrix + * entry. I must be at least zero. + * + * J (global input) int + * On entry, J specifies the global column index of the matrix + * entry. J must be at least zero. + * + * IMB (global input) const int + * On entry, IMB specifies the size of the first row block of + * the global matrix. IMB must be at least one. + * + * MB (global input) const int + * On entry, MB specifies the blocking factor used to partition + * and distribute the rows of the matrix A. MB must be larger + * than one. + * + * INB (global input) const int + * On entry, INB specifies the size of the first column block of + * the global matrix. INB must be at least one. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the columns of the matrix A. NB must be larger + * than one. + * + * RSRC (global input) const int + * On entry, RSRC specifies the row coordinate of the process + * that possesses the row I. RSRC must be at least zero and + * strictly less than NPROW. + * + * CSRC (global input) const int + * On entry, CSRC specifies the column coordinate of the process + * that possesses the column J. CSRC must be at least zero and + * strictly less than NPCOL. + * + * MYROW (local input) const int + * On entry, MYROW specifies my row process coordinate in the + * grid. MYROW is greater than or equal to zero and less than + * NPROW. + * + * MYCOL (local input) const int + * On entry, MYCOL specifies my column process coordinate in the + * grid. MYCOL is greater than or equal to zero and less than + * NPCOL. + * + * NPROW (global input) const int + * On entry, NPROW specifies the number of process rows in the + * grid. NPROW is at least one. + * + * NPCOL (global input) const int + * On entry, NPCOL specifies the number of process columns in + * the grid. NPCOL is at least one. + * + * II (local output) int * + * On exit, II specifies the local starting row index of the + * submatrix. On exit, II is at least 0. + * + * JJ (local output) int * + * On exit, JJ specifies the local starting column index of the + * submatrix. On exit, JJ is at least 0. + * + * PROW (global output) int * + * On exit, PROW is the row coordinate of the process owning the + * entry specified by the global index I. PROW is at least zero + * and less than NPROW. + * + * PCOL (global output) int * + * On exit, PCOL is the column coordinate of the process owning + * the entry specified by the global index J. PCOL is at least + * zero and less than NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc; +/* .. + * .. Executable Statements .. + */ + imb = IMB; + *PROW = RSRC; + + if( ( *PROW == -1 ) || ( NPROW == 1 ) ) + { +/* + * The data is not distributed, or there is just one process row in the + * grid. + */ + *II = I; + } + else if( I < imb ) + { +/* + * I refers to an entry in the first block of rows + */ + *II = ( MYROW == *PROW ? I : 0 ); + } + else + { + mb = MB; + rsrc = *PROW; +/* + * The discussion goes as follows: compute my distance from the source + * process so that within this process coordinate system, the source + * process is the process such that mydist = 0, or equivalently + * MYROW == rsrc. + * + * Find out the global coordinate of the block I belongs to (nblocks), + * as well as the minimum local number of blocks that every process has. + * + * when mydist < nblocks-ilocblk*NPROCS, I own ilocblk + 1 full blocks, + * when mydist > nblocks-ilocblk*NPROCS, I own ilocblk full blocks, + * when mydist = nblocks-ilocblk*NPROCS, I own ilocblk full blocks + * but not I, or I own ilocblk + 1 blocks and the entry I refers to. + */ + if( MYROW == rsrc ) + { +/* + * I refers to an entry that is not in the first block, find out which + * process has it. + */ + nblocks = ( I - imb ) / mb + 1; + *PROW += nblocks; + *PROW -= ( *PROW / NPROW ) * NPROW; +/* + * Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only + * three possible cases: + * + * 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I do not own + * I, in which case II = IMB + ( ilocblk - 1 ) * MB. Note that this + * case cannot happen when ilocblk is zero, since nblocks is at + * least one. + * + * 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in + * which case I and II can respectively be written as IMB + + * (nblocks-1)*NB + IL and IMB + (ilocblk-1) * MB + IL. That is + * II = I + (ilocblk-nblocks)*MB. Note that this case cannot happen + * when ilocblk is zero, since nblocks is at least one. + * + * 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns + * ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB. + * Note that when ilocblk is zero, II is just IMB. + */ + if( nblocks < NPROW ) + { + *II = imb; + } + else + { + ilocblk = nblocks / NPROW; + if( ilocblk * NPROW >= nblocks ) + { + *II = ( ( MYROW == *PROW ) ? + I + ( ilocblk - nblocks ) * mb : + imb + ( ilocblk - 1 ) * mb ); + } + else + { + *II = imb + ilocblk * mb; + } + } + } + else + { +/* + * I refers to an entry that is not in the first block, find out which + * process has it. + */ + nblocks = ( I -= imb ) / mb + 1; + *PROW += nblocks; + *PROW -= ( *PROW / NPROW ) * NPROW; +/* + * Compute my distance from the source process so that within this pro- + * cess coordinate system, the source process is the process such that + * mydist=0. + */ + if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW; +/* + * When mydist < nblocks - ilocblk * NPROW, I own ilocblk+1 full blocks + * of size MB since I am not the source process, i.e. II=(ilocblk+1)*MB. + * When mydist>=nblocks-ilocblk*NPROW and I do not own I, I own ilocblk + * full blocks of size MB, i.e. II = ilocblk*MB, otherwise I own ilocblk + * blocks and I, in which case I can be written as IMB + (nblocks-1)*MB + * + IL and II = ilocblk*MB + IL = I - IMB + (ilocblk - nblocks + 1)*MB. + */ + if( nblocks < NPROW ) + { + mydist -= nblocks; + *II = ( ( mydist < 0 ) ? mb : + ( ( MYROW == *PROW ) ? + I + ( 1 - nblocks ) * mb : 0 ) ); + } + else + { + ilocblk = nblocks / NPROW; + mydist -= nblocks - ilocblk * NPROW; + *II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb : + ( ( MYROW == *PROW ) ? + ( ilocblk - nblocks + 1 ) * mb + I : + ilocblk * mb ) ); + } + } + } +/* + * Idem for the columns + */ + inb = INB; + *PCOL = CSRC; + + if( ( *PCOL == -1 ) || ( NPCOL == 1 ) ) + { + *JJ = J; + } + else if( J < inb ) + { + *JJ = ( MYCOL == *PCOL ? J : 0 ); + } + else + { + nb = NB; + csrc = *PCOL; + + if( MYCOL == csrc ) + { + nblocks = ( J - inb ) / nb + 1; + *PCOL += nblocks; + *PCOL -= ( *PCOL / NPCOL ) * NPCOL; + + if( nblocks < NPCOL ) + { + *JJ = inb; + } + else + { + ilocblk = nblocks / NPCOL; + if( ilocblk * NPCOL >= nblocks ) + { + *JJ = ( ( MYCOL == *PCOL ) ? + J + ( ilocblk - nblocks ) * nb : + inb + ( ilocblk - 1 ) * nb ); + } + else + { + *JJ = inb + ilocblk * nb; + } + } + } + else + { + nblocks = ( J -= inb ) / nb + 1; + *PCOL += nblocks; + *PCOL -= ( *PCOL / NPCOL ) * NPCOL; + + if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL; + + if( nblocks < NPCOL ) + { + mydist -= nblocks; + *JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ? + J + ( 1 - nblocks )*nb : 0 ) ); + } + else + { + ilocblk = nblocks / NPCOL; + mydist -= nblocks - ilocblk * NPCOL; + *JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb : + ( ( MYCOL == *PCOL ) ? + ( ilocblk - nblocks + 1 ) * nb + J : + ilocblk * nb ) ); + } + } + } +/* + * End of HPL_infog2l + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_numroc.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_numroc.c new file mode 100644 index 000000000..39cd736d3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_numroc.c @@ -0,0 +1,120 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_numroc +( + const int N, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_numroc +( N, INB, NB, PROC, SRCPROC, NPROCS ) + const int N; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_numroc returns the local number of matrix rows/columns process + * PROC will get if we give out N rows/columns starting from global + * index 0. + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the number of rows/columns being dealt + * out. N must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whose + * local portion is determined. PROC must be at least zero and + * strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + return( HPL_numrocI( N, 0, INB, NB, PROC, SRCPROC, NPROCS ) ); +/* + * End of HPL_numroc + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_numrocI.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_numrocI.c new file mode 100644 index 000000000..70f3497de --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_numrocI.c @@ -0,0 +1,243 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_numrocI +( + const int N, + const int I, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_numrocI +( N, I, INB, NB, PROC, SRCPROC, NPROCS ) + const int N; + const int I; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_numrocI returns the local number of matrix rows/columns process + * PROC will get if we give out N rows/columns starting from global + * index I. + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the number of rows/columns being dealt + * out. N must be at least zero. + * + * I (input) const int + * On entry, I specifies the global index of the matrix entry + * I must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of th + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whos + * local portion is determined. PROC must be at least zero an + * strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the proces + * that possesses the first row or column of the matrix. SRCPRO + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process row + * or columns over which the matrix is distributed. NPROCS mus + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ilocblk, inb, mydist, nblocks, srcproc; +/* .. + * .. Executable Statements .. + */ + if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * The data is not distributed, or there is just one process in this di- + * mension of the grid. + */ + return( N ); +/* + * Compute coordinate of process owning I and corresponding INB + */ + srcproc = SRCPROC; + + if( ( inb = INB - I ) <= 0 ) + { +/* + * I is not in the first block, find out which process has it and update + * the size of first block + */ + srcproc += ( nblocks = (-inb) / NB + 1 ); + srcproc -= ( srcproc / NPROCS ) * NPROCS; + inb += nblocks * NB; + } +/* + * Now everything is just like N, I=0, INB, NB, srcproc, NPROCS. The + * discussion goes as follows: compute my distance from the source pro- + * cess so that within this process coordinate system, the source pro- + * cess is the process such that mydist = 0, or PROC == srcproc. + * + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries. Then remark that + * + * when mydist < nblocks - ilocblk*NPROCS, I own ilocblk+1 full blocks, + * when mydist > nblocks - ilocblk*NPROCS, I own ilocblk full blocks, + * when mydist = nblocks - ilocblk*NPROCS, either the last block is not + * full and I own it, or the last block is full and I am the first pro- + * cess owning only ilocblk full blocks. + */ + if( PROC == srcproc ) + { +/* + * I am the source process, i.e. I own I (mydist=0). When N <= INB, the + * answer is simply N. + */ + if( N <= inb ) return( N ); +/* + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries. + */ + nblocks = ( N - inb ) / NB + 1; +/* + * Since mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only + * two possible cases: + * + * 1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS di- + * vides the global number of full blocks, then the source process + * srcproc owns one more block than the other processes; and N can + * be rewritten as N = INB + (nblocks-1) * NB + LNB with LNB >= 0 + * size of the last block. Similarly, the local value Np correspon- + * ding to N can be written as Np = INB + (ilocblk-1) * NB + LNB = + * N + ( ilocblk-1 - (nblocks-1) )*NB. Note that this case cannot + * happen when ilocblk is zero, since nblocks is at least one. + * + * 2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only + * owns full blocks, and therefore Np = INB + ilocblk * NB. Note + * that when ilocblk is zero, Np is just INB. + */ + if( nblocks < NPROCS ) return( inb ); + + ilocblk = nblocks / NPROCS; + return( ( nblocks - ilocblk * NPROCS ) ? inb + ilocblk * NB : + N + ( ilocblk - nblocks ) * NB ); + } + else + { +/* + * I am not the source process. When N <= INB, the answer is simply 0. + */ + if( N <= inb ) return( 0 ); +/* + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries + */ + nblocks = ( N - inb ) / NB + 1; +/* + * Compute my distance from the source process so that within this pro- + * cess coordinate system, the source process is the process such that + * mydist=0. + */ + if( ( mydist = PROC - srcproc ) < 0 ) mydist += NPROCS; +/* + * When mydist < nblocks - ilocblk*NPROCS, I own ilocblk + 1 full blocks + * of size NB since I am not the source process, + * + * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks + * of size NB since I am not the source process, + * + * when mydist = nblocks - ilocblk*NPROCS, + * either the last block is not full and I own it, in which case + * N = INB + (nblocks - 1)*NB + LNB with LNB the size of the last + * block such that NB > LNB > 0; the local value Np corresponding to + * N is given by Np = ilocblk*NB+LNB = N-INB+(ilocblk-nblocks+1)*NB; + * or the last block is full and I am the first process owning only + * ilocblk full blocks of size NB, that is N = INB+(nblocks-1)*NB and + * Np = ilocblk * NB = N - INB + (ilocblk-nblocks+1) * NB. + */ + if( nblocks < NPROCS ) + return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 : + N - inb + NB * ( 1 - nblocks ) ) ); + + ilocblk = nblocks / NPROCS; + mydist -= nblocks - ilocblk * NPROCS; + return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB : + ( ( mydist > 0 ) ? ilocblk * NB : + N - inb + NB * ( ilocblk - nblocks + 1 ) ) ); + } +/* + * End of HPL_numrocI + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pabort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pabort.c new file mode 100644 index 000000000..268975fc1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pabort.c @@ -0,0 +1,137 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pabort +( + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_pabort( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pabort displays an error message on stderr and halts execution. + * + * + * Arguments + * ========= + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + int rank; + char cline[128]; +#ifndef STDC_HEADERS + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( stderr, "%s %s %d, %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR", "from process #", rank, "in function", + SRNAME, cline ); + else + HPL_fprintf( stderr, + "%s %s %d, %s %d %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR", "from process #", rank, "on line", LINE, + "of function", SRNAME, cline ); + + MPI_Abort( MPI_COMM_WORLD, -1 ); + exit( -1 ); +/* + * End of HPL_pabort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlamch.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlamch.c new file mode 100644 index 000000000..73cf649da --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlamch.c @@ -0,0 +1,143 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_pdlamch +( + MPI_Comm COMM, + const HPL_T_MACH CMACH +) +#else +double HPL_pdlamch +( COMM, CMACH ) + MPI_Comm COMM; + const HPL_T_MACH CMACH; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlamch determines machine-specific arithmetic constants such as + * the relative machine precision (eps), the safe minimum(sfmin) such that + * 1/sfmin does not overflow, the base of the machine (base), the precision + * (prec), the number of (base) digits in the mantissa (t), whether + * rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum + * exponent before (gradual) underflow (emin), the underflow threshold + * (rmin)- base**(emin-1), the largest exponent before overflow (emax), the + * overflow threshold (rmax) - (base**emax)*(1-eps). + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * CMACH (global input) const HPL_T_MACH + * Specifies the value to be returned by HPL_pdlamch + * = HPL_MACH_EPS, HPL_pdlamch := eps (default) + * = HPL_MACH_SFMIN, HPL_pdlamch := sfmin + * = HPL_MACH_BASE, HPL_pdlamch := base + * = HPL_MACH_PREC, HPL_pdlamch := eps*base + * = HPL_MACH_MLEN, HPL_pdlamch := t + * = HPL_MACH_RND, HPL_pdlamch := rnd + * = HPL_MACH_EMIN, HPL_pdlamch := emin + * = HPL_MACH_RMIN, HPL_pdlamch := rmin + * = HPL_MACH_EMAX, HPL_pdlamch := emax + * = HPL_MACH_RMAX, HPL_pdlamch := rmax + * + * where + * + * eps = relative machine precision, + * sfmin = safe minimum, + * base = base of the machine, + * prec = eps*base, + * t = number of digits in the mantissa, + * rnd = 1.0 if rounding occurs in addition, + * emin = minimum exponent before underflow, + * rmin = underflow threshold, + * emax = largest exponent before overflow, + * rmax = overflow threshold. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double param; +/* .. + * .. Executable Statements .. + */ + param = HPL_dlamch( CMACH ); + + switch( CMACH ) + { + case HPL_MACH_EPS : + case HPL_MACH_SFMIN : + case HPL_MACH_EMIN : + case HPL_MACH_RMIN : + (void) HPL_all_reduce( (void *)(¶m), 1, HPL_DOUBLE, + HPL_max, COMM ); + break; + case HPL_MACH_EMAX : + case HPL_MACH_RMAX : + (void) HPL_all_reduce( (void *)(¶m), 1, HPL_DOUBLE, + HPL_min, COMM ); + break; + default : + break; + } + + return( param ); +/* + * End of HPL_pdlamch + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlange.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlange.c new file mode 100644 index 000000000..40bdcc36b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlange.c @@ -0,0 +1,242 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_pdlange +( + const HPL_T_grid * GRID, + const HPL_T_NORM NORM, + const int M, + const int N, + const int NB, + const double * A, + const int LDA +) +#else +double HPL_pdlange +( GRID, NORM, M, N, NB, A, LDA ) + const HPL_T_grid * GRID; + const HPL_T_NORM NORM; + const int M; + const int N; + const int NB; + const double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlange returns the value of the one norm, or the infinity norm, + * or the element of largest absolute value of a distributed matrix A: + * + * + * max(abs(A(i,j))) when NORM = HPL_NORM_A, + * norm1(A), when NORM = HPL_NORM_1, + * normI(A), when NORM = HPL_NORM_I, + * + * where norm1 denotes the one norm of a matrix (maximum column sum) and + * normI denotes the infinity norm of a matrix (maximum row sum). Note + * that max(abs(A(i,j))) is not a matrix norm. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * NORM (global input) const HPL_T_NORM + * On entry, NORM specifies the value to be returned by this + * function as described above. + * + * M (global input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,LocQ(N)), + * that contains the local pieces of the distributed matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double s, v0=HPL_rzero, * work = NULL; + MPI_Comm Acomm, Ccomm, Rcomm; + int ii, jj, mp, mycol, myrow, npcol, nprow, + nq; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Rcomm = GRID->row_comm; Ccomm = GRID->col_comm; + Acomm = GRID->all_comm; + + Mnumroc( mp, M, NB, NB, myrow, 0, nprow ); + Mnumroc( nq, N, NB, NB, mycol, 0, npcol ); + + if( Mmin( M, N ) == 0 ) { return( v0 ); } + else if( NORM == HPL_NORM_A ) + { +/* + * max( abs( A ) ) + */ + if( ( nq > 0 ) && ( mp > 0 ) ) + { + for( jj = 0; jj < nq; jj++ ) + { + for( ii = 0; ii < mp; ii++ ) + { v0 = Mmax( v0, Mabs( *A ) ); A++; } + A += LDA - mp; + } + } + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0, + Acomm ); + } + else if( NORM == HPL_NORM_1 ) + { +/* + * Find norm_1( A ). + */ + if( nq > 0 ) + { + work = (double*)malloc( (size_t)(nq) * sizeof( double ) ); + if( work == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); } + + for( jj = 0; jj < nq; jj++ ) + { + s = HPL_rzero; + for( ii = 0; ii < mp; ii++ ) { s += Mabs( *A ); A++; } + work[jj] = s; A += LDA - mp; + } +/* + * Find sum of global matrix columns, store on row 0 of process grid + */ + (void) HPL_reduce( (void *)(work), nq, HPL_DOUBLE, HPL_sum, + 0, Ccomm ); +/* + * Find maximum sum of columns for 1-norm + */ + if( myrow == 0 ) + { v0 = work[HPL_idamax( nq, work, 1 )]; v0 = Mabs( v0 ); } + if( work ) free( work ); + } +/* + * Find max in row 0, store result in process (0,0) + */ + if( myrow == 0 ) + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0, + Rcomm ); + } + else if( NORM == HPL_NORM_I ) + { +/* + * Find norm_inf( A ) + */ + if( mp > 0 ) + { + work = (double*)malloc( (size_t)(mp) * sizeof( double ) ); + if( work == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); } + + for( ii = 0; ii < mp; ii++ ) { work[ii] = HPL_rzero; } + + for( jj = 0; jj < nq; jj++ ) + { + for( ii = 0; ii < mp; ii++ ) + { work[ii] += Mabs( *A ); A++; } + A += LDA - mp; + } +/* + * Find sum of global matrix rows, store on column 0 of process grid + */ + (void) HPL_reduce( (void *)(work), mp, HPL_DOUBLE, HPL_sum, + 0, Rcomm ); +/* + * Find maximum sum of rows for inf-norm + */ + if( mycol == 0 ) + { v0 = work[HPL_idamax( mp, work, 1 )]; v0 = Mabs( v0 ); } + if( work ) free( work ); + } +/* + * Find max in column 0, store result in process (0,0) + */ + if( mycol == 0 ) + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, + 0, Ccomm ); + } +/* + * Broadcast answer to every process in the grid + */ + (void) HPL_broadcast( (void *)(&v0), 1, HPL_DOUBLE, 0, Acomm ); + + return( v0 ); +/* + * End of HPL_pdlange + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlaprnt.c new file mode 100644 index 000000000..20f11129a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pdlaprnt.c @@ -0,0 +1,236 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaprnt +( + const HPL_T_grid * GRID, + const int M, + const int N, + const int NB, + double * A, + const int LDA, + const int IAROW, + const int IACOL, + const char * CMATNM +) +#else +void HPL_pdlaprnt +( GRID, M, N, NB, A, LDA, IAROW, IACOL, CMATNM ) + const HPL_T_grid * GRID; + const int M; + const int N; + const int NB; + double * A; + const int LDA; + const int IAROW; + const int IACOL; + const char * CMATNM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaprnt prints to standard error a distributed matrix A. The + * local pieces of A are sent to the process of coordinates (0,0) in + * the grid and then printed. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * M (global input) const int + * On entry, M specifies the number of rows of the coefficient + * matrix A. M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the + * coefficient matrix A. N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * A (local input) double * + * On entry, A points to an array of dimension (LDA,LocQ(N)). + * This array contains the coefficient matrix to be printed. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * IAROW (global input) const int + * On entry, IAROW specifies the row process coordinate owning + * the first row of A. IAROW must be larger than or equal to + * zero and less than NPROW. + * + * IACOL (global input) const int + * On entry, IACOL specifies the column process coordinate + * owning the first column of A. IACOL must be larger than or + * equal to zero and less than NPCOL. + * + * CMATNM (global input) const char * + * On entry, CMATNM is the name of the matrix to be printed. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm Acomm; + double * buf = NULL; + int h, i, ib, icurcol=IACOL, icurrow=IAROW, + ii=0, j, jb, jj=0, mycol, myrow, npcol, + nprow, src; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Acomm = GRID->all_comm; + if( ( myrow == 0 ) && ( mycol == 0 ) ) + buf = (double*)malloc( (size_t)(NB) * sizeof( double ) ); + + for( j = 0; j < N; j += NB ) + { + jb = N-j; jb = Mmin( jb, NB ); + for( h = 0; h < jb; h++ ) + { + (void) HPL_barrier( Acomm ); + + for( i = 0; i < M; i += NB ) + { + ib = M-i; ib = Mmin( ib, NB ); + if( ( icurrow == 0 ) && ( icurcol == 0 ) ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_dlaprnt( ib, 1, Mptr( A, ii, jj+h, LDA ), i+1, + j+h+1, LDA, CMATNM ); + } + else + { + if( ( myrow == icurrow ) && ( mycol == icurcol ) ) + { + (void) HPL_send( Mptr( A, ii, jj+h, LDA ), ib, 0, + 9000+(j+h)*M+i, Acomm ); + } + else if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + src = HPL_pnum( GRID, icurrow, icurcol ); + (void) HPL_recv( buf, ib, src, 9000+(j+h)*M+i, + Acomm ); + if (buf != NULL) + HPL_dlaprnt( ib, 1, buf, i+1, j+h+1, NB, CMATNM ); + } + } + if( myrow == icurrow ) ii += ib; + icurrow = MModAdd1( icurrow, nprow ); + (void) HPL_barrier( Acomm ); + } + ii = 0; icurrow = IAROW; + } + if( mycol == icurcol ) jj += jb; + icurcol = MModAdd1( icurcol, npcol ); + (void) HPL_barrier( Acomm ); + } + if( ( myrow == 0 ) && ( mycol == 0 ) && ( buf ) ) free( buf ); +/* + * End of HPL_pdlaprnt + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pwarn.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pwarn.c new file mode 100644 index 000000000..a9f666f89 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pauxil/HPL_pwarn.c @@ -0,0 +1,139 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pwarn +( + FILE * STREAM, + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_pwarn( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pwarn displays an error message. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + int rank; + char cline[128]; +#ifndef STDC_HEADERS + FILE * STREAM; + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( STREAM, "%s %s %d, %s %s:\n>>> %s <<<\n\n", + "HPL ERROR", "from process #", rank, "in function", + SRNAME, cline ); + else + HPL_fprintf( STREAM, "%s %s %d, %s %d %s %s:\n>>> %s <<<\n\n", + "HPL ERROR", "from process #", rank, "on line", LINE, + "of function", SRNAME, cline ); +/* + * End of HPL_pwarn + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocmax.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocmax.c new file mode 100644 index 000000000..644641412 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocmax.c @@ -0,0 +1,149 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dlocmax +( + HPL_T_panel * PANEL, + const int N, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocmax +( PANEL, N, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int N; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocmax finds the maximum entry in the current column and packs + * the useful information in WORK[0:3]. On exit, WORK[0] contains the + * local maximum absolute value scalar, WORK[1] is the corresponding + * local row index, WORK[2] is the corresponding global row index, and + * WORK[3] is the coordinate of the process owning this max. When N is + * less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set + * to the total number of process rows. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * N (local input) const int + * On entry, N specifies the local number of rows of the column + * of A on which we operate. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 4. On exit, + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A; + int kk, igindx, ilindx, myrow, nb, nprow; +/* .. + * .. Executable Statements .. + */ + if( N > 0 ) + { + A = Mptr( PANEL->A, II, JJ, PANEL->lda ); + myrow = PANEL->grid->myrow; + nprow = PANEL->grid->nprow; + nb = PANEL->nb; + kk = PANEL->ii + II + ( ilindx = HPL_idamax( N, A, 1 ) ); + Mindxl2g( igindx, kk, nb, nb, myrow, 0, nprow ); +/* + * WORK[0] := local maximum absolute value scalar, + * WORK[1] := corresponding local row index, + * WORK[2] := corresponding global row index, + * WORK[3] := coordinate of process owning this max. + */ + WORK[0] = A[ilindx]; WORK[1] = (double)(ilindx); + WORK[2] = (double)(igindx); WORK[3] = (double)(myrow); + } + else + { +/* + * If I do not have any row of A, then set the coordinate of the process + * (WORK[3]) owning this "ghost" row, such that it will never be used, + * even if there are only zeros in the current column of A. + */ + WORK[0] = WORK[1] = WORK[2] = HPL_rzero; + WORK[3] = (double)(PANEL->grid->nprow); + } +/* + * End of HPL_dlocmax + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocswpN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocswpN.c new file mode 100644 index 000000000..a3919500a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocswpN.c @@ -0,0 +1,436 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LOCSWP_DEPTH +#define HPL_LOCSWP_DEPTH 32 +#define HPL_LOCSWP_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlocswpN +( + HPL_T_panel * PANEL, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocswpN +( PANEL, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocswpN performs the local swapping operations within a panel. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. The N0 length max + * row is stored in WORK[4:4+N0-1]; Note that this is also the + * JJth row (or column) of L1. The remaining part of this array + * is used as workspace. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax; + double * A1, * A2, * L, * Wr0, * Wmx; + int ilindx, lda, myrow, n0, nr, nu; + register int i; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda; + + Wr0 = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0]; + nu = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) + << HPL_LOCSWP_LOG2_DEPTH ); + nr = n0 - nu; +/* + * Replicated swap and copy of the current (new) row of A into L1 + */ + L = Mptr( PANEL->L1, JJ, 0, n0 ); +/* + * If the pivot is non-zero ... + */ + if( gmax != HPL_rzero ) + { +/* + * and if I own the current row of A ... + */ + if( myrow == PANEL->prow ) + { +/* + * and if I also own the row to be swapped with the current row of A ... + */ + if( myrow == (int)(WORK[3]) ) + { +/* + * and if the current row of A is not to swapped with itself ... + */ + if( ( ilindx = (int)(WORK[1]) ) != 0 ) + { +/* + * then copy the max row into L1 and locally swap the 2 rows of A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + A2 = Mptr( A1, ilindx, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) + { + *L=*A1=Wmx[ 0]; *A2=Wr0[ 0]; L+=n0; A1+=lda; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L=*A1=Wmx[ 1]; *A2=Wr0[ 1]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L=*A1=Wmx[ 2]; *A2=Wr0[ 2]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 3]; *A2=Wr0[ 3]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L=*A1=Wmx[ 4]; *A2=Wr0[ 4]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 5]; *A2=Wr0[ 5]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 6]; *A2=Wr0[ 6]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 7]; *A2=Wr0[ 7]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L=*A1=Wmx[ 8]; *A2=Wr0[ 8]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 9]; *A2=Wr0[ 9]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[10]; *A2=Wr0[10]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[11]; *A2=Wr0[11]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[12]; *A2=Wr0[12]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[13]; *A2=Wr0[13]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[14]; *A2=Wr0[14]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[15]; *A2=Wr0[15]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L=*A1=Wmx[16]; *A2=Wr0[16]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[17]; *A2=Wr0[17]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[18]; *A2=Wr0[18]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[19]; *A2=Wr0[19]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[20]; *A2=Wr0[20]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[21]; *A2=Wr0[21]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[22]; *A2=Wr0[22]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[23]; *A2=Wr0[23]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[24]; *A2=Wr0[24]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[25]; *A2=Wr0[25]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[26]; *A2=Wr0[26]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[27]; *A2=Wr0[27]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[28]; *A2=Wr0[28]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[29]; *A2=Wr0[29]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[30]; *A2=Wr0[30]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[31]; *A2=Wr0[31]; L+=n0; A1+=lda; A2+=lda; +#endif + } + for( i = 0; i < nr; i++, L += n0, A1 += lda, A2 += lda ) + { *L = *A1 = Wmx[i]; *A2 = Wr0[i]; } + } + else + { +/* + * otherwise the current row of A is swapped with itself, so just copy + * the current of A into L1. + */ + *Mptr( PANEL->A, II, JJ, lda ) = gmax; + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) + { + *L = Wmx[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wmx[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0; + *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0; + *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0; + *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0; + *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0; + *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0; + *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0; + *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0; + *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0; + *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0; + *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0; + *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0; +#endif + } + for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; } + } + } + else + { +/* + * otherwise, the row to be swapped with the current row of A is in Wmx, + * so copy Wmx into L1 and A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH ) + { + *L = *A1 = Wmx[ 0]; L += n0; A1 += lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = *A1 = Wmx[ 1]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = *A1 = Wmx[ 2]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 3]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = *A1 = Wmx[ 4]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 5]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 6]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 7]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = *A1 = Wmx[ 8]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 9]; L += n0; A1 += lda; + *L = *A1 = Wmx[10]; L += n0; A1 += lda; + *L = *A1 = Wmx[11]; L += n0; A1 += lda; + *L = *A1 = Wmx[12]; L += n0; A1 += lda; + *L = *A1 = Wmx[13]; L += n0; A1 += lda; + *L = *A1 = Wmx[14]; L += n0; A1 += lda; + *L = *A1 = Wmx[15]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = *A1 = Wmx[16]; L += n0; A1 += lda; + *L = *A1 = Wmx[17]; L += n0; A1 += lda; + *L = *A1 = Wmx[18]; L += n0; A1 += lda; + *L = *A1 = Wmx[19]; L += n0; A1 += lda; + *L = *A1 = Wmx[20]; L += n0; A1 += lda; + *L = *A1 = Wmx[21]; L += n0; A1 += lda; + *L = *A1 = Wmx[22]; L += n0; A1 += lda; + *L = *A1 = Wmx[23]; L += n0; A1 += lda; + *L = *A1 = Wmx[24]; L += n0; A1 += lda; + *L = *A1 = Wmx[25]; L += n0; A1 += lda; + *L = *A1 = Wmx[26]; L += n0; A1 += lda; + *L = *A1 = Wmx[27]; L += n0; A1 += lda; + *L = *A1 = Wmx[28]; L += n0; A1 += lda; + *L = *A1 = Wmx[29]; L += n0; A1 += lda; + *L = *A1 = Wmx[30]; L += n0; A1 += lda; + *L = *A1 = Wmx[31]; L += n0; A1 += lda; +#endif + } + + for( i = 0; i < nr; i++, L += n0, A1 += lda ) + { *L = *A1 = Wmx[i]; } + } + } + else + { +/* + * otherwise I do not own the current row of A, so copy the max row Wmx + * into L1. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH ) + { + *L = Wmx[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wmx[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0; + *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0; + *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0; + *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0; + *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0; + *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0; + *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0; + *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0; + *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0; + *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0; + *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0; + *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0; +#endif + } + for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; } +/* + * and if I own the max row, overwrite it with the current row Wr0. + */ + if( myrow == (int)(WORK[3]) ) + { + A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *A2 = Wr0[ 0]; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *A2 = Wr0[ 1]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda; + *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda; + *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda; + *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda; + *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda; + *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda; + *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda; + *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda; + *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda; + *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda; + *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda; + *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda; +#endif + } + + for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; } + } + } + } + else + { +/* + * Otherwise the max element in the current column is zero, simply copy + * the current row Wr0 into L1. The matrix is singular. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *L = Wr0[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wr0[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wr0[ 2]; L+=n0; *L = Wr0[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wr0[ 4]; L+=n0; *L = Wr0[ 5]; L+=n0; + *L = Wr0[ 6]; L+=n0; *L = Wr0[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wr0[ 8]; L+=n0; *L = Wr0[ 9]; L+=n0; + *L = Wr0[10]; L+=n0; *L = Wr0[11]; L+=n0; + *L = Wr0[12]; L+=n0; *L = Wr0[13]; L+=n0; + *L = Wr0[14]; L+=n0; *L = Wr0[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wr0[16]; L+=n0; *L = Wr0[17]; L+=n0; + *L = Wr0[18]; L+=n0; *L = Wr0[19]; L+=n0; + *L = Wr0[20]; L+=n0; *L = Wr0[21]; L+=n0; + *L = Wr0[22]; L+=n0; *L = Wr0[23]; L+=n0; + *L = Wr0[24]; L+=n0; *L = Wr0[25]; L+=n0; + *L = Wr0[26]; L+=n0; *L = Wr0[27]; L+=n0; + *L = Wr0[28]; L+=n0; *L = Wr0[29]; L+=n0; + *L = Wr0[30]; L+=n0; *L = Wr0[31]; L+=n0; +#endif + } + + for( i = 0; i < nr; i++, L += n0 ) { *L = Wr0[i]; } +/* + * set INFO. + */ + if( *(PANEL->DINFO) == 0.0 ) + *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1); + } +/* + * End of HPL_dlocswpN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocswpT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocswpT.c new file mode 100644 index 000000000..89b86e35a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_dlocswpT.c @@ -0,0 +1,406 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LOCSWP_DEPTH +#define HPL_LOCSWP_DEPTH 32 +#define HPL_LOCSWP_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlocswpT +( + HPL_T_panel * PANEL, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocswpT +( PANEL, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocswpT performs the local swapping operations within a panel. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. The N0 length max + * row is stored in WORK[4:4+N0-1]; Note that this is also the + * JJth row (or column) of L1. The remaining part of this array + * is used as workspace. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax; + double * A1, * A2, * L, * Wr0, * Wmx; + int ilindx, lda, myrow, n0, nr, nu; + register int i; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda; + + Wr0 = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0]; + nu = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) + << HPL_LOCSWP_LOG2_DEPTH ); + nr = n0 - nu; +/* + * Replicated swap and copy of the current (new) row of A into L1 + */ + L = Mptr( PANEL->L1, 0, JJ, n0 ); +/* + * If the pivot is non-zero ... + */ + if( gmax != HPL_rzero ) + { +/* + * and if I own the current row of A ... + */ + if( myrow == PANEL->prow ) + { +/* + * and if I also own the row to be swapped with the current row of A ... + */ + if( myrow == (int)(WORK[3]) ) + { +/* + * and if the current row of A is not to swapped with itself ... + */ + if( ( ilindx = (int)(WORK[1]) ) != 0 ) + { +/* + * then copy the max row into L1 and locally swap the 2 rows of A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + A2 = Mptr( A1, ilindx, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH, + L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=*A1=Wmx[ 0]; *A2=Wr0[ 0]; A1+=lda; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=*A1=Wmx[ 1]; *A2=Wr0[ 1]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=*A1=Wmx[ 2]; *A2=Wr0[ 2]; A1+=lda; A2+=lda; + L[ 3]=*A1=Wmx[ 3]; *A2=Wr0[ 3]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=*A1=Wmx[ 4]; *A2=Wr0[ 4]; A1+=lda; A2+=lda; + L[ 5]=*A1=Wmx[ 5]; *A2=Wr0[ 5]; A1+=lda; A2+=lda; + L[ 6]=*A1=Wmx[ 6]; *A2=Wr0[ 6]; A1+=lda; A2+=lda; + L[ 7]=*A1=Wmx[ 7]; *A2=Wr0[ 7]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=*A1=Wmx[ 8]; *A2=Wr0[ 8]; A1+=lda; A2+=lda; + L[ 9]=*A1=Wmx[ 9]; *A2=Wr0[ 9]; A1+=lda; A2+=lda; + L[10]=*A1=Wmx[10]; *A2=Wr0[10]; A1+=lda; A2+=lda; + L[11]=*A1=Wmx[11]; *A2=Wr0[11]; A1+=lda; A2+=lda; + L[12]=*A1=Wmx[12]; *A2=Wr0[12]; A1+=lda; A2+=lda; + L[13]=*A1=Wmx[13]; *A2=Wr0[13]; A1+=lda; A2+=lda; + L[14]=*A1=Wmx[14]; *A2=Wr0[14]; A1+=lda; A2+=lda; + L[15]=*A1=Wmx[15]; *A2=Wr0[15]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=*A1=Wmx[16]; *A2=Wr0[16]; A1+=lda; A2+=lda; + L[17]=*A1=Wmx[17]; *A2=Wr0[17]; A1+=lda; A2+=lda; + L[18]=*A1=Wmx[18]; *A2=Wr0[18]; A1+=lda; A2+=lda; + L[19]=*A1=Wmx[19]; *A2=Wr0[19]; A1+=lda; A2+=lda; + L[20]=*A1=Wmx[20]; *A2=Wr0[20]; A1+=lda; A2+=lda; + L[21]=*A1=Wmx[21]; *A2=Wr0[21]; A1+=lda; A2+=lda; + L[22]=*A1=Wmx[22]; *A2=Wr0[22]; A1+=lda; A2+=lda; + L[23]=*A1=Wmx[23]; *A2=Wr0[23]; A1+=lda; A2+=lda; + L[24]=*A1=Wmx[24]; *A2=Wr0[24]; A1+=lda; A2+=lda; + L[25]=*A1=Wmx[25]; *A2=Wr0[25]; A1+=lda; A2+=lda; + L[26]=*A1=Wmx[26]; *A2=Wr0[26]; A1+=lda; A2+=lda; + L[27]=*A1=Wmx[27]; *A2=Wr0[27]; A1+=lda; A2+=lda; + L[28]=*A1=Wmx[28]; *A2=Wr0[28]; A1+=lda; A2+=lda; + L[29]=*A1=Wmx[29]; *A2=Wr0[29]; A1+=lda; A2+=lda; + L[30]=*A1=Wmx[30]; *A2=Wr0[30]; A1+=lda; A2+=lda; + L[31]=*A1=Wmx[31]; *A2=Wr0[31]; A1+=lda; A2+=lda; +#endif + } + + for( i = 0; i < nr; i++, A1 += lda, A2 += lda ) + { L[i] = *A1 = Wmx[i]; *A2 = Wr0[i]; } + } + else + { +/* + * otherwise the current row of A is swapped with itself, so just copy + * the current of A into L1. + */ + *Mptr( PANEL->A, II, JJ, lda ) = gmax; + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wmx[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wmx[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; + L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wmx[ 8]; L[12]=Wmx[12]; + L[ 9]=Wmx[ 9]; L[13]=Wmx[13]; + L[10]=Wmx[10]; L[14]=Wmx[14]; + L[11]=Wmx[11]; L[15]=Wmx[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wmx[16]; L[20]=Wmx[20]; + L[17]=Wmx[17]; L[21]=Wmx[21]; + L[18]=Wmx[18]; L[22]=Wmx[22]; + L[19]=Wmx[19]; L[23]=Wmx[23]; + L[24]=Wmx[24]; L[28]=Wmx[28]; + L[25]=Wmx[25]; L[29]=Wmx[29]; + L[26]=Wmx[26]; L[30]=Wmx[30]; + L[27]=Wmx[27]; L[31]=Wmx[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; } + } + } + else + { +/* + * otherwise, the row to be swapped with the current row of A is in Wmx, + * so copy Wmx into L1 and A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=*A1=Wmx[ 0]; A1+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=*A1=Wmx[ 1]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=*A1=Wmx[ 2]; A1+=lda; L[ 3]=*A1=Wmx[ 3]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=*A1=Wmx[ 4]; A1+=lda; L[ 5]=*A1=Wmx[ 5]; A1+=lda; + L[ 6]=*A1=Wmx[ 6]; A1+=lda; L[ 7]=*A1=Wmx[ 7]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=*A1=Wmx[ 8]; A1+=lda; L[ 9]=*A1=Wmx[ 9]; A1+=lda; + L[10]=*A1=Wmx[10]; A1+=lda; L[11]=*A1=Wmx[11]; A1+=lda; + L[12]=*A1=Wmx[12]; A1+=lda; L[13]=*A1=Wmx[13]; A1+=lda; + L[14]=*A1=Wmx[14]; A1+=lda; L[15]=*A1=Wmx[15]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=*A1=Wmx[16]; A1+=lda; L[17]=*A1=Wmx[17]; A1+=lda; + L[18]=*A1=Wmx[18]; A1+=lda; L[19]=*A1=Wmx[19]; A1+=lda; + L[20]=*A1=Wmx[20]; A1+=lda; L[21]=*A1=Wmx[21]; A1+=lda; + L[22]=*A1=Wmx[22]; A1+=lda; L[23]=*A1=Wmx[23]; A1+=lda; + L[24]=*A1=Wmx[24]; A1+=lda; L[25]=*A1=Wmx[25]; A1+=lda; + L[26]=*A1=Wmx[26]; A1+=lda; L[27]=*A1=Wmx[27]; A1+=lda; + L[28]=*A1=Wmx[28]; A1+=lda; L[29]=*A1=Wmx[29]; A1+=lda; + L[30]=*A1=Wmx[30]; A1+=lda; L[31]=*A1=Wmx[31]; A1+=lda; +#endif + } + + for( i = 0; i < nr; i++, A1 += lda ) { L[i]=*A1=Wmx[i]; } + } + } + else + { +/* + * otherwise I do not own the current row of A, so copy the max row Wmx + * into L1. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wmx[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wmx[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wmx[ 8]; L[ 9]=Wmx[ 9]; L[10]=Wmx[10]; L[11]=Wmx[11]; + L[12]=Wmx[12]; L[13]=Wmx[13]; L[14]=Wmx[14]; L[15]=Wmx[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wmx[16]; L[17]=Wmx[17]; L[18]=Wmx[18]; L[19]=Wmx[19]; + L[20]=Wmx[20]; L[21]=Wmx[21]; L[22]=Wmx[22]; L[23]=Wmx[23]; + L[24]=Wmx[24]; L[25]=Wmx[25]; L[26]=Wmx[26]; L[27]=Wmx[27]; + L[28]=Wmx[28]; L[29]=Wmx[29]; L[30]=Wmx[30]; L[31]=Wmx[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; } +/* + * and if I own the max row, overwrite it with the current row Wr0. + */ + if( myrow == (int)(WORK[3]) ) + { + A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *A2 = Wr0[ 0]; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *A2 = Wr0[ 1]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda; + *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda; + *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda; + *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda; + *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda; + *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda; + *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda; + *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda; + *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda; + *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda; + *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda; + *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda; +#endif + } + for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; } + } + } + } + else + { +/* + * Otherwise the max element in the current column is zero, simply copy + * the current row Wr0 into L1. The matrix is singular. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wr0[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wr0[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wr0[ 2]; L[ 3]=Wr0[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wr0[ 4]; L[ 5]=Wr0[ 5]; L[ 6]=Wr0[ 6]; L[ 7]=Wr0[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wr0[ 8]; L[12]=Wr0[12]; L[ 9]=Wr0[ 9]; L[13]=Wr0[13]; + L[10]=Wr0[10]; L[14]=Wr0[14]; L[11]=Wr0[11]; L[15]=Wr0[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wr0[16]; L[20]=Wr0[20]; L[17]=Wr0[17]; L[21]=Wr0[21]; + L[18]=Wr0[18]; L[22]=Wr0[22]; L[19]=Wr0[19]; L[23]=Wr0[23]; + L[24]=Wr0[24]; L[28]=Wr0[28]; L[25]=Wr0[25]; L[29]=Wr0[29]; + L[26]=Wr0[26]; L[30]=Wr0[30]; L[27]=Wr0[27]; L[31]=Wr0[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wr0[i]; } +/* + * Set INFO. + */ + if( *(PANEL->DINFO) == 0.0 ) + *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1); + } +/* + * End of HPL_dlocswpT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdfact.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdfact.c new file mode 100644 index 000000000..1d99c6e14 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdfact.c @@ -0,0 +1,141 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdfact +( + HPL_T_panel * PANEL +) +#else +void HPL_pdfact +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdfact recursively factorizes a 1-dimensional panel of columns. + * The RPFACT function pointer specifies the recursive algorithm to be + * used, either Crout, Left- or Right looking. NBMIN allows to vary the + * recursive stopping criterium in terms of the number of columns in the + * panel, and NDIV allows to specify the number of subpanels each panel + * should be divided into. Usuallly a value of 2 will be chosen. Finally + * PFACT is a function pointer specifying the non-recursive algorithm to + * to be used on at most NBMIN columns. One can also choose here between + * Crout, Left- or Right looking. Empirical tests seem to indicate that + * values of 4 or 8 for NBMIN give the best results. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + void * vptr = NULL; + int align, jb; +/* .. + * .. Executable Statements .. + */ + jb = PANEL->jb; PANEL->n -= jb; PANEL->ja += jb; + + if( ( PANEL->grid->mycol != PANEL->pcol ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_RPFACT ); +#endif + align = PANEL->algo->align; + vptr = (void *)malloc( ( (size_t)(align) + + (size_t)(((4+((unsigned int)(jb) << 1)) << 1) )) * + sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdfact", "Memory allocation failed" ); } +/* + * Factor the panel - Update the panel pointers + */ + PANEL->algo->rffun( PANEL, PANEL->mp, jb, 0, (double *)HPL_PTR( vptr, + ((size_t)(align) * sizeof(double) ) ) ); + if( vptr ) free( vptr ); + + PANEL->A = Mptr( PANEL->A, 0, jb, PANEL->lda ); + PANEL->nq -= jb; PANEL->jj += jb; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_RPFACT ); +#endif +/* + * End of HPL_pdfact + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdmxswp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdmxswp.c new file mode 100644 index 000000000..b14452197 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdmxswp.c @@ -0,0 +1,311 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdmxswp +( + HPL_T_panel * PANEL, + const int M, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_pdmxswp +( PANEL, M, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int M; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdmxswp swaps and broadcasts the absolute value max row using + * bi-directional exchange. The buffer is partially set by HPL_dlocmax. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by + * + * log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) + * + * where lat and bdwth are the latency and bandwidth of the network for + * double precision real elements. Communication only occurs in one + * process column. Mono-directional links will cause the communication + * cost to double. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of the matrix + * column on which this function operates. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * It is assumed that HPL_dlocmax was called prior to this + * routine to initialize the first four entries of this array. + * On exit, the N0 length max row is stored in WORK[4:4+N0-1]; + * Note that this is also the JJth row (or column) of L1. The + * remaining part is used as a temporary array. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax, tmp1; + double * A0, * Wmx, * Wwork; + HPL_T_grid * grid; + MPI_Comm comm; + unsigned int hdim, ip2, ip2_, ipow, k, mask; + int Np2, cnt_, cnt0, i, icurrow, lda, mydist, + mydis_, myrow, n0, nprow, partner, rcnt, + root, scnt, size_; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_MXSWP ); +#endif + grid = PANEL->grid; myrow = grid->myrow; nprow = grid->nprow; +/* + * ip2 : the smallest power of two less than or equal to nprow; + * hdim : dimension of the hypercube made of those ip2 processes; + * Np2 : logical flag indicating whether or not nprow is a power of 2; + */ + comm = grid->col_comm; ip2 = (unsigned int)(grid->row_ip2); + hdim = (unsigned int)(grid->row_hdim); n0 = PANEL->jb; + icurrow = PANEL->prow; Np2 = (int)( ( size_ = nprow - ip2 ) != 0 ); + mydist = MModSub( myrow, icurrow, nprow ); +/* + * Set up pointers in workspace: WORK and Wwork point to the beginning + * of the buffers of size 4 + 2*N0 to be combined. Wmx points to the row + * owning the local (before combine) and global (after combine) absolute + * value max. A0 points to the copy of the current row of the matrix. + */ + cnt0 = ( cnt_ = n0 + 4 ) + n0; A0 = ( Wmx = WORK + 4 ) + n0; + Wwork = WORK + cnt0; +/* + * Wmx[0:N0-1] := A[ilindx,0:N0-1] where ilindx is (int)(WORK[1]) (row + * with max in current column). If I am the current process row, pack in + * addition the current row of A in A0[0:N0-1]. If I do not own any row + * of A, then zero out Wmx[0:N0-1]. + */ + if( M > 0 ) + { + lda = PANEL->lda; + HPL_dcopy( n0, Mptr( PANEL->A, II+(int)(WORK[1]), 0, lda ), lda, + Wmx, 1 ); + if( myrow == icurrow ) + { HPL_dcopy( n0, Mptr( PANEL->A, II, 0, lda ), lda, A0, 1 ); } + } + else { for( i = 0; i < n0; i++ ) Wmx[i] = HPL_rzero; } +/* + * Combine the results (bi-directional exchange): the process coordina- + * tes are relative to icurrow, this allows to reduce the communication + * volume when nprow is not a power of 2. + * + * When nprow is not a power of 2: proc[i-ip2] receives local data from + * proc[i] for all i in [ip2..nprow). In addition, proc[0] (icurrow) + * sends to proc[ip2] the current row of A for later broadcast in procs + * [ip2..nprow). + */ + if( ( Np2 != 0 ) && + ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) ) + { + if( ( mydist & ip2 ) != 0 ) + { + if( mydist == (int)(ip2) ) + (void) HPL_sdrv( WORK, cnt_, MSGID_BEGIN_PFACT, A0, n0, + MSGID_BEGIN_PFACT, MModAdd( partner, + icurrow, nprow ), comm ); + else + (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else + { + if( mydist == 0 ) + (void) HPL_sdrv( A0, n0, MSGID_BEGIN_PFACT, Wwork, cnt_, + MSGID_BEGIN_PFACT, MModAdd( partner, + icurrow, nprow ), comm ); + else + (void) HPL_recv( Wwork, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + + tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] ); + if( ( tmp1 > gmax ) || + ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) ) + { HPL_dcopy( cnt_, Wwork, 1, WORK, 1 ); } + } + } + + if( mydist < (int)(ip2) ) + { +/* + * power of 2 part of the processes collection: processes [0..ip2) are + * combining (binary exchange); proc[0] has two rows to send, but one to + * receive. At every step k in [0..hdim) of the algorithm, a process + * pair exchanging 2 rows is such that myrow >> k+1 is 0. Among those + * processes the ones that are sending one more row than what they are + * receiving are such that myrow >> k is equal to 0. + */ + k = 0; ipow = 1; + + while( k < hdim ) + { + if( ( (unsigned int)(mydist) >> ( k + 1 ) ) == 0 ) + { + if( ( (unsigned int)(mydist) >> k ) == 0 ) + { scnt = cnt0; rcnt = cnt_; } + else + { scnt = cnt_; rcnt = cnt0; } + } + else { scnt = rcnt = cnt_; } + + partner = (int)( (unsigned int)(mydist) ^ ipow ); + (void) HPL_sdrv( WORK, scnt, MSGID_BEGIN_PFACT, Wwork, rcnt, + MSGID_BEGIN_PFACT, MModAdd( partner, icurrow, + nprow ), comm ); + + tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] ); + if( ( tmp1 > gmax ) || + ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) ) + { + HPL_dcopy( ( rcnt == cnt0 ? cnt0 : cnt_ ), Wwork, 1, + WORK, 1 ); + } + else if( rcnt == cnt0 ) + { HPL_dcopy( n0, Wwork+cnt_, 1, A0, 1 ); } + + ipow <<= 1; k++; + } + } + else if( size_ > 1 ) + { +/* + * proc[ip2] broadcast current row of A to procs [ip2+1..nprow). + */ + k = (unsigned int)(size_) - 1; ip2_ = mask = 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( A0, n0, MModAdd( root, partner, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else if( partner < size_ ) + { + (void) HPL_send( A0, n0, MModAdd( root, partner, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + } + ip2_ >>= 1; + } while( ip2_ > 0 ); + } +/* + * If nprow is not a power of 2, for all i in [ip2..nprow), proc[i-ip2] + * sends the pivot row to proc[i] along with the first four entries of + * the WORK array. + */ + if( ( Np2 != 0 ) && + ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) ) + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_recv( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else + { + (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + } +/* + * Save the global pivot index in pivot array + */ + (PANEL->DPIV)[JJ] = WORK[2]; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_MXSWP ); +#endif +/* + * End of HPL_pdmxswp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpancrN.c new file mode 100644 index 000000000..4ea170b73 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpancrN.c @@ -0,0 +1,270 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpancrN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpancrN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpancrN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Crout variant of the usual + * one-dimensional algorithm. The lower triangular N0-by-N0 upper block + * of the panel is stored in no-transpose form (i.e. just like the input + * matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); +/* + * Compute row (column) jj of L1 + */ + if( kk > 0 ) + { + L1ptr = Mptr( L1, jj, jj+1, n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk, Nm1 ); + Xv1 = vsip_msubview_d( Xv0, jj, ICOFF, 1, kk ); + Yv1 = vsip_msubview_d( Xv0, jj, jj+1, 1, Nm1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Av1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplTrans, kk, Nm1, -HPL_rone, + Mptr( L1, ICOFF, jj+1, n0 ), n0, Mptr( L1, jj, + ICOFF, n0 ), n0, HPL_rone, L1ptr, n0 ); +#endif + if( curr != 0 ) + HPL_dcopy( Nm1, L1ptr, n0, Mptr( A, ii, jj+1, lda ), lda ); + } +/* + * Scale current column by its absolute value max entry - Update dia- + * diagonal and subdiagonal elements in column A(iip1:iip1+Mm1-1, jj+1) + * and find local absolute value max in that column (Only one pass + * through cache for each current column). This sequence of operations + * could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk+1, 1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + vsip_mdestroy_d( Yv1 ); + vsip_mdestroy_d( Xv1 ); + vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, ICOFF, + jj+1, n0 ), 1, HPL_rone, Mptr( A, iip1, jj+1, lda ), + 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; kk++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); + +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpancrN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpancrT.c new file mode 100644 index 000000000..50ed300aa --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpancrT.c @@ -0,0 +1,267 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpancrT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpancrT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpancrT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Crout variant of the usual + * one-dimensional algorithm. The lower triangular N0-by-N0 upper block + * of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); +/* + * Compute row (column) jj of L1 + */ + if( kk > 0 ) + { + L1ptr = Mptr( L1, jj+1, jj, n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Xv0, jj+1, ICOFF, Nm1, kk ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj, kk, 1 ); + Yv1 = vsip_msubview_d( Xv0, jj+1, jj, Nm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Nm1, kk, -HPL_rone, + Mptr( L1, jj+1, ICOFF, n0 ), n0, Mptr( L1, ICOFF, + jj, n0 ), 1, HPL_rone, L1ptr, 1 ); +#endif + if( curr != 0 ) + HPL_dcopy( Nm1, L1ptr, 1, Mptr( A, ii, jj+1, lda ), lda ); + } +/* + * Scale current column by its absolute value max entry - Update dia- + * diagonal and subdiagonal elements in column A(iip1:iip1+Mm1-1, jj+1) + * and find local absolute value max in that column (Only one pass + * through cache for each current column). This sequence of operations + * could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 ); + Xv1 = vsip_msubview_d( Xv0, jj+1, ICOFF, 1, kk+1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, jj+1, ICOFF, + n0 ), n0, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; kk++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpancrT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanllN.c new file mode 100644 index 000000000..fa471198d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanllN.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanllN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanllN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanllN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Left-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in no-transpose form (i.e. just like the + * input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column and initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + + L1ptr = Mptr( L1, ICOFF, jj+1, n0 ); kk = jj + 1 - ICOFF; + HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, HplUnit, kk, + Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, 1 ); +/* + * Scale current column by its absolute value max entry - Update and + * find local absolute value max in next column (Only one pass through + * cache for each next column). This sequence of operations could bene- + * fit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk, 1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, 1, + HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) + { + HPL_dcopy( kk, L1ptr, 1, Mptr( A, ICOFF, jj+1, lda ), 1 ); + ii = iip1; iip1++; m = Mm1; Mm1--; + } + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanllN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanllT.c new file mode 100644 index 000000000..a6e1b67bd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanllT.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanllT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanllT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanllT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Left-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column and initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + + L1ptr = Mptr( L1, jj+1, ICOFF, n0 ); kk = jj + 1 - ICOFF; + HPL_dtrsv( HplColumnMajor, HplUpper, HplTrans, HplUnit, kk, + Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, n0 ); +/* + * Scale current column by its absolute value max entry - Update and + * find local absolute value max in next column (Only one pass through + * cache for each next column). This sequence of operations could bene- + * fit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk ); + Xv1 = vsip_msubview_d( Xv0, jj+1, ICOFF, 1, kk ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, n0, + HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) + { + HPL_dcopy( kk, L1ptr, n0, Mptr( A, ICOFF, jj+1, lda ), 1 ); + ii = iip1; iip1++; m = Mm1; Mm1--; + } + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); + +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanllT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanrlN.c new file mode 100644 index 000000000..0a3b9a542 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanrlN.c @@ -0,0 +1,250 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanrlN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanrlN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanrlN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Right-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in no-transpose form (i.e. just like the + * input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Acur, * Anxt; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Xv1, * Yv0, * Yv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, lda, m=M; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 >= 1 ) + { + Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda ); +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); +/* + * Scale current column by its absolute value max entry - Update trai- + * ling sub-matrix and find local absolute value max in next column (On- + * ly one pass through cache for each current column). This sequence of + * operations could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 ); + HPL_daxpy( Mm1, -WORK[4+jj+1], Acur, 1, Anxt, 1 ); + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); +#ifdef HPL_CALL_VSIPL + if( Nm1 > 1 ) + { +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2, + Mm1, Nm1-1 ); + Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj, + Mm1, 1 ); + Yv1 = vsip_msubview_d( Yv0, jj, jj+2, 1, Nm1-1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); + } +#else + if( Nm1 > 1 ) + HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + WORK+4+jj+2, 1, Mptr( Anxt, 0, 1, lda ), lda ); +#endif +/* + * Same thing as above but with worse data access on y (A += x * y^T) + * + * if( Nm1 > 1 ) ) + * HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + * Mptr( L1, jj, jj+2, n0 ), n0, Mptr( Anxt, 0, 1, lda ), + * lda ); + */ + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Yv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanrlN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanrlT.c new file mode 100644 index 000000000..68c1afc02 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdpanrlT.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanrlT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanrlT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanrlT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Right-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Acur, * Anxt, * L1; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Xv1, * Yv0, * Yv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, lda, m=M, + n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 >= 1 ) + { + Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda ); +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); +/* + * Scale current column by its absolute value max entry - Update trai- + * ling sub-matrix and find local absolute value max in next column (On- + * ly one pass through cache for each current column). This sequence of + * operations could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 ); + HPL_daxpy( Mm1, -(*(Mptr( L1, jj+1, jj, n0 ))), Acur, 1, Anxt, 1 ); + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + + if( Nm1 > 1 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2, + Mm1, Nm1-1 ); + Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj, + Mm1, 1 ); + Yv1 = vsip_msubview_d( Yv0, jj+2, jj, Nm1-1, 1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + Mptr( L1, jj+2, jj, n0 ), 1, Mptr( Anxt, 0, 1, lda ), + lda ); +#endif + } + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Yv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanrlT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpancrN.c new file mode 100644 index 000000000..348d7ebe6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpancrN.c @@ -0,0 +1,282 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpancrN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpancrN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpancrN HPL_pdrpancrN recursively factorizes a panel of columns using the + * recursive Crout variant of the usual one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Local update - Factor current panel - Replicated update and solve + */ +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + } + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, jj, + -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, + 0, jj, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), + lda ); +#endif + HPL_pdrpancrN( PANEL, m, jb, ioff, WORK ); + + if( n > 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + Av2 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n ); + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff+jb, jj, n ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, jb, n, + jj, -HPL_rone, Mptr( L1ptr, jj, 0, n0 ), n0, + Mptr( L1ptr, 0, jj+jb, n0 ), n0, HPL_rone, + Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, + n0 ), n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); + } +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpancrN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpancrT.c new file mode 100644 index 000000000..a1ecfac2c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpancrT.c @@ -0,0 +1,282 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpancrT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpancrT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpancrT recursively factorizes a panel of columns using the + * recursive Crout variant of the usual one-dimensional algorithm. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Local update - Factor current panel - Replicated update and solve + */ +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, + VSIP_MAT_TRANS, HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, jj, + -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, + jj, 0, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), + lda ); +#endif + HPL_pdrpancrT( PANEL, m, jb, ioff, WORK ); + + if( n > 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Lv0, ioff+jb, ICOFF, n, jj ); + Av2 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb ); + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, + VSIP_MAT_NTRANS, HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, n, jb, + jj, -HPL_rone, Mptr( L1ptr, jj+jb, 0, n0 ), n0, + Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone, + Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); +#endif + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, + n0 ), n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); + } +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpancrT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanllN.c new file mode 100644 index 000000000..4dbc13b44 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanllN.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanllN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanllN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanllN recursively factorizes a panel of columns using the + * recursive Left-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Replicated solve - Local update - Factor current panel + */ + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, + jj, jb, HPL_rone, L1ptr, n0, Mptr( L1ptr, 0, jj, n0 ), + n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jj ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jj ); + } + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, + jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, + Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj, lda ), lda ); +#endif + HPL_pdrpanllN( PANEL, m, jb, ioff, WORK ); +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanllN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanllT.c new file mode 100644 index 000000000..887caeb87 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanllT.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanllT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanllT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanllT recursively factorizes a panel of columns using the + * recursive Left-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Replicated solve - Local update - Factor current panel + */ + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, jb, jj, HPL_rone, L1ptr, n0, Mptr( L1ptr, + jj, 0, n0 ), n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jj ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jj ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Av2 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, + jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, + Mptr( L1ptr, jj, 0, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj, lda ), lda ); +#endif + HPL_pdrpanllT( PANEL, m, jb, ioff, WORK ); +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanllT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanrlN.c new file mode 100644 index 000000000..22f105cf4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanrlN.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanrlN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanrlN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanrlN recursively factorizes a panel of columns using the + * recursive Right-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Factor current panel - Replicated solve - Local update + */ + HPL_pdrpanrlN( PANEL, m, jb, ioff, WORK ); + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), + n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); + if( curr != 0 ) { ii += jb; m -= jb; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb, + m, n ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m, n ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, n, + jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda, + Mptr( L1ptr, jj, jj+jb, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj+jb, lda ), lda ); +#endif +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanrlN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanrlT.c new file mode 100644 index 000000000..a77301b9b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pfact/HPL_pdrpanrlT.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanrlT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanrlT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanrlT recursively factorizes a panel of columns using the + * recursive Right-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Factor current panel - Replicated solve - Local update + */ + HPL_pdrpanrlT( PANEL, m, jb, ioff, WORK ); + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), + n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); + if( curr != 0 ) { ii += jb; m -= jb; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb, + m, N ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m, n ); + } + Lv1 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, n, + jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda, + Mptr( L1ptr, jj+jb, jj, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj+jb, lda ), lda ); +#endif +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanrlT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_equil.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_equil.c new file mode 100644 index 000000000..b917a6525 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_equil.c @@ -0,0 +1,253 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_equil +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_TRANS TRANS, + const int N, + double * U, + const int LDU, + int * IPLEN, + const int * IPMAP, + const int * IPMAPM1, + int * IWORK +) +#else +void HPL_equil +( PBCST, IFLAG, PANEL, TRANS, N, U, LDU, IPLEN, IPMAP, IPMAPM1, IWORK ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_TRANS TRANS; + const int N; + double * U; + const int LDU; + int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_equil equilibrates the local pieces of U, so that on exit to + * this function, pieces of U contained in every process row are of the + * same size. This phase makes the rolling phase optimal. In addition, + * this function probes for the column panel L and forwards it when + * possible. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be equilibrated) information. + * + * TRANS (global input) const enum HPL_TRANS + * On entry, TRANS specifies whether U is stored in transposed + * or non-transposed form. + * + * N (local input) const int + * On entry, N specifies the number of rows or columns of U. N + * must be at least 0. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[nprow]) when U is stored in + * non-transposed form, and MAX(1,N) otherwise. + * + * IPLEN (global input) int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension NPROW+1. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, ip, ipU, ipcur, iprow, iptgt, lastrow, + left, npm1, nprow, ll, llU, llcur, lltgt, + right, slen, smax, smin; +/* .. + * .. Executable Statements .. + */ + if( ( npm1 = ( nprow = PANEL->grid->nprow ) - 1 ) <= 1 ) return; +/* + * If the current distribution of the pieces of U is already optimal for + * the rolling phase, then return imediately. The optimal distribution + * is such that ip processes have smax items and the remaining processes + * only have smin items. Another way to check this is to verify that all + * differences IPLEN[i+1] - IPLEN[i] are either smin or smax. + */ + smax = ( ( slen = IPLEN[nprow] ) + npm1 ) / nprow; + ip = slen - nprow * ( smin = slen / nprow ); + + iprow = 0; + do + { + ll = IPLEN[iprow+1] - IPLEN[iprow]; iprow++; + } while( ( iprow < nprow ) && ( ( ll == smin ) || ( ll == smax ) ) ); + + if( iprow == nprow ) return; +/* + * Now, we are sure the distribution of the pieces of U is not optimal + * with respect to the rolling phase, thus perform equilibration. Go + * through the list of processes: Processes that have rows that do not + * belong to them with respect to the optimal mapping spread them in a + * logarithmic fashion. To simplify a little bit the implementation, and + * mainly the packing, a source process row spreads its data to its left + * first, and then to its right. + */ + IWORK[nprow] = slen; + + for( iprow = 0; iprow < nprow; iprow++ ) + { + llU = IPLEN[iprow+1] - ( ipU = IPLEN[iprow] ); + if( iprow < ip ) { lltgt = smax; iptgt = iprow * smax; } + else { lltgt = smin; iptgt = iprow * smin + ip; } + + left = ( ipU < iptgt ); right = ( iptgt + lltgt < ipU + llU ); +/* + * If I have something to spread to either the left or the right + */ + if( ( llU > 0 ) && ( left || right ) ) + { /* Figure out how much every other process should have */ + + ipcur = ipU; llcur = llU; + + for( i = 0; i < nprow; i++ ) + { + if( i < ip ) { lltgt = smax; iptgt = i * smax; } + else { lltgt = smin; iptgt = i * smin + ip; } + lastrow = iptgt + lltgt - 1; + + if( ( lastrow >= ipcur ) && ( llcur > 0 ) ) + { ll = lastrow - ipcur + 1; ll = Mmin( ll, llcur ); llcur -= ll; } + else { ll = 0; } + + IWORK[i] = ipcur; ipcur += ll; IWORK[i+1] = ipcur; + } +/* + * Equilibration phase + */ + if( TRANS == HplNoTrans ) + { + if( left ) + { + HPL_spreadN( PBCST, IFLAG, PANEL, HplLeft, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + + if( right ) + { + HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + } + else + { + if( left ) + { + HPL_spreadT( PBCST, IFLAG, PANEL, HplLeft, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + + if( right ) + { + HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + } + } + } +/* + * Finally update IPLEN with the indexes corresponding to the new dis- + * tribution of U - IPLEN[nprow] remained unchanged. + */ + for( i = 0; i < nprow; i++ ) IPLEN[i] = ( i < ip ? i*smax : i*smin + ip ); +/* + * End of HPL_equil + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_logsort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_logsort.c new file mode 100644 index 000000000..0715159bd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_logsort.c @@ -0,0 +1,185 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_logsort +( + const int NPROCS, + const int ICURROC, + int * IPLEN, + int * IPMAP, + int * IPMAPM1 +) +#else +void HPL_logsort +( NPROCS, ICURROC, IPLEN, IPMAP, IPMAPM1 ) + const int NPROCS; + const int ICURROC; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_logsort computes an array IPMAP and its inverse IPMAPM1 that + * contain the logarithmic sorted processes id with repect to the local + * number of rows of U that they own. This is necessary to ensure that + * the logarithmic spreading of U is optimal in terms of number of steps + * and communication volume as well. In other words, the larget pieces + * of U will be sent a minimal number of times. + * + * Arguments + * ========= + * + * NPROCS (global input) const int + * On entry, NPROCS specifies the number of process rows in the + * process grid. NPROCS is at least one. + * + * ICURROC (global input) const int + * On entry, ICURROC is the source process row. + * + * IPLEN (global input/output) int * + * On entry, IPLEN is an array of dimension NPROCS+1, such that + * IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U, + * that process i-1 has. On exit, IPLEN[i] is the number of + * rows of U in the processes before process IPMAP[i] after the + * sort, with the convention that IPLEN[NPROCS] is the total + * number of rows of the panel. In other words, IPLEN[i+1] - + * IPLEN[i] is the number of rows of A that should be moved to + * the process IPMAP[i]. IPLEN is such that the number of rows + * of the source process row is IPLEN[1] - IPLEN[0], and the + * remaining entries of this array are sorted so that the + * quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROCS. On exit, + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myroc] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROCS. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROCS) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dist, i, ip, iplen_i, iplen_j, itmp, j, k; +/* .. + * .. Executable Statements .. + */ +/* + * Compute the logarithmic distance between process j and process 0, as + * well as the maximum logarithmic distance. IPMAPM1 is workarray here. + */ + for( j = 0, dist = 0; j < NPROCS; j++ ) + { + IPMAP[j] = MModAdd( j, ICURROC, NPROCS ); ip = j; itmp = 0; + do { if( ip & 1 ) itmp++; ip >>= 1; } while ( ip ); + IPMAPM1[j] = itmp; if( itmp > dist ) dist = itmp; + } +/* + * Shift IPLEN[1..NPROCS] of ICURROC places, so that IPLEN[1] is now + * what used to be IPLEN[ICURROC+1]. Initialize IPMAP, so that IPMAP[0] + * is ICURROC. + */ + for( j = 0; j < ICURROC; j++ ) + { + for( i = 2, itmp = IPLEN[1]; i <= NPROCS; i++ ) IPLEN[i-1] = IPLEN[i]; + IPLEN[NPROCS] = itmp; + } +/* + * logarithmic sort + */ + for( k = 1; k <= dist; k++ ) + { + for( j = 1; j < NPROCS; j++ ) + { + if( IPMAPM1[j] == k ) + { + for( i = 2; i < NPROCS; i++ ) + { + if( k < IPMAPM1[i] ) + { + iplen_i = IPLEN[i+1]; iplen_j = IPLEN[j+1]; + + if( iplen_j < iplen_i ) + { + IPLEN[j+1] = iplen_i; IPLEN[i+1] = iplen_j; + itmp = IPMAP[j]; IPMAP[j] = IPMAP[i]; + IPMAP[i] = itmp; + } + } + } + } + } + } +/* + * Compute IPLEN and IPMAPM1 (the inverse of IPMAP) + */ + IPLEN[0] = 0; + + for( i = 0; i < NPROCS; i++ ) + { + IPMAPM1[ IPMAP[i] ] = i; + IPLEN[i+1] += IPLEN[i]; + } +/* + * End of HPL_logsort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesv.c new file mode 100644 index 000000000..ced74269e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesv.c @@ -0,0 +1,116 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesv +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesv +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesv factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with or without look-ahead. The lower triangular factor is left + * unpivoted and the pivots are not returned. The right hand side is the + * N+1 column of the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( A->n <= 0 ) return; + + A->info = 0; + + if( ( ALGO->depth == 0 ) || ( GRID->npcol == 1 ) ) + { + HPL_pdgesv0( GRID, ALGO, A ); + } + else + { + HPL_pdgesvK2( GRID, ALGO, A ); + } +/* + * Solve upper triangular system + */ + if( A->info == 0 ) HPL_pdtrsv( GRID, A ); +/* + * End of HPL_pdgesv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesv0.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesv0.c new file mode 100644 index 000000000..d79b6fa55 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesv0.c @@ -0,0 +1,167 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesv0 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesv0 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesv0 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * without look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, j, jb, n, nb, tag=MSGID_BEGIN_FACT, + test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( N = A->n ) <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + + HPL_pdupdate = ALGO->upfun; nb = A->nb; +/* + * Allocate a panel list of length 1 - Allocate panel[0] resources + */ + panel = (HPL_T_panel **)malloc( sizeof( HPL_T_panel * ) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesv0", "Memory allocation failed" ); } + + HPL_pdpanel_new( GRID, ALGO, N, N+1, Mmin( N, nb ), A, 0, 0, tag, + &panel[0] ); +/* + * Loop over the columns of A + */ + for( j = 0; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && GRID->mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Release panel resources - re-initialize panel data structure + */ + (void) HPL_pdpanel_free( panel[0] ); + HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[0] ); +/* + * Factor and broadcast current panel - update + */ + HPL_pdfact( panel[0] ); + (void) HPL_binit( panel[0] ); + do + { (void) HPL_bcast( panel[0], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[0] ); + HPL_pdupdate( NULL, NULL, panel[0], -1 ); +/* + * Update message id for next factorization + */ + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Release panel resources and panel list + */ + (void) HPL_pdpanel_disp( &panel[0] ); + + if( panel ) free( panel ); +/* + * End of HPL_pdgesv0 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesvK1.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesvK1.c new file mode 100644 index 000000000..ff1958cfc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesvK1.c @@ -0,0 +1,222 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesvK1 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesvK1 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesvK1 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, depth, icurcol=0, j, jb, jj=0, jstart, + k, mycol, n, nb, nn, npcol, nq, + tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + mycol = GRID->mycol; npcol = GRID->npcol; + depth = ALGO->depth; HPL_pdupdate = ALGO->upfun; + N = A->n; nb = A->nb; + + if( N <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + +/* + * Allocate a panel list of length depth + 1 (depth >= 1) + */ + panel = (HPL_T_panel **)malloc( (size_t)(depth+1)*sizeof( HPL_T_panel *) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesvK1", "Memory allocation failed" ); } +/* + * Create and initialize the first depth panels + */ + nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0; + + for( k = 0; k < depth; k++ ) + { + jb = Mmin( nn, nb ); + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart, + tag, &panel[k] ); + nn -= jb; jstart += jb; + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Initialize the lookahead - Factor jstart columns: panel[0..depth-1] + */ + for( k = 0, j = 0; k < depth; k++ ) + { + jb = jstart - j; jb = Mmin( jb, nb ); j += jb; +/* + * Factor and broadcast k-th panel - use long topology for those + */ + HPL_pdfact( panel[k] ); + (void) HPL_binit( panel[k] ); + do + { (void) HPL_bcast( panel[k], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[k] ); +/* + * Partial update of the depth-1-k panels in front of me + */ + if( k < depth - 1 ) + { + nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol ); + HPL_pdupdate( NULL, NULL, panel[k], nn ); + } + } +/* + * Main loop over the remaining columns of A + */ + for( j = jstart; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Allocate current panel resources - Finish latest update - Factor and + * broadcast current panel + */ + HPL_pdpanel_new( GRID, ALGO, n, n+1, jb, A, j, j, tag, &panel[depth] ); + + if( mycol == icurcol ) + { + nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) /* partial updates 0..depth-1 */ + HPL_pdupdate( NULL, NULL, panel[k], nn ); + HPL_pdfact( panel[depth] ); /* factor current panel */ + } + else { nn = 0; } + /* Finish the latest update and broadcast the current panel */ + (void) HPL_binit( panel[depth] ); + HPL_pdupdate( panel[depth], &test, panel[0], nq-nn ); + (void) HPL_bwait( panel[depth] ); +/* + * Release latest panel resources - circular of the panel pointers + * Go to the next process row and column - update the message ids for + * broadcast + */ + (void) HPL_pdpanel_disp( &panel[0] ); + for( k = 0; k < depth; k++ ) panel[k] = panel[k+1]; + + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Clean-up: Finish updates - release panels and panel list + */ + nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) + { + HPL_pdupdate( NULL, NULL, panel[k], nn ); + (void) HPL_pdpanel_disp( &panel[k] ); + } + + if( panel ) free( panel ); +/* + * End of HPL_pdgesvK1 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesvK2.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesvK2.c new file mode 100644 index 000000000..dec506ab9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdgesvK2.c @@ -0,0 +1,231 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesvK2 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesvK2 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesvK2 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * p, * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, depth, icurcol=0, j, jb, jj=0, jstart, + k, mycol, n, nb, nn, npcol, nq, + tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + mycol = GRID->mycol; npcol = GRID->npcol; + depth = ALGO->depth; HPL_pdupdate = ALGO->upfun; + N = A->n; nb = A->nb; + + if( N <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + +/* + * Allocate a panel list of length depth + 1 (depth >= 1) + */ + panel = (HPL_T_panel **)malloc( (size_t)(depth+1) * sizeof( HPL_T_panel *) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesvK2", "Memory allocation failed" ); } +/* + * Create and initialize the first depth panels + */ + nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0; + + for( k = 0; k < depth; k++ ) + { + jb = Mmin( nn, nb ); + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart, + tag, &panel[k] ); + nn -= jb; jstart += jb; + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Create last depth+1 panel + */ + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, Mmin( nn, nb ), A, jstart, + jstart, tag, &panel[depth] ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); +/* + * Initialize the lookahead - Factor jstart columns: panel[0..depth-1] + */ + for( k = 0, j = 0; k < depth; k++ ) + { + jb = jstart - j; jb = Mmin( jb, nb ); j += jb; +/* + * Factor and broadcast k-th panel + */ + HPL_pdfact( panel[k] ); + (void) HPL_binit( panel[k] ); + do + { (void) HPL_bcast( panel[k], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[k] ); +/* + * Partial update of the depth-k-1 panels in front of me + */ + if( k < depth - 1 ) + { + nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol ); + HPL_pdupdate( NULL, NULL, panel[k], nn ); + } + } +/* + * Main loop over the remaining columns of A + */ + for( j = jstart; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Initialize current panel - Finish latest update, Factor and broadcast + * current panel + */ + (void) HPL_pdpanel_free( panel[depth] ); + HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[depth] ); + + if( mycol == icurcol ) + { + nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) /* partial updates 0..depth-1 */ + (void) HPL_pdupdate( NULL, NULL, panel[k], nn ); + HPL_pdfact( panel[depth] ); /* factor current panel */ + } + else { nn = 0; } + /* Finish the latest update and broadcast the current panel */ + (void) HPL_binit( panel[depth] ); + HPL_pdupdate( panel[depth], &test, panel[0], nq-nn ); + (void) HPL_bwait( panel[depth] ); +/* + * Circular of the panel pointers: + * xtmp = x[0]; for( k=0; k < depth; k++ ) x[k] = x[k+1]; x[d] = xtmp; + * + * Go to next process row and column - update the message ids for broadcast + */ + p = panel[0]; for( k = 0; k < depth; k++ ) panel[k] = panel[k+1]; + panel[depth] = p; + + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Clean-up: Finish updates - release panels and panel list + */ + nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) + { + (void) HPL_pdupdate( NULL, NULL, panel[k], nn ); + (void) HPL_pdpanel_disp( &panel[k] ); + } + (void) HPL_pdpanel_disp( &panel[depth] ); + + if( panel ) free( panel ); +/* + * End of HPL_pdgesvK2 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp00N.c new file mode 100644 index 000000000..b4433e1be --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp00N.c @@ -0,0 +1,432 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp00N +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp00N +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp00N applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * Bi-directional exchange is used to perform the swap :: broadcast of + * the row panel U at once, resulting in a lower number of messages than + * usual as well as a lower communication volume. With P process rows and + * assuming bi-directional links, the running time of this function can + * be approximated by: + * + * log_2(P) * (lat + NB*LocQ(N) / bdwth) + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. Mono + * directional links will double this communication cost. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be broadcast and swapped) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + HPL_T_grid * grid; + double * A, * U, * W; + void * vptr = NULL; + int * ipID, * lindxA, * lindxAU, * llen, + * llen_sv; + unsigned int ip2, ip2_=1, ipdist, ipow=1, mask=1, + mydist, mydis_; + int Cmsgid=MSGID_BEGIN_PFACT, Np2, align, + hdim, i, icurrow, *iflag, ipA, ipW, *ipl, + iprow, jb, k, lda, ldW, myrow, n, nprow, + partner, root, size_, usize; +#define LDU jb +/* .. + * .. Executable Statements .. + */ + n = Mmin( NN, PANEL->n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Retrieve parameters from the PANEL data structure + */ + grid = PANEL->grid; nprow = grid->nprow; myrow = grid->myrow; + comm = grid->col_comm; ip2 = (unsigned int)grid->row_ip2; + hdim = grid->row_hdim; align = PANEL->algo->align; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; usize = jb * n; + ldW = n + 1; +/* + * Allocate space for temporary W (ldW * jb) + */ + vptr = (void*)malloc( + ((size_t)(align) + ((size_t)(jb) * (size_t)(ldW))) * sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlaswp00N", "Memory allocation failed" ); } + + W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) ); +/* + * Construct ipID and its local counter parts lindxA, lindxAU - llen is + * the number of rows/columns that I have in workspace and that I should + * send. Compute lindx_, ipA, llen if it has not already been done for + * this panel; + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + lindxA = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k; + llen = lindxAU + k; llen_sv = llen + nprow; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } + else if( *iflag == 1 ) /* HPL_pdlaswp01N called before: reuse ipID */ + { + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } +/* + * Copy the llen_sv into llen - Reset ipA to its correct value + */ + ipA = llen_sv[myrow]; + for( i = 0; i < nprow; i++ ) { llen[i] = llen_sv[i]; } +/* + * For i in [0..2*jb), lindxA[i] is the offset in A of a row that ulti- + * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ). In icurrow, + * we directly pack into U, otherwise we pack into workspace. The first + * entry of each column packed in workspace is in fact the row or column + * offset in U where it should go to. + */ + if( myrow == icurrow ) + { + HPL_dlaswp01N( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } + else + { + HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * Algorithm for bi-directional data exchange: + * + * As long as I have not talked to a process that already had the data + * from icurrow, I will be sending the workspace, otherwise I will be + * sending U. Note that the columns in workspace contain the local index + * in U they should go to. + * + * If I am receiving from a process that has the data from icurrow, I + * will be receiving in U, copy the data of U that stays into A, and + * then the columns I have in workspace into U; otherwise I will be re- + * ceiving in the remaining workspace. If I am one of those processes + * that already has the data from icurrow, I will be immediately copying + * the data I have in my workspace into U. + * + * When I receive U, some of U should be copied in my piece of A before + * I can copy the rows I have in my workspace into U. This information + * is kept in the lists lindx_: the row lindxAU[i] should be copied in + * the row lindxA[i] of my piece of A, just as in the reversed initial + * packing operation. Those rows are thus the first ones in the work ar- + * ray. After this operation has been performed, I will not need + * those lindx arrays, and I will always be sending a buffer of size + * jb x n, or n x jb, that is, U. + * + * At every step of the algorithm, it is necesary to update the list + * llen, so that I can figure out how large the next messages I will be + * sending/receiving are. It is obvious when I am sending U. It is not + * otherwise. + * + * We choose icurrow to be the source of the bi-directional exchange. + * This allows the processes in the non-power 2 part to receive U at the + * first exchange, and then broadcast internally this U so that those + * processes can grab their piece of A. + */ + if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; } + ipW = ipA; + Np2 = ( ( size_ = nprow - ip2 ) != 0 ); + mydist = (unsigned int)MModSub( myrow, icurrow, nprow ); +/* + * bi-directional exchange: If nprow is not a power of 2, proc[i-ip2] + * receives local data from proc[i] for all i in [ip2..nprow); icurrow + * is the source, these last process indexes are relative to icurrow. + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + + if( mydist == 0 ) /* I am the current row: I send U and recv W */ + { + (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW, + Cmsgid, partner, comm ); + if( llen[partner] > 0 ) + HPL_dlaswp03N( llen[partner], n, U, LDU, W, W+1, ldW ); + } + else if( mydist == ip2 ) + { /* I recv U for later Bcast, I send my W */ + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + } + else /* None of us is icurrow, we exchange our Ws */ + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm ); + } + else + { + (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, + partner, Cmsgid, comm ); + if( llen[partner] > 0 ) ipW += llen[partner]; + } + } + } +/* + * Update llen + */ + for( i = 1; i < size_; i++ ) + { + iprow = MModAdd( icurrow, i, nprow ); + partner = MModAdd( iprow, (int)(ip2), nprow ); + llen[ iprow ] += llen[ partner ]; + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * power of 2 part of the processes collection: only processes [0..ip2) + * are working; some of them (mydist >> (k+1) == 0) either send or re- + * ceive U. At every step k, k is in [0 .. hdim), of the algorithm, a + * process pair that exchanges U is such that (mydist >> (k+1) == 0). + * Among those processes, the ones that are sending U are such that + * mydist >> k == 0. + */ + if( mydist < ip2 ) + { + k = 0; + + while( k < hdim ) + { + partner = (int)(mydist ^ ipow); + partner = MModAdd( icurrow, partner, nprow ); +/* + * Exchange and combine the local results - If I receive U, then I must + * copy from U the rows that belong to my piece of A, and then update U + * by copying in it the rows I have accumulated in W. Otherwise, I re- + * ceive W. In this later case, and I have U, I shall update my copy of + * U by copying in it the rows I have accumulated in W. If I did not + * have U before, I simply need to update my pointer in W for later use. + */ + if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 ) + { + if( ( mydist >> (unsigned int)(k) ) == 0 ) + { + (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW, + ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + HPL_dlaswp03N( llen[partner], n, U, LDU, Mptr( W, 0, ipW, + ldW ), Mptr( W, 1, ipW, ldW ), ldW ); + ipW += llen[partner]; + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + HPL_dlaswp04N( ipA, llen[myrow], n, U, LDU, A, lda, W, + W+1, ldW, lindxA, lindxAU ); + } + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0, + ipW, ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + ipW += llen[partner]; + } +/* + * Update llen - Go to next process pairs + */ + iprow = icurrow; ipdist = 0; + do + { + if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist ) + { + partner = MModAdd( icurrow, partner, nprow ); + llen[iprow] += llen[partner]; + llen[partner] = llen[iprow]; + } + iprow = MModAdd( iprow, 1, nprow ); ipdist++; + + } while( ipdist < ip2 ); + + ipow <<= 1; k++; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + } + else + { +/* + * non power of 2 part of the process collection: proc[ip2] broadcast U + * to procs[ip2..nprow) (relatively to icurrow). + */ + if( size_ > 1 ) + { + k = size_ - 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = (unsigned int)MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + + } + else if( partner < size_ ) + { + (void) HPL_send( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + } + } + ip2_ >>= 1; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2_ > 0 ); + } +/* + * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece + * of A. + */ + HPL_dlaswp05N( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } +/* + * If nprow is not a power of 2, proc[i-ip2] sends global result to + * proc[i] for all i in [ip2..nprow); + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + if( ( mydist & ip2 ) != 0 ) + { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); } + else + { (void) HPL_send( U, usize, partner, Cmsgid, comm ); } + } + + if( vptr ) free( vptr ); +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp00N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp00T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp00T.c new file mode 100644 index 000000000..7a9764c09 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp00T.c @@ -0,0 +1,433 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp00T +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp00T +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp00T applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * Bi-directional exchange is used to perform the swap :: broadcast of + * the row panel U at once, resulting in a lower number of messages than + * usual as well as a lower communication volume. With P process rows and + * assuming bi-directional links, the running time of this function can + * be approximated by: + * + * log_2(P) * (lat + NB*LocQ(N) / bdwth) + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. Mono + * directional links will double this communication cost. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be broadcast and swapped) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + HPL_T_grid * grid; + double * A, * U, * W; + void * vptr = NULL; + int * ipID, * lindxA, * lindxAU, * llen, + * llen_sv; + unsigned int ip2, ip2_=1, ipdist, ipow=1, mask=1, + mydist, mydis_; + int Cmsgid=MSGID_BEGIN_PFACT, Np2, align, + hdim, i, icurrow, *iflag, ipA, ipW, *ipl, + iprow, jb, k, lda, ldW, myrow, n, nprow, + partner, root, size_, usize; +#define LDU n +/* .. + * .. Executable Statements .. + */ + n = Mmin( NN, PANEL->n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Retrieve parameters from the PANEL data structure + */ + grid = PANEL->grid; nprow = grid->nprow; myrow = grid->myrow; + comm = grid->col_comm; ip2 = (unsigned int)grid->row_ip2; + hdim = grid->row_hdim; align = PANEL->algo->align; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; usize = jb * n; + ldW = n + 1; +/* + * Allocate space for temporary W (ldW * jb) + */ + vptr = (void*)malloc( ( (size_t)(align) + + ((size_t)(jb) * (size_t)(ldW))) * + sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlaswp00T", "Memory allocation failed" ); } + + W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) ); +/* + * Construct ipID and its local counter parts lindxA, lindxAU - llen is + * the number of rows/columns that I have in workspace and that I should + * send. Compute lindx_, ipA, llen if it has not already been done for + * this panel; + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + lindxA = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k; + llen = lindxAU + k; llen_sv = llen + nprow; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } + else if( *iflag == 1 ) /* HPL_pdlaswp01T called before: reuse ipID */ + { + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } +/* + * Copy the llen_sv into llen - Reset ipA to its correct value + */ + ipA = llen_sv[myrow]; + for( i = 0; i < nprow; i++ ) { llen[i] = llen_sv[i]; } +/* + * For i in [0..2*jb), lindxA[i] is the offset in A of a row that ulti- + * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ). In icurrow, + * we directly pack into U, otherwise we pack into workspace. The first + * entry of each column packed in workspace is in fact the row or column + * offset in U where it should go to. + */ + if( myrow == icurrow ) + { + HPL_dlaswp01T( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } + else + { + HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * Algorithm for bi-directional data exchange: + * + * As long as I have not talked to a process that already had the data + * from icurrow, I will be sending the workspace, otherwise I will be + * sending U. Note that the columns in workspace contain the local index + * in U they should go to. + * + * If I am receiving from a process that has the data from icurrow, I + * will be receiving in U, copy the data of U that stays into A, and + * then the columns I have in workspace into U; otherwise I will be re- + * ceiving in the remaining workspace. If I am one of those processes + * that already has the data from icurrow, I will be immediately copying + * the data I have in my workspace into U. + * + * When I receive U, some of U should be copied in my piece of A before + * I can copy the rows I have in my workspace into U. This information + * is kept in the lists lindx_: the row lindxAU[i] should be copied in + * the row lindxA[i] of my piece of A, just as in the reversed initial + * packing operation. Those rows are thus the first ones in the work ar- + * ray. After this operation has been performed, I will not need + * those lindx arrays, and I will always be sending a buffer of size + * jb x n, or n x jb, that is, U. + * + * At every step of the algorithm, it is necesary to update the list + * llen, so that I can figure out how large the next messages I will be + * sending/receiving are. It is obvious when I am sending U. It is not + * otherwise. + * + * We choose icurrow to be the source of the bi-directional exchange. + * This allows the processes in the non-power 2 part to receive U at the + * first exchange, and then broadcast internally this U so that those + * processes can grab their piece of A. + */ + if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; } + ipW = ipA; + Np2 = ( ( size_ = nprow - ip2 ) != 0 ); + mydist = (unsigned int)MModSub( myrow, icurrow, nprow ); +/* + * bi-directional exchange: If nprow is not a power of 2, proc[i-ip2] + * receives local data from proc[i] for all i in [ip2..nprow); icurrow + * is the source, these last process indexes are relative to icurrow. + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + + if( mydist == 0 ) /* I am the current row: I send U and recv W */ + { + (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW, + Cmsgid, partner, comm ); + if( llen[partner] > 0 ) + HPL_dlaswp03T( llen[partner], n, U, LDU, W, W+1, ldW ); + } + else if( mydist == ip2 ) + { /* I recv U for later Bcast, I send my W */ + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + } + else /* None of us is icurrow, we exchange our Ws */ + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm ); + } + else + { + (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, + partner, Cmsgid, comm ); + if( llen[partner] > 0 ) ipW += llen[partner]; + } + } + } +/* + * Update llen + */ + for( i = 1; i < size_; i++ ) + { + iprow = MModAdd( icurrow, i, nprow ); + partner = MModAdd( iprow, (int)(ip2), nprow ); + llen[ iprow ] += llen[ partner ]; + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * power of 2 part of the processes collection: only processes [0..ip2) + * are working; some of them (mydist >> (k+1) == 0) either send or re- + * ceive U. At every step k, k is in [0 .. hdim), of the algorithm, a + * process pair that exchanges U is such that (mydist >> (k+1) == 0). + * Among those processes, the ones that are sending U are such that + * mydist >> k == 0. + */ + if( mydist < ip2 ) + { + k = 0; + + while( k < hdim ) + { + partner = (int)(mydist ^ ipow); + partner = MModAdd( icurrow, partner, nprow ); +/* + * Exchange and combine the local results - If I receive U, then I must + * copy from U the rows that belong to my piece of A, and then update U + * by copying in it the rows I have accumulated in W. Otherwise, I re- + * ceive W. In this later case, and I have U, I shall update my copy of + * U by copying in it the rows I have accumulated in W. If I did not + * have U before, I simply need to update my pointer in W for later use. + */ + if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 ) + { + if( ( mydist >> (unsigned int)(k) ) == 0 ) + { + (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW, + ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + HPL_dlaswp03T( llen[partner], n, U, LDU, Mptr( W, 0, ipW, + ldW ), Mptr( W, 1, ipW, ldW ), ldW ); + ipW += llen[partner]; + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + HPL_dlaswp04T( ipA, llen[myrow], n, U, LDU, A, lda, W, + W+1, ldW, lindxA, lindxAU ); + } + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0, + ipW, ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + ipW += llen[partner]; + } +/* + * Update llen - Go to next process pairs + */ + iprow = icurrow; ipdist = 0; + do + { + if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist ) + { + partner = MModAdd( icurrow, partner, nprow ); + llen[iprow] += llen[partner]; + llen[partner] = llen[iprow]; + } + iprow = MModAdd( iprow, 1, nprow ); ipdist++; + + } while( ipdist < ip2 ); + + ipow <<= 1; k++; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + } + else + { +/* + * non power of 2 part of the process collection: proc[ip2] broadcast U + * to procs[ip2..nprow) (relatively to icurrow). + */ + if( size_ > 1 ) + { + k = size_ - 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = (unsigned int)MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + + } + else if( partner < size_ ) + { + (void) HPL_send( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + } + } + ip2_ >>= 1; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2_ > 0 ); + } +/* + * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece + * of A. + */ + HPL_dlaswp05T( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } +/* + * If nprow is not a power of 2, proc[i-ip2] sends global result to + * proc[i] for all i in [ip2..nprow); + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + if( ( mydist & ip2 ) != 0 ) + { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); } + else + { (void) HPL_send( U, usize, partner, Cmsgid, comm ); } + } + + if( vptr ) free( vptr ); +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp00T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp01N.c new file mode 100644 index 000000000..31f219840 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp01N.c @@ -0,0 +1,217 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp01N +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp01N +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp01N applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * A "Spread then roll" algorithm performs the swap :: broadcast of the + * row panel U at once, resulting in a minimal communication volume and + * a "very good" use of the connectivity if available. With P process + * rows and assuming bi-directional links, the running time of this + * function can be approximated by: + * + * (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. K is + * a constant in (2,3] that depends on the achieved bandwidth during a + * simultaneous message exchange between two processes. An empirical + * optimistic value of K is typically 2.4. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * U; + int * ipID, * iplen, * ipmap, * ipmapm1, + * iwork, * lindxA = NULL, * lindxAU, + * permU; + static int equil=-1; + int icurrow, * iflag, * ipA, * ipl, jb, k, + lda, myrow, n, nprow; +#define LDU jb +/* .. + * .. Executable Statements .. + */ + n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Decide whether equilibration should be performed or not + */ + if( equil == -1 ) equil = PANEL->algo->equil; +/* + * Retrieve parameters from the PANEL data structure + */ + nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; +/* + * Compute ipID (if not already done for this panel). lindxA and lindxAU + * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1 + * are of size nprow, permU is of length jb, and this function needs a + * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): + * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1) + * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1); + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + ipA = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1; + lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1; + ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( *iflag == 0 ) /* HPL_pdlaswp00N called before: reuse ipID */ + { + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( ( *iflag == 1 ) && ( equil != 0 ) ) + { /* HPL_pdlaswp01N was call before only re-compute IPLEN, IPMAP */ + HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 ); + *iflag = 1; + } +/* + * Copy into U the rows to be spread (local to icurrow) + */ + if( myrow == icurrow ) + { HPL_dlaswp01N( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } +/* + * Spread U - optionally probe for column panel + */ + HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen, + ipmap, ipmapm1 ); +/* + * Local exchange (everywhere but in process row icurrow) + */ + if( myrow != icurrow ) + { + k = ipmapm1[myrow]; + HPL_dlaswp06N( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, iplen[k], + 0, LDU ), LDU, lindxA ); + } +/* + * Equilibration + */ + if( equil != 0 ) + HPL_equil( PBCST, IFLAG, PANEL, HplNoTrans, n, U, LDU, iplen, + ipmap, ipmapm1, iwork ); +/* + * Rolling phase + */ + HPL_rollN( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 ); +/* + * Permute U in every process row + */ + HPL_dlaswp00N( jb, n, U, LDU, permU ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp01N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp01T.c new file mode 100644 index 000000000..0c4de2669 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdlaswp01T.c @@ -0,0 +1,217 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp01T +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp01T +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp01T applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * A "Spread then roll" algorithm performs the swap :: broadcast of the + * row panel U at once, resulting in a minimal communication volume and + * a "very good" use of the connectivity if available. With P process + * rows and assuming bi-directional links, the running time of this + * function can be approximated by: + * + * (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. K is + * a constant in (2,3] that depends on the achieved bandwidth during a + * simultaneous message exchange between two processes. An empirical + * optimistic value of K is typically 2.4. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * U; + int * ipID, * iplen, * ipmap, * ipmapm1, + * iwork, * lindxA = NULL, * lindxAU, + * permU; + static int equil=-1; + int icurrow, * iflag, * ipA, * ipl, jb, k, + lda, myrow, n, nprow; +#define LDU n +/* .. + * .. Executable Statements .. + */ + n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Decide whether equilibration should be performed or not + */ + if( equil == -1 ) equil = PANEL->algo->equil; +/* + * Retrieve parameters from the PANEL data structure + */ + nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; +/* + * Compute ipID (if not already done for this panel). lindxA and lindxAU + * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1 + * are of size nprow, permU is of length jb, and this function needs a + * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): + * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1) + * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1); + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + ipA = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1; + lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1; + ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( *iflag == 0 ) /* HPL_pdlaswp00T called before: reuse ipID */ + { + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( ( *iflag == 1 ) && ( equil != 0 ) ) + { /* HPL_pdlaswp01T was call before only re-compute IPLEN, IPMAP */ + HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 ); + *iflag = 1; + } +/* + * Copy into U the rows to be spread (local to icurrow) + */ + if( myrow == icurrow ) + { HPL_dlaswp01T( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } +/* + * Spread U - optionally probe for column panel + */ + HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen, + ipmap, ipmapm1 ); +/* + * Local exchange (everywhere but in process row icurrow) + */ + if( myrow != icurrow ) + { + k = ipmapm1[myrow]; + HPL_dlaswp06T( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, 0, + iplen[k], LDU ), LDU, lindxA ); + } +/* + * Equilibration + */ + if( equil != 0 ) + HPL_equil( PBCST, IFLAG, PANEL, HplTrans, n, U, LDU, iplen, ipmap, + ipmapm1, iwork ); +/* + * Rolling phase + */ + HPL_rollT( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 ); +/* + * Permute U in every process row + */ + HPL_dlaswp10N( n, jb, U, LDU, permU ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp01T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdtrsv.c new file mode 100644 index 000000000..d2135130a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdtrsv.c @@ -0,0 +1,296 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdtrsv +( + HPL_T_grid * GRID, + HPL_T_pmat * AMAT +) +#else +void HPL_pdtrsv +( GRID, AMAT ) + HPL_T_grid * GRID; + HPL_T_pmat * AMAT; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdtrsv solves an upper triangular system of linear equations. + * + * The rhs is the last column of the N by N+1 matrix A. The solve starts + * in the process column owning the Nth column of A, so the rhs b may + * need to be moved one process column to the left at the beginning. The + * routine therefore needs a column vector in every process column but + * the one owning b. The result is replicated in all process rows, and + * returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. + * + * The algorithm uses decreasing one-ring broadcast in process rows and + * columns implemented in terms of synchronous communication point to + * point primitives. The lookahead of depth 1 is used to minimize the + * critical path. This entire operation is essentially ``latency'' bound + * and an estimate of its running time is given by: + * + * (move rhs) lat + N / ( P bdwth ) + + * (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + + * gam2 N^2 / ( P Q ), + * + * where gam2 is an estimate of the Level 2 BLAS rate of execution. + * There are N / NB diagonal blocks. One must exchange 2 messages of + * length NB to compute the next NB entries of the vector solution, as + * well as performing a total of N^2 floating point operations. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * AMAT (local input/output) HPL_T_pmat * + * On entry, AMAT points to the data structure containing the + * local array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm Ccomm, Rcomm; + double * A=NULL, * Aprev=NULL, * Aptr, * XC=NULL, + * XR=NULL, * Xd=NULL, * Xdprev=NULL, + * W=NULL; + int Alcol, Alrow, Anpprev, Anp, Anq, Bcol, + Cmsgid, GridIsNotPx1, GridIsNot1xQ, Rmsgid, + Wfr=0, colprev, kb, kbprev, lda, mycol, + myrow, n, n1, n1p, n1pprev=0, nb, npcol, + nprow, rowprev, tmp1, tmp2; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PTRSV ); +#endif + if( ( n = AMAT->n ) <= 0 ) return; + nb = AMAT->nb; lda = AMAT->ld; A = AMAT->A; XR = AMAT->X; + + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Rcomm = GRID->row_comm; Rmsgid = MSGID_BEGIN_PTRSV; + Ccomm = GRID->col_comm; Cmsgid = MSGID_BEGIN_PTRSV + 1; + GridIsNot1xQ = ( nprow > 1 ); GridIsNotPx1 = ( npcol > 1 ); +/* + * Move the rhs in the process column owning the last column of A. + */ + Mnumroc( Anp, n, nb, nb, myrow, 0, nprow ); + Mnumroc( Anq, n, nb, nb, mycol, 0, npcol ); + + tmp1 = ( n - 1 ) / nb; + Alrow = tmp1 - ( tmp1 / nprow ) * nprow; + Alcol = tmp1 - ( tmp1 / npcol ) * npcol; + kb = n - tmp1 * nb; + + Aptr = (double *)(A); XC = Mptr( Aptr, 0, Anq, lda ); + Mindxg2p( n, nb, nb, Bcol, 0, npcol ); + + if( ( Anp > 0 ) && ( Alcol != Bcol ) ) + { + if( mycol == Bcol ) + { (void) HPL_send( XC, Anp, Alcol, Rmsgid, Rcomm ); } + else if( mycol == Alcol ) + { (void) HPL_recv( XC, Anp, Bcol, Rmsgid, Rcomm ); } + } + Rmsgid = ( Rmsgid + 2 > + MSGID_END_PTRSV ? MSGID_BEGIN_PTRSV : Rmsgid + 2 ); + if( mycol != Alcol ) + { for( tmp1=0; tmp1 < Anp; tmp1++ ) XC[tmp1] = HPL_rzero; } +/* + * Set up lookahead + */ + n1 = ( npcol - 1 ) * nb; n1 = Mmax( n1, nb ); + if( Anp > 0 ) + { + W = (double*)malloc( (size_t)(Mmin( n1, Anp )) * sizeof( double ) ); + if( W == NULL ) + { HPL_pabort( __LINE__, "HPL_pdtrsv", "Memory allocation failed" ); } + Wfr = 1; + } + + Anpprev = Anp; Xdprev = XR; Aprev = Aptr = Mptr( Aptr, 0, Anq, lda ); + tmp1 = n - kb; tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1pprev, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); + + if( myrow == Alrow ) { Anpprev = ( Anp -= kb ); } + if( mycol == Alcol ) + { + Aprev = ( Aptr -= lda * kb ); Anq -= kb; Xdprev = ( Xd = XR + Anq ); + if( myrow == Alrow ) + { + HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit, + kb, Aptr+Anp, lda, XC+Anp, 1 ); + HPL_dcopy( kb, XC+Anp, 1, Xd, 1 ); + } + } + + rowprev = Alrow; Alrow = MModSub1( Alrow, nprow ); + colprev = Alcol; Alcol = MModSub1( Alcol, npcol ); + kbprev = kb; n -= kb; + tmp1 = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); +/* + * Start the operations + */ + while( n > 0 ) + { + if( mycol == Alcol ) { Aptr -= lda * kb; Anq -= kb; Xd = XR + Anq; } + if( myrow == Alrow ) { Anp -= kb; } +/* + * Broadcast (decreasing-ring) of previous solution block in previous + * process column, compute partial update of current block and send it + * to current process column. + */ + if( mycol == colprev ) + { +/* + * Send previous solution block in process row above + */ + if( myrow == rowprev ) + { + if( GridIsNot1xQ ) + (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ), + Cmsgid, Ccomm ); + } + else + { + (void) HPL_recv( Xdprev, kbprev, MModAdd1( myrow, nprow ), + Cmsgid, Ccomm ); + } +/* + * Compute partial update of previous solution block and send it to cur- + * rent column + */ + if( n1pprev > 0 ) + { + tmp1 = Anpprev - n1pprev; + HPL_dgemv( HplColumnMajor, HplNoTrans, n1pprev, kbprev, + -HPL_rone, Aprev+tmp1, lda, Xdprev, 1, HPL_rone, + XC+tmp1, 1 ); + if( GridIsNotPx1 ) + (void) HPL_send( XC+tmp1, n1pprev, Alcol, Rmsgid, Rcomm ); + } +/* + * Finish the (decreasing-ring) broadcast of the solution block in pre- + * vious process column + */ + if( ( myrow != rowprev ) && + ( myrow != MModAdd1( rowprev, nprow ) ) ) + (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ), + Cmsgid, Ccomm ); + } + else if( mycol == Alcol ) + { +/* + * Current column receives and accumulates partial update of previous + * solution block + */ + if( n1pprev > 0 ) + { + (void) HPL_recv( W, n1pprev, colprev, Rmsgid, Rcomm ); + HPL_daxpy( n1pprev, HPL_rone, W, 1, XC+Anpprev-n1pprev, 1 ); + } + } +/* + * Solve current diagonal block + */ + if( ( mycol == Alcol ) && ( myrow == Alrow ) ) + { + HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit, + kb, Aptr+Anp, lda, XC+Anp, 1 ); + HPL_dcopy( kb, XC+Anp, 1, XR+Anq, 1 ); + } +/* +* Finish previous update +*/ + if( ( mycol == colprev ) && ( ( tmp1 = Anpprev - n1pprev ) > 0 ) ) + HPL_dgemv( HplColumnMajor, HplNoTrans, tmp1, kbprev, -HPL_rone, + Aprev, lda, Xdprev, 1, HPL_rone, XC, 1 ); +/* +* Save info of current step and update info for the next step +*/ + if( mycol == Alcol ) { Xdprev = Xd; Aprev = Aptr; } + if( myrow == Alrow ) { Anpprev -= kb; } + rowprev = Alrow; colprev = Alcol; + n1pprev = n1p; kbprev = kb; n -= kb; + Alrow = MModSub1( Alrow, nprow ); Alcol = MModSub1( Alcol, npcol ); + tmp1 = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); + + Rmsgid = ( Rmsgid+2 > MSGID_END_PTRSV ? + MSGID_BEGIN_PTRSV : Rmsgid+2 ); + Cmsgid = ( Cmsgid+2 > MSGID_END_PTRSV ? + MSGID_BEGIN_PTRSV+1 : Cmsgid+2 ); + } +/* + * Replicate last solution block + */ + if( mycol == colprev ) + (void) HPL_broadcast( (void *)(XR), kbprev, HPL_DOUBLE, rowprev, + Ccomm ); + + if( Wfr ) free( W ); +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PTRSV ); +#endif +/* + * End of HPL_pdtrsv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateNN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateNN.c new file mode 100644 index 000000000..7e31ddcd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateNN.c @@ -0,0 +1,442 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateNN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateNN +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateNN broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU jb +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, n ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, 0, nn, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateNN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateNT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateNT.c new file mode 100644 index 000000000..faa3ef207 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateNT.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateNT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateNT +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateNT broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU n +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, nn, 0, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateNT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateTN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateTN.c new file mode 100644 index 000000000..a16aa26a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateTN.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateTN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateTN +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateTN broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU jb +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, n ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, 0, nn, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateTN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateTT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateTT.c new file mode 100644 index 000000000..81e6cc4b7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pdupdateTT.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateTT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateTT +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateTT broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU n +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, nn, 0, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateTT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_perm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_perm.c new file mode 100644 index 000000000..bf7cc4503 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_perm.c @@ -0,0 +1,131 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_perm +( + const int N, + int * LINDXA, + int * LINDXAU, + int * IWORK +) +#else +void HPL_perm +( N, LINDXA, LINDXAU, IWORK ) + const int N; + int * LINDXA; + int * LINDXAU; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_perm combines two index arrays and generate the corresponding + * permutation. First, this function computes the inverse of LINDXA, and + * then combine it with LINDXAU. Second, in order to be able to perform + * the permutation in place, LINDXAU is overwritten by the sequence of + * permutation producing the same result. What we ultimately want to + * achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the + * call to this function, this in place permutation can be performed by + * for i in [0..N) swap U[i] with U[LINDXAU[i]]. + * + * Arguments + * ========= + * + * N (global input) const int + * On entry, N specifies the length of the arrays LINDXA and + * LINDXAU. N should be at least zero. + * + * LINDXA (global input/output) int * + * On entry, LINDXA is an array of dimension N containing the + * source indexes. On exit, LINDXA contains the combined index + * array. + * + * LINDXAU (global input/output) int * + * On entry, LINDXAU is an array of dimension N containing the + * target indexes. On exit, LINDXAU contains the sequence of + * permutation, that should be applied in increasing order to + * permute the underlying array U in place. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension N. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j, k, fndd; +/* .. + * .. Executable Statements .. + */ +/* + * Inverse LINDXA - combine LINDXA and LINDXAU - Initialize IWORK + */ + for( i = 0; i < N; i++ ) { IWORK[LINDXA[i]] = i; } + for( i = 0; i < N; i++ ) { LINDXA[i] = LINDXAU[IWORK[i]]; IWORK[i] = i; } + + for( i = 0; i < N; i++ ) + { + /* search LINDXA such that LINDXA[j] == i */ + j = 0; do { fndd = ( LINDXA[j] == i ); j++; } while( !fndd ); j--; + /* search IWORK such that IWORK[k] == j */ + k = 0; do { fndd = ( IWORK[k] == j ); k++; } while( !fndd ); k--; + /* swap IWORK[i] and IWORK[k]; LINDXAU[i] = k */ + j = IWORK[i]; IWORK[i] = IWORK[k]; IWORK[k] = j; + LINDXAU[i] = k; + } +/* + * End of HPL_perm + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pipid.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pipid.c new file mode 100644 index 000000000..ab5ef949f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_pipid.c @@ -0,0 +1,187 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pipid +( + HPL_T_panel * PANEL, + int * K, + int * IPID +) +#else +void HPL_pipid +( PANEL, K, IPID ) + HPL_T_panel * PANEL; + int * K; + int * IPID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pipid computes an array IPID that contains the source and final + * destination of matrix rows resulting from the application of N + * interchanges as computed by the LU factorization with row partial + * pivoting. The array IPID is such that the row of global index IPID(i) + * should be mapped onto the row of global index IPID(i+1). Note that we + * cannot really know the length of IPID a priori. However, we know that + * this array is at least 2*N long, since there are N rows to swap and + * broadcast. The length of this array must be smaller than or equal to + * 4*N, since every row is swapped with at most a single distinct remote + * row. The algorithm constructing IPID goes as follows: Let IA be the + * global index of the first row to be swapped. + * + * For every row src IA + i with i in [0..N) to be swapped with row dst + * such that dst is given by DPIV[i]: + * + * Is row src the destination of a previous row of the current block, + * that is, is there k odd such that IPID(k) is equal to src ? + * Yes: update this destination with dst. For example, if the + * pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, + * we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it + * was thought so far ... + * No : add the pair (src,dst) at the end of IPID; row src has not + * been moved yet. + * + * Is row dst different from src the destination of a previous row of + * the current block, i.e., is there k odd such that IPID(k) is equal to + * dst ? + * Yes: update IPID(k) with src. For example, if the pivot array + * is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in + * fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought + * so far ... + * No : add the pair (dst,src) at the end of IPID; row dst has not + * been moved yet. + * + * Note that when src is equal to dst, the pair (dst,src) should not be + * added to IPID in order to avoid duplicated entries in this array. + * During the construction of the array IPID, we make sure that the + * first N entries are such that IPID(k) with k odd is equal to IA+k/2. + * For k in [0..K/2), the row of global index IPID(2*k) should be + * mapped onto the row of global index IPID(2*k+1). + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global output) int * + * On exit, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global output) int * + * On entry, IPID is an array of length 4*N. On exit, the first + * K entries of that array contain the src and final destination + * resulting from the application of the N interchanges as + * specified by DPIV. The pairs (src,dst) are contiguously + * stored and sorted so that IPID(2*i+1) is equal to IA+i with i + * in [0..N) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, fndd, fnds, ia, i, j, jb, lst, off, + src; + double * dpiv; +/* .. + * .. Executable Statements .. + */ + dpiv = PANEL->DPIV; jb = PANEL->jb; src = ia = PANEL->ia; + dst = (int)(dpiv[0]); IPID[0] = dst; IPID[1] = src; *K = 2; + if( src != dst ) { IPID[2] = src; IPID[3] = dst; *K += 2; } + + for( i = 1; i < jb; i++ ) + { + fnds = 0; j = 1; + + if( ( src = ia + i ) == ( dst = (int)(dpiv[i]) ) ) + { + do { if( src == IPID[j] ) { fnds = j; } else { j += 2; } } + while( !( fnds ) && ( j < *K ) ); + if( !fnds ) { lst = *K; off = 2; IPID[lst] = src; } + else { lst = fnds-1; off = 0; } + IPID[lst+1] = dst; + } + else + { + fndd = 0; + do + { + if ( src == IPID[j] ) { fnds = j; } + else if( dst == IPID[j] ) { fndd = j; } + j += 2; + } + while( ( !( fnds ) || !( fndd ) ) && ( j < *K ) ); + if( !fnds ) { IPID[*K] = src; IPID[*K+1] = dst; off = 2; } + else { IPID[fnds] = dst; off = 0; } + if( !fndd ) { lst = *K+off; IPID[lst ] = dst; off += 2; } + else { lst = fndd-1; } + IPID[lst+1] = src; + } +/* + * Enforce IPID(1,i) equal to src = ia + i + */ + if( lst != ( j = ( i << 1 ) ) ) + { + src = IPID[j ]; IPID[j ] = IPID[lst ]; IPID[lst ] = src; + dst = IPID[j+1]; IPID[j+1] = IPID[lst+1]; IPID[lst+1] = dst; + } + *K += off; + } +/* + * End of HPL_pipid + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx0.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx0.c new file mode 100644 index 000000000..be12639d0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx0.c @@ -0,0 +1,281 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx0 +( + HPL_T_panel * PANEL, + const int K, + int * IPID, + int * LINDXA, + int * LINDXAU, + int * LLEN +) +#else +void HPL_plindx0 +( PANEL, K, IPID, LINDXA, LINDXAU, LLEN ) + HPL_T_panel * PANEL; + const int K; + int * IPID; + int * LINDXA; + int * LINDXAU; + int * LLEN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx0 computes two local arrays LINDXA and LINDXAU containing + * the local source and final destination position resulting from the + * application of row interchanges. + * + * On entry, the array IPID of length K is such that the row of global + * index IPID(i) should be mapped onto row of global index IPID(i+1). + * Let IA be the global index of the first row to be swapped. For k in + * [0..K/2), the row of global index IPID(2*k) should be mapped onto the + * row of global index IPID(2*k+1). The question then, is to determine + * which rows should ultimately be part of U. + * + * First, some rows of the process ICURROW may be swapped locally. One + * of this row belongs to U, the other one belongs to my local piece of + * A. The other rows of the current block are swapped with remote rows + * and are thus not part of U. These rows however should be sent along, + * and grabbed by the other processes as we progress in the exchange + * phase. + * + * So, assume that I am ICURROW and consider a row of index IPID(2*i) + * that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less + * than N, this row is locally swapped and should be copied into U at + * the position IPID(2*i+1) - IA. No row will be exchanged for this one. + * If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be + * locally copied into my local piece of A at the position corresponding + * to the row of global index IPID(2*i+1). + * + * If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) + * is to be swapped away and strictly speaking does not belong to U, but + * to A remotely. Since this process will however send this array U, + * this row is copied into U, exactly where the row IPID(2*i+1) should + * go. For this, we search IPID for k1, such that IPID(2*k1) is equal to + * IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position + * IPID(2*k1+1)-IA. + * + * It is thus important to put the rows that go into U, i.e., such that + * IPID(2*i+1) - IA is less than N at the begining of the array IPID. By + * doing so, U is formed, and the local copy is performed in just one + * sweep. + * + * Two lists LINDXA and LINDXAU are built. LINDXA contains the local + * index of the rows I have that should be copied. LINDXAU contains the + * local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A + * is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). In the process + * ICURROW, the initial packing algorithm proceeds as follows. + * + * for all entries in IPID, + * if IPID(2*i) is in ICURROW, + * if IPID(2*i+1) is in ICURROW, + * if( IPID(2*i+1) - IA < N ) + * save corresponding local position + * of this row (LINDXA); + * save local position (LINDXAU) in U + * where this row goes; + * [copy row IPID(2*i) in U at position + * IPID(2*i+1)-IA; ]; + * else + * save corresponding local position of + * this row (LINDXA); + * save local position (-LINDXAU) in A + * where this row goes; + * [copy row IPID(2*i) in my piece of A + * at IPID(2*i+1);] + * end if + * else + * find k1 such that IPID(2*k1) = IPID(2*i+1); + * copy row IPID(2*i) in U at position + * IPID(2*k1+1)-IA; + * save corresponding local position of this + * row (LINDXA); + * save local position (LINDXAU) in U where + * this row goes; + * end if + * end if + * end for + * + * Second, if I am not the current row process ICURROW, all source rows + * in IPID that I own are part of U. Indeed, they are swapped with one + * row of the current block of rows, and the main factorization + * algorithm proceeds one row after each other. The processes different + * from ICURROW, should exchange and accumulate those rows until they + * receive some data previously owned by the process ICURROW. + * + * In processes different from ICURROW, the initial packing algorithm + * proceeds as follows. Consider a row of global index IPID(2*i) that I + * own. When I will be receiving data previously owned by ICURROW, i.e., + * U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, + * and this particular row of U should be first copied into my piece of + * A, at A(il,:), where il is the local row index corresponding to + * IPID(2*i). Now,initially, this row will be packed into workspace, say + * as the kth row of that work array. The following algorithm sets + * LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row + * should be copied. LINDXA(k) stores the local index in A where this + * row of U should be copied, i.e il. + * + * for all entries in IPID, + * if IPID(2*i) is not in ICURROW, + * copy row IPID(2*i) in work array; + * save corresponding local position + * of this row (LINDXA); + * save position (LINDXAU) in U where + * this row should be copied; + * end if + * end for + * + * Since we are at it, we also globally figure out how many rows every + * process has. That is necessary, because it would rather be cumbersome + * to figure it on the fly during the bi-directional exchange phase. + * This information is kept in the array LLEN of size NPROW. Also note + * that the arrays LINDXA and LINDXAU are of max length equal to 2*N. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * LINDXA (local output) int * + * On entry, LINDXA is an array of dimension 2*N. On exit, this + * array contains the local indexes of the rows of A I have that + * should be copied into U. + * + * LINDXAU (local output) int * + * On exit, LINDXAU is an array of dimension 2*N. On exit, this + * array contains the local destination information encoded as + * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be + * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). + * + * LLEN (global output) int * + * On entry, LLEN is an array of length NPROW. On exit, it + * contains how many rows every process has. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, dstrow, fndd, i, ia, icurrow, il, + ip=0, iroff, j, jb, myrow, nb, nprow, + src, srcrow; +/* .. + * .. Executable Statements .. + */ +/* + * Compute the local arrays LINDXA and LINDXAU containing the local + * source and final destination position resulting from the application + * of N interchanges. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + icurrow = PANEL->prow; jb = PANEL->jb; + nb = PANEL->nb; ia = PANEL->ia; + iroff = PANEL->ii; + + for( i = 0; i < nprow; i++ ) LLEN[i] = 0; + + for( i = 0; i < K; i += 2 ) + { + src = IPID[i]; + Mindxg2p( src, nb, nb, srcrow, 0, nprow ); LLEN[ srcrow ]++; + + if( myrow == srcrow ) + { + Mindxg2l( il, src, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; dst = IPID[i+1]; + + if( myrow == icurrow ) + { + Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + if( dstrow == icurrow ) + { + if( dst - ia < jb ) { LINDXAU[ip] = dst - ia; } + else + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXAU[ip] = iroff - il; + } + } + else + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + LINDXAU[ip] = IPID[j-1] - ia; + } + } + else { LINDXAU[ip] = dst - ia; } + + ip++; + } + } +/* + * End of HPL_plindx0 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx1.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx1.c new file mode 100644 index 000000000..a24fd4c56 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx1.c @@ -0,0 +1,275 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx1 +( + HPL_T_panel * PANEL, + const int K, + const int * IPID, + int * IPA, + int * LINDXA, + int * LINDXAU, + int * IPLEN, + int * IPMAP, + int * IPMAPM1, + int * PERMU, + int * IWORK +) +#else +void HPL_plindx1 +( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK ) + HPL_T_panel * PANEL; + const int K; + const int * IPID; + int * IPA; + int * LINDXA; + int * LINDXAU; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; + int * PERMU; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx1 computes two local arrays LINDXA and LINDXAU containing + * the local source and final destination position resulting from the + * application of row interchanges. In addition, this function computes + * three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic + * mapping information for the spreading phase. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) const int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * IPA (global output) int * + * On exit, IPA specifies the number of rows that the current + * process row has that either belong to U or should be swapped + * with remote rows of A. + * + * LINDXA (global output) int * + * On entry, LINDXA is an array of dimension 2*N. On exit, this + * array contains the local indexes of the rows of A I have that + * should be copied into U. + * + * LINDXAU (global output) int * + * On exit, LINDXAU is an array of dimension 2*N. On exit, this + * array contains the local destination information encoded as + * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be + * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). + * + * IPLEN (global output) int * + * On entry, IPLEN is an array of dimension NPROW + 1. On exit, + * this array is such that IPLEN[i] is the number of rows of A + * in the processes before process IPMAP[i] after the sort + * with the convention that IPLEN[nprow] is the total number of + * rows of the panel. In other words IPLEN[i+1]-IPLEN[i] is the + * local number of rows of A that should be moved to the process + * IPMAP[i]. IPLEN is such that the number of rows of the source + * process row can be computed as IPLEN[1] - IPLEN[0], and the + * remaining entries of this array are sorted so that the + * quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROW. On exit, this + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myrow] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROW. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROCS) + * + * PERMU (global output) int * + * On entry, PERMU is an array of dimension JB. On exit, PERMU + * contains a sequence of permutations, that should be applied + * in increasing order to permute in place the row panel U. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension 2*JB. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int * iwork; + int dst, dstrow, fndd, i, ia, icurrow, il, + ip, ipU, iroff, j, jb, myrow, nb, nprow, + src, srcrow; +/* .. + * .. Executable Statements .. + */ +/* + * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1 + */ + HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 ); +/* + * Compute the local arrays LINDXA and LINDXAU containing the local + * source and final destination position resulting from the application + * of N interchanges. Compute LINDXA and LINDXAU in icurrow, and LINDXA + * elsewhere and PERMU in every process. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + jb = PANEL->jb; nb = PANEL->nb; ia = PANEL->ia; + iroff = PANEL->ii; icurrow = PANEL->prow; + + iwork = IWORK + jb; + + if( myrow == icurrow ) + { + for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) + { + src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + + if( srcrow == icurrow ) + { + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + + Mindxg2l( il, src, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; + + if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) + { + PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; + j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; + IPLEN[il]++; ipU++; + } + else if( dstrow != icurrow ) + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + + PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow]; + j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; + IPLEN[il]++; ipU++; + } + else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) ) + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXAU[ip] = iroff - il; + } + ip++; + } + } + *IPA = ip; + } + else + { + for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) + { + src = IPID[i ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); +/* + * LINDXA[i] is the local index of the row of A that belongs into U + */ + if( myrow == dstrow ) + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; ip++; + } +/* + * iwork[i] is the local (current) position index in U + * PERMU[i] is the local (final) destination index in U + */ + if( srcrow == icurrow ) + { + if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) + { + PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; + iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; + } + else if( dstrow != icurrow ) + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow]; + iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; + } + } + } + *IPA = 0; + } +/* + * Simplify iwork and PERMU, return in PERMU the sequence of permutation + * that need to be apply to U after it has been broadcast. + */ + HPL_perm( jb, iwork, PERMU, IWORK ); +/* + * Reset IPLEN to its correct value + */ + for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1]; + IPLEN[0] = 0; +/* + * End of HPL_plindx1 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx10.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx10.c new file mode 100644 index 000000000..fa460fd35 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_plindx10.c @@ -0,0 +1,155 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx10 +( + HPL_T_panel * PANEL, + const int K, + const int * IPID, + int * IPLEN, + int * IPMAP, + int * IPMAPM1 +) +#else +void HPL_plindx10 +( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PANEL; + const int K; + const int * IPID; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx10 computes three arrays IPLEN, IPMAP and IPMAPM1 that + * contain the logarithmic mapping information for the spreading phase. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) const int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * IPLEN (global output) int * + * On entry, IPLEN is an array of dimension NPROW + 1. On exit, + * this array is such that IPLEN[i] is the number of rows of A + * in the processes before process IMAP[i] after the sort, with + * the convention that IPLEN[nprow] is the total number of rows. + * In other words, IPLEN[i+1] - IPLEN[i] is the local number of + * rows of A that should be moved for each process. IPLEN is + * such that the number of rows of the source process row can be + * computed as IPLEN[1] - IPLEN[0], and the remaining entries of + * this array are sorted so that the quantities IPLEN[i+1] - + * IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROW. On exit, this + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myrow] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROW. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROW) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, dstrow, i, ia, icurrow, jb, nb, + nprow, src, srcrow; +/* .. + * .. Executable Statements .. + */ + nprow = PANEL->grid->nprow; jb = PANEL->jb; nb = PANEL->nb; + ia = PANEL->ia; icurrow = PANEL->prow; +/* + * Compute redundantly the local number of rows that each process has + * and that belong to U in IPLEN[1 .. nprow+1] + */ + for( i = 0; i <= nprow; i++ ) IPLEN[i] = 0; + + for( i = 0; i < K; i += 2 ) + { + src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + if( srcrow == icurrow ) + { + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + if( ( dstrow != srcrow ) || ( dst - ia < jb ) ) IPLEN[dstrow+1]++; + } + } +/* + * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1 + * (the inverse of IPMAP) + */ + HPL_logsort( nprow, icurrow, IPLEN, IPMAP, IPMAPM1 ); +/* + * End of HPL_plindx10 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_rollN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_rollN.c new file mode 100644 index 000000000..e68590a01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_rollN.c @@ -0,0 +1,225 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +void HPL_rollN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int N, + double * U, + const int LDU, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_rollN +( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int N; + double * U; + const int LDU; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rollN rolls the local arrays containing the local pieces of U, so + * that on exit to this function U is replicated in every process row. + * In addition, this function probe for the presence of the column panel + * and forwards it when available. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be rolled) information. + * + * N (local input) const int + * On entry, N specifies the number of columns of U. N must be + * at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[NPROW]). + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process row. + * + * IPMAP (global input) const int * + * On entry, IMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Datatype type[2]; + MPI_Status status; + MPI_Request request; + MPI_Comm comm; + int Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS, + ierr=MPI_SUCCESS, il, k, l, lengthR, + lengthS, mydist, myrow, next, npm1, nprow, + partner, prev; +/* .. + * .. Executable Statements .. + */ + if( N <= 0 ) return; + + npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow; + comm = PANEL->grid->col_comm; +/* + * Rolling phase + */ + mydist = IPMAPM1[myrow]; + prev = IPMAP[MModSub1( mydist, nprow )]; + next = IPMAP[MModAdd1( mydist, nprow )]; + + for( k = 0; k < npm1; k++ ) + { + l = (int)( (unsigned int)(k) >> 1 ); + + if( ( ( mydist + k ) & 1 ) != 0 ) + { + il = MModAdd( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModSub( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev; + } + else + { + il = MModSub( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModAdd( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next; + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lengthR, LDU, MPI_DOUBLE, + &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, ibufR, 0, LDU ), 1, type[I_RECV], + partner, Cmsgid, comm, &request ); + } + + if( lengthS > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lengthS, LDU, MPI_DOUBLE, + &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibufS, 0, LDU ), 1, type[I_SEND], + partner, Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_SEND] ); + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_RECV] ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_rollN", "MPI call failed" ); } +/* + * End of HPL_rollN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_rollT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_rollT.c new file mode 100644 index 000000000..0160c9412 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_rollT.c @@ -0,0 +1,259 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +void HPL_rollT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int N, + double * U, + const int LDU, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_rollT +( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int N; + double * U; + const int LDU; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rollT rolls the local arrays containing the local pieces of U, so + * that on exit to this function U is replicated in every process row. + * In addition, this function probe for the presence of the column panel + * and forwards it when available. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be rolled) information. + * + * N (local input) const int + * On entry, N specifies the local number of rows of U. N must + * be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,N). + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process row. + * + * IPMAP (global input) const int * + * On entry, IMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#if 0 + MPI_Datatype type[2]; +#endif + MPI_Status status; + MPI_Request request; + MPI_Comm comm; + int Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS, + ierr=MPI_SUCCESS, il, k, l, lengthR, + lengthS, mydist, myrow, next, npm1, nprow, + partner, prev; +/* .. + * .. Executable Statements .. + */ + if( N <= 0 ) return; + + npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow; + comm = PANEL->grid->col_comm; +/* + * Rolling phase + */ + mydist = IPMAPM1[myrow]; + prev = IPMAP[MModSub1( mydist, nprow )]; + next = IPMAP[MModAdd1( mydist, nprow )]; + + for( k = 0; k < npm1; k++ ) + { + l = (int)( (unsigned int)(k) >> 1 ); + + if( ( ( mydist + k ) & 1 ) != 0 ) + { + il = MModAdd( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModSub( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev; + } + else + { + il = MModSub( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModAdd( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next; + } + + if( lengthR > 0 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lengthR * LDU, MPI_DOUBLE, + &type[I_RECV] ); + else + ierr = MPI_Type_vector( lengthR, N, LDU, MPI_DOUBLE, + &type[I_RECV] ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, 0, ibufR, LDU ), 1, type[I_RECV], + partner, Cmsgid, comm, &request ); +#else +/* + * In our case, LDU is N - Do not use the MPI datatype. + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, 0, ibufR, LDU ), lengthR*LDU, + MPI_DOUBLE, partner, Cmsgid, comm, &request ); +#endif + } + + if( lengthS > 0 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lengthS*LDU, MPI_DOUBLE, + &type[I_SEND] ); + else + ierr = MPI_Type_vector( lengthS, N, LDU, MPI_DOUBLE, + &type[I_SEND] ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibufS, LDU ), 1, type[I_SEND], + partner, Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_SEND] ); +#else +/* + * In our case, LDU is N - Do not use the MPI datatype. + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibufS, LDU ), lengthS*LDU, + MPI_DOUBLE, partner, Cmsgid, comm ); +#endif + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); +#if 0 + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_RECV] ); +#endif + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_rollT", "MPI call failed" ); } +/* + * End of HPL_rollT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_spreadN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_spreadN.c new file mode 100644 index 000000000..202611e7f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_spreadN.c @@ -0,0 +1,303 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_spreadN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_SIDE SIDE, + const int N, + double * U, + const int LDU, + const int SRCDIST, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_spreadN +( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_SIDE SIDE; + const int N; + double * U; + const int LDU; + const int SRCDIST; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_spreadN spreads the local array containing local pieces of U, so + * that on exit to this function, a piece of U is contained in every + * process row. The array IPLEN contains the number of rows of U, that + * should be spread on any given process row. This function also probes + * for the presence of the column panel PBCST. In case of success, this + * panel will be forwarded. If PBCST is NULL on input, this probing + * mechanism will be disabled. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be spread) information. + * + * SIDE (global input) const enum HPL_SIDE + * On entry, SIDE specifies whether the local piece of U located + * in process IPMAP[SRCDIST] should be spread to the right or to + * the left. This feature is used by the equilibration process. + * + * N (global input) const int + * On entry, N specifies the local number of columns of U. N + * must be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[nprow]). + * + * SRCDIST (local input) const int + * On entry, SRCDIST specifies the source process that spreads + * its piece of U. + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process before process IPMAP[i], with the convention + * that IPLEN[nprow] is the total number of rows. In other words + * IPLEN[i+1] - IPLEN[i] is the local number of rows of U that + * should be moved to process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Datatype type; + MPI_Status status; + MPI_Comm comm; + unsigned int ip2=1, mask=1, mydist, mydist2; + int Cmsgid=MSGID_BEGIN_PFACT, ibuf, + ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow, + npm1, nprow, partner; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + comm = PANEL->grid->col_comm; +/* + * Spread U to the left + */ + if( SIDE == HplLeft ) + { + nprow = ( npm1 = SRCDIST ) + 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) > + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2; + lgth = IPLEN[nprow]; + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + else if( partner < nprow ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il += ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + else + { + npm1 = ( nprow -= SRCDIST ) - 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) < + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist -= SRCDIST ); il = ip2; + lgth = IPLEN[SRCDIST+nprow]; +/* + * Spread U to the right - offset the IPLEN, and IPMAP arrays + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + k = il ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ); + k = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf; + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + else if( partner < nprow ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il += ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_spreadN", "MPI call failed" ); } +/* + * End of HPL_spreadN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_spreadT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_spreadT.c new file mode 100644 index 000000000..1adf93507 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/src/pgesv/HPL_spreadT.c @@ -0,0 +1,372 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_spreadT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_SIDE SIDE, + const int N, + double * U, + const int LDU, + const int SRCDIST, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_spreadT +( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_SIDE SIDE; + const int N; + double * U; + const int LDU; + const int SRCDIST; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_spreadT spreads the local array containing local pieces of U, so + * that on exit to this function, a piece of U is contained in every + * process row. The array IPLEN contains the number of columns of U, + * that should be spread on any given process row. This function also + * probes for the presence of the column panel PBCST. If available, + * this panel will be forwarded. If PBCST is NULL on input, this + * probing mechanism will be disabled. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be spread) information. + * + * SIDE (global input) const enum HPL_SIDE + * On entry, SIDE specifies whether the local piece of U located + * in process IPMAP[SRCDIST] should be spread to the right or to + * the left. This feature is used by the equilibration process. + * + * N (global input) const int + * On entry, N specifies the local number of rows of U. N must + * be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,N). + * + * SRCDIST (local input) const int + * On entry, SRCDIST specifies the source process that spreads + * its piece of U. + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process before process IPMAP[i], with the convention + * that IPLEN[nprow] is the total number of rows. In other words + * IPLEN[i+1] - IPLEN[i] is the local number of rows of U that + * should be moved to process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#if 0 + MPI_Datatype type; +#endif + MPI_Status status; + MPI_Comm comm; + unsigned int ip2=1, mask=1, mydist, mydist2; + int Cmsgid=MSGID_BEGIN_PFACT, ibuf, + ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow, + npm1, nprow, partner; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + comm = PANEL->grid->col_comm; +/* + * Spread U + */ + if( SIDE == HplLeft ) + { + nprow = ( npm1 = SRCDIST ) + 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) > + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2; + lgth = IPLEN[nprow]; + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[npm1-partner], + Cmsgid, comm, &status ); +#endif + } + else if( partner < nprow ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[npm1-partner], + Cmsgid, comm ); +#endif + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il += ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + else + { + npm1 = ( nprow -= SRCDIST ) - 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) < + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist -= SRCDIST ); il = ip2; +/* + * Spread to the right - offset the IPLEN and IPMAP arrays + */ + lgth = IPLEN[SRCDIST+nprow]; +/* + * Spread U + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + k = il ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ); + k = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf; + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[SRCDIST+partner], + Cmsgid, comm, &status ); +#endif + } + else if( partner < nprow ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[SRCDIST+partner], + Cmsgid, comm ); +#endif + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il += ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_spreadT", "MPI call failed" ); } +/* + * End of HPL_spreadT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_dmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_dmatgen.c new file mode 100644 index 000000000..c14ef0fd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_dmatgen.c @@ -0,0 +1,134 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dmatgen +( + const int M, + const int N, + double * A, + const int LDA, + const int ISEED +) +#else +void HPL_dmatgen +( M, N, A, LDA, ISEED ) + const int M; + const int N; + double * A; + const int LDA; + const int ISEED; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dmatgen generates (or regenerates) a random matrix A. + * + * The pseudo-random generator uses the linear congruential algorithm: + * X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer + * Programming, Knuth 1973, Vol. 2. + * + * Arguments + * ========= + * + * M (input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * A (output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * this array contains the coefficients of the randomly + * generated matrix. + * + * LDA (input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * ISEED (input) const int + * On entry, ISEED specifies the seed number to generate the + * matrix A. ISEED must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int iadd[2], ia1[2], ic1[2], iran1[2], + jseed[2], mult[2]; + int i, incA = LDA - M, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; +/* + * Initialize the random sequence + */ + mult [0] = HPL_MULT0; mult [1] = HPL_MULT1; + iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1; + jseed[0] = ISEED; jseed[1] = 0; + + HPL_xjumpm( 1, mult, iadd, jseed, iran1, ia1, ic1 ); + HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 ); +/* + * Generate an M by N matrix + */ + for( j = 0; j < N; A += incA, j++ ) + for( i = 0; i < M; A++, i++ ) *A = HPL_rand(); +/* + * End of HPL_dmatgen + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_jumpit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_jumpit.c new file mode 100644 index 000000000..4d4dc4db5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_jumpit.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_jumpit +( + int * MULT, + int * IADD, + int * IRANN, + int * IRANM +) +#else +void HPL_jumpit +( MULT, IADD, IRANN, IRANM ) + int * MULT; + int * IADD; + int * IRANN; + int * IRANM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_jumpit jumps in the random sequence from the number X(n) encoded + * in IRANN to the number X(m) encoded in IRANM using the constants A + * and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A + * and C obviously depend on m and n, see the function HPL_xjumpm in + * order to initialize them. + * + * Arguments + * ========= + * + * MULT (local input) int * + * On entry, MULT is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant A. + * + * IADD (local input) int * + * On entry, IADD is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant C. + * + * IRANN (local input) int * + * On entry, IRANN is an array of dimension 2, that contains + * the 16-lower and 15-higher bits of the encoding of X(n). + * + * IRANM (local output) int * + * On entry, IRANM is an array of dimension 2. On exit, this + * array contains respectively the 16-lower and 15-higher bits + * of the encoding of X(m). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + HPL_lmul( IRANN, MULT, j ); /* j = IRANN * MULT; */ + HPL_ladd( j, IADD, IRANM ); /* IRANM = j + IADD; */ + HPL_setran( 0, IRANM ); /* irand = IRANM */ +/* + * End of HPL_jumpit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_ladd.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_ladd.c new file mode 100644 index 000000000..0d4e4c08c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_ladd.c @@ -0,0 +1,126 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_ladd +( + int * J, + int * K, + int * I +) +#else +void HPL_ladd +( J, K, I ) + int * J; + int * K; + int * I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ladd adds without carry two long positive integers K and J and + * puts the result into I. The long integers I, J, K are encoded on 64 + * bits using an array of 2 integers. The 32-lower bits are stored in + * the first entry of each array, the 32-higher bits in the second + * entry. + * + * Arguments + * ========= + * + * J (local input) int * + * On entry, J is an integer array of dimension 2 containing the + * encoded long integer J. + * + * K (local input) int * + * On entry, K is an integer array of dimension 2 containing the + * encoded long integer K. + * + * I (local output) int * + * On entry, I is an integer array of dimension 2. On exit, this + * array contains the encoded long integer result. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + unsigned int itmp0, itmp1; + unsigned int ktmp0 = K[0] & 65535, ktmp1 = (unsigned)K[0] >> 16; + unsigned int ktmp2 = K[1] & 65535, ktmp3 = (unsigned)K[1] >> 16; + unsigned int jtmp0 = J[0] & 65535, jtmp1 = (unsigned)J[0] >> 16; + unsigned int jtmp2 = J[1] & 65535, jtmp3 = (unsigned)J[1] >> 16; + +/* .. + * .. Executable Statements .. + */ +/* + * K[1] K[0] K I[0] = (K[0]+J[0]) % 2^32 + * XXXX XXXX carry = (K[0]+J[0]) / 2^32 + * + * + J[1] J[0] J I[1] = K[1] + J[1] + carry + * XXXX XXXX I[1] = I[1] % 2^32 + * ------------- + * I[1] I[0] + * 0XXX XXXX I + */ + itmp0 = ktmp0 + jtmp0; + itmp1 = itmp0 >> 16; I[0] = itmp0 - (itmp1 << 16 ); + itmp1 += ktmp1 + jtmp1; I[0] |= (itmp1 & 65535) << 16; + itmp0 = (itmp1 >> 16) + ktmp2 + jtmp2; + I[1] = itmp0 - ((itmp0 >> 16 ) << 16); + itmp1 = (itmp0 >> 16) + ktmp3 + jtmp3; + I[1] |= (itmp1 & 65535) << 16; +/* + * End of HPL_ladd + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_lmul.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_lmul.c new file mode 100644 index 000000000..254b192f6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_lmul.c @@ -0,0 +1,131 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_lmul +( + int * K, + int * J, + int * I +) +#else +void HPL_lmul +( K, J, I ) + int * K; + int * J; + int * I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_lmul multiplies without carry two long positive integers K and J + * and puts the result into I. The long integers I, J, K are encoded on + * 64 bits using an array of 2 integers. The 32-lower bits are stored in + * the first entry of each array, the 32-higher bits in the second entry + * of each array. For efficiency purposes, the intrisic modulo function + * is inlined. + * + * Arguments + * ========= + * + * K (local input) int * + * On entry, K is an integer array of dimension 2 containing the + * encoded long integer K. + * + * J (local input) int * + * On entry, J is an integer array of dimension 2 containing the + * encoded long integer J. + * + * I (local output) int * + * On entry, I is an integer array of dimension 2. On exit, this + * array contains the encoded long integer result. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int r, c; + unsigned int kk[4], jj[4], res[5]; +/* .. + * .. Executable Statements .. + */ +/* + * Addition is done with 16 bits at a time. Multiplying two 16-bit + * integers yields a 32-bit result. The lower 16-bits of the result + * are kept in I, and the higher 16-bits are carried over to the + * next multiplication. + */ + for (c = 0; c < 2; ++c) { + kk[2*c] = K[c] & 65535; + kk[2*c+1] = ((unsigned)K[c] >> 16) & 65535; + jj[2*c] = J[c] & 65535; + jj[2*c+1] = ((unsigned)J[c] >> 16) & 65535; + } + + res[0] = 0; + for (c = 0; c < 4; ++c) { + res[c+1] = (res[c] >> 16) & 65535; + res[c] &= 65535; + for (r = 0; r < c+1; ++r) { + res[c] = kk[r] * jj[c-r] + (res[c] & 65535); + res[c+1] += (res[c] >> 16) & 65535; + } + } + + for (c = 0; c < 2; ++c) + I[c] = (int)(((res[2*c+1] & 65535) << 16) | (res[2*c] & 65535)); +/* + * End of HPL_lmul + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_rand.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_rand.c new file mode 100644 index 000000000..fe4e12f5e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_rand.c @@ -0,0 +1,94 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_rand( void ) +#else +double HPL_rand() +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rand generates the next number in the random sequence. This + * function ensures that this number lies in the interval (-0.5, 0.5]. + * + * The static array irand contains the information (2 integers) required + * to generate the next number in the sequence X(n). This number is + * computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the + * constant d is the largest 64 bit positive unsigned integer. The array + * irand is then updated for the generation of the next number X(n+1) + * in the random sequence as follows X(n+1) = a * X(n) + c. The + * constants a and c should have been preliminarily stored in the arrays + * ias and ics as 2 pairs of integers. The initialization of ias, ics + * and irand is performed by the function HPL_setran. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + HPL_setran( 3, j ); +/* + * return number between -0.5 and 0.5 + */ + return( HPL_HALF - + (((j[0] & 65535) + ((unsigned)j[0] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF + + (j[1] & 65535) + ((unsigned)j[1] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF ); +/* + * End of HPL_rand + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_setran.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_setran.c new file mode 100644 index 000000000..1a3ca73aa --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_setran.c @@ -0,0 +1,115 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int ias[2], ics[2], irand[2]; + +#ifdef STDC_HEADERS +void HPL_setran +( + const int OPTION, + int * IRAN +) +#else +void HPL_setran +( OPTION, IRAN ) + const int OPTION; + int * IRAN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_setran initializes the random generator with the encoding of the + * first number X(0) in the sequence, and the constants a and c used to + * compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), + * a and c are stored in the static variables irand, ias and ics. When + * OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the + * values of the input array IRAN. When OPTION is 3, IRAN is set to the + * current value of irand, and irand is then incremented. + * + * Arguments + * ========= + * + * OPTION (local input) const int + * On entry, OPTION is an integer that specifies the operations + * to be performed on the random generator as specified above. + * + * IRAN (local input/output) int * + * On entry, IRAN is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of a random number. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + if( OPTION == 3 ) + { /* return current value */ + IRAN[0] = irand[0]; IRAN[1] = irand[1]; + HPL_lmul( irand, ias, j ); /* j = irand * ias; */ + HPL_ladd( j, ics, irand ); /* irand = j + ics; */ + } + else if( OPTION == 0 ) { irand[0] = IRAN[0]; irand[1] = IRAN[1]; } + else if( OPTION == 1 ) { ias [0] = IRAN[0]; ias [1] = IRAN[1]; } + else if( OPTION == 2 ) { ics [0] = IRAN[0]; ics [1] = IRAN[1]; } +/* + * End of HPL_setran + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_xjumpm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_xjumpm.c new file mode 100644 index 000000000..ae70bbc16 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/matgen/HPL_xjumpm.c @@ -0,0 +1,158 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_xjumpm +( + const int JUMPM, + int * MULT, + int * IADD, + int * IRANN, + int * IRANM, + int * IAM, + int * ICM +) +#else +void HPL_xjumpm +( JUMPM, MULT, IADD, IRANN, IRANM, IAM, ICM ) + const int JUMPM; + int * MULT; + int * IADD; + int * IRANN; + int * IRANM; + int * IAM; + int * ICM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_xjumpm computes the constants A and C to jump JUMPM numbers in + * the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in + * MULT and IADD specify how to jump from one entry in the sequence to + * the next. + * + * Arguments + * ========= + * + * JUMPM (local input) const int + * On entry, JUMPM specifies the number of entries in the + * sequence to jump over. When JUMPM is less or equal than zero, + * A and C are not computed, IRANM is set to IRANN corresponding + * to a jump of size zero. + * + * MULT (local input) int * + * On entry, MULT is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant a to jump from + * X(n) to X(n+1) = a*X(n) + c in the random sequence. + * + * IADD (local input) int * + * On entry, IADD is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant c to jump from + * X(n) to X(n+1) = a*X(n) + c in the random sequence. + * + * IRANN (local input) int * + * On entry, IRANN is an array of dimension 2. that contains the + * 16-lower and 15-higher bits of the encoding of X(n). + * + * IRANM (local output) int * + * On entry, IRANM is an array of dimension 2. On exit, this + * array contains respectively the 16-lower and 15-higher bits + * of the encoding of X(n+JUMPM). + * + * IAM (local output) int * + * On entry, IAM is an array of dimension 2. On exit, when JUMPM + * is greater than zero, this array contains the encoded + * constant A to jump from X(n) to X(n+JUMPM) in the random + * sequence. IAM(0:1) contains respectively the 16-lower and + * 15-higher bits of this constant A. When JUMPM is less or + * equal than zero, this array is not referenced. + * + * ICM (local output) int * + * On entry, ICM is an array of dimension 2. On exit, when JUMPM + * is greater than zero, this array contains the encoded + * constant C to jump from X(n) to X(n+JUMPM) in the random + * sequence. ICM(0:1) contains respectively the 16-lower and + * 15-higher bits of this constant C. When JUMPM is less or + * equal than zero, this array is not referenced. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2], k; +/* .. + * .. Executable Statements .. + */ + if( JUMPM > 0 ) + { + IAM[0] = MULT[0]; IAM[1] = MULT[1]; /* IAM = MULT; */ + ICM[0] = IADD[0]; ICM[1] = IADD[1]; /* ICM = IADD; */ + for( k = 1; k <= JUMPM-1; k++ ) + { + HPL_lmul( IAM, MULT, j ); /* j = IAM * MULT; */ + IAM[0] = j[0]; IAM[1] = j[1]; /* IAM = j; */ + HPL_lmul( ICM, MULT, j ); /* j = ICM * MULT; */ + HPL_ladd( IADD, j, ICM ); /* ICM = IADD + j; */ + } + HPL_lmul( IRANN, IAM, j ); /* j = IRANN * IAM; */ + HPL_ladd( j, ICM, IRANM ); /* IRANM = j + ICM; */ + } + else + { /* IRANM = IRANN */ + IRANM[0] = IRANN[0]; IRANM[1] = IRANN[1]; + } +/* + * End of HPL_xjumpm + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/pmatgen/HPL_pdmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/pmatgen/HPL_pdmatgen.c new file mode 100644 index 000000000..2d129c863 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/pmatgen/HPL_pdmatgen.c @@ -0,0 +1,198 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdmatgen +( + const HPL_T_grid * GRID, + const int M, + const int N, + const int NB, + double * A, + const int LDA, + const int ISEED +) +#else +void HPL_pdmatgen +( GRID, M, N, NB, A, LDA, ISEED ) + const HPL_T_grid * GRID; + const int M; + const int N; + const int NB; + double * A; + const int LDA; + const int ISEED; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdmatgen generates (or regenerates) a parallel random matrix A. + * + * The pseudo-random generator uses the linear congruential algorithm: + * X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer + * Programming, Knuth 1973, Vol. 2. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * M (global input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,LocQ(N)). + * On exit, this array contains the coefficients of the randomly + * generated matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * ISEED (global input) const int + * On entry, ISEED specifies the seed number to generate the + * matrix A. ISEED must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int iadd [2], ia1 [2], ia2 [2], ia3 [2], + ia4 [2], ia5 [2], ib1 [2], ib2 [2], + ib3 [2], ic1 [2], ic2 [2], ic3 [2], + ic4 [2], ic5 [2], iran1[2], iran2[2], + iran3[2], iran4[2], itmp1[2], itmp2[2], + itmp3[2], jseed[2], mult [2]; + int ib, iblk, ik, jb, jblk, jk, jump1, jump2, + jump3, jump4, jump5, jump6, jump7, lmb, + lnb, mblks, mp, mycol, myrow, nblks, + npcol, nprow, nq; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + + mult [0] = HPL_MULT0; mult [1] = HPL_MULT1; + iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1; + jseed[0] = ISEED; jseed[1] = 0; +/* + * Generate an M by N matrix starting in process (0,0) + */ + Mnumroc( mp, M, NB, NB, myrow, 0, nprow ); + Mnumroc( nq, N, NB, NB, mycol, 0, npcol ); + + if( ( mp <= 0 ) || ( nq <= 0 ) ) return; +/* + * Local number of blocks and size of the last one + */ + mblks = ( mp + NB - 1 ) / NB; lmb = mp - ( ( mp - 1 ) / NB ) * NB; + nblks = ( nq + NB - 1 ) / NB; lnb = nq - ( ( nq - 1 ) / NB ) * NB; +/* + * Compute multiplier/adder for various jumps in random sequence + */ + jump1 = 1; jump2 = nprow * NB; jump3 = M; jump4 = npcol * NB; + jump5 = NB; jump6 = mycol; jump7 = myrow * NB; + + HPL_xjumpm( jump1, mult, iadd, jseed, iran1, ia1, ic1 ); + HPL_xjumpm( jump2, mult, iadd, iran1, itmp1, ia2, ic2 ); + HPL_xjumpm( jump3, mult, iadd, iran1, itmp1, ia3, ic3 ); + HPL_xjumpm( jump4, ia3, ic3, iran1, itmp1, ia4, ic4 ); + HPL_xjumpm( jump5, ia3, ic3, iran1, itmp1, ia5, ic5 ); + HPL_xjumpm( jump6, ia5, ic5, iran1, itmp3, itmp1, itmp2 ); + HPL_xjumpm( jump7, mult, iadd, itmp3, iran1, itmp1, itmp2 ); + HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 ); +/* + * Save value of first number in sequence + */ + ib1[0] = iran1[0]; ib1[1] = iran1[1]; + ib2[0] = iran1[0]; ib2[1] = iran1[1]; + ib3[0] = iran1[0]; ib3[1] = iran1[1]; + + for( jblk = 0; jblk < nblks; jblk++ ) + { + jb = ( jblk == nblks - 1 ? lnb : NB ); + for( jk = 0; jk < jb; jk++ ) + { + for( iblk = 0; iblk < mblks; iblk++ ) + { + ib = ( iblk == mblks - 1 ? lmb : NB ); + for( ik = 0; ik < ib; A++, ik++ ) *A = HPL_rand(); + HPL_jumpit( ia2, ic2, ib1, iran2 ); + ib1[0] = iran2[0]; ib1[1] = iran2[1]; + } + A += LDA - mp; + HPL_jumpit( ia3, ic3, ib2, iran3 ); + ib1[0] = iran3[0]; ib1[1] = iran3[1]; + ib2[0] = iran3[0]; ib2[1] = iran3[1]; + } + HPL_jumpit( ia4, ic4, ib3, iran4 ); + ib1[0] = iran4[0]; ib1[1] = iran4[1]; + ib2[0] = iran4[0]; ib2[1] = iran4[1]; + ib3[0] = iran4[0]; ib3[1] = iran4[1]; + } +/* + * End of HPL_pdmatgen + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pddriver.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pddriver.c new file mode 100644 index 000000000..5e4050f48 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pddriver.c @@ -0,0 +1,293 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int main +( + int ARGC, + char * * ARGV +) +#else +int main( ARGC, ARGV ) +/* + * .. Scalar Arguments .. + */ + int ARGC; +/* + * .. Array Arguments .. + */ + char * * ARGV; +#endif +{ +/* + * Purpose + * ======= + * + * main is the main driver program for testing the HPL routines. + * This program is driven by a short data file named "HPL.dat". + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int nval [HPL_MAX_PARAM], + nbval [HPL_MAX_PARAM], + pval [HPL_MAX_PARAM], + qval [HPL_MAX_PARAM], + nbmval[HPL_MAX_PARAM], + ndvval[HPL_MAX_PARAM], + ndhval[HPL_MAX_PARAM]; + + HPL_T_FACT pfaval[HPL_MAX_PARAM], + rfaval[HPL_MAX_PARAM]; + + HPL_T_TOP topval[HPL_MAX_PARAM]; + + HPL_T_grid grid; + HPL_T_palg algo; + HPL_T_test test; + int L1notran, Unotran, align, equil, in, inb, + inbm, indh, indv, ipfa, ipq, irfa, itop, + mycol, myrow, ns, nbs, nbms, ndhs, ndvs, + npcol, npfs, npqs, nprow, nrfs, ntps, + rank, size, tswap; + HPL_T_ORDER pmapping; + HPL_T_FACT rpfa; + HPL_T_SWAP fswap; +/* .. + * .. Executable Statements .. + */ + MPI_Init( &ARGC, &ARGV ); +#ifdef HPL_CALL_VSIPL + vsip_init((void*)0); +#endif + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + MPI_Comm_size( MPI_COMM_WORLD, &size ); +/* + * Read and check validity of test parameters from input file + * + * HPL Version 1.0, Linpack benchmark input file + * Your message here + * HPL.out output file name (if any) + * 6 device out (6=stdout,7=stderr,file) + * 4 # of problems sizes (N) + * 29 30 34 35 Ns + * 4 # of NBs + * 1 2 3 4 NBs + * 0 PMAP process mapping (0=Row-,1=Column-major) + * 3 # of process grids (P x Q) + * 2 1 4 Ps + * 2 4 1 Qs + * 16.0 threshold + * 3 # of panel fact + * 0 1 2 PFACTs (0=left, 1=Crout, 2=Right) + * 2 # of recursive stopping criterium + * 2 4 NBMINs (>= 1) + * 1 # of panels in recursion + * 2 NDIVs + * 3 # of recursive panel fact. + * 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) + * 1 # of broadcast + * 0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + * 1 # of lookahead depth + * 0 DEPTHs (>=0) + * 2 SWAP (0=bin-exch,1=long,2=mix) + * 4 swapping threshold + * 0 L1 in (0=transposed,1=no-transposed) form + * 0 U in (0=transposed,1=no-transposed) form + * 1 Equilibration (0=no,1=yes) + * 8 memory alignment in double (> 0) + */ + HPL_pdinfo( &test, &ns, nval, &nbs, nbval, &pmapping, &npqs, pval, qval, + &npfs, pfaval, &nbms, nbmval, &ndvs, ndvval, &nrfs, rfaval, + &ntps, topval, &ndhs, ndhval, &fswap, &tswap, &L1notran, + &Unotran, &equil, &align ); +/* + * Loop over different process grids - Define process grid. Go to bottom + * of process grid loop if this case does not use my process. + */ + for( ipq = 0; ipq < npqs; ipq++ ) + { + (void) HPL_grid_init( MPI_COMM_WORLD, pmapping, pval[ipq], qval[ipq], + &grid ); + (void) HPL_grid_info( &grid, &nprow, &npcol, &myrow, &mycol ); + + if( ( myrow < 0 ) || ( myrow >= nprow ) || + ( mycol < 0 ) || ( mycol >= npcol ) ) goto label_end_of_npqs; + + for( in = 0; in < ns; in++ ) + { /* Loop over various problem sizes */ + for( inb = 0; inb < nbs; inb++ ) + { /* Loop over various blocking factors */ + for( indh = 0; indh < ndhs; indh++ ) + { /* Loop over various lookahead depths */ + for( itop = 0; itop < ntps; itop++ ) + { /* Loop over various broadcast topologies */ + for( irfa = 0; irfa < nrfs; irfa++ ) + { /* Loop over various recursive factorizations */ + for( ipfa = 0; ipfa < npfs; ipfa++ ) + { /* Loop over various panel factorizations */ + for( inbm = 0; inbm < nbms; inbm++ ) + { /* Loop over various recursive stopping criteria */ + for( indv = 0; indv < ndvs; indv++ ) + { /* Loop over various # of panels in recursion */ +/* + * Set up the algorithm parameters + */ + algo.btopo = topval[itop]; algo.depth = ndhval[indh]; + algo.nbmin = nbmval[inbm]; algo.nbdiv = ndvval[indv]; + + algo.pfact = rpfa = pfaval[ipfa]; + + if( L1notran != 0 ) + { + if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllN; + else if( rpfa == HPL_CROUT ) algo.pffun = HPL_pdpancrN; + else algo.pffun = HPL_pdpanrlN; + + algo.rfact = rpfa = rfaval[irfa]; + if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllN; + else if( rpfa == HPL_CROUT ) algo.rffun = HPL_pdrpancrN; + else algo.rffun = HPL_pdrpanrlN; + + if( Unotran != 0 ) algo.upfun = HPL_pdupdateNN; + else algo.upfun = HPL_pdupdateNT; + } + else + { + if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllT; + else if( rpfa == HPL_CROUT ) algo.pffun = HPL_pdpancrT; + else algo.pffun = HPL_pdpanrlT; + + algo.rfact = rpfa = rfaval[irfa]; + if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllT; + else if( rpfa == HPL_CROUT ) algo.rffun = HPL_pdrpancrT; + else algo.rffun = HPL_pdrpanrlT; + + if( Unotran != 0 ) algo.upfun = HPL_pdupdateTN; + else algo.upfun = HPL_pdupdateTT; + } + + algo.fswap = fswap; algo.fsthr = tswap; + algo.equil = equil; algo.align = align; + + HPL_pdtest( &test, &grid, &algo, nval[in], nbval[inb] ); + + } + } + } + } + } + } + } + } + (void) HPL_grid_exit( &grid ); +label_end_of_npqs: ; + } +/* + * Print ending messages, close output file, exit. + */ + if( rank == 0 ) + { + test.ktest = test.kpass + test.kfail + test.kskip; +#ifndef HPL_DETAILED_TIMING + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); +#else + if( test.thrsh > HPL_rzero ) + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); +#endif + + HPL_fprintf( test.outfp, "\n%s %6d %s\n", "Finished", test.ktest, + "tests with the following results:" ); + if( test.thrsh > HPL_rzero ) + { + HPL_fprintf( test.outfp, " %6d %s\n", test.kpass, + "tests completed and passed residual checks," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kfail, + "tests completed and failed residual checks," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kskip, + "tests skipped because of illegal input values." ); + } + else + { + HPL_fprintf( test.outfp, " %6d %s\n", test.kpass, + "tests completed without checking," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kskip, + "tests skipped because of illegal input values." ); + } + + HPL_fprintf( test.outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( test.outfp, "\nEnd of Tests.\n" ); + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); + + if( ( test.outfp != stdout ) && ( test.outfp != stderr ) ) + (void) fclose( test.outfp ); + } +#ifdef HPL_CALL_VSIPL + vsip_finalize((void*)0); +#endif + MPI_Finalize(); + exit( 0 ); + + return( 0 ); +/* + * End of main + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pdinfo.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pdinfo.c new file mode 100644 index 000000000..4ede45be6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pdinfo.c @@ -0,0 +1,1182 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdinfo +( + HPL_T_test * TEST, + int * NS, + int * N, + int * NBS, + int * NB, + HPL_T_ORDER * PMAPPIN, + int * NPQS, + int * P, + int * Q, + int * NPFS, + HPL_T_FACT * PF, + int * NBMS, + int * NBM, + int * NDVS, + int * NDV, + int * NRFS, + HPL_T_FACT * RF, + int * NTPS, + HPL_T_TOP * TP, + int * NDHS, + int * DH, + HPL_T_SWAP * FSWAP, + int * TSWAP, + int * L1NOTRAN, + int * UNOTRAN, + int * EQUIL, + int * ALIGN +) +#else +void HPL_pdinfo +( TEST, NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN ) + HPL_T_test * TEST; + int * NS; + int * N; + int * NBS; + int * NB; + HPL_T_ORDER * PMAPPIN; + int * NPQS; + int * P; + int * Q; + int * NPFS; + HPL_T_FACT * PF; + int * NBMS; + int * NBM; + int * NDVS; + int * NDV; + int * NRFS; + HPL_T_FACT * RF; + int * NTPS; + HPL_T_TOP * TP; + int * NDHS; + int * DH; + HPL_T_SWAP * FSWAP; + int * TSWAP; + int * L1NOTRAN; + int * UNOTRAN; + int * EQUIL; + int * ALIGN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdinfo reads the startup information for the various tests and + * transmits it to all processes. + * + * Arguments + * ========= + * + * TEST (global output) HPL_T_test * + * On entry, TEST points to a testing data structure. On exit, + * the fields of this data structure are initialized as follows: + * TEST->outfp specifies the output file where the results will + * be printed. It is only defined and used by the process 0 of + * the grid. TEST->thrsh specifies the threshhold value for the + * test ratio. TEST->epsil is the relative machine precision of + * the distributed computer. Finally the test counters, kfail, + * kpass, kskip, ktest are initialized to zero. + * + * NS (global output) int * + * On exit, NS specifies the number of different problem sizes + * to be tested. NS is less than or equal to HPL_MAX_PARAM. + * + * N (global output) int * + * On entry, N is an array of dimension HPL_MAX_PARAM. On exit, + * the first NS entries of this array contain the problem sizes + * to run the code with. + * + * NBS (global output) int * + * On exit, NBS specifies the number of different distribution + * blocking factors to be tested. NBS must be less than or equal + * to HPL_MAX_PARAM. + * + * NB (global output) int * + * On exit, PMAPPIN specifies the process mapping onto the no- + * des of the MPI machine configuration. PMAPPIN defaults to + * row-major ordering. + * + * PMAPPIN (global output) HPL_T_ORDER * + * On entry, NB is an array of dimension HPL_MAX_PARAM. On exit, + * the first NBS entries of this array contain the values of the + * various distribution blocking factors, to run the code with. + * + * NPQS (global output) int * + * On exit, NPQS specifies the number of different values that + * can be used for P and Q, i.e., the number of process grids to + * run the code with. NPQS must be less than or equal to + * HPL_MAX_PARAM. + * + * P (global output) int * + * On entry, P is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPQS entries of this array contain the values of P, + * the number of process rows of the NPQS grids to run the code + * with. + * + * Q (global output) int * + * On entry, Q is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPQS entries of this array contain the values of Q, + * the number of process columns of the NPQS grids to run the + * code with. + * + * NPFS (global output) int * + * On exit, NPFS specifies the number of different values that + * can be used for PF : the panel factorization algorithm to run + * the code with. NPFS is less than or equal to HPL_MAX_PARAM. + * + * PF (global output) HPL_T_FACT * + * On entry, PF is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPFS entries of this array contain the various + * panel factorization algorithms to run the code with. + * + * NBMS (global output) int * + * On exit, NBMS specifies the number of various recursive + * stopping criteria to be tested. NBMS must be less than or + * equal to HPL_MAX_PARAM. + * + * NBM (global output) int * + * On entry, NBM is an array of dimension HPL_MAX_PARAM. On + * exit, the first NBMS entries of this array contain the values + * of the various recursive stopping criteria to be tested. + * + * NDVS (global output) int * + * On exit, NDVS specifies the number of various numbers of + * panels in recursion to be tested. NDVS is less than or equal + * to HPL_MAX_PARAM. + * + * NDV (global output) int * + * On entry, NDV is an array of dimension HPL_MAX_PARAM. On + * exit, the first NDVS entries of this array contain the values + * of the various numbers of panels in recursion to be tested. + * + * NRFS (global output) int * + * On exit, NRFS specifies the number of different values that + * can be used for RF : the recursive factorization algorithm to + * be tested. NRFS is less than or equal to HPL_MAX_PARAM. + * + * RF (global output) HPL_T_FACT * + * On entry, RF is an array of dimension HPL_MAX_PARAM. On exit, + * the first NRFS entries of this array contain the various + * recursive factorization algorithms to run the code with. + * + * NTPS (global output) int * + * On exit, NTPS specifies the number of different values that + * can be used for the broadcast topologies to be tested. NTPS + * is less than or equal to HPL_MAX_PARAM. + * + * TP (global output) HPL_T_TOP * + * On entry, TP is an array of dimension HPL_MAX_PARAM. On exit, + * the first NTPS entries of this array contain the various + * broadcast (along rows) topologies to run the code with. + * + * NDHS (global output) int * + * On exit, NDHS specifies the number of different values that + * can be used for the lookahead depths to be tested. NDHS is + * less than or equal to HPL_MAX_PARAM. + * + * DH (global output) int * + * On entry, DH is an array of dimension HPL_MAX_PARAM. On + * exit, the first NDHS entries of this array contain the values + * of lookahead depths to run the code with. Such a value is at + * least 0 (no-lookahead) or greater than zero. + * + * FSWAP (global output) HPL_T_SWAP * + * On exit, FSWAP specifies the swapping algorithm to be used in + * all tests. + * + * TSWAP (global output) int * + * On exit, TSWAP specifies the swapping threshold as a number + * of columns when the mixed swapping algorithm was chosen. + * + * L1NOTRA (global output) int * + * On exit, L1NOTRAN specifies whether the upper triangle of the + * panels of columns should be stored in no-transposed form + * (L1NOTRAN=1) or in transposed form (L1NOTRAN=0). + * + * UNOTRAN (global output) int * + * On exit, UNOTRAN specifies whether the panels of rows should + * be stored in no-transposed form (UNOTRAN=1) or transposed + * form (UNOTRAN=0) during their broadcast. + * + * EQUIL (global output) int * + * On exit, EQUIL specifies whether equilibration during the + * swap-broadcast of the panel of rows should be performed + * (EQUIL=1) or not (EQUIL=0). + * + * ALIGN (global output) int * + * On exit, ALIGN specifies the alignment of the dynamically + * allocated buffers in double precision words. ALIGN is greater + * than zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + char file[HPL_LINE_MAX], line[HPL_LINE_MAX], + auth[HPL_LINE_MAX], num [HPL_LINE_MAX]; + FILE * infp; + int * iwork = NULL; + char * lineptr; + int error=0, fid, i, j, lwork, maxp, nprocs, + rank, size; +/* .. + * .. Executable Statements .. + */ + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + MPI_Comm_size( MPI_COMM_WORLD, &size ); +/* + * Initialize the TEST data structure with default values + */ + TEST->outfp = stderr; TEST->epsil = 2.0e-16; TEST->thrsh = 16.0; + TEST->kfail = TEST->kpass = TEST->kskip = TEST->ktest = 0; +/* + * Process 0 reads the input data, broadcasts to other processes and + * writes needed information to TEST->outfp. + */ + if( rank == 0 ) + { +/* + * Open file and skip data file header + */ + if( ( infp = fopen( "HPL.dat", "r" ) ) == NULL ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "cannot open file HPL.dat" ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) fgets( auth, HPL_LINE_MAX - 2, infp ); +/* + * Read name and unit number for summary output file + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", file ); + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); + fid = atoi( num ); + if ( fid == 6 ) TEST->outfp = stdout; + else if( fid == 7 ) TEST->outfp = stderr; + else if( ( TEST->outfp = fopen( file, "w" ) ) == NULL ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "cannot open file %s.", + file ); + error = 1; goto label_error; + } +/* + * Read and check the parameter values for the tests. + * + * Problem size (>=0) (N) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NS = atoi( num ); + if( ( *NS < 1 ) || ( *NS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %d", + "Number of values of N is less than 1 or greater than", + HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( N[ i ] = atoi( num ) ) < 0 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of N less than 0" ); + error = 1; goto label_error; + } + } +/* + * Block size (>=1) (NB) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NBS = atoi( num ); + if( ( *NBS < 1 ) || ( *NBS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NB is less than 1 or", + "greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NBS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NB[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NB less than 1" ); + error = 1; goto label_error; + } + } +/* + * Process grids, mapping, (>=1) (P, Q) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); + *PMAPPIN = ( atoi( num ) == 1 ? HPL_COLUMN_MAJOR : HPL_ROW_MAJOR ); + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NPQS = atoi( num ); + if( ( *NPQS < 1 ) || ( *NPQS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of grids is less", + "than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPQS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( P[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of P less than 1" ); + error = 1; goto label_error; + } + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPQS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( Q[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of Q less than 1" ); + error = 1; goto label_error; + } + } +/* + * Check for enough processes in machine configuration + */ + maxp = 0; + for( i = 0; i < *NPQS; i++ ) + { nprocs = P[i] * Q[i]; maxp = Mmax( maxp, nprocs ); } + if( maxp > size ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Need at least %d processes for these tests", maxp ); + error = 1; goto label_error; + } +/* + * Checking threshold value (TEST->thrsh) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); TEST->thrsh = atof( num ); +/* + * Panel factorization algorithm (PF) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NPFS = atoi( num ); + if( ( *NPFS < 1 ) || ( *NPFS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "number of values of PFACT", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPFS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) PF[ i ] = HPL_LEFT_LOOKING; + else if( j == 1 ) PF[ i ] = HPL_CROUT; + else if( j == 2 ) PF[ i ] = HPL_RIGHT_LOOKING; + else PF[ i ] = HPL_RIGHT_LOOKING; + } +/* + * Recursive stopping criterium (>=1) (NBM) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NBMS = atoi( num ); + if( ( *NBMS < 1 ) || ( *NBMS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NBMIN", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NBMS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NBM[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NBMIN less than 1" ); + error = 1; goto label_error; + } + } +/* + * Number of panels in recursion (>=2) (NDV) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NDVS = atoi( num ); + if( ( *NDVS < 1 ) || ( *NDVS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NDIV", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NDVS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NDV[ i ] = atoi( num ) ) < 2 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NDIV less than 2" ); + error = 1; goto label_error; + } + } +/* + * Recursive panel factorization (RF) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NRFS = atoi( num ); + if( ( *NRFS < 1 ) || ( *NRFS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of RFACT", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NRFS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) RF[ i ] = HPL_LEFT_LOOKING; + else if( j == 1 ) RF[ i ] = HPL_CROUT; + else if( j == 2 ) RF[ i ] = HPL_RIGHT_LOOKING; + else RF[ i ] = HPL_RIGHT_LOOKING; + } +/* + * Broadcast topology (TP) (0=rg, 1=2rg, 2=rgM, 3=2rgM, 4=L) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NTPS = atoi( num ); + if( ( *NTPS < 1 ) || ( *NTPS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of BCAST", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NTPS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) TP[ i ] = HPL_1RING; + else if( j == 1 ) TP[ i ] = HPL_1RING_M; + else if( j == 2 ) TP[ i ] = HPL_2RING; + else if( j == 3 ) TP[ i ] = HPL_2RING_M; + else if( j == 4 ) TP[ i ] = HPL_BLONG; + else if( j == 5 ) TP[ i ] = HPL_BLONG_M; + else TP[ i ] = HPL_1RING_M; + } +/* + * Lookahead depth (>=0) (NDH) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NDHS = atoi( num ); + if( ( *NDHS < 1 ) || ( *NDHS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of DEPTH", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NDHS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); + lineptr += strlen( num ) + 1; + if( ( DH[ i ] = atoi( num ) ) < 0 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of DEPTH less than 0" ); + error = 1; goto label_error; + } + } +/* + * Swapping algorithm (0,1 or 2) (FSWAP) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); j = atoi( num ); + if( j == 0 ) *FSWAP = HPL_SWAP00; + else if( j == 1 ) *FSWAP = HPL_SWAP01; + else if( j == 2 ) *FSWAP = HPL_SW_MIX; + else *FSWAP = HPL_SWAP01; +/* + * Swapping threshold (>=0) (TSWAP) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *TSWAP = atoi( num ); + if( *TSWAP <= 0 ) *TSWAP = 0; +/* + * L1 in (no-)transposed form (0 or 1) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *L1NOTRAN = atoi( num ); + if( ( *L1NOTRAN != 0 ) && ( *L1NOTRAN != 1 ) ) *L1NOTRAN = 0; +/* + * U in (no-)transposed form (0 or 1) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *UNOTRAN = atoi( num ); + if( ( *UNOTRAN != 0 ) && ( *UNOTRAN != 1 ) ) *UNOTRAN = 0; +/* + * Equilibration (0=no, 1=yes) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *EQUIL = atoi( num ); + if( ( *EQUIL != 0 ) && ( *EQUIL != 1 ) ) *EQUIL = 1; +/* + * Memory alignment in bytes (> 0) (ALIGN) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *ALIGN = atoi( num ); + if( *ALIGN <= 0 ) *ALIGN = 4; +/* + * Close input file + */ +label_error: + if (infp != NULL) + (void) fclose( infp ); + } + else { TEST->outfp = NULL; } +/* + * Check for error on reading input file + */ + (void) HPL_all_reduce( (void *)(&error), 1, HPL_INT, HPL_max, + MPI_COMM_WORLD ); + if( error ) + { + if( rank == 0 ) + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Illegal input in file HPL.dat. Exiting ..." ); + MPI_Finalize(); +#ifdef HPL_CALL_VSIPL + (void) vsip_finalize( NULL ); +#endif + exit( 1 ); + } +/* + * Compute and broadcast machine epsilon + */ + TEST->epsil = HPL_pdlamch( MPI_COMM_WORLD, HPL_MACH_EPS ); +/* + * Pack information arrays and broadcast + */ + (void) HPL_broadcast( (void *)(&(TEST->thrsh)), 1, HPL_DOUBLE, 0, + MPI_COMM_WORLD ); +/* + * Broadcast array sizes + */ + iwork = (int *)malloc( (size_t)(15) * sizeof( int ) ); + if( rank == 0 ) + { + iwork[ 0] = *NS; iwork[ 1] = *NBS; + iwork[ 2] = ( *PMAPPIN == HPL_ROW_MAJOR ? 0 : 1 ); + iwork[ 3] = *NPQS; iwork[ 4] = *NPFS; iwork[ 5] = *NBMS; + iwork[ 6] = *NDVS; iwork[ 7] = *NRFS; iwork[ 8] = *NTPS; + iwork[ 9] = *NDHS; iwork[10] = *TSWAP; iwork[11] = *L1NOTRAN; + iwork[12] = *UNOTRAN; iwork[13] = *EQUIL; iwork[14] = *ALIGN; + } + (void) HPL_broadcast( (void *)iwork, 15, HPL_INT, 0, MPI_COMM_WORLD ); + if( rank != 0 ) + { + *NS = iwork[ 0]; *NBS = iwork[ 1]; + *PMAPPIN = ( iwork[ 2] == 0 ? HPL_ROW_MAJOR : HPL_COLUMN_MAJOR ); + *NPQS = iwork[ 3]; *NPFS = iwork[ 4]; *NBMS = iwork[ 5]; + *NDVS = iwork[ 6]; *NRFS = iwork[ 7]; *NTPS = iwork[ 8]; + *NDHS = iwork[ 9]; *TSWAP = iwork[10]; *L1NOTRAN = iwork[11]; + *UNOTRAN = iwork[12]; *EQUIL = iwork[13]; *ALIGN = iwork[14]; + } + if( iwork ) free( iwork ); +/* + * Pack information arrays and broadcast + */ + lwork = (*NS) + (*NBS) + 2 * (*NPQS) + (*NPFS) + (*NBMS) + + (*NDVS) + (*NRFS) + (*NTPS) + (*NDHS) + 1; + + if (lwork < 0) + exit(EXIT_FAILURE); + + + iwork = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); + if( rank == 0 ) + { + j = 0; + for( i = 0; i < *NS; i++ ) { iwork[j] = N [i]; j++; } + for( i = 0; i < *NBS; i++ ) { iwork[j] = NB[i]; j++; } + for( i = 0; i < *NPQS; i++ ) { iwork[j] = P [i]; j++; } + for( i = 0; i < *NPQS; i++ ) { iwork[j] = Q [i]; j++; } + for( i = 0; i < *NPFS; i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; + else if( PF[i] == HPL_CROUT ) iwork[j] = 1; + else if( PF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; + j++; + } + for( i = 0; i < *NBMS; i++ ) { iwork[j] = NBM[i]; j++; } + for( i = 0; i < *NDVS; i++ ) { iwork[j] = NDV[i]; j++; } + for( i = 0; i < *NRFS; i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; + else if( RF[i] == HPL_CROUT ) iwork[j] = 1; + else if( RF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; + j++; + } + for( i = 0; i < *NTPS; i++ ) + { + if( TP[i] == HPL_1RING ) iwork[j] = 0; + else if( TP[i] == HPL_1RING_M ) iwork[j] = 1; + else if( TP[i] == HPL_2RING ) iwork[j] = 2; + else if( TP[i] == HPL_2RING_M ) iwork[j] = 3; + else if( TP[i] == HPL_BLONG ) iwork[j] = 4; + else if( TP[i] == HPL_BLONG_M ) iwork[j] = 5; + j++; + } + for( i = 0; i < *NDHS; i++ ) { iwork[j] = DH[i]; j++; } + + if( *FSWAP == HPL_SWAP00 ) iwork[j] = 0; + else if( *FSWAP == HPL_SWAP01 ) iwork[j] = 1; + else if( *FSWAP == HPL_SW_MIX ) iwork[j] = 2; + j++; + } + (void) HPL_broadcast( (void*)iwork, lwork, HPL_INT, 0, + MPI_COMM_WORLD ); + if ((rank != 0) && (iwork != NULL)) + { + j = 0; + for( i = 0; i < *NS; i++ ) { N [i] = iwork[j]; j++; } + for( i = 0; i < *NBS; i++ ) { NB[i] = iwork[j]; j++; } + for( i = 0; i < *NPQS; i++ ) { P [i] = iwork[j]; j++; } + for( i = 0; i < *NPQS; i++ ) { Q [i] = iwork[j]; j++; } + + for( i = 0; i < *NPFS; i++ ) + { + if( iwork[j] == 0 ) PF[i] = HPL_LEFT_LOOKING; + else if( iwork[j] == 1 ) PF[i] = HPL_CROUT; + else if( iwork[j] == 2 ) PF[i] = HPL_RIGHT_LOOKING; + j++; + } + for( i = 0; i < *NBMS; i++ ) { NBM[i] = iwork[j]; j++; } + for( i = 0; i < *NDVS; i++ ) { NDV[i] = iwork[j]; j++; } + for( i = 0; i < *NRFS; i++ ) + { + if( iwork[j] == 0 ) RF[i] = HPL_LEFT_LOOKING; + else if( iwork[j] == 1 ) RF[i] = HPL_CROUT; + else if( iwork[j] == 2 ) RF[i] = HPL_RIGHT_LOOKING; + j++; + } + for( i = 0; i < *NTPS; i++ ) + { + if( iwork[j] == 0 ) TP[i] = HPL_1RING; + else if( iwork[j] == 1 ) TP[i] = HPL_1RING_M; + else if( iwork[j] == 2 ) TP[i] = HPL_2RING; + else if( iwork[j] == 3 ) TP[i] = HPL_2RING_M; + else if( iwork[j] == 4 ) TP[i] = HPL_BLONG; + else if( iwork[j] == 5 ) TP[i] = HPL_BLONG_M; + j++; + } + for( i = 0; i < *NDHS; i++ ) { DH[i] = iwork[j]; j++; } + + if( iwork[j] == 0 ) *FSWAP = HPL_SWAP00; + else if( iwork[j] == 1 ) *FSWAP = HPL_SWAP01; + else if( iwork[j] == 2 ) *FSWAP = HPL_SW_MIX; + j++; + + if( iwork ) free( iwork ); + } +/* + * regurgitate input + */ + if( rank == 0 ) + { + + if (TEST->outfp != NULL){ + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "HPLinpack 2.3 -- High-Performance Linpack benchmark -- ", + " December 2, 2018" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Written by A. Petitet and R. Clint Whaley, ", + "Innovative Computing Laboratory, UTK" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Modified by Piotr Luszczek, ", + "Innovative Computing Laboratory, UTK" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Modified by Julien Langou, ", + "University of Colorado Denver"); + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + + HPL_fprintf( TEST->outfp, "\n%s\n", + "An explanation of the input/output parameters follows:" ); + HPL_fprintf( TEST->outfp, "%s\n", + "T/V : Wall time / encoded variant." ); + HPL_fprintf( TEST->outfp, "%s\n", + "N : The order of the coefficient matrix A." ); + HPL_fprintf( TEST->outfp, "%s\n", + "NB : The partitioning blocking factor." ); + HPL_fprintf( TEST->outfp, "%s\n", + "P : The number of process rows." ); + HPL_fprintf( TEST->outfp, "%s\n", + "Q : The number of process columns." ); + HPL_fprintf( TEST->outfp, "%s\n", + "Time : Time in seconds to solve the linear system." ); + HPL_fprintf( TEST->outfp, "%s\n\n", + "Gflops : Rate of execution for solving the linear system." ); + HPL_fprintf( TEST->outfp, "%s\n", + "The following parameter values will be used:" ); +/* + * Problem size + */ + HPL_fprintf( TEST->outfp, "\nN :" ); + for( i = 0; i < Mmin( 8, *NS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + if( *NS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + if( *NS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + } + } +/* + * Distribution blocking factor + */ + HPL_fprintf( TEST->outfp, "\nNB :" ); + for( i = 0; i < Mmin( 8, *NBS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + if( *NBS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NBS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + if( *NBS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NBS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + } + } +/* + * Process mapping + */ + HPL_fprintf( TEST->outfp, "\nPMAP :" ); + if( *PMAPPIN == HPL_ROW_MAJOR ) + HPL_fprintf( TEST->outfp, " Row-major process mapping" ); + else if( *PMAPPIN == HPL_COLUMN_MAJOR ) + HPL_fprintf( TEST->outfp, " Column-major process mapping" ); +/* + * Process grid + */ + HPL_fprintf( TEST->outfp, "\nP :" ); + for( i = 0; i < Mmin( 8, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + if( *NPQS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + if( *NPQS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPQS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + } + } + HPL_fprintf( TEST->outfp, "\nQ :" ); + for( i = 0; i < Mmin( 8, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + if( *NPQS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + if( *NPQS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPQS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + } + } +/* + * Panel Factorization + */ + HPL_fprintf( TEST->outfp, "\nPFACT :" ); + for( i = 0; i < Mmin( 8, *NPFS ); i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NPFS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPFS ); i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NPFS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPFS; i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + } + } +/* + * Recursive stopping criterium + */ + HPL_fprintf( TEST->outfp, "\nNBMIN :" ); + for( i = 0; i < Mmin( 8, *NBMS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + if( *NBMS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NBMS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + if( *NBMS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NBMS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + } + } +/* + * Number of panels in recursion + */ + HPL_fprintf( TEST->outfp, "\nNDIV :" ); + for( i = 0; i < Mmin( 8, *NDVS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + if( *NDVS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NDVS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + if( *NDVS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NDVS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + } + } +/* + * Recursive Factorization + */ + HPL_fprintf( TEST->outfp, "\nRFACT :" ); + for( i = 0; i < Mmin( 8, *NRFS ); i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NRFS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NRFS ); i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NRFS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NRFS; i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + } + } +/* + * Broadcast topology + */ + HPL_fprintf( TEST->outfp, "\nBCAST :" ); + for( i = 0; i < Mmin( 8, *NTPS ); i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + if( *NTPS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NTPS ); i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + if( *NTPS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NTPS; i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + } + } +/* + * Lookahead depths + */ + HPL_fprintf( TEST->outfp, "\nDEPTH :" ); + for( i = 0; i < Mmin( 8, *NDHS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + if( *NDHS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NDHS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + if( *NDHS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NDHS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + } + } +/* + * Swapping algorithm + */ + HPL_fprintf( TEST->outfp, "\nSWAP :" ); + if( *FSWAP == HPL_SWAP00 ) + HPL_fprintf( TEST->outfp, " Binary-exchange" ); + else if( *FSWAP == HPL_SWAP01 ) + HPL_fprintf( TEST->outfp, " Spread-roll (long)" ); + else if( *FSWAP == HPL_SW_MIX ) + HPL_fprintf( TEST->outfp, " Mix (threshold = %d)", *TSWAP ); +/* + * L1 storage form + */ + HPL_fprintf( TEST->outfp, "\nL1 :" ); + if( *L1NOTRAN != 0 ) + HPL_fprintf( TEST->outfp, " no-transposed form" ); + else + HPL_fprintf( TEST->outfp, " transposed form" ); +/* + * U storage form + */ + HPL_fprintf( TEST->outfp, "\nU :" ); + if( *UNOTRAN != 0 ) + HPL_fprintf( TEST->outfp, " no-transposed form" ); + else + HPL_fprintf( TEST->outfp, " transposed form" ); +/* + * Equilibration + */ + HPL_fprintf( TEST->outfp, "\nEQUIL :" ); + if( *EQUIL != 0 ) + HPL_fprintf( TEST->outfp, " yes" ); + else + HPL_fprintf( TEST->outfp, " no" ); +/* + * Alignment + */ + HPL_fprintf( TEST->outfp, "\nALIGN : %d double precision words", + *ALIGN ); + + HPL_fprintf( TEST->outfp, "\n\n" ); +/* + * For testing only + */ + if( TEST->thrsh > HPL_rzero ) + { + HPL_fprintf( TEST->outfp, "%s%s\n\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( TEST->outfp, "%s\n", + "- The matrix A is randomly generated for each test." ); + HPL_fprintf( TEST->outfp, "%s\n", + "- The following scaled residual check will be computed:" ); + HPL_fprintf( TEST->outfp, "%s\n", + " ||Ax-b||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )" ); + HPL_fprintf( TEST->outfp, "%s %21.6e\n", + "- The relative machine precision (eps) is taken to be ", + TEST->epsil ); + HPL_fprintf( TEST->outfp, "%s %11.1f\n\n", + "- Computational tests pass if scaled residuals are less than ", + TEST->thrsh ); + } + } + } +/* + * End of HPL_pdinfo + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pdtest.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pdtest.c new file mode 100644 index 000000000..73a62a7ff --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptest/HPL_pdtest.c @@ -0,0 +1,438 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdtest +( + HPL_T_test * TEST, + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int N, + const int NB +) +#else +void HPL_pdtest +( TEST, GRID, ALGO, N, NB ) + HPL_T_test * TEST; + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int N; + const int NB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdtest performs one test given a set of parameters such as the + * process grid, the problem size, the distribution blocking factor ... + * This function generates the data, calls and times the linear system + * solver, checks the accuracy of the obtained vector solution and + * writes this information to the file pointed to by TEST->outfp. + * + * Arguments + * ========= + * + * TEST (global input) HPL_T_test * + * On entry, TEST points to a testing data structure: outfp + * specifies the output file where the results will be printed. + * It is only defined and used by the process 0 of the grid. + * thrsh specifies the threshhold value for the test ratio. + * Concretely, a test is declared "PASSED" if and only if the + * following inequality is satisfied: + * ||Ax-b||_oo / ( epsil * + * ( || x ||_oo * || A ||_oo + || b ||_oo ) * + * N ) < thrsh. + * epsil is the relative machine precision of the distributed + * computer. Finally the test counters, kfail, kpass, kskip and + * ktest are updated as follows: if the test passes, kpass is + * incremented by one; if the test fails, kfail is incremented + * by one; if the test is skipped, kskip is incremented by one. + * ktest is left unchanged. + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters to be used for this test. + * + * N (global input) const int + * On entry, N specifies the order of the coefficient matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_DETAILED_TIMING + double HPL_w[HPL_TIMING_N]; +#endif + HPL_T_pmat mat; + double wtime[1]; + int info[3]; + double Anorm1, AnormI, Gflops, Xnorm1, XnormI, + BnormI, resid0, resid1; + double * Bptr; + void * vptr = NULL; + static int first=1; + int ii, ip2, mycol, myrow, npcol, nprow, nq; + char ctop, cpfact, crfact; + time_t current_time_start, current_time_end; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + + mat.n = N; mat.nb = NB; mat.info = 0; + mat.mp = HPL_numroc( N, NB, NB, myrow, 0, nprow ); + nq = HPL_numroc( N, NB, NB, mycol, 0, npcol ); + mat.nq = nq + 1; +/* + * Allocate matrix, right-hand-side, and vector solution x. [ A | b ] is + * N by N+1. One column is added in every process column for the solve. + * The result however is stored in a 1 x N vector replicated in every + * process row. In every process, A is lda * (nq+1), x is 1 * nq and the + * workspace is mp. + * + * Ensure that lda is a multiple of ALIGN and not a power of 2 + */ + mat.ld = ( ( Mmax( 1, mat.mp ) - 1 ) / ALGO->align ) * ALGO->align; + do + { + ii = ( mat.ld += ALGO->align ); ip2 = 1; + while( ii > 1 ) { ii >>= 1; ip2 <<= 1; } + } + while( mat.ld == ip2 ); +/* + * Allocate dynamic memory + */ + vptr = (void*)malloc( ( (size_t)(ALGO->align) + + (size_t)(mat.ld+1) * (size_t)(mat.nq) ) * + sizeof(double) ); + info[0] = (vptr == NULL); info[1] = myrow; info[2] = mycol; + (void) HPL_all_reduce( (void *)(info), 3, HPL_INT, HPL_max, + GRID->all_comm ); + if( info[0] != 0 ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", + "[%d,%d] %s", info[1], info[2], + "Memory allocation failed for A, x and b. Skip." ); + (TEST->kskip)++; + /* some processes might have succeeded with allocation */ + if (vptr) free(vptr); + return; + } +/* + * generate matrix and right-hand-side, [ A | b ] which is N by N+1. + */ + mat.A = (double *)HPL_PTR( vptr, + ((size_t)(ALGO->align) * sizeof(double) ) ); + mat.X = Mptr( mat.A, 0, mat.nq, mat.ld ); + HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED ); +#ifdef HPL_CALL_VSIPL + mat.block = vsip_blockbind_d( (vsip_scalar_d *)(mat.A), + (vsip_length)(mat.ld * mat.nq), + VSIP_MEM_NONE ); +#endif +/* + * Solve linear system + */ + HPL_ptimer_boot(); (void) HPL_barrier( GRID->all_comm ); + time( ¤t_time_start ); + HPL_ptimer( 0 ); + HPL_pdgesv( GRID, ALGO, &mat ); + HPL_ptimer( 0 ); + time( ¤t_time_end ); +#ifdef HPL_CALL_VSIPL + (void) vsip_blockrelease_d( mat.block, VSIP_TRUE ); + vsip_blockdestroy_d( mat.block ); +#endif +/* + * Gather max of all CPU and WALL clock timings and print timing results + */ + HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME, + 1, 0, wtime ); + + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + if( first ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "T/V N NB P Q", + " Time Gflops" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + if( TEST->thrsh <= HPL_rzero ) first = 0; + } +/* + * 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve. + * Print WALL time + */ + Gflops = ( ( (double)(N) / 1.0e+9 ) * + ( (double)(N) / wtime[0] ) ) * + ( ( 2.0 / 3.0 ) * (double)(N) + ( 3.0 / 2.0 ) ); + + cpfact = ( ( (HPL_T_FACT)(ALGO->pfact) == + (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ? (char)('L') : + ( ( (HPL_T_FACT)(ALGO->pfact) == (HPL_T_FACT)(HPL_CROUT) ) ? + (char)('C') : (char)('R') ) ); + crfact = ( ( (HPL_T_FACT)(ALGO->rfact) == + (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ? (char)('L') : + ( ( (HPL_T_FACT)(ALGO->rfact) == (HPL_T_FACT)(HPL_CROUT) ) ? + (char)('C') : (char)('R') ) ); + + if( ALGO->btopo == HPL_1RING ) ctop = '0'; + else if( ALGO->btopo == HPL_1RING_M ) ctop = '1'; + else if( ALGO->btopo == HPL_2RING ) ctop = '2'; + else if( ALGO->btopo == HPL_2RING_M ) ctop = '3'; + else if( ALGO->btopo == HPL_BLONG ) ctop = '4'; + else /* if( ALGO->btopo == HPL_BLONG_M ) */ ctop = '5'; + + if( wtime[0] > HPL_rzero ) { + HPL_fprintf( TEST->outfp, + "W%c%1d%c%c%1d%c%1d%12d %5d %5d %5d %18.2f %19.4e\n", + ( GRID->order == HPL_ROW_MAJOR ? 'R' : 'C' ), + ALGO->depth, ctop, crfact, ALGO->nbdiv, cpfact, ALGO->nbmin, + N, NB, nprow, npcol, wtime[0], Gflops ); + HPL_fprintf( TEST->outfp, + "HPL_pdgesv() start time %s\n", ctime( ¤t_time_start ) ); + HPL_fprintf( TEST->outfp, + "HPL_pdgesv() end time %s\n", ctime( ¤t_time_end ) ); + } + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME, + HPL_TIMING_N, HPL_TIMING_BEG, HPL_w ); + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "--VVV--VVV--VVV--VVV--VVV--VVV--VVV--V", + "VV--VVV--VVV--VVV--VVV--VVV--VVV--VVV-" ); +/* + * Recursive panel factorization + */ + if( HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time rfact . . . : %18.2f\n", + HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] ); +/* + * Panel factorization + */ + if( HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time pfact . . : %18.2f\n", + HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] ); +/* + * Panel factorization (swap) + */ + if( HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time mxswp . . : %18.2f\n", + HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] ); +/* + * Update + */ + if( HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time update . . : %18.2f\n", + HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] ); +/* + * Update (swap) + */ + if( HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time laswp . . : %18.2f\n", + HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] ); +/* + * Upper triangular system solve + */ + if( HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time up tr sv . : %18.2f\n", + HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] ); + + if( TEST->thrsh <= HPL_rzero ) + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + } +#endif +/* + * Quick return, if I am not interested in checking the computations + */ + if( TEST->thrsh <= HPL_rzero ) + { (TEST->kpass)++; if( vptr ) free( vptr ); return; } +/* + * Check info returned by solve + */ + if( mat.info != 0 ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", "%s %d, %s", + "Error code returned by solve is", mat.info, "skip" ); + (TEST->kskip)++; + if( vptr ) free( vptr ); return; + } +/* + * Check computation, re-generate [ A | b ], compute norm 1 and inf of A and x, + * and norm inf of b - A x. Display residual checks. + */ + HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED ); + Anorm1 = HPL_pdlange( GRID, HPL_NORM_1, N, N, NB, mat.A, mat.ld ); + AnormI = HPL_pdlange( GRID, HPL_NORM_I, N, N, NB, mat.A, mat.ld ); +/* + * Because x is distributed in process rows, switch the norms + */ + XnormI = HPL_pdlange( GRID, HPL_NORM_1, 1, N, NB, mat.X, 1 ); + Xnorm1 = HPL_pdlange( GRID, HPL_NORM_I, 1, N, NB, mat.X, 1 ); +/* + * If I am in the col that owns b, (1) compute local BnormI, (2) all_reduce to + * find the max (in the col). Then (3) broadcast along the rows so that every + * process has BnormI. Note that since we use a uniform distribution in [-0.5,0.5] + * for the entries of B, it is very likely that BnormI (<=,~) 0.5. + */ + Bptr = Mptr( mat.A, 0, nq, mat.ld ); + if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ){ + if( mat.mp > 0 ) + { + BnormI = Bptr[HPL_idamax( mat.mp, Bptr, 1 )]; BnormI = Mabs( BnormI ); + } + else + { + BnormI = HPL_rzero; + } + (void) HPL_all_reduce( (void *)(&BnormI), 1, HPL_DOUBLE, HPL_max, + GRID->col_comm ); + } + (void) HPL_broadcast( (void *)(&BnormI), 1, HPL_DOUBLE, + HPL_indxg2p( N, NB, NB, 0, npcol ), + GRID->row_comm ); +/* + * If I own b, compute ( b - A x ) and ( - A x ) otherwise + */ + if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ) + { + HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone, + mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1 ); + } + else if( nq > 0 ) + { + HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone, + mat.A, mat.ld, mat.X, 1, HPL_rzero, Bptr, 1 ); + } + else { for( ii = 0; ii < mat.mp; ii++ ) Bptr[ii] = HPL_rzero; } +/* + * Reduce the distributed residual in process column 0 + */ + if( mat.mp > 0 ) + (void) HPL_reduce( Bptr, mat.mp, HPL_DOUBLE, HPL_sum, 0, + GRID->row_comm ); +/* + * Compute || b - A x ||_oo + */ + resid0 = HPL_pdlange( GRID, HPL_NORM_I, N, 1, NB, Bptr, mat.ld ); +/* + * Computes and displays norms, residuals ... + */ + if( N <= 0 ) + { + resid1 = HPL_rzero; + } + else + { + resid1 = resid0 / ( TEST->epsil * ( AnormI * XnormI + BnormI ) * (double)(N) ); + } + + if( resid1 < TEST->thrsh ) (TEST->kpass)++; + else (TEST->kfail)++; + + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( TEST->outfp, "%s%16.8e%s%s\n", + "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ", resid1, + " ...... ", ( resid1 < TEST->thrsh ? "PASSED" : "FAILED" ) ); + + if(resid1 >= TEST->thrsh ) + { + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||Ax-b||_oo . . . . . . . . . . . . . . . . . = ", resid0 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||A||_oo . . . . . . . . . . . . . . . . . . . = ", AnormI ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||A||_1 . . . . . . . . . . . . . . . . . . . = ", Anorm1 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||x||_oo . . . . . . . . . . . . . . . . . . . = ", XnormI ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||x||_1 . . . . . . . . . . . . . . . . . . . = ", Xnorm1 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||b||_oo . . . . . . . . . . . . . . . . . . . = ", BnormI ); + } + } + if( vptr ) free( vptr ); +/* + * End of HPL_pdtest + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer.c new file mode 100644 index 000000000..202416079 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer.c @@ -0,0 +1,358 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int HPL_ptimer_disabled; +static double HPL_ptimer_cpusec [HPL_NPTIMER], + HPL_ptimer_cpustart [HPL_NPTIMER], + HPL_ptimer_wallsec [HPL_NPTIMER], + HPL_ptimer_wallstart[HPL_NPTIMER]; +/* + * --------------------------------------------------------------------- + * User callable functions + * --------------------------------------------------------------------- + */ +#ifdef STDC_HEADERS +void HPL_ptimer_boot( void ) +#else +void HPL_ptimer_boot() +#endif +{ +/* + * HPL_ptimer_boot (re)sets all timers to 0, and enables HPL_ptimer. + */ +/* + * .. Local Variables .. + */ + int i; +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 0; + + for( i = 0; i < HPL_NPTIMER; i++ ) + { + HPL_ptimer_cpusec [i] = HPL_ptimer_wallsec [i] = HPL_rzero; + HPL_ptimer_cpustart[i] = HPL_ptimer_wallstart[i] = HPL_PTIMER_STARTFLAG; + } +/* + * End of HPL_ptimer_boot + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer( const int I ) +#else +void HPL_ptimer( I ) + const int I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer provides a "stopwatch" functionality cpu/wall timer in + * seconds. Up to 64 separate timers can be functioning at once. The + * first call starts the timer, and the second stops it. This routine + * can be disenabled by calling HPL_ptimer_disable(), so that calls to + * the timer are ignored. This feature can be used to make sure certain + * sections of code do not affect timings, even if they call routines + * which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable + * the timer functionality. One can retrieve the current value of a + * timer by calling + * + * t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + * + * where I is the timer index in [0..64). To inititialize the timer + * functionality, one must have called HPL_ptimer_boot() prior to any of + * the functions mentioned above. + * + * Arguments + * ========= + * + * I (global input) const int + * On entry, I specifies the timer to stop/start. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( HPL_ptimer_disabled ) return; +/* + * If timer has not been started, start it. Otherwise, stop it and add + * interval to count + */ + if( HPL_ptimer_wallstart[I] == HPL_PTIMER_STARTFLAG ) + { + HPL_ptimer_wallstart[I] = HPL_ptimer_walltime(); + HPL_ptimer_cpustart [I] = HPL_ptimer_cputime (); + } + else + { + HPL_ptimer_cpusec [I] += HPL_ptimer_cputime ()-HPL_ptimer_cpustart [I]; + HPL_ptimer_wallsec [I] += HPL_ptimer_walltime()-HPL_ptimer_wallstart[I]; + HPL_ptimer_wallstart[I] = HPL_PTIMER_STARTFLAG; + } +/* + * End of HPL_ptimer + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_enable( void ) +#else +void HPL_ptimer_enable() +#endif +{ +/* + * HPL_ptimer_enable sets it so calls to HPL_ptimer are not ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 0; + return; +/* + * End of HPL_ptimer_enable + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_disable( void ) +#else +void HPL_ptimer_disable() +#endif +{ +/* + * HPL_ptimer_disable sets it so calls to HPL_ptimer are ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 1; + return; +/* + * End of HPL_ptimer_disable + */ +} + +#ifdef STDC_HEADERS +double HPL_ptimer_inquire +( + const HPL_T_PTIME TMTYPE, + const int I +) +#else +double HPL_ptimer_inquire( TMTYPE, I ) + const int I; + const HPL_T_PTIME TMTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer_inquire returns wall- or cpu- time that has accumulated in + * timer I. + * + * Arguments + * ========= + * + * TMTYPE (global input) const HPL_T_PTIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_PTIME : wall clock time is returned, + * = HPL_CPU_PTIME : CPU time is returned (default). + * + * I (global input) const int + * On entry, I specifies the timer to return. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double time; +/* .. + * .. Executable Statements .. + */ +/* + * If wall- or cpu-time are not available on this machine, return + * HPL_PTIMER_ERROR + */ + if( TMTYPE == HPL_WALL_PTIME ) + { + if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR ) + time = HPL_PTIMER_ERROR; + else + time = HPL_ptimer_wallsec[I]; + } + else + { + if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR ) + time = HPL_PTIMER_ERROR; + else + time = HPL_ptimer_cpusec [I]; + } + return( time ); +/* + * End of HPL_ptimer_inquire + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_combine +( + MPI_Comm COMM, + const HPL_T_PTIME_OP OPE, + const HPL_T_PTIME TMTYPE, + const int N, + const int IBEG, + double * TIMES +) +#else +void HPL_ptimer_combine( COMM, OPE, TMTYPE, N, IBEG, TIMES ) + const int IBEG, N; + const HPL_T_PTIME_OP OPE; + const HPL_T_PTIME TMTYPE; + MPI_Comm COMM; + double * TIMES; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer_combine combines the timing information stored on a scope + * of processes into the user TIMES array. + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection on + * which the timings are taken. + * + * OPE (global input) const HPL_T_PTIME_OP + * On entry, OP specifies what combine operation should be done + * as follows: + * = HPL_AMAX_PTIME get max. time on any process (default), + * = HPL_AMIN_PTIME get min. time on any process, + * = HPL_SUM_PTIME get sum of times across processes. + * + * TMTYPE (global input) const HPL_T_PTIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_PTIME : wall clock time is returned, + * = HPL_CPU_PTIME : CPU time is returned (default). + * + * N (global input) const int + * On entry, N specifies the number of timers to combine. + * + * IBEG (global input) const int + * On entry, IBEG specifies the first timer to be combined. + * + * TIMES (global output) double * + * On entry, TIMES is an array of dimension at least N. On exit, + * this array contains the requested timing information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, tmpdis; +/* .. + * .. Executable Statements .. + */ + tmpdis = HPL_ptimer_disabled; HPL_ptimer_disabled = 1; +/* + * Timer has been disabled for combine operation - copy timing informa- + * tion into user times array. If wall- or cpu-time are not available + * on this machine, fill in times with HPL_PTIMER_ERROR flag and return. + */ + if( TMTYPE == HPL_WALL_PTIME ) + { + if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR ) + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return; } + else + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_wallsec[IBEG+i]; } + } + else + { + if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR ) + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return; } + else + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_cpusec[IBEG+i]; } + } +/* + * Combine all nodes information, restore HPL_ptimer_disabled, and return + */ + for( i = 0; i < N; i++ ) TIMES[i] = Mmax( HPL_rzero, TIMES[i] ); + + if( OPE == HPL_AMAX_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM ); + else if( OPE == HPL_AMIN_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_min, COMM ); + else if( OPE == HPL_SUM_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_sum, COMM ); + else + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM ); + + HPL_ptimer_disabled = tmpdis; +/* + * End of HPL_ptimer_combine + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer_cputime.c new file mode 100644 index 000000000..711ef185d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer_cputime.c @@ -0,0 +1,146 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_ptimer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, + * the clock() function is used to return an approximation of processor + * time used by the program. The value returned is the CPU time used so + * far as a clock_t; to get the number of seconds used, the result is + * divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C + * standard library. If HPL_USE_TIMES is defined, the times() function + * is used instead. This function returns the current process times. + * times() returns the number of clock ticks that have elapsed since the + * system has been up. Otherwise and by default, the standard library + * function getrusage() is used. + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_CLOCK ) + +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + static double cps = CLOCKS_PER_SEC; + double d; + clock_t t1; + static clock_t t0 = 0; + + if( t0 == 0 ) t0 = clock(); + t1 = clock() - t0; + d = (double)(t1) / cps; + return( d ); +} + +#elif defined( HPL_USE_TIMES ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + clock_t t1; + struct tms ts; + static double ClockTick = HPL_rzero; + + if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK)); + (void) times( &ts ); + return( (double)(ts.tms_utime) / ClockTick ); +} + +/* #elif defined( HPL_USE_GETRUSAGE ) */ +#else + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + struct rusage ruse; + + (void) getrusage( RUSAGE_SELF, &ruse ); + return( (double)( ruse.ru_utime.tv_sec ) + + ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) ); +} + +/* +#else + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + return( HPL_PTIMER_ERROR ); +} +*/ + +#endif +/* + * End of HPL_ptimer_cputime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer_walltime.c new file mode 100644 index 000000000..96cbd300f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/ptimer/HPL_ptimer_walltime.c @@ -0,0 +1,103 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_ptimer_walltime returns the elapsed (wall-clock) time. + * + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_GETTIMEOFDAY ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_walltime( void ) +#else +double HPL_ptimer_walltime() +#endif +{ + struct timeval tp; + static long start=0, startu; + + if( !start ) + { + (void) gettimeofday( &tp, NULL ); + start = tp.tv_sec; + startu = tp.tv_usec; + return( HPL_rzero ); + } + (void) gettimeofday( &tp, NULL ); + + return( (double)( tp.tv_sec - start ) + + ( (double)( tp.tv_usec-startu ) / 1000000.0 ) ); +} + +#else + +#ifdef STDC_HEADERS +double HPL_ptimer_walltime( void ) +#else +double HPL_ptimer_walltime() +#endif +{ + return( MPI_Wtime() ); +} + +#endif +/* + * End of HPL_ptimer_walltime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer.c new file mode 100644 index 000000000..3be9665f7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer.c @@ -0,0 +1,253 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int HPL_timer_disabled; +static double HPL_timer_cpusec [HPL_NTIMER], + HPL_timer_cpustart [HPL_NTIMER], + HPL_timer_wallsec [HPL_NTIMER], + HPL_timer_wallstart[HPL_NTIMER]; +/* + * --------------------------------------------------------------------- + * User callable functions + * --------------------------------------------------------------------- + */ +#ifdef STDC_HEADERS +void HPL_timer_boot( void ) +#else +void HPL_timer_boot() +#endif +{ +/* + * HPL_timer_boot (re)sets all timers to 0, and enables HPL_timer. + */ +/* + * .. Local Variables .. + */ + int i; +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 0; + + for( i = 0; i < HPL_NTIMER; i++ ) + { + HPL_timer_cpusec [i] = HPL_timer_wallsec [i] = HPL_rzero; + HPL_timer_cpustart[i] = HPL_timer_wallstart[i] = HPL_TIMER_STARTFLAG; + } +/* + * End of HPL_timer_boot + */ +} + +#ifdef STDC_HEADERS +void HPL_timer( const int I ) +#else +void HPL_timer( I ) + const int I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_timer provides a "stopwatch" functionality cpu/wall timer in + * seconds. Up to 64 separate timers can be functioning at once. The + * first call starts the timer, and the second stops it. This routine + * can be disenabled by calling HPL_timer_disable(), so that calls to + * the timer are ignored. This feature can be used to make sure certain + * sections of code do not affect timings, even if they call routines + * which have HPL_timer calls in them. HPL_timer_enable() will re-enable + * the timer functionality. One can retrieve the current value of a + * timer by calling + * + * t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + * + * where I is the timer index in [0..64). To initialize the timer + * functionality, one must have called HPL_timer_boot() prior to any of + * the functions mentioned above. + * + * Arguments + * ========= + * + * I (global input) const int + * On entry, I specifies the timer to stop/start. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( HPL_timer_disabled ) return; +/* + * If timer has not been started, start it. Otherwise, stop it and add + * interval to count + */ + if( HPL_timer_wallstart[I] == HPL_TIMER_STARTFLAG ) + { + HPL_timer_wallstart[I] = HPL_timer_walltime(); + HPL_timer_cpustart [I] = HPL_timer_cputime (); + } + else + { + HPL_timer_cpusec [I] += HPL_timer_cputime () - HPL_timer_cpustart [I]; + HPL_timer_wallsec [I] += HPL_timer_walltime() - HPL_timer_wallstart[I]; + HPL_timer_wallstart[I] = HPL_TIMER_STARTFLAG; + } +/* + * End of HPL_timer + */ +} + +#ifdef STDC_HEADERS +void HPL_timer_enable( void ) +#else +void HPL_timer_enable() +#endif +{ +/* + * HPL_timer_enable sets it so calls to HPL_timer are not ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 0; + return; +/* + * End of HPL_timer_enable + */ +} + +#ifdef STDC_HEADERS +void HPL_timer_disable( void ) +#else +void HPL_timer_disable() +#endif +{ +/* + * HPL_timer_disable sets it so calls to HPL_timer are ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 1; + return; +/* + * End of HPL_timer_disable + */ +} + +#ifdef STDC_HEADERS +double HPL_timer_inquire +( + const HPL_T_TIME TMTYPE, + const int I +) +#else +double HPL_timer_inquire( TMTYPE, I ) + const int I; + const HPL_T_TIME TMTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_timer_inquire returns wall- or cpu- time that has accumulated in + * timer I. + * + * Arguments + * ========= + * + * TMTYPE (global input) const HPL_T_TIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_TIME : wall clock time is returned, + * = HPL_CPU_TIME : CPU time is returned (default). + * + * I (global input) const int + * On entry, I specifies the timer to return. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double time; +/* .. + * .. Executable Statements .. + */ +/* + * If wall- or cpu-time are not available on this machine, return + * HPL_TIMER_ERROR + */ + if( TMTYPE == HPL_WALL_TIME ) + { + if( HPL_timer_walltime() == HPL_TIMER_ERROR ) + time = HPL_TIMER_ERROR; + else + time = HPL_timer_wallsec[I]; + } + else + { + if( HPL_timer_cputime() == HPL_TIMER_ERROR ) + time = HPL_TIMER_ERROR; + else + time = HPL_timer_cpusec [I]; + } + return( time ); +/* + * End of HPL_timer_inquire + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer_cputime.c new file mode 100644 index 000000000..4a7f9dfef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer_cputime.c @@ -0,0 +1,145 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_timer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, + * the clock() function is used to return an approximation of processor + * time used by the program. The value returned is the CPU time used so + * far as a clock_t; to get the number of seconds used, the result is + * divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C + * standard library. If HPL_USE_TIMES is defined, the times() function + * is used instead. This function returns the current process times. + * times() returns the number of clock ticks that have elapsed since the + * system has been up. Otherwise and by default, the standard library + * function getrusage() is used. + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_CLOCK ) + +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + static double cps = CLOCKS_PER_SEC; + double d; + clock_t t1; + static clock_t t0 = 0; + + if( t0 == 0 ) t0 = clock(); + t1 = clock() - t0; + d = (double)(t1) / cps; + return( d ); +} + +#elif defined( HPL_USE_TIMES ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + clock_t t1; + struct tms ts; + static double ClockTick = HPL_rzero; + + if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK)); + (void) times( &ts ); + return( (double)(ts.tms_utime) / ClockTick ); +} + +/* #elif defined( HPL_USE_GETRUSAGE ) */ +#else + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + struct rusage ruse; + (void) getrusage( RUSAGE_SELF, &ruse ); + return( (double)( ruse.ru_utime.tv_sec ) + + ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) ); +} + +/* +#else + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + return( HPL_TIMER_ERROR ); +} +*/ + +#endif +/* + * End of HPL_timer_cputime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer_walltime.c new file mode 100644 index 000000000..f4f44f202 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/out/testing/timer/HPL_timer_walltime.c @@ -0,0 +1,88 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_timer_walltime returns the elapsed (wall-clock) time. + * + * + * --------------------------------------------------------------------- + */ + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_walltime( void ) +#else +double HPL_timer_walltime() +#endif +{ + struct timeval tp; + static long start=0, startu; + + if( !start ) + { + (void) gettimeofday( &tp, NULL ); + start = tp.tv_sec; + startu = tp.tv_usec; + return( HPL_rzero ); + } + (void) gettimeofday( &tp, NULL ); + + return( (double)( tp.tv_sec - start ) + + ( (double)( tp.tv_usec-startu ) / 1000000.0 ) ); +} +/* + * End of HPL_timer_walltime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS new file mode 100644 index 000000000..056fd81ba --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = FreeBSD_PIV_CBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpich +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a $(MPdir)/lib/libpmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/share/ATLAS/lib/FreeBSD_P5SSE2 +LAinc = +LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = /usr/bin/f77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = /usr/bin/ranlib +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.HPUX_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.HPUX_FBLAS new file mode 100644 index 000000000..af3f5da5f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.HPUX_FBLAS @@ -0,0 +1,179 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = HPUX +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - MPI directories - library ------------------------------------------ +# ---------------------------------------------------------------------- +# MPIinc tells the C compiler where to find the MPI header files, MPIlib +# is defined to be the name of the MPI library to be used. The variables +# MPIdir and MPIplat are only used for defining MPIinc and MPIlib). +# +MPIdir = $(HOME)/local/mpi +MPIplat = $(MPIdir)/hpux/ch_p4 +# +MPIinc = -I$(MPIdir)/include -I$(MPIplat)/include +MPIlib = $(MPIplat)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - BLAS library ------------------------------------------------------- +# ---------------------------------------------------------------------- +# +BLASlib = /usr/lib/pa1.1/libblas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(MPIinc) +HPL_LIBS = $(HPLlib) $(BLASlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS F77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(HPL_INCLUDES) $(F2CDEFS) $(HPL_OPTS) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -D_INCLUDE_POSIX_SOURCE -DUseTimes -Aa +O4 +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = cc +LINKFLAGS = -Aa +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.I860_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.I860_FBLAS new file mode 100644 index 000000000..984236be2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.I860_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = I860_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = -lmpi +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lkmath +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) -nx +CCFLAGS = $(HPL_DEFS) -O4 -nx +# +LINKER = f77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.IRIX_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.IRIX_FBLAS new file mode 100644 index 000000000..d78bcf09f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.IRIX_FBLAS @@ -0,0 +1,181 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = IRIX_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = $(HOME)/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/IRIX64/ch_p4/include +MPlib = $(MPdir)/IRIX64/ch_p4/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lblas +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DStringSunStyle -DF77_INTEGER=int +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) -64 +CCFLAGS = $(HPL_DEFS) -O3 -64 -OPT:Olimit=15000 -TARG:platform=IP30 \ + -LNO:blocking=OFF -LOPT:alias=typed +# +LINKER = cc +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS new file mode 100644 index 000000000..624306902 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_ATHLON_CBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - MPI directories - library ------------------------------------------ +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_ATHLON +LAinc = +LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the Fortran 77 BLAS interface +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +LINKER = /usr/bin/gcc +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS new file mode 100644 index 000000000..07985f781 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_ATHLON_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_ATHLON +LAinc = +LAlib = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +LINKER = /usr/bin/g77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL new file mode 100644 index 000000000..ddf3fb4b6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_ATHLON_VSIPL +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - MPI directories - library ------------------------------------------ +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = /home/software/TASP_VSIPL_Core_Plus +LAinc = -I$(LAdir)/include +LAlib = $(LAdir)/lib/libvsip_c.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the Fortran 77 BLAS interface +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_VSIPL +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +LINKER = /usr/bin/gcc +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_Intel64 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_Intel64 new file mode 100644 index 000000000..47661c25d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_Intel64 @@ -0,0 +1,193 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -fs +MKDIR = mkdir -p +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_Intel64 +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +# MPdir = /opt/intel/mpi/4.1.0 +# MPinc = -I$(MPdir)/include64 +# MPlib = $(MPdir)/lib64/libmpi.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(MKLROOT) +ifndef LAinc +LAinc = $(LAdir)/mkl/include +endif +ifndef LAlib +LAlib = -L$(LAdir)/mkl/lib/intel64 \ + -Wl,--start-group \ + $(LAdir)/lib/intel64/libmkl_intel_lp64.a \ + $(LAdir)/lib/intel64/libmkl_intel_thread.a \ + $(LAdir)/lib/intel64/libmkl_core.a \ + -Wl,--end-group -lpthread -ldl +endif +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) -I$(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpiicc +CCNOOPT = $(HPL_DEFS) +OMP_DEFS = -openmp +CCFLAGS = $(HPL_DEFS) -O3 -w -ansi-alias -i-static -z noexecstack -z relro -z now -nocompchk -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = $(CC) +LINKFLAGS = $(CCFLAGS) $(OMP_DEFS) -mt_mpi +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_CBLAS new file mode 100644 index 000000000..535a0e214 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_CBLAS @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_CBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII +LAinc = +LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = /usr/bin/g77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm new file mode 100644 index 000000000..31fc9ea74 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_CBLAS_gm +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII +LAinc = +LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = mpif77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_FBLAS new file mode 100644 index 000000000..5ed9aac12 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_FBLAS @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII +LAinc = +LAlib = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = /usr/bin/g77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm new file mode 100644 index 000000000..a2416396c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_FBLAS_gm +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII +LAinc = +LAlib = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = mpif77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_VSIPL new file mode 100644 index 000000000..0f690a1b3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_VSIPL @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_VSIPL +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = /home/software/TASP_VSIPL_Core_Plus +LAinc = -I$(LAdir)/include +LAlib = $(LAdir)/lib/libvsip_c.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_VSIPL +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = /usr/bin/g77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm new file mode 100644 index 000000000..fee265e46 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_VSIPL_gm +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = /home/software/TASP_VSIPL_Core_Plus +LAinc = -I$(LAdir)/include +LAlib = $(LAdir)/lib/libvsip_c.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_VSIPL +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = mpif77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.MacOSX_Accelerate b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.MacOSX_Accelerate new file mode 100644 index 000000000..d1ce69b64 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.MacOSX_Accelerate @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -fs +MKDIR = mkdir -p +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = MacOSX_Accelerate +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +# MPdir = /opt/intel/mpi/4.1.0 +# MPinc = -I$(MPdir)/include64 +# MPlib = $(MPdir)/lib64/libmpi.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -framework Accelerate +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc-openmpi-mp +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -O3 +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = $(CC) +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = cr +RANLIB = ranlib +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWR2_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWR2_FBLAS new file mode 100644 index 000000000..628f2c152 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWR2_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = PWR2_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lesslp2 +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpcc_r +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -O3 -qarch=pwr2 -qtune=pwr2 -qmaxmem=-1 +# +LINKER = mpxlf_r +LINKFLAGS = -bmaxdata:0x70000000 $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWR3_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWR3_FBLAS new file mode 100644 index 000000000..bba468803 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWR3_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = PWR3_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lessl +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/vac/bin/xlc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -qtune=pwr3 -qarch=pwr3 -O3 -qmaxmem=-1 -qfloat=hsflt +# +LINKER = /usr/bin/xlf +LINKFLAGS = -bmaxdata:0x70000000 $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWRPC_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWRPC_FBLAS new file mode 100644 index 000000000..2a0fb2ec6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.PWRPC_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = PWRPC_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lessl +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpcc_r +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -O3 -qarch=ppc -qtune=604 -qmaxmem=-1 +# +LINKER = mpxlf_r +LINKFLAGS = -bmaxdata:0x70000000 $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS new file mode 100644 index 000000000..1ade2d8aa --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = SUN4SOL2-g_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = $(HOME)/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include +MPlib = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -xlic_lib=sunperf +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -g +# +LINKER = purify -best-effort f77 +LINKFLAGS = +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL new file mode 100644 index 000000000..1cbb371fd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = SUN4SOL2-g_VSIPL +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = $(HOME)/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include +MPlib = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/local/TASP_VSIPL_Core_Plus +LAinc = -I$(LAdir)/include +LAlib = $(LAdir)/lib/libvsip_c.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_VSIPL +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -g +# +LINKER = purify -best-effort cc +LINKFLAGS = +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2_FBLAS new file mode 100644 index 000000000..a1d5d6315 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.SUN4SOL2_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = SUN4SOL2_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = $(HOME)/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include +MPlib = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -xlic_lib=sunperf +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -dalign -fsingle -xO5 -native -xarch=v8plusa +# +LINKER = f77 +LINKFLAGS = -dalign -native -xarch=v8plusa -xO5 +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.T3E_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.T3E_FBLAS new file mode 100644 index 000000000..fe12cae9a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.T3E_FBLAS @@ -0,0 +1,187 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = T3E_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DUpCase -DF77_INTEGER=long -DStringCrayStyle \ + -DCRAY_BLAS -DHPL_USE_TIMES +# +# When UpCase is defined, CRAY_BLAS redefines the BLAS routines used in +# HPL to be prefixed with an S. In the Cray programming environment, the +# default INTEGER and REAL size is 64 bits. This is reflected in the +# Cray Scientific Library as well, so SGEMM is the 64-bit matrix multi- +# ply. +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -O3 +# +LINKER = f77 +LINKFLAGS = -O3,unroll2,pipeline2 +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Tru64_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Tru64_FBLAS new file mode 100644 index 000000000..3d8062061 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Tru64_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Tru64_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/alpha/ch_p4/include +MPlib = $(MPdir)/alpha/ch_p4/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lcxml +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -arch host -tune host -std -O5 +# +LINKER = f77 +LINKFLAGS = -nofor_main -O5 -arch host -tune host +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = ranlib +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Tru64_FBLAS_elan b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Tru64_FBLAS_elan new file mode 100644 index 000000000..f9550412c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.Tru64_FBLAS_elan @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Tru64_FBLAS_elan +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = -lmpi -lelan +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lcxml +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -arch host -tune host -std -O5 +# +LINKER = f77 +LINKFLAGS = -nofor_main -O5 -arch host -tune host +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = ranlib +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.UNKNOWN.in b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.UNKNOWN.in new file mode 100644 index 000000000..8cbbd8242 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/Make.UNKNOWN.in @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = @SHELL@ +# +CD = @CD@ +CP = @CP@ +LN_S = @LN_S@ +MKDIR = @MKDIR@ +RM = @RM@ +TOUCH = @TOUCH@ +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = @ARCH@ +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = @MPDIR@ +MPinc = @MPINC@ +MPlib = @MPLIB@ +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = @LADIR@ +LAinc = @LAINC@ +LAlib = @LALIB@ +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = @F2CDEFS@ +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = @CC@ +CCNOOPT = $(HPL_DEFS) @CCNOOPT@ +CCFLAGS = $(HPL_DEFS) @CCFLAGS@ +# +LINKER = @LINKER@ +LINKFLAGS = @LINKFLAGS@ +# +ARCHIVER = @ARCHIVER@ +ARFLAGS = @ARFLAGS@ +RANLIB = @RANLIB@ +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/make_generic b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/make_generic new file mode 100644 index 000000000..68cf74a3a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/setup/make_generic @@ -0,0 +1,83 @@ +#!/bin/sh +# +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# +# Configure script to create Make.UNKNOWN from Make.UNKNOWN.in for the +# HPL distribution, so users without a real Unix system can have a gene- +# ric Make.UNKNOWN to edit for their needs. This script substitutes +# pathless version of all the system programs, and commonly used options +# values into Make.UNKNOWN.in. +# +######################################################################## +# +sed -e 's%@SHELL@%/bin/sh%' \ + -e 's%@CD@%cd%' \ + -e 's%@CP@%cp%' \ + -e 's%@LN_S@%ln -s%' \ + -e 's%@MKDIR@%mkdir%' \ + -e 's%@RM@%/bin/rm -f%' \ + -e 's%@TOUCH@%touch%' \ + -e 's%@ARCH@%UNKNOWN%' \ + -e 's%@CC@%mpicc%' \ + -e 's%@CCNOOPT@%%' \ + -e 's%@CCFLAGS@%%' \ + -e 's%@LINKER@%mpif77%' \ + -e 's%@LINKFLAGS@%%' \ + -e 's%@ARCHIVER@%ar%' \ + -e 's%@ARFLAGS@%r%' \ + -e 's%@RANLIB@%echo%' \ + -e 's%@MPDIR@%%' \ + -e 's%@MPINC@%%' \ + -e 's%@MPLIB@%%' \ + -e 's%@F2CDEFS@%-DAdd_ -DF77_INTEGER=int -DStringSunStyle%' \ + -e 's%@LADIR@%%' \ + -e 's%@LAINC@%%' \ + -e 's%@LALIB@%-lblas%' \ + Make.UNKNOWN.in > Make.UNKNOWN +# +######################################################################## diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/Makefile.am new file mode 100644 index 000000000..2e6d3d454 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/Makefile.am @@ -0,0 +1,42 @@ +AM_CPPFLAGS = -I$(top_srcdir)/../include + +lib_LIBRARIES = libhpl.a + +libhpl_a_SOURCES = \ +auxil/HPL_dlatcpy.c auxil/HPL_fprintf.c auxil/HPL_dlacpy.c auxil/HPL_dlamch.c \ +blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \ +blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \ +blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \ +comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \ +comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \ +comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \ +grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \ +grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \ +grid/HPL_min.c grid/HPL_all_reduce.c grid/HPL_barrier.c \ +panel/HPL_pdpanel_disp.c panel/HPL_pdpanel_free.c panel/HPL_pdpanel_init.c panel/HPL_pdpanel_new.c \ +pauxil/HPL_pdlamch.c pauxil/HPL_pdlange.c \ +pauxil/HPL_indxg2p.c pauxil/HPL_numroc.c pauxil/HPL_numrocI.c pauxil/HPL_numrocI.c \ +pauxil/HPL_dlaswp00N.c pauxil/HPL_dlaswp01N.c pauxil/HPL_dlaswp01T.c \ +pauxil/HPL_dlaswp02N.c pauxil/HPL_dlaswp03N.c pauxil/HPL_dlaswp03T.c \ +pauxil/HPL_dlaswp04N.c pauxil/HPL_dlaswp04T.c pauxil/HPL_dlaswp05N.c \ +pauxil/HPL_dlaswp05T.c pauxil/HPL_dlaswp06N.c pauxil/HPL_dlaswp06T.c \ +pauxil/HPL_infog2l.c pauxil/HPL_dlaswp10N.c pauxil/HPL_pwarn.c \ +pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \ +pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \ +pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c pfact/HPL_pdrpanrlT.c \ +pfact/HPL_pdmxswp.c pfact/HPL_pdfact.c pfact/HPL_dlocmax.c \ +pfact/HPL_pdpancrT.c pfact/HPL_pdpancrN.c pfact/HPL_dlocmax.c \ +pfact/HPL_dlocswpN.c pfact/HPL_dlocswpT.c pfact/HPL_pdmxswp.c \ +pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \ +pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \ +pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c \ +pfact/HPL_pdrpanrlT.c pauxil/HPL_pabort.c pauxil/HPL_pdlamch.c \ +pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c \ +pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c pgesv/HPL_pdupdateTT.c \ +pgesv/HPL_equil.c pgesv/HPL_pipid.c pgesv/HPL_plindx0.c \ +pgesv/HPL_plindx10.c pgesv/HPL_plindx1.c pgesv/HPL_plindx10.c \ +pgesv/HPL_rollN.c pgesv/HPL_rollT.c pgesv/HPL_spreadN.c pgesv/HPL_spreadT.c \ +pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesvK2.c \ +pgesv/HPL_pdlaswp00N.c pgesv/HPL_pdlaswp00T.c pgesv/HPL_pdlaswp01N.c pgesv/HPL_pdlaswp01T.c \ +pgesv/HPL_pdtrsv.c pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c \ +pgesv/HPL_pdupdateTT.c pgesv/HPL_logsort.c pgesv/HPL_perm.c diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/Makefile.in new file mode 100644 index 000000000..139ecbad0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/Makefile.in @@ -0,0 +1,1355 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +subdir = src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/include/hplconfig.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" +LIBRARIES = $(lib_LIBRARIES) +AR = ar +ARFLAGS = cru +AM_V_AR = $(am__v_AR_@AM_V@) +am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@) +am__v_AR_0 = @echo " AR " $@; +am__v_AR_1 = +libhpl_a_AR = $(AR) $(ARFLAGS) +libhpl_a_LIBADD = +am__dirstamp = $(am__leading_dot)dirstamp +am_libhpl_a_OBJECTS = auxil/HPL_dlatcpy.$(OBJEXT) \ + auxil/HPL_fprintf.$(OBJEXT) auxil/HPL_dlacpy.$(OBJEXT) \ + auxil/HPL_dlamch.$(OBJEXT) blas/HPL_dscal.$(OBJEXT) \ + blas/HPL_dtrsm.$(OBJEXT) blas/HPL_dtrsv.$(OBJEXT) \ + blas/HPL_idamax.$(OBJEXT) blas/HPL_dgemv.$(OBJEXT) \ + blas/HPL_dscal.$(OBJEXT) blas/HPL_daxpy.$(OBJEXT) \ + blas/HPL_dcopy.$(OBJEXT) blas/HPL_dgemm.$(OBJEXT) \ + blas/HPL_dgemv.$(OBJEXT) blas/HPL_dger.$(OBJEXT) \ + comm/HPL_sdrv.$(OBJEXT) comm/HPL_send.$(OBJEXT) \ + comm/HPL_recv.$(OBJEXT) comm/HPL_bcast.$(OBJEXT) \ + comm/HPL_binit.$(OBJEXT) comm/HPL_bwait.$(OBJEXT) \ + comm/HPL_blong.$(OBJEXT) comm/HPL_1ring.$(OBJEXT) \ + comm/HPL_1rinM.$(OBJEXT) comm/HPL_2rinM.$(OBJEXT) \ + comm/HPL_2ring.$(OBJEXT) comm/HPL_blonM.$(OBJEXT) \ + comm/HPL_packL.$(OBJEXT) grid/HPL_reduce.$(OBJEXT) \ + grid/HPL_sum.$(OBJEXT) grid/HPL_grid_info.$(OBJEXT) \ + grid/HPL_grid_init.$(OBJEXT) grid/HPL_all_reduce.$(OBJEXT) \ + grid/HPL_broadcast.$(OBJEXT) grid/HPL_grid_exit.$(OBJEXT) \ + grid/HPL_max.$(OBJEXT) grid/HPL_min.$(OBJEXT) \ + grid/HPL_all_reduce.$(OBJEXT) grid/HPL_barrier.$(OBJEXT) \ + panel/HPL_pdpanel_disp.$(OBJEXT) \ + panel/HPL_pdpanel_free.$(OBJEXT) \ + panel/HPL_pdpanel_init.$(OBJEXT) \ + panel/HPL_pdpanel_new.$(OBJEXT) pauxil/HPL_pdlamch.$(OBJEXT) \ + pauxil/HPL_pdlange.$(OBJEXT) pauxil/HPL_indxg2p.$(OBJEXT) \ + pauxil/HPL_numroc.$(OBJEXT) pauxil/HPL_numrocI.$(OBJEXT) \ + pauxil/HPL_numrocI.$(OBJEXT) pauxil/HPL_dlaswp00N.$(OBJEXT) \ + pauxil/HPL_dlaswp01N.$(OBJEXT) pauxil/HPL_dlaswp01T.$(OBJEXT) \ + pauxil/HPL_dlaswp02N.$(OBJEXT) pauxil/HPL_dlaswp03N.$(OBJEXT) \ + pauxil/HPL_dlaswp03T.$(OBJEXT) pauxil/HPL_dlaswp04N.$(OBJEXT) \ + pauxil/HPL_dlaswp04T.$(OBJEXT) pauxil/HPL_dlaswp05N.$(OBJEXT) \ + pauxil/HPL_dlaswp05T.$(OBJEXT) pauxil/HPL_dlaswp06N.$(OBJEXT) \ + pauxil/HPL_dlaswp06T.$(OBJEXT) pauxil/HPL_infog2l.$(OBJEXT) \ + pauxil/HPL_dlaswp10N.$(OBJEXT) pauxil/HPL_pwarn.$(OBJEXT) \ + pfact/HPL_pdpanllN.$(OBJEXT) pfact/HPL_pdpanllT.$(OBJEXT) \ + pfact/HPL_pdpanrlN.$(OBJEXT) pfact/HPL_pdpanrlT.$(OBJEXT) \ + pfact/HPL_pdrpancrN.$(OBJEXT) pfact/HPL_pdrpancrT.$(OBJEXT) \ + pfact/HPL_pdrpanllN.$(OBJEXT) pfact/HPL_pdrpanllT.$(OBJEXT) \ + pfact/HPL_pdrpanrlN.$(OBJEXT) pfact/HPL_pdrpanrlT.$(OBJEXT) \ + pfact/HPL_pdmxswp.$(OBJEXT) pfact/HPL_pdfact.$(OBJEXT) \ + pfact/HPL_dlocmax.$(OBJEXT) pfact/HPL_pdpancrT.$(OBJEXT) \ + pfact/HPL_pdpancrN.$(OBJEXT) pfact/HPL_dlocmax.$(OBJEXT) \ + pfact/HPL_dlocswpN.$(OBJEXT) pfact/HPL_dlocswpT.$(OBJEXT) \ + pfact/HPL_pdmxswp.$(OBJEXT) pfact/HPL_pdpanllN.$(OBJEXT) \ + pfact/HPL_pdpanllT.$(OBJEXT) pfact/HPL_pdpanrlN.$(OBJEXT) \ + pfact/HPL_pdpanrlT.$(OBJEXT) pfact/HPL_pdrpancrN.$(OBJEXT) \ + pfact/HPL_pdrpancrT.$(OBJEXT) pfact/HPL_pdrpanllN.$(OBJEXT) \ + pfact/HPL_pdrpanllT.$(OBJEXT) pfact/HPL_pdrpanrlN.$(OBJEXT) \ + pfact/HPL_pdrpanrlT.$(OBJEXT) pauxil/HPL_pabort.$(OBJEXT) \ + pauxil/HPL_pdlamch.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \ + pgesv/HPL_pdgesv.$(OBJEXT) pgesv/HPL_pdgesvK1.$(OBJEXT) \ + pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdupdateNN.$(OBJEXT) \ + pgesv/HPL_pdupdateNT.$(OBJEXT) pgesv/HPL_pdupdateTN.$(OBJEXT) \ + pgesv/HPL_pdupdateTT.$(OBJEXT) pgesv/HPL_equil.$(OBJEXT) \ + pgesv/HPL_pipid.$(OBJEXT) pgesv/HPL_plindx0.$(OBJEXT) \ + pgesv/HPL_plindx10.$(OBJEXT) pgesv/HPL_plindx1.$(OBJEXT) \ + pgesv/HPL_plindx10.$(OBJEXT) pgesv/HPL_rollN.$(OBJEXT) \ + pgesv/HPL_rollT.$(OBJEXT) pgesv/HPL_spreadN.$(OBJEXT) \ + pgesv/HPL_spreadT.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \ + pgesv/HPL_pdgesv.$(OBJEXT) pgesv/HPL_pdgesvK1.$(OBJEXT) \ + pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \ + pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdlaswp00N.$(OBJEXT) \ + pgesv/HPL_pdlaswp00T.$(OBJEXT) pgesv/HPL_pdlaswp01N.$(OBJEXT) \ + pgesv/HPL_pdlaswp01T.$(OBJEXT) pgesv/HPL_pdtrsv.$(OBJEXT) \ + pgesv/HPL_pdupdateNN.$(OBJEXT) pgesv/HPL_pdupdateNT.$(OBJEXT) \ + pgesv/HPL_pdupdateTN.$(OBJEXT) pgesv/HPL_pdupdateTT.$(OBJEXT) \ + pgesv/HPL_logsort.$(OBJEXT) pgesv/HPL_perm.$(OBJEXT) +libhpl_a_OBJECTS = $(am_libhpl_a_OBJECTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = auxil/$(DEPDIR)/HPL_dlacpy.Po \ + auxil/$(DEPDIR)/HPL_dlamch.Po auxil/$(DEPDIR)/HPL_dlatcpy.Po \ + auxil/$(DEPDIR)/HPL_fprintf.Po blas/$(DEPDIR)/HPL_daxpy.Po \ + blas/$(DEPDIR)/HPL_dcopy.Po blas/$(DEPDIR)/HPL_dgemm.Po \ + blas/$(DEPDIR)/HPL_dgemv.Po blas/$(DEPDIR)/HPL_dger.Po \ + blas/$(DEPDIR)/HPL_dscal.Po blas/$(DEPDIR)/HPL_dtrsm.Po \ + blas/$(DEPDIR)/HPL_dtrsv.Po blas/$(DEPDIR)/HPL_idamax.Po \ + comm/$(DEPDIR)/HPL_1rinM.Po comm/$(DEPDIR)/HPL_1ring.Po \ + comm/$(DEPDIR)/HPL_2rinM.Po comm/$(DEPDIR)/HPL_2ring.Po \ + comm/$(DEPDIR)/HPL_bcast.Po comm/$(DEPDIR)/HPL_binit.Po \ + comm/$(DEPDIR)/HPL_blonM.Po comm/$(DEPDIR)/HPL_blong.Po \ + comm/$(DEPDIR)/HPL_bwait.Po comm/$(DEPDIR)/HPL_packL.Po \ + comm/$(DEPDIR)/HPL_recv.Po comm/$(DEPDIR)/HPL_sdrv.Po \ + comm/$(DEPDIR)/HPL_send.Po grid/$(DEPDIR)/HPL_all_reduce.Po \ + grid/$(DEPDIR)/HPL_barrier.Po grid/$(DEPDIR)/HPL_broadcast.Po \ + grid/$(DEPDIR)/HPL_grid_exit.Po \ + grid/$(DEPDIR)/HPL_grid_info.Po \ + grid/$(DEPDIR)/HPL_grid_init.Po grid/$(DEPDIR)/HPL_max.Po \ + grid/$(DEPDIR)/HPL_min.Po grid/$(DEPDIR)/HPL_reduce.Po \ + grid/$(DEPDIR)/HPL_sum.Po panel/$(DEPDIR)/HPL_pdpanel_disp.Po \ + panel/$(DEPDIR)/HPL_pdpanel_free.Po \ + panel/$(DEPDIR)/HPL_pdpanel_init.Po \ + panel/$(DEPDIR)/HPL_pdpanel_new.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp00N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp01N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp01T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp02N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp03N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp03T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp04N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp04T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp05N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp05T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp06N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp06T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp10N.Po \ + pauxil/$(DEPDIR)/HPL_indxg2p.Po \ + pauxil/$(DEPDIR)/HPL_infog2l.Po pauxil/$(DEPDIR)/HPL_numroc.Po \ + pauxil/$(DEPDIR)/HPL_numrocI.Po pauxil/$(DEPDIR)/HPL_pabort.Po \ + pauxil/$(DEPDIR)/HPL_pdlamch.Po \ + pauxil/$(DEPDIR)/HPL_pdlange.Po pauxil/$(DEPDIR)/HPL_pwarn.Po \ + pfact/$(DEPDIR)/HPL_dlocmax.Po pfact/$(DEPDIR)/HPL_dlocswpN.Po \ + pfact/$(DEPDIR)/HPL_dlocswpT.Po pfact/$(DEPDIR)/HPL_pdfact.Po \ + pfact/$(DEPDIR)/HPL_pdmxswp.Po pfact/$(DEPDIR)/HPL_pdpancrN.Po \ + pfact/$(DEPDIR)/HPL_pdpancrT.Po \ + pfact/$(DEPDIR)/HPL_pdpanllN.Po \ + pfact/$(DEPDIR)/HPL_pdpanllT.Po \ + pfact/$(DEPDIR)/HPL_pdpanrlN.Po \ + pfact/$(DEPDIR)/HPL_pdpanrlT.Po \ + pfact/$(DEPDIR)/HPL_pdrpancrN.Po \ + pfact/$(DEPDIR)/HPL_pdrpancrT.Po \ + pfact/$(DEPDIR)/HPL_pdrpanllN.Po \ + pfact/$(DEPDIR)/HPL_pdrpanllT.Po \ + pfact/$(DEPDIR)/HPL_pdrpanrlN.Po \ + pfact/$(DEPDIR)/HPL_pdrpanrlT.Po pgesv/$(DEPDIR)/HPL_equil.Po \ + pgesv/$(DEPDIR)/HPL_logsort.Po pgesv/$(DEPDIR)/HPL_pdgesv.Po \ + pgesv/$(DEPDIR)/HPL_pdgesv0.Po pgesv/$(DEPDIR)/HPL_pdgesvK1.Po \ + pgesv/$(DEPDIR)/HPL_pdgesvK2.Po \ + pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po \ + pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po \ + pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po \ + pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po \ + pgesv/$(DEPDIR)/HPL_pdtrsv.Po \ + pgesv/$(DEPDIR)/HPL_pdupdateNN.Po \ + pgesv/$(DEPDIR)/HPL_pdupdateNT.Po \ + pgesv/$(DEPDIR)/HPL_pdupdateTN.Po \ + pgesv/$(DEPDIR)/HPL_pdupdateTT.Po pgesv/$(DEPDIR)/HPL_perm.Po \ + pgesv/$(DEPDIR)/HPL_pipid.Po pgesv/$(DEPDIR)/HPL_plindx0.Po \ + pgesv/$(DEPDIR)/HPL_plindx1.Po pgesv/$(DEPDIR)/HPL_plindx10.Po \ + pgesv/$(DEPDIR)/HPL_rollN.Po pgesv/$(DEPDIR)/HPL_rollT.Po \ + pgesv/$(DEPDIR)/HPL_spreadN.Po pgesv/$(DEPDIR)/HPL_spreadT.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libhpl_a_SOURCES) +DIST_SOURCES = $(libhpl_a_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIBS = @BLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build_alias = @build_alias@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host_alias = @host_alias@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/../include +lib_LIBRARIES = libhpl.a +libhpl_a_SOURCES = \ +auxil/HPL_dlatcpy.c auxil/HPL_fprintf.c auxil/HPL_dlacpy.c auxil/HPL_dlamch.c \ +blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \ +blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \ +blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \ +comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \ +comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \ +comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \ +grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \ +grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \ +grid/HPL_min.c grid/HPL_all_reduce.c grid/HPL_barrier.c \ +panel/HPL_pdpanel_disp.c panel/HPL_pdpanel_free.c panel/HPL_pdpanel_init.c panel/HPL_pdpanel_new.c \ +pauxil/HPL_pdlamch.c pauxil/HPL_pdlange.c \ +pauxil/HPL_indxg2p.c pauxil/HPL_numroc.c pauxil/HPL_numrocI.c pauxil/HPL_numrocI.c \ +pauxil/HPL_dlaswp00N.c pauxil/HPL_dlaswp01N.c pauxil/HPL_dlaswp01T.c \ +pauxil/HPL_dlaswp02N.c pauxil/HPL_dlaswp03N.c pauxil/HPL_dlaswp03T.c \ +pauxil/HPL_dlaswp04N.c pauxil/HPL_dlaswp04T.c pauxil/HPL_dlaswp05N.c \ +pauxil/HPL_dlaswp05T.c pauxil/HPL_dlaswp06N.c pauxil/HPL_dlaswp06T.c \ +pauxil/HPL_infog2l.c pauxil/HPL_dlaswp10N.c pauxil/HPL_pwarn.c \ +pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \ +pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \ +pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c pfact/HPL_pdrpanrlT.c \ +pfact/HPL_pdmxswp.c pfact/HPL_pdfact.c pfact/HPL_dlocmax.c \ +pfact/HPL_pdpancrT.c pfact/HPL_pdpancrN.c pfact/HPL_dlocmax.c \ +pfact/HPL_dlocswpN.c pfact/HPL_dlocswpT.c pfact/HPL_pdmxswp.c \ +pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \ +pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \ +pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c \ +pfact/HPL_pdrpanrlT.c pauxil/HPL_pabort.c pauxil/HPL_pdlamch.c \ +pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c \ +pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c pgesv/HPL_pdupdateTT.c \ +pgesv/HPL_equil.c pgesv/HPL_pipid.c pgesv/HPL_plindx0.c \ +pgesv/HPL_plindx10.c pgesv/HPL_plindx1.c pgesv/HPL_plindx10.c \ +pgesv/HPL_rollN.c pgesv/HPL_rollT.c pgesv/HPL_spreadN.c pgesv/HPL_spreadT.c \ +pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesvK2.c \ +pgesv/HPL_pdlaswp00N.c pgesv/HPL_pdlaswp00T.c pgesv/HPL_pdlaswp01N.c pgesv/HPL_pdlaswp01T.c \ +pgesv/HPL_pdtrsv.c pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c \ +pgesv/HPL_pdupdateTT.c pgesv/HPL_logsort.c pgesv/HPL_perm.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-libLIBRARIES: $(lib_LIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(INSTALL_DATA) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(INSTALL_DATA) $$list2 "$(DESTDIR)$(libdir)" || exit $$?; } + @$(POST_INSTALL) + @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + if test -f $$p; then \ + $(am__strip_dir) \ + echo " ( cd '$(DESTDIR)$(libdir)' && $(RANLIB) $$f )"; \ + ( cd "$(DESTDIR)$(libdir)" && $(RANLIB) $$f ) || exit $$?; \ + else :; fi; \ + done + +uninstall-libLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(libdir)'; $(am__uninstall_files_from_dir) + +clean-libLIBRARIES: + -test -z "$(lib_LIBRARIES)" || rm -f $(lib_LIBRARIES) +auxil/$(am__dirstamp): + @$(MKDIR_P) auxil + @: > auxil/$(am__dirstamp) +auxil/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) auxil/$(DEPDIR) + @: > auxil/$(DEPDIR)/$(am__dirstamp) +auxil/HPL_dlatcpy.$(OBJEXT): auxil/$(am__dirstamp) \ + auxil/$(DEPDIR)/$(am__dirstamp) +auxil/HPL_fprintf.$(OBJEXT): auxil/$(am__dirstamp) \ + auxil/$(DEPDIR)/$(am__dirstamp) +auxil/HPL_dlacpy.$(OBJEXT): auxil/$(am__dirstamp) \ + auxil/$(DEPDIR)/$(am__dirstamp) +auxil/HPL_dlamch.$(OBJEXT): auxil/$(am__dirstamp) \ + auxil/$(DEPDIR)/$(am__dirstamp) +blas/$(am__dirstamp): + @$(MKDIR_P) blas + @: > blas/$(am__dirstamp) +blas/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) blas/$(DEPDIR) + @: > blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dscal.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dtrsm.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dtrsv.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_idamax.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dgemv.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_daxpy.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dcopy.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dgemm.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dger.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +comm/$(am__dirstamp): + @$(MKDIR_P) comm + @: > comm/$(am__dirstamp) +comm/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) comm/$(DEPDIR) + @: > comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_sdrv.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_send.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_recv.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_bcast.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_binit.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_bwait.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_blong.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_1ring.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_1rinM.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_2rinM.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_2ring.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_blonM.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_packL.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +grid/$(am__dirstamp): + @$(MKDIR_P) grid + @: > grid/$(am__dirstamp) +grid/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) grid/$(DEPDIR) + @: > grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_reduce.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_sum.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_grid_info.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_grid_init.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_all_reduce.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_broadcast.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_grid_exit.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_max.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_min.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_barrier.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +panel/$(am__dirstamp): + @$(MKDIR_P) panel + @: > panel/$(am__dirstamp) +panel/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) panel/$(DEPDIR) + @: > panel/$(DEPDIR)/$(am__dirstamp) +panel/HPL_pdpanel_disp.$(OBJEXT): panel/$(am__dirstamp) \ + panel/$(DEPDIR)/$(am__dirstamp) +panel/HPL_pdpanel_free.$(OBJEXT): panel/$(am__dirstamp) \ + panel/$(DEPDIR)/$(am__dirstamp) +panel/HPL_pdpanel_init.$(OBJEXT): panel/$(am__dirstamp) \ + panel/$(DEPDIR)/$(am__dirstamp) +panel/HPL_pdpanel_new.$(OBJEXT): panel/$(am__dirstamp) \ + panel/$(DEPDIR)/$(am__dirstamp) +pauxil/$(am__dirstamp): + @$(MKDIR_P) pauxil + @: > pauxil/$(am__dirstamp) +pauxil/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pauxil/$(DEPDIR) + @: > pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_pdlamch.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_pdlange.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_indxg2p.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_numroc.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_numrocI.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp00N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp01N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp01T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp02N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp03N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp03T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp04N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp04T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp05N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp05T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp06N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp06T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_infog2l.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp10N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_pwarn.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pfact/$(am__dirstamp): + @$(MKDIR_P) pfact + @: > pfact/$(am__dirstamp) +pfact/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pfact/$(DEPDIR) + @: > pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpanllN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpanllT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpanrlN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpanrlT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpancrN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpancrT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpanllN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpanllT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpanrlN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpanrlT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdmxswp.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdfact.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_dlocmax.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpancrT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpancrN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_dlocswpN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_dlocswpT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_pabort.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pgesv/$(am__dirstamp): + @$(MKDIR_P) pgesv + @: > pgesv/$(am__dirstamp) +pgesv/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pgesv/$(DEPDIR) + @: > pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdgesv0.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdgesv.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdgesvK1.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdgesvK2.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdupdateNN.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdupdateNT.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdupdateTN.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdupdateTT.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_equil.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pipid.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_plindx0.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_plindx10.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_plindx1.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_rollN.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_rollT.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_spreadN.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_spreadT.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdlaswp00N.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdlaswp00T.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdlaswp01N.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdlaswp01T.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdtrsv.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_logsort.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_perm.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) + +libhpl.a: $(libhpl_a_OBJECTS) $(libhpl_a_DEPENDENCIES) $(EXTRA_libhpl_a_DEPENDENCIES) + $(AM_V_at)-rm -f libhpl.a + $(AM_V_AR)$(libhpl_a_AR) libhpl.a $(libhpl_a_OBJECTS) $(libhpl_a_LIBADD) + $(AM_V_at)$(RANLIB) libhpl.a + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f auxil/*.$(OBJEXT) + -rm -f blas/*.$(OBJEXT) + -rm -f comm/*.$(OBJEXT) + -rm -f grid/*.$(OBJEXT) + -rm -f panel/*.$(OBJEXT) + -rm -f pauxil/*.$(OBJEXT) + -rm -f pfact/*.$(OBJEXT) + -rm -f pgesv/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlacpy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlamch.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlatcpy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_fprintf.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_daxpy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dcopy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dgemm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dgemv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dger.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dscal.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dtrsm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dtrsv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_idamax.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_1rinM.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_1ring.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_2rinM.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_2ring.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bcast.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_binit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blonM.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blong.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bwait.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_packL.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_recv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_sdrv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_send.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_all_reduce.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_barrier.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_broadcast.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_exit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_info.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_max.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_min.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_reduce.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_sum.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_disp.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_free.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_new.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp00N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp01N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp01T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp02N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp03N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp03T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp04N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp04T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp05N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp05T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp06N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp06T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp10N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_indxg2p.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_infog2l.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_numroc.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_numrocI.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pabort.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pdlamch.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pdlange.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pwarn.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocmax.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocswpN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocswpT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdfact.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdmxswp.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpancrN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpancrT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanllN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanllT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanrlN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanrlT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpancrN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpancrT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanllN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanllT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanrlN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanrlT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_equil.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_logsort.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesv0.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesvK1.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesvK2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdtrsv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateNN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateNT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateTN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateTT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_perm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pipid.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx0.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx1.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx10.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_rollN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_rollT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_spreadN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_spreadT.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f auxil/$(DEPDIR)/$(am__dirstamp) + -rm -f auxil/$(am__dirstamp) + -rm -f blas/$(DEPDIR)/$(am__dirstamp) + -rm -f blas/$(am__dirstamp) + -rm -f comm/$(DEPDIR)/$(am__dirstamp) + -rm -f comm/$(am__dirstamp) + -rm -f grid/$(DEPDIR)/$(am__dirstamp) + -rm -f grid/$(am__dirstamp) + -rm -f panel/$(DEPDIR)/$(am__dirstamp) + -rm -f panel/$(am__dirstamp) + -rm -f pauxil/$(DEPDIR)/$(am__dirstamp) + -rm -f pauxil/$(am__dirstamp) + -rm -f pfact/$(DEPDIR)/$(am__dirstamp) + -rm -f pfact/$(am__dirstamp) + -rm -f pgesv/$(DEPDIR)/$(am__dirstamp) + -rm -f pgesv/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLIBRARIES mostlyclean-am + +distclean: distclean-am + -rm -f auxil/$(DEPDIR)/HPL_dlacpy.Po + -rm -f auxil/$(DEPDIR)/HPL_dlamch.Po + -rm -f auxil/$(DEPDIR)/HPL_dlatcpy.Po + -rm -f auxil/$(DEPDIR)/HPL_fprintf.Po + -rm -f blas/$(DEPDIR)/HPL_daxpy.Po + -rm -f blas/$(DEPDIR)/HPL_dcopy.Po + -rm -f blas/$(DEPDIR)/HPL_dgemm.Po + -rm -f blas/$(DEPDIR)/HPL_dgemv.Po + -rm -f blas/$(DEPDIR)/HPL_dger.Po + -rm -f blas/$(DEPDIR)/HPL_dscal.Po + -rm -f blas/$(DEPDIR)/HPL_dtrsm.Po + -rm -f blas/$(DEPDIR)/HPL_dtrsv.Po + -rm -f blas/$(DEPDIR)/HPL_idamax.Po + -rm -f comm/$(DEPDIR)/HPL_1rinM.Po + -rm -f comm/$(DEPDIR)/HPL_1ring.Po + -rm -f comm/$(DEPDIR)/HPL_2rinM.Po + -rm -f comm/$(DEPDIR)/HPL_2ring.Po + -rm -f comm/$(DEPDIR)/HPL_bcast.Po + -rm -f comm/$(DEPDIR)/HPL_binit.Po + -rm -f comm/$(DEPDIR)/HPL_blonM.Po + -rm -f comm/$(DEPDIR)/HPL_blong.Po + -rm -f comm/$(DEPDIR)/HPL_bwait.Po + -rm -f comm/$(DEPDIR)/HPL_packL.Po + -rm -f comm/$(DEPDIR)/HPL_recv.Po + -rm -f comm/$(DEPDIR)/HPL_sdrv.Po + -rm -f comm/$(DEPDIR)/HPL_send.Po + -rm -f grid/$(DEPDIR)/HPL_all_reduce.Po + -rm -f grid/$(DEPDIR)/HPL_barrier.Po + -rm -f grid/$(DEPDIR)/HPL_broadcast.Po + -rm -f grid/$(DEPDIR)/HPL_grid_exit.Po + -rm -f grid/$(DEPDIR)/HPL_grid_info.Po + -rm -f grid/$(DEPDIR)/HPL_grid_init.Po + -rm -f grid/$(DEPDIR)/HPL_max.Po + -rm -f grid/$(DEPDIR)/HPL_min.Po + -rm -f grid/$(DEPDIR)/HPL_reduce.Po + -rm -f grid/$(DEPDIR)/HPL_sum.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_disp.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_free.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_init.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_new.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp00N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp01N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp01T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp02N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp03N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp03T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp04N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp04T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp05N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp05T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp06N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp06T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp10N.Po + -rm -f pauxil/$(DEPDIR)/HPL_indxg2p.Po + -rm -f pauxil/$(DEPDIR)/HPL_infog2l.Po + -rm -f pauxil/$(DEPDIR)/HPL_numroc.Po + -rm -f pauxil/$(DEPDIR)/HPL_numrocI.Po + -rm -f pauxil/$(DEPDIR)/HPL_pabort.Po + -rm -f pauxil/$(DEPDIR)/HPL_pdlamch.Po + -rm -f pauxil/$(DEPDIR)/HPL_pdlange.Po + -rm -f pauxil/$(DEPDIR)/HPL_pwarn.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocmax.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocswpN.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocswpT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdfact.Po + -rm -f pfact/$(DEPDIR)/HPL_pdmxswp.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpancrN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpancrT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanllN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanllT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanrlN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanrlT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpancrN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpancrT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanllN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanllT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanrlN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanrlT.Po + -rm -f pgesv/$(DEPDIR)/HPL_equil.Po + -rm -f pgesv/$(DEPDIR)/HPL_logsort.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesv.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesv0.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesvK1.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesvK2.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdtrsv.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateNN.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateNT.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateTN.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateTT.Po + -rm -f pgesv/$(DEPDIR)/HPL_perm.Po + -rm -f pgesv/$(DEPDIR)/HPL_pipid.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx0.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx1.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx10.Po + -rm -f pgesv/$(DEPDIR)/HPL_rollN.Po + -rm -f pgesv/$(DEPDIR)/HPL_rollT.Po + -rm -f pgesv/$(DEPDIR)/HPL_spreadN.Po + -rm -f pgesv/$(DEPDIR)/HPL_spreadT.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f auxil/$(DEPDIR)/HPL_dlacpy.Po + -rm -f auxil/$(DEPDIR)/HPL_dlamch.Po + -rm -f auxil/$(DEPDIR)/HPL_dlatcpy.Po + -rm -f auxil/$(DEPDIR)/HPL_fprintf.Po + -rm -f blas/$(DEPDIR)/HPL_daxpy.Po + -rm -f blas/$(DEPDIR)/HPL_dcopy.Po + -rm -f blas/$(DEPDIR)/HPL_dgemm.Po + -rm -f blas/$(DEPDIR)/HPL_dgemv.Po + -rm -f blas/$(DEPDIR)/HPL_dger.Po + -rm -f blas/$(DEPDIR)/HPL_dscal.Po + -rm -f blas/$(DEPDIR)/HPL_dtrsm.Po + -rm -f blas/$(DEPDIR)/HPL_dtrsv.Po + -rm -f blas/$(DEPDIR)/HPL_idamax.Po + -rm -f comm/$(DEPDIR)/HPL_1rinM.Po + -rm -f comm/$(DEPDIR)/HPL_1ring.Po + -rm -f comm/$(DEPDIR)/HPL_2rinM.Po + -rm -f comm/$(DEPDIR)/HPL_2ring.Po + -rm -f comm/$(DEPDIR)/HPL_bcast.Po + -rm -f comm/$(DEPDIR)/HPL_binit.Po + -rm -f comm/$(DEPDIR)/HPL_blonM.Po + -rm -f comm/$(DEPDIR)/HPL_blong.Po + -rm -f comm/$(DEPDIR)/HPL_bwait.Po + -rm -f comm/$(DEPDIR)/HPL_packL.Po + -rm -f comm/$(DEPDIR)/HPL_recv.Po + -rm -f comm/$(DEPDIR)/HPL_sdrv.Po + -rm -f comm/$(DEPDIR)/HPL_send.Po + -rm -f grid/$(DEPDIR)/HPL_all_reduce.Po + -rm -f grid/$(DEPDIR)/HPL_barrier.Po + -rm -f grid/$(DEPDIR)/HPL_broadcast.Po + -rm -f grid/$(DEPDIR)/HPL_grid_exit.Po + -rm -f grid/$(DEPDIR)/HPL_grid_info.Po + -rm -f grid/$(DEPDIR)/HPL_grid_init.Po + -rm -f grid/$(DEPDIR)/HPL_max.Po + -rm -f grid/$(DEPDIR)/HPL_min.Po + -rm -f grid/$(DEPDIR)/HPL_reduce.Po + -rm -f grid/$(DEPDIR)/HPL_sum.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_disp.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_free.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_init.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_new.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp00N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp01N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp01T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp02N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp03N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp03T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp04N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp04T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp05N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp05T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp06N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp06T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp10N.Po + -rm -f pauxil/$(DEPDIR)/HPL_indxg2p.Po + -rm -f pauxil/$(DEPDIR)/HPL_infog2l.Po + -rm -f pauxil/$(DEPDIR)/HPL_numroc.Po + -rm -f pauxil/$(DEPDIR)/HPL_numrocI.Po + -rm -f pauxil/$(DEPDIR)/HPL_pabort.Po + -rm -f pauxil/$(DEPDIR)/HPL_pdlamch.Po + -rm -f pauxil/$(DEPDIR)/HPL_pdlange.Po + -rm -f pauxil/$(DEPDIR)/HPL_pwarn.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocmax.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocswpN.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocswpT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdfact.Po + -rm -f pfact/$(DEPDIR)/HPL_pdmxswp.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpancrN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpancrT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanllN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanllT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanrlN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanrlT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpancrN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpancrT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanllN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanllT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanrlN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanrlT.Po + -rm -f pgesv/$(DEPDIR)/HPL_equil.Po + -rm -f pgesv/$(DEPDIR)/HPL_logsort.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesv.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesv0.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesvK1.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesvK2.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdtrsv.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateNN.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateNT.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateTN.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateTT.Po + -rm -f pgesv/$(DEPDIR)/HPL_perm.Po + -rm -f pgesv/$(DEPDIR)/HPL_pipid.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx0.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx1.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx10.Po + -rm -f pgesv/$(DEPDIR)/HPL_rollN.Po + -rm -f pgesv/$(DEPDIR)/HPL_rollT.Po + -rm -f pgesv/$(DEPDIR)/HPL_spreadN.Po + -rm -f pgesv/$(DEPDIR)/HPL_spreadT.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-libLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libLIBRARIES cscopelist-am ctags ctags-am \ + distclean distclean-compile distclean-generic distclean-tags \ + distdir dvi dvi-am html html-am info info-am install \ + install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am \ + install-libLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am uninstall-libLIBRARIES + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_abort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_abort.c new file mode 100644 index 000000000..bf0c5e727 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_abort.c @@ -0,0 +1,129 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_abort +( + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_abort( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_abort displays an error message on stderr and halts execution. + * + * + * Arguments + * ========= + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[128]; +#ifndef STDC_HEADERS + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( stderr, "%s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR in function", SRNAME, cline ); + else + HPL_fprintf( stderr, "%s %d %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR on line", LINE, "of function", SRNAME, cline ); + exit( 0 ); +/* + * End of HPL_abort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlacpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlacpy.c new file mode 100644 index 000000000..ec71180eb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlacpy.c @@ -0,0 +1,343 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factors + * #ifndef HPL_LACPY_M_DEPTH + * #define HPL_LACPY_M_DEPTH 32 + * #define HPL_LACPY_LOG2_M_DEPTH 5 + * #endif + * #ifndef HPL_LACPY_N_DEPTH + * #define HPL_LACPY_N_DEPTH 4 + * #define HPL_LACPY_LOG2_N_DEPTH 2 + * #endif + */ +#ifndef HPL_LACPY_M_DEPTH +#define HPL_LACPY_M_DEPTH 4 +#define HPL_LACPY_LOG2_M_DEPTH 2 +#endif +#ifndef HPL_LACPY_N_DEPTH +#define HPL_LACPY_N_DEPTH 2 +#define HPL_LACPY_LOG2_N_DEPTH 1 +#endif + +#ifdef STDC_HEADERS +void HPL_dlacpy +( + const int M, + const int N, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dlacpy +( M, N, A, LDA, B, LDB ) + const int M; + const int N; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlacpy copies an array A into an array B. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the arrays A and + * B. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the arrays A + * and B. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N). + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * B (local output) double * + * On entry, B points to an array of dimension (LDB,N). On exit, + * B is overwritten with A. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of the array B. + * LDB must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_LACPY_USE_COPY + register int j; +#else +#if ( HPL_LACPY_N_DEPTH == 1 ) + const double * A0 = A; + double * B0 = B; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + const double * A0 = A, * A1 = A + LDA; + double * B0 = B, * B1 = B + LDB; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + const double * A0 = A, * A1 = A + LDA, + * A2 = A + (LDA << 1), * A3 = A + 3 * LDA; + double * B0 = B, * B1 = B + LDB, + * B2 = B + (LDB << 1), * B3 = B + 3 * LDB; +#endif + const int incA = ( (unsigned int)(LDA) << + HPL_LACPY_LOG2_N_DEPTH ) - M, + incB = ( (unsigned int)(LDB) << + HPL_LACPY_LOG2_N_DEPTH ) - M, + incA0 = (unsigned int)(LDA) - M, + incB0 = (unsigned int)(LDB) - M; + int mu, nu; + register int i, j; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + +#ifdef HPL_LACPY_USE_COPY + for( j = 0; j < N; j++, A0 += LDA, B0 += LDB ) HPL_dcopy( M, A0, 1, B0, 1 ); +#else + mu = (int)( ( (unsigned int)(M) >> HPL_LACPY_LOG2_M_DEPTH ) << + HPL_LACPY_LOG2_M_DEPTH ); + nu = (int)( ( (unsigned int)(N) >> HPL_LACPY_LOG2_N_DEPTH ) << + HPL_LACPY_LOG2_N_DEPTH ); + + for( j = 0; j < nu; j += HPL_LACPY_N_DEPTH ) + { + for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH ) + { +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 0] = A0[ 0]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; B2[ 0] = A2[ 0]; B3[ 0] = A3[ 0]; +#endif + +#if ( HPL_LACPY_M_DEPTH > 1 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 1] = A0[ 1]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; B2[ 1] = A2[ 1]; B3[ 1] = A3[ 1]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 2 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B2[ 2] = A2[ 2]; B3[ 2] = A3[ 2]; + B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; B2[ 3] = A2[ 3]; B3[ 3] = A3[ 3]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 4 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; + B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B2[ 4] = A2[ 4]; B3[ 4] = A3[ 4]; + B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; B2[ 5] = A2[ 5]; B3[ 5] = A3[ 5]; + B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B2[ 6] = A2[ 6]; B3[ 6] = A3[ 6]; + B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; B2[ 7] = A2[ 7]; B3[ 7] = A3[ 7]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 8 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11]; + B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; + B0[10] = A0[10]; B1[10] = A1[10]; B0[11] = A0[11]; B1[11] = A1[11]; + B0[12] = A0[12]; B1[12] = A1[12]; B0[13] = A0[13]; B1[13] = A1[13]; + B0[14] = A0[14]; B1[14] = A1[14]; B0[15] = A0[15]; B1[15] = A1[15]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B2[ 8] = A2[ 8]; B3[ 8] = A3[ 8]; + B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; B2[ 9] = A2[ 9]; B3[ 9] = A3[ 9]; + B0[10] = A0[10]; B1[10] = A1[10]; B2[10] = A2[10]; B3[10] = A3[10]; + B0[11] = A0[11]; B1[11] = A1[11]; B2[11] = A2[11]; B3[11] = A3[11]; + B0[12] = A0[12]; B1[12] = A1[12]; B2[12] = A2[12]; B3[12] = A3[12]; + B0[13] = A0[13]; B1[13] = A1[13]; B2[13] = A2[13]; B3[13] = A3[13]; + B0[14] = A0[14]; B1[14] = A1[14]; B2[14] = A2[14]; B3[14] = A3[14]; + B0[15] = A0[15]; B1[15] = A1[15]; B2[15] = A2[15]; B3[15] = A3[15]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 16 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19]; + B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23]; + B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27]; + B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[16] = A0[16]; B1[16] = A1[16]; B0[17] = A0[17]; B1[17] = A1[17]; + B0[18] = A0[18]; B1[18] = A1[18]; B0[19] = A0[19]; B1[19] = A1[19]; + B0[20] = A0[20]; B1[20] = A1[20]; B0[21] = A0[21]; B1[21] = A1[21]; + B0[22] = A0[22]; B1[22] = A1[22]; B0[23] = A0[23]; B1[23] = A1[23]; + B0[24] = A0[24]; B1[24] = A1[24]; B0[25] = A0[25]; B1[25] = A1[25]; + B0[26] = A0[26]; B1[26] = A1[26]; B0[27] = A0[27]; B1[27] = A1[27]; + B0[28] = A0[28]; B1[28] = A1[28]; B0[29] = A0[29]; B1[29] = A1[29]; + B0[30] = A0[30]; B1[30] = A1[30]; B0[31] = A0[31]; B1[31] = A1[31]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[16] = A0[16]; B1[16] = A1[16]; B2[16] = A2[16]; B3[16] = A3[16]; + B0[17] = A0[17]; B1[17] = A1[17]; B2[17] = A2[17]; B3[17] = A3[17]; + B0[18] = A0[18]; B1[18] = A1[18]; B2[18] = A2[18]; B3[18] = A3[18]; + B0[19] = A0[19]; B1[19] = A1[19]; B2[19] = A2[19]; B3[19] = A3[19]; + B0[20] = A0[20]; B1[20] = A1[20]; B2[20] = A2[20]; B3[20] = A3[20]; + B0[21] = A0[21]; B1[21] = A1[21]; B2[21] = A2[21]; B3[21] = A3[21]; + B0[22] = A0[22]; B1[22] = A1[22]; B2[22] = A2[22]; B3[22] = A3[22]; + B0[23] = A0[23]; B1[23] = A1[23]; B2[23] = A2[23]; B3[23] = A3[23]; + B0[24] = A0[24]; B1[24] = A1[24]; B2[24] = A2[24]; B3[24] = A3[24]; + B0[25] = A0[25]; B1[25] = A1[25]; B2[25] = A2[25]; B3[25] = A3[25]; + B0[26] = A0[26]; B1[26] = A1[26]; B2[26] = A2[26]; B3[26] = A3[26]; + B0[27] = A0[27]; B1[27] = A1[27]; B2[27] = A2[27]; B3[27] = A3[27]; + B0[28] = A0[28]; B1[28] = A1[28]; B2[28] = A2[28]; B3[28] = A3[28]; + B0[29] = A0[29]; B1[29] = A1[29]; B2[29] = A2[29]; B3[29] = A3[29]; + B0[30] = A0[30]; B1[30] = A1[30]; B2[30] = A2[30]; B3[30] = A3[30]; + B0[31] = A0[31]; B1[31] = A1[31]; B2[31] = A2[31]; B3[31] = A3[31]; +#endif + +#endif + +#if ( HPL_LACPY_N_DEPTH == 1 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; + A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; + A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH; + A2 += HPL_LACPY_M_DEPTH; B2 += HPL_LACPY_M_DEPTH; + A3 += HPL_LACPY_M_DEPTH; B3 += HPL_LACPY_M_DEPTH; +#endif + } + + for( i = mu; i < M; i++ ) + { +#if ( HPL_LACPY_N_DEPTH == 1 ) + *B0 = *A0; B0++; A0++; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++; + *B2 = *A2; B2++; A2++; *B3 = *A3; B3++; A3++; +#endif + } + +#if ( HPL_LACPY_N_DEPTH == 1 ) + A0 += incA; B0 += incB; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + A0 += incA; B0 += incB; A1 += incA; B1 += incB; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + A0 += incA; B0 += incB; A1 += incA; B1 += incB; + A2 += incA; B2 += incB; A3 += incA; B3 += incB; +#endif + } + + for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 ) + { + for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH, + B0 += HPL_LACPY_M_DEPTH, A0 += HPL_LACPY_M_DEPTH ) + { + B0[ 0] = A0[ 0]; +#if ( HPL_LACPY_M_DEPTH > 1 ) + B0[ 1] = A0[ 1]; +#endif +#if ( HPL_LACPY_M_DEPTH > 2 ) + B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3]; +#endif +#if ( HPL_LACPY_M_DEPTH > 4 ) + B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7]; +#endif +#if ( HPL_LACPY_M_DEPTH > 8 ) + B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11]; + B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15]; +#endif +#if ( HPL_LACPY_M_DEPTH > 16 ) + B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19]; + B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23]; + B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27]; + B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31]; +#endif + } + for( i = mu; i < M; i++, B0++, A0++ ) { *B0 = *A0; } + } +#endif +/* + * End of HPL_dlacpy + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlamch.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlamch.c new file mode 100644 index 000000000..c685f0d5e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlamch.c @@ -0,0 +1,876 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static function prototypes + * --------------------------------------------------------------------- + */ +static void HPL_dlamc1 +STDC_ARGS( +( int *, int *, int *, int * ) ); +static void HPL_dlamc2 +STDC_ARGS( +( int *, int *, int *, double *, + int *, double *, int *, double * ) ); +static double HPL_dlamc3 +STDC_ARGS( +( const double, const double ) ); +static void HPL_dlamc4 +STDC_ARGS( +( int *, const double, const int ) ); +static void HPL_dlamc5 +STDC_ARGS( +( const int, const int, const int, const int, + int *, double * ) ); +static double HPL_dipow +STDC_ARGS( +( const double, const int ) ); + +#ifdef STDC_HEADERS +double HPL_dlamch +( + const HPL_T_MACH CMACH +) +#else +double HPL_dlamch +( CMACH ) + const HPL_T_MACH CMACH; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamch determines machine-specific arithmetic constants such as + * the relative machine precision (eps), the safe minimum (sfmin) such + * that 1 / sfmin does not overflow, the base of the machine (base), the + * precision (prec), the number of (base) digits in the mantissa (t), + * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the + * minimum exponent before (gradual) underflow (emin), the underflow + * threshold (rmin) base**(emin-1), the largest exponent before overflow + * (emax), the overflow threshold (rmax) (base**emax)*(1-eps). + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamch.f (version 2.0 -- 1992), that was itself + * based on the function ENVRON by Malcolm and incorporated suggestions + * by Gentleman and Marovich. See + * + * Malcolm M. A., Algorithms to reveal properties of floating-point + * arithmetic., Comms. of the ACM, 15, 949-951 (1972). + * + * Gentleman W. M. and Marovich S. B., More on algorithms that reveal + * properties of floating point arithmetic units., Comms. of the ACM, + * 17, 276-277 (1974). + * + * Arguments + * ========= + * + * CMACH (local input) const HPL_T_MACH + * Specifies the value to be returned by HPL_dlamch + * = HPL_MACH_EPS, HPL_dlamch := eps (default) + * = HPL_MACH_SFMIN, HPL_dlamch := sfmin + * = HPL_MACH_BASE, HPL_dlamch := base + * = HPL_MACH_PREC, HPL_dlamch := eps*base + * = HPL_MACH_MLEN, HPL_dlamch := t + * = HPL_MACH_RND, HPL_dlamch := rnd + * = HPL_MACH_EMIN, HPL_dlamch := emin + * = HPL_MACH_RMIN, HPL_dlamch := rmin + * = HPL_MACH_EMAX, HPL_dlamch := emax + * = HPL_MACH_RMAX, HPL_dlamch := rmax + * + * where + * + * eps = relative machine precision, + * sfmin = safe minimum, + * base = base of the machine, + * prec = eps*base, + * t = number of digits in the mantissa, + * rnd = 1.0 if rounding occurs in addition, + * emin = minimum exponent before underflow, + * rmin = underflow threshold, + * emax = largest exponent before overflow, + * rmax = overflow threshold. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + static double eps, sfmin, base, t, rnd, emin, rmin, emax, + rmax, prec; + double small; + static int first=1; + int beta=0, imax=0, imin=0, it=0, lrnd=0; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; + HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax ); + base = (double)(beta); t = (double)(it); + if( lrnd != 0 ) + { rnd = HPL_rone; eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; } + else + { rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it ); } + prec = eps * base; emin = (double)(imin); emax = (double)(imax); + sfmin = rmin; small = HPL_rone / rmax; +/* + * Use SMALL plus a bit, to avoid the possibility of rounding causing + * overflow when computing 1/sfmin. + */ + if( small >= sfmin ) sfmin = small * ( HPL_rone + eps ); + } + + if( CMACH == HPL_MACH_EPS ) return( eps ); + if( CMACH == HPL_MACH_SFMIN ) return( sfmin ); + if( CMACH == HPL_MACH_BASE ) return( base ); + if( CMACH == HPL_MACH_PREC ) return( prec ); + if( CMACH == HPL_MACH_MLEN ) return( t ); + if( CMACH == HPL_MACH_RND ) return( rnd ); + if( CMACH == HPL_MACH_EMIN ) return( emin ); + if( CMACH == HPL_MACH_RMIN ) return( rmin ); + if( CMACH == HPL_MACH_EMAX ) return( emax ); + if( CMACH == HPL_MACH_RMAX ) return( rmax ); + + return( eps ); +/* + * End of HPL_dlamch + */ +} + +#ifdef STDC_HEADERS +static void HPL_dlamc1 +( + int * BETA, + int * T, + int * RND, + int * IEEE1 +) +#else +static void HPL_dlamc1 +( BETA, T, RND, IEEE1 ) +/* + * .. Scalar Arguments .. + */ + int * BETA, * IEEE1, * RND, * T; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc1 determines the machine parameters given by BETA, T, RND, + * and IEEE1. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc1.f (version 2.0 -- 1992), that was itself + * based on the function ENVRON by Malcolm and incorporated suggestions + * by Gentleman and Marovich. See + * + * Malcolm M. A., Algorithms to reveal properties of floating-point + * arithmetic., Comms. of the ACM, 15, 949-951 (1972). + * + * Gentleman W. M. and Marovich S. B., More on algorithms that reveal + * properties of floating point arithmetic units., Comms. of the ACM, + * 17, 276-277 (1974). + * + * Arguments + * ========= + * + * BETA (local output) int * + * The base of the machine. + * + * T (local output) int * + * The number of ( BETA ) digits in the mantissa. + * + * RND (local output) int * + * Specifies whether proper rounding (RND=1) or chopping (RND=0) + * occurs in addition. This may not be a reliable guide to the + * way in which the machine performs its arithmetic. + * + * IEEE1 (local output) int * + * Specifies whether rounding appears to be done in the IEEE + * `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double a, b, c, f, one, qtr, savec, t1, t2; + static int first=1, lbeta, lieee1, lrnd, lt; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; one = HPL_rone; +/* + * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and + * RND. Throughout this routine we use the function HPL_dlamc3 to ensure + * that relevant values are stored and not held in registers, or are not + * affected by optimizers. + * + * Compute a = 2.0**m with the smallest positive integer m such that + * fl( a + 1.0 ) == a. + */ + a = HPL_rone; c = HPL_rone; + do + { a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); } + while( c == HPL_rone ); +/* + * Now compute b = 2.0**m with the smallest positive integer m such that + * fl( a + b ) > a. + */ + b = HPL_rone; c = HPL_dlamc3( a, b ); + while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); } +/* + * Now compute the base. a and c are neighbouring floating point num- + * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe- + * rence is BETA. Adding 0.25 to c is to ensure that it is truncated to + * BETA and not (BETA-1). + */ + qtr = one / 4.0; savec = c; + c = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr); +/* + * Now determine whether rounding or chopping occurs, by adding a bit + * less than BETA/2 and a bit more than BETA/2 to a. + */ + b = (double)(lbeta); + f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a ); + if( c == a ) { lrnd = 1; } else { lrnd = 0; } + f = HPL_dlamc3( b / HPL_rtwo, b / 100.0 ); c = HPL_dlamc3( f, a ); + if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0; +/* + * Try and decide whether rounding is done in the IEEE round to nea- + * rest style. b/2 is half a unit in the last place of the two numbers + * a and savec. Furthermore, a is even, i.e. has last bit zero, and sa- + * vec is odd. Thus adding b/2 to a should not change a, but adding b/2 + * to savec should change savec. + */ + t1 = HPL_dlamc3( b / HPL_rtwo, a ); + t2 = HPL_dlamc3( b / HPL_rtwo, savec ); + if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1; + else lieee1 = 0; +/* + * Now find the mantissa, T. It should be the integer part of log to the + * base BETA of a, however it is safer to determine T by powering. So we + * find T as the smallest positive integer for which fl( beta**t + 1.0 ) + * is equal to 1.0. + */ + lt = 0; a = HPL_rone; c = HPL_rone; + + do + { + lt++; a *= (double)(lbeta); + c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); + } while( c == HPL_rone ); + } + + *BETA = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1; +} + +#ifdef STDC_HEADERS +static void HPL_dlamc2 +( + int * BETA, + int * T, + int * RND, + double * EPS, + int * EMIN, + double * RMIN, + int * EMAX, + double * RMAX +) +#else +static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) +/* + * .. Scalar Arguments .. + */ + int * BETA, * EMAX, * EMIN, * RND, * T; + double * EPS, * RMAX, * RMIN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc2 determines the machine parameters specified in its argu- + * ment list. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc2.f (version 2.0 -- 1992), that was itself + * based on a function PARANOIA by W. Kahan of the University of Cali- + * fornia at Berkeley for the computation of the relative machine epsi- + * lon eps. + * + * Arguments + * ========= + * + * BETA (local output) int * + * The base of the machine. + * + * T (local output) int * + * The number of ( BETA ) digits in the mantissa. + * + * RND (local output) int * + * Specifies whether proper rounding (RND=1) or chopping (RND=0) + * occurs in addition. This may not be a reliable guide to the + * way in which the machine performs its arithmetic. + * + * EPS (local output) double * + * The smallest positive number such that fl( 1.0 - EPS ) < 1.0, + * where fl denotes the computed value. + * + * EMIN (local output) int * + * The minimum exponent before (gradual) underflow occurs. + * + * RMIN (local output) double * + * The smallest normalized number for the machine, given by + * BASE**( EMIN - 1 ), where BASE is the floating point value + * of BETA. + * + * EMAX (local output) int * + * The maximum exponent before overflow occurs. + * + * RMAX (local output) double * + * The largest positive number for the machine, given by + * BASE**EMAX * ( 1 - EPS ), where BASE is the floating point + * value of BETA. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + static double leps, lrmax, lrmin; + double a, b, c, half, one, rbase, sixth, small, + third, two, zero; + static int first=1, iwarn=0, lbeta=0, lemax, lemin, + lt=0; + int gnmin=0, gpmin=0, i, ieee, lieee1=0, + lrnd=0, ngnmin=0, ngpmin=0; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo; +/* + * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of BETA, + * T, RND, EPS, EMIN and RMIN. + * + * Throughout this routine we use the function HPL_dlamc3 to ensure that + * relevant values are stored and not held in registers, or are not af- + * fected by optimizers. + * + * HPL_dlamc1 returns the parameters lbeta, lt, lrnd and lieee1. + */ + HPL_dlamc1( &lbeta, <, &lrnd, &lieee1 ); +/* + * Start to find eps. + */ + b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a; +/* + * Try some tricks to see whether or not this is the correct EPS. + */ + b = two / 3.0; + half = one / HPL_rtwo; + sixth = HPL_dlamc3( b, -half ); + third = HPL_dlamc3( sixth, sixth ); + b = HPL_dlamc3( third, -half ); + b = HPL_dlamc3( b, sixth ); + b = Mabs( b ); if( b < leps ) b = leps; + + leps = HPL_rone; + + while( ( leps > b ) && ( b > zero ) ) + { + leps = b; + c = HPL_dlamc3( half * leps, + HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) ); + c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c ); + c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c ); + } + if( a < leps ) leps = a; +/* + * Computation of EPS complete. + * + * Now find EMIN. Let a = + or - 1, and + or - (1 + BASE**(-3)). Keep + * dividing a by BETA until (gradual) underflow occurs. This is detected + * when we cannot recover the previous a. + */ + rbase = one / (double)(lbeta); small = one; + for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero ); + a = HPL_dlamc3( one, small ); + HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta ); + HPL_dlamc4( &gpmin, a, lbeta ); HPL_dlamc4( &gnmin, -a, lbeta ); + + ieee = 0; + + if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) ) + { + if( ngpmin == gpmin ) + { +/* + * Non twos-complement machines, no gradual underflow; e.g., VAX ) + */ + lemin = ngpmin; + } + else if( ( gpmin-ngpmin ) == 3 ) + { +/* + * Non twos-complement machines with gradual underflow; e.g., IEEE stan- + * dard followers + */ + lemin = ngpmin - 1 + lt; ieee = 1; + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, gpmin ); + iwarn = 1; + } + } + else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) ) + { + if( Mabs( ngpmin-ngnmin ) == 1 ) + { +/* + * Twos-complement machines, no gradual underflow; e.g., CYBER 205 + */ + lemin = Mmax( ngpmin, ngnmin ); + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); + iwarn = 1; + } + } + else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) ) + { + if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 ) + { +/* + * Twos-complement machines with gradual underflow; no known machine + */ + lemin = Mmax( ngpmin, ngnmin ) - 1 + lt; + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); + iwarn = 1; + } + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin ); + lemin = Mmin( lemin, gnmin ); iwarn = 1; + } +/* + * Comment out this if block if EMIN is ok + */ + if( iwarn != 0 ) + { + first = 1; + HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n", +"WARNING. The value EMIN may be incorrect:- EMIN =", lemin, +"If, after inspection, the value EMIN looks acceptable, please comment ", +"out the if block as marked within the code of routine HPL_dlamc2, ", +"otherwise supply EMIN explicitly." ); + } +/* + * Assume IEEE arithmetic if we found denormalised numbers above, or if + * arithmetic seems to round in the IEEE style, determined in routine + * HPL_dlamc1. A true IEEE machine should have both things true; how- + * ever, faulty machines may have one or the other. + */ + if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1; + else ieee = 0; +/* + * Compute RMIN by successive division by BETA. We could compute RMIN + * as BASE**( EMIN - 1 ), but some machines underflow during this compu- + * tation. + */ + lrmin = HPL_rone; + for( i = 0; i < 1 - lemin; i++ ) + lrmin = HPL_dlamc3( lrmin*rbase, zero ); +/* + * Finally, call HPL_dlamc5 to compute emax and rmax. + */ + HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax ); + } + *BETA = lbeta; *T = lt; *RND = lrnd; *EPS = leps; + *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax; +} + +#ifdef STDC_HEADERS +static double HPL_dlamc3( const double A, const double B ) +#else +static double HPL_dlamc3( A, B ) +/* + * .. Scalar Arguments .. + */ + const double A, B; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc3 is intended to force a and b to be stored prior to doing + * the addition of a and b, for use in situations where optimizers + * might hold one of these in a register. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc3.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * A, B (local input) double + * The values a and b. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + return( A + B ); +} + +#ifdef STDC_HEADERS +static void HPL_dlamc4 +( + int * EMIN, + const double START, + const int BASE +) +#else +static void HPL_dlamc4( EMIN, START, BASE ) +/* + * .. Scalar Arguments .. + */ + int * EMIN; + const int BASE; + const double START; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc4 is a service function for HPL_dlamc2. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc4.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * EMIN (local output) int * + * The minimum exponent before (gradual) underflow, computed by + * setting A = START and dividing by BASE until the previous A + * can not be recovered. + * + * START (local input) double + * The starting point for determining EMIN. + * + * BASE (local input) int + * The base of the machine. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double a, b1, b2, c1, c2, d1, d2, one, rbase, zero; + int i; +/* .. + * .. Executable Statements .. + */ + a = START; one = HPL_rone; rbase = one / (double)(BASE); + zero = HPL_rzero; + *EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a; + + do + { + (*EMIN)--; a = b1; + b1 = HPL_dlamc3( a / BASE, zero ); + c1 = HPL_dlamc3( b1 * BASE, zero ); + d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1; + b2 = HPL_dlamc3( a * rbase, zero ); + c2 = HPL_dlamc3( b2 / rbase, zero ); + d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2; + } while( ( c1 == a ) && ( c2 == a ) && ( d1 == a ) && ( d2 == a ) ); +} + +#ifdef STDC_HEADERS +static void HPL_dlamc5 +( + const int BETA, + const int P, + const int EMIN, + const int IEEE, + int * EMAX, + double * RMAX +) +#else +static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX ) +/* + * .. Scalar Arguments .. + */ + const int BETA, EMIN, IEEE, P; + int * EMAX; + double * RMAX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc5 attempts to compute RMAX, the largest machine floating- + * point number, without overflow. It assumes that EMAX + abs(EMIN) sum + * approximately to a power of 2. It will fail on machines where this + * assumption does not hold, for example, the Cyber 205 (EMIN = -28625, + * EMAX = 28718). It will also fail if the value supplied for EMIN is + * too large (i.e. too close to zero), probably with overflow. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc5.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * BETA (local input) int + * The base of floating-point arithmetic. + * + * P (local input) int + * The number of base BETA digits in the mantissa of a floating- + * point value. + * + * EMIN (local input) int + * The minimum exponent before (gradual) underflow. + * + * IEEE (local input) int + * A logical flag specifying whether or not the arithmetic sys- + * tem is thought to comply with the IEEE standard. + * + * EMAX (local output) int * + * The largest exponent before overflow. + * + * RMAX (local output) double * + * The largest machine floating-point number. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double oldy=HPL_rzero, recbas, y, z; + int exbits=1, expsum, i, lexp=1, nbits, try, + uexp; +/* .. + * .. Executable Statements .. + */ +/* + * First compute lexp and uexp, two powers of 2 that bound abs(EMIN). + * We then assume that EMAX + abs( EMIN ) will sum approximately to the + * bound that is closest to abs( EMIN ). (EMAX is the exponent of the + * required number RMAX). + */ +l_10: + try = (int)( (unsigned int)(lexp) << 1 ); + if( try <= ( -EMIN ) ) { lexp = try; exbits++; goto l_10; } + + if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try; exbits++; } +/* + * Now -lexp is less than or equal to EMIN, and -uexp is greater than or + * equal to EMIN. exbits is the number of bits needed to store the expo- + * nent. + */ + if( ( uexp+EMIN ) > ( -lexp-EMIN ) ) + { expsum = (int)( (unsigned int)(lexp) << 1 ); } + else + { expsum = (int)( (unsigned int)(uexp) << 1 ); } +/* + * expsum is the exponent range, approximately equal to EMAX - EMIN + 1. + */ + *EMAX = expsum + EMIN - 1; +/* + * nbits is the total number of bits needed to store a floating-point + * number. + */ + nbits = 1 + exbits + P; + + if( ( nbits % 2 == 1 ) && ( BETA == 2 ) ) + { +/* + * Either there are an odd number of bits used to store a floating-point + * number, which is unlikely, or some bits are not used in the represen- + * tation of numbers, which is possible, (e.g. Cray machines) or the + * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines), + * which is perhaps the most likely. We have to assume the last alterna- + * tive. If this is true, then we need to reduce EMAX by one because + * there must be some way of representing zero in an implicit-bit sys- + * tem. On machines like Cray we are reducing EMAX by one unnecessarily. + */ + (*EMAX)--; + } + + if( IEEE != 0 ) + { +/* + * Assume we are on an IEEE machine which reserves one exponent for in- + * finity and NaN. + */ + (*EMAX)--; + } +/* + * Now create RMAX, the largest machine number, which should be equal to + * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being + * careful that the result is less than 1.0. + */ + recbas = HPL_rone / (double)(BETA); + z = (double)(BETA) - HPL_rone; + y = HPL_rzero; + + for( i = 0; i < P; i++ ) + { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); } + + if( y >= HPL_rone ) y = oldy; +/* + * Now multiply by BETA**EMAX to get RMAX. + */ + for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero ); + + *RMAX = y; +/* + * End of HPL_dlamch + */ +} + +#ifdef STDC_HEADERS +static double HPL_dipow +( + const double X, + const int N +) +#else +static double HPL_dipow( X, N ) +/* + * .. Scalar Arguments .. + */ + const int N; + const double X; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dipow computes the integer n-th power of a real scalar x. + * + * Arguments + * ========= + * + * X (local input) const double + * The real scalar x. + * + * N (local input) const int + * The integer power to raise x to. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r, y=HPL_rone; + int k, n; +/* .. + * .. Executable Statements .. + */ + if( X == HPL_rzero ) return( HPL_rzero ); + if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; } + for( k = 0; k < n; k++ ) y *= r; + + return( y ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlange.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlange.c new file mode 100644 index 000000000..82f118b6b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlange.c @@ -0,0 +1,184 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_dlange +( + const HPL_T_NORM NORM, + const int M, + const int N, + const double * A, + const int LDA +) +#else +double HPL_dlange +( NORM, M, N, A, LDA ) + const HPL_T_NORM NORM; + const int M; + const int N; + const double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlange returns the value of the one norm, or the infinity norm, + * or the element of largest absolute value of a matrix A: + * + * max(abs(A(i,j))) when NORM = HPL_NORM_A, + * norm1(A), when NORM = HPL_NORM_1, + * normI(A), when NORM = HPL_NORM_I, + * + * where norm1 denotes the one norm of a matrix (maximum column sum) and + * normI denotes the infinity norm of a matrix (maximum row sum). Note + * that max(abs(A(i,j))) is not a matrix norm. + * + * Arguments + * ========= + * + * NORM (local input) const HPL_T_NORM + * On entry, NORM specifies the value to be returned by this + * function as described above. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N), that + * contains the matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double s, v0=HPL_rzero, * work = NULL; + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return( HPL_rzero ); + + if( NORM == HPL_NORM_A ) + { +/* + * max( abs( A ) ) + */ + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) { v0 = Mmax( v0, Mabs( *A ) ); A++; } + A += LDA - M; + } + } + else if( NORM == HPL_NORM_1 ) + { +/* + * Find norm_1( A ). + */ + work = (double*)malloc( (size_t)(N) * sizeof( double ) ); + if( work == NULL ) + { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); } + else + { + for( j = 0; j < N; j++ ) + { + s = HPL_rzero; + for( i = 0; i < M; i++ ) { s += Mabs( *A ); A++; } + work[j] = s; A += LDA - M; + } +/* + * Find maximum sum of columns for 1-norm + */ + v0 = work[HPL_idamax( N, work, 1 )]; v0 = Mabs( v0 ); + if( work ) free( work ); + } + } + else if( NORM == HPL_NORM_I ) + { +/* + * Find norm_inf( A ) + */ + work = (double*)malloc( (size_t)(M) * sizeof( double ) ); + if( work == NULL ) + { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); } + else + { + for( i = 0; i < M; i++ ) { work[i] = HPL_rzero; } + + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) { work[i] += Mabs( *A ); A++; } + A += LDA - M; + } +/* + * Find maximum sum of rows for inf-norm + */ + v0 = work[HPL_idamax( M, work, 1 )]; v0 = Mabs( v0 ); + if( work ) free( work ); + } + } + + return( v0 ); +/* + * End of HPL_dlange + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlaprnt.c new file mode 100644 index 000000000..f29df3cd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlaprnt.c @@ -0,0 +1,130 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dlaprnt +( + const int M, + const int N, + double * A, + const int IA, + const int JA, + const int LDA, + const char * CMATNM +) +#else +void HPL_dlaprnt +( M, N, A, IA, JA, LDA, CMATNM ) + const int M; + const int N; + double * A; + const int IA; + const int JA; + const int LDA; + const char * CMATNM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaprnt prints to standard error an M-by-N matrix A. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A. M must be at + * least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of A. N must be + * at least zero. + * + * A (local input) double * + * On entry, A points to an array of dimension (LDA,N). + * + * IA (local input) const int + * On entry, IA specifies the starting row index to be printed. + * + * JA (local input) const int + * On entry, JA specifies the starting column index to be + * printed. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * CMATNM (local input) const char * + * On entry, CMATNM is the name of the matrix to be printed. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) + { + HPL_fprintf( stderr, "%s(%6d,%6d)=%30.18f\n", CMATNM, IA+i, + JA+j, *(Mptr( A, i, j, LDA )) ); + } + } +/* + * End of HPL_dlaprnt + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlatcpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlatcpy.c new file mode 100644 index 000000000..410451c24 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_dlatcpy.c @@ -0,0 +1,398 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factors + * #ifndef HPL_LATCPY_M_DEPTH + * #define HPL_LATCPY_M_DEPTH 32 + * #define HPL_LATCPY_LOG2_M_DEPTH 5 + * #endif + * #ifndef HPL_LATCPY_N_DEPTH + * #define HPL_LATCPY_N_DEPTH 4 + * #define HPL_LATCPY_LOG2_N_DEPTH 2 + * #endif + */ +#ifndef HPL_LATCPY_M_DEPTH +#define HPL_LATCPY_M_DEPTH 4 +#define HPL_LATCPY_LOG2_M_DEPTH 2 +#endif +#ifndef HPL_LATCPY_N_DEPTH +#define HPL_LATCPY_N_DEPTH 2 +#define HPL_LATCPY_LOG2_N_DEPTH 1 +#endif + +#ifdef STDC_HEADERS +void HPL_dlatcpy +( + const int M, + const int N, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dlatcpy +( M, N, A, LDA, B, LDB ) + const int M; + const int N; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlatcpy copies the transpose of an array A into an array B. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the array B and + * the number of columns of A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of rows of the array A and + * the number of columns of B. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,M). + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,N). + * + * B (local output) double * + * On entry, B points to an array of dimension (LDB,N). On exit, + * B is overwritten with the transpose of A. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of the array B. + * LDB must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_LATCPY_USE_COPY + register int j; +#else +#if ( HPL_LATCPY_N_DEPTH == 1 ) + const double * A0 = A; + double * B0 = B; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + const double * A0 = A, * A1 = A + 1; + double * B0 = B, * B1 = B + LDB; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + const double * A0 = A, * A1 = A + 1, + * A2 = A + 2, * A3 = A + 3; + double * B0 = B, * B1 = B + LDB, + * B2 = B + (LDB << 1), * B3 = B + 3 * LDB; +#endif + const int incA = -M * LDA + (1 << HPL_LATCPY_LOG2_N_DEPTH), + incB = ( (unsigned int)(LDB) << + HPL_LATCPY_LOG2_N_DEPTH ) - M, + incA0 = -M * LDA + 1, incB0 = LDB - M; + int mu, nu; + register int i, j; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + +#ifdef HPL_LATCPY_USE_COPY + for( j = 0; j < N; j++, B0 += LDB ) HPL_dcopy( M, A0+j, LDA, B0, 1 ); +#else + mu = (int)( ( (unsigned int)(M) >> HPL_LATCPY_LOG2_M_DEPTH ) << + HPL_LATCPY_LOG2_M_DEPTH ); + nu = (int)( ( (unsigned int)(N) >> HPL_LATCPY_LOG2_N_DEPTH ) << + HPL_LATCPY_LOG2_N_DEPTH ); + + for( j = 0; j < nu; j += HPL_LATCPY_N_DEPTH ) + { + for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH ) + { +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 0] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA; + B2[ 0] = *A2; A2 += LDA; B3[ 0] = *A3; A3 += LDA; +#endif + +#if ( HPL_LATCPY_M_DEPTH > 1 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 1] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA; + B2[ 1] = *A2; A2 += LDA; B3[ 1] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 2 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 2] = *A0; A0 += LDA; B0[ 3] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA; + B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA; + B2[ 2] = *A2; A2 += LDA; B3[ 2] = *A3; A3 += LDA; + B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA; + B2[ 3] = *A2; A2 += LDA; B3[ 3] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 4 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 4] = *A0; A0 += LDA; B0[ 5] = *A0; A0 += LDA; + B0[ 6] = *A0; A0 += LDA; B0[ 7] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA; + B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA; + B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA; + B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA; + B2[ 4] = *A2; A2 += LDA; B3[ 4] = *A3; A3 += LDA; + B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA; + B2[ 5] = *A2; A2 += LDA; B3[ 5] = *A3; A3 += LDA; + B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA; + B2[ 6] = *A2; A2 += LDA; B3[ 6] = *A3; A3 += LDA; + B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA; + B2[ 7] = *A2; A2 += LDA; B3[ 7] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 8 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 8] = *A0; A0 += LDA; B0[ 9] = *A0; A0 += LDA; + B0[10] = *A0; A0 += LDA; B0[11] = *A0; A0 += LDA; + B0[12] = *A0; A0 += LDA; B0[13] = *A0; A0 += LDA; + B0[14] = *A0; A0 += LDA; B0[15] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA; + B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA; + B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA; + B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA; + B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA; + B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA; + B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA; + B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA; + B2[ 8] = *A2; A2 += LDA; B3[ 8] = *A3; A3 += LDA; + B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA; + B2[ 9] = *A2; A2 += LDA; B3[ 9] = *A3; A3 += LDA; + B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA; + B2[10] = *A2; A2 += LDA; B3[10] = *A3; A3 += LDA; + B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA; + B2[11] = *A2; A2 += LDA; B3[11] = *A3; A3 += LDA; + B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA; + B2[12] = *A2; A2 += LDA; B3[12] = *A3; A3 += LDA; + B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA; + B2[13] = *A2; A2 += LDA; B3[13] = *A3; A3 += LDA; + B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA; + B2[14] = *A2; A2 += LDA; B3[14] = *A3; A3 += LDA; + B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA; + B2[15] = *A2; A2 += LDA; B3[15] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 16 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[16] = *A0; A0 += LDA; B0[17] = *A0; A0 += LDA; + B0[18] = *A0; A0 += LDA; B0[19] = *A0; A0 += LDA; + B0[20] = *A0; A0 += LDA; B0[21] = *A0; A0 += LDA; + B0[22] = *A0; A0 += LDA; B0[23] = *A0; A0 += LDA; + B0[24] = *A0; A0 += LDA; B0[25] = *A0; A0 += LDA; + B0[26] = *A0; A0 += LDA; B0[27] = *A0; A0 += LDA; + B0[28] = *A0; A0 += LDA; B0[29] = *A0; A0 += LDA; + B0[30] = *A0; A0 += LDA; B0[31] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA; + B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA; + B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA; + B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA; + B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA; + B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA; + B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA; + B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA; + B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA; + B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA; + B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA; + B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA; + B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA; + B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA; + B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA; + B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA; + B2[16] = *A2; A2 += LDA; B3[16] = *A3; A3 += LDA; + B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA; + B2[17] = *A2; A2 += LDA; B3[17] = *A3; A3 += LDA; + B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA; + B2[18] = *A2; A2 += LDA; B3[18] = *A3; A3 += LDA; + B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA; + B2[19] = *A2; A2 += LDA; B3[19] = *A3; A3 += LDA; + B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA; + B2[20] = *A2; A2 += LDA; B3[20] = *A3; A3 += LDA; + B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA; + B2[21] = *A2; A2 += LDA; B3[21] = *A3; A3 += LDA; + B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA; + B2[22] = *A2; A2 += LDA; B3[22] = *A3; A3 += LDA; + B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA; + B2[23] = *A2; A2 += LDA; B3[23] = *A3; A3 += LDA; + B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA; + B2[24] = *A2; A2 += LDA; B3[24] = *A3; A3 += LDA; + B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA; + B2[25] = *A2; A2 += LDA; B3[25] = *A3; A3 += LDA; + B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA; + B2[26] = *A2; A2 += LDA; B3[26] = *A3; A3 += LDA; + B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA; + B2[27] = *A2; A2 += LDA; B3[27] = *A3; A3 += LDA; + B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA; + B2[28] = *A2; A2 += LDA; B3[28] = *A3; A3 += LDA; + B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA; + B2[29] = *A2; A2 += LDA; B3[29] = *A3; A3 += LDA; + B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA; + B2[30] = *A2; A2 += LDA; B3[30] = *A3; A3 += LDA; + B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA; + B2[31] = *A2; A2 += LDA; B3[31] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0 += HPL_LATCPY_M_DEPTH; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH; + B2 += HPL_LATCPY_M_DEPTH; B3 += HPL_LATCPY_M_DEPTH; +#endif + } + + for( i = mu; i < M; i++ ) + { +#if ( HPL_LATCPY_N_DEPTH == 1 ) + *B0 = *A0; B0++; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA; + *B2 = *A2; B2++; A2 += LDA; *B3 = *A3; B3++; A3 += LDA; +#endif + } + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + A0 += incA; B0 += incB; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + A0 += incA; A1 += incA; B0 += incB; B1 += incB; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + A0 += incA; A1 += incA; A2 += incA; A3 += incA; + B0 += incB; B1 += incB; B2 += incB; B3 += incB; +#endif + } + + for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 ) + { + for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH, B0 += HPL_LATCPY_M_DEPTH ) + { + B0[ 0]=*A0; A0 += LDA; +#if ( HPL_LATCPY_M_DEPTH > 1 ) + B0[ 1]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 2 ) + B0[ 2]=*A0; A0 += LDA; B0[ 3]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 4 ) + B0[ 4]=*A0; A0 += LDA; B0[ 5]=*A0; A0 += LDA; + B0[ 6]=*A0; A0 += LDA; B0[ 7]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 8 ) + B0[ 8]=*A0; A0 += LDA; B0[ 9]=*A0; A0 += LDA; + B0[10]=*A0; A0 += LDA; B0[11]=*A0; A0 += LDA; + B0[12]=*A0; A0 += LDA; B0[13]=*A0; A0 += LDA; + B0[14]=*A0; A0 += LDA; B0[15]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 16 ) + B0[16]=*A0; A0 += LDA; B0[17]=*A0; A0 += LDA; + B0[18]=*A0; A0 += LDA; B0[19]=*A0; A0 += LDA; + B0[20]=*A0; A0 += LDA; B0[21]=*A0; A0 += LDA; + B0[22]=*A0; A0 += LDA; B0[23]=*A0; A0 += LDA; + B0[24]=*A0; A0 += LDA; B0[25]=*A0; A0 += LDA; + B0[26]=*A0; A0 += LDA; B0[27]=*A0; A0 += LDA; + B0[28]=*A0; A0 += LDA; B0[29]=*A0; A0 += LDA; + B0[30]=*A0; A0 += LDA; B0[31]=*A0; A0 += LDA; +#endif + } + + for( i = mu; i < M; i++, B0++, A0 += LDA ) { *B0 = *A0; } + } +#endif +/* + * End of HPL_dlatcpy + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_fprintf.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_fprintf.c new file mode 100644 index 000000000..adaf22b39 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_fprintf.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_fprintf +( + FILE * STREAM, + const char * FORM, + ... +) +#else +void HPL_fprintf( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_fprintf is a wrapper around fprintf flushing the output stream. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[256]; +#ifndef STDC_HEADERS + FILE * STREAM; + char * FORM; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + (void) fprintf( STREAM, "%s", cline ); + (void) fflush( STREAM ); +/* + * End of HPL_fprintf + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_warn.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_warn.c new file mode 100644 index 000000000..bc40818a9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/HPL_warn.c @@ -0,0 +1,134 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_warn +( + FILE * STREAM, + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_warn( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_warn displays an error message. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[128]; +#ifndef STDC_HEADERS + FILE * STREAM; + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( STREAM, "%s %s:\n>>> %s <<<\n\n", "HPL ERROR in function", + SRNAME, cline ); + else + HPL_fprintf( STREAM, "%s %d %s %s:\n>>> %s <<<\n\n", + "HPL ERROR on line", LINE, "of function", SRNAME, cline ); +/* + * End of HPL_warn + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_abort.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_abort.o new file mode 100644 index 000000000..25b7e6696 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_abort.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlacpy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlacpy.o new file mode 100644 index 000000000..6703f341f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlacpy.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlamch.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlamch.o new file mode 100644 index 000000000..ef5c411f0 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlamch.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlange.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlange.o new file mode 100644 index 000000000..8fb657669 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlange.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlaprnt.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlaprnt.o new file mode 100644 index 000000000..29c5f89f7 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlaprnt.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlatcpy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlatcpy.o new file mode 100644 index 000000000..fea336857 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_dlatcpy.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_fprintf.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_fprintf.o new file mode 100644 index 000000000..00861c5a9 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_fprintf.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_warn.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_warn.o new file mode 100644 index 000000000..e4944e00f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/HPL_warn.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/Make.inc new file mode 120000 index 000000000..8547ec814 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/Make.inc @@ -0,0 +1 @@ +/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/Makefile new file mode 100644 index 000000000..e92d18b80 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/Makefile @@ -0,0 +1,100 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h +# +## Object files ######################################################## +# +HPL_au0obj = \ + HPL_dlacpy.o HPL_dlatcpy.o HPL_fprintf.o \ + HPL_warn.o HPL_abort.o HPL_dlaprnt.o \ + HPL_dlange.o +HPL_au1obj = \ + HPL_dlamch.o +HPL_auxobj = \ + $(HPL_au0obj) $(HPL_au1obj) +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_auxobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_auxobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dlacpy.o : ../HPL_dlacpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlacpy.c +HPL_dlatcpy.o : ../HPL_dlatcpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlatcpy.c +HPL_fprintf.o : ../HPL_fprintf.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_fprintf.c +HPL_warn.o : ../HPL_warn.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_warn.c +HPL_abort.o : ../HPL_abort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_abort.c +HPL_dlaprnt.o : ../HPL_dlaprnt.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaprnt.c +HPL_dlange.o : ../HPL_dlange.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlange.c +HPL_dlamch.o : ../HPL_dlamch.c $(INCdep) + $(CC) -o $@ -c $(CCNOOPT) ../HPL_dlamch.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/auxil/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_daxpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_daxpy.c new file mode 100644 index 000000000..72be5774b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_daxpy.c @@ -0,0 +1,175 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_daxpy + +#ifdef STDC_HEADERS +void HPL_daxpy +( + const int N, + const double ALPHA, + const double * X, + const int INCX, + double * Y, + const int INCY +) +#else +void HPL_daxpy +( N, ALPHA, X, INCX, Y, INCY ) + const int N; + const double ALPHA; + const double * X; + const int INCX; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_daxpy scales the vector x by alpha and adds it to y. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vectors x and y. N + * must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero, then the entries of the incremented array X + * need not be set on input. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * On exit, the entries of the incremented array Y are updated + * with the scaled entries of the incremented array X. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_daxpy( N, ALPHA, X, INCX, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + register const double alpha = ALPHA; + register double x0, x1, x2, x3, y0, y1, y2, y3; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY; + + if( ( N > 0 ) && ( alpha != HPL_rzero ) ) + { + if( ( nu = ( N >> 2 ) << 2 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); y0 = (*Y); x1 = X[INCX ]; y1 = Y[INCY ]; + x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3]; + + *Y = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1; + Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3; + + X += incX4; + Y += incY4; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { + x0 = (*X); + y0 = (*Y); + + *Y = y0 + alpha * x0; + + X += INCX; + Y += INCY; + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; +#else +#define F77N N +#define F77incx INCX +#define F77incy INCY +#endif + F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy ); +#endif +/* + * End of HPL_daxpy + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dcopy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dcopy.c new file mode 100644 index 000000000..a8fe24109 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dcopy.c @@ -0,0 +1,168 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dcopy + +#ifdef STDC_HEADERS +void HPL_dcopy +( + const int N, + const double * X, + const int INCX, + double * Y, + const int INCY +) +#else +void HPL_dcopy +( N, X, INCX, Y, INCY ) + const int N; + const double * X; + const int INCX; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dcopy copies the vector x into the vector y. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vectors x and y. N + * must be at least zero. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * On exit, the entries of the incremented array Y are updated + * with the entries of the incremented array X. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dcopy( N, X, INCX, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + register double x0, x1, x2, x3, x4, x5, x6, x7; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY, + incX5 = 5 * INCX, incY5 = 5 * INCY, + incX6 = 6 * INCX, incY6 = 6 * INCY, + incX7 = 7 * INCX, incY7 = 7 * INCY, + incX8 = 8 * INCX, incY8 = 8 * INCY; + + if( N > 0 ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + *Y = x0; Y[incY4] = x4; Y[INCY ] = x1; Y[incY5] = x5; + Y[incY2] = x2; Y[incY6] = x6; Y[incY3] = x3; Y[incY7] = x7; + + X += incX8; + Y += incY8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { + x0 = (*X); + *Y = x0; + + X += INCX; + Y += INCY; + } + } +#endif +#ifdef HPL_CALL_FBLAS +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; +#else +#define F77N N +#define F77incx INCX +#define F77incy INCY +#endif + F77dcopy( &F77N, X, &F77incx, Y, &F77incy ); +#endif +/* + * End of HPL_dcopy + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemm.c new file mode 100644 index 000000000..b222e4717 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemm.c @@ -0,0 +1,521 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dgemm + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dgemmNN +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iail, iblj, icij, j, jal, jbj, jcj, l; + + for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) + { + HPL_dscal( M, BETA, C+jcj, 1 ); + for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 ) + { + t0 = ALPHA * B[iblj]; + for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) + { C[icij] += A[iail] * t0; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmNT +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iail, ibj, ibjl, icij, j, jal, jcj, l; + + for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) + { + HPL_dscal( M, BETA, C+jcj, 1 ); + for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB ) + { + t0 = ALPHA * B[ibjl]; + for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) + { C[icij] += A[iail] * t0; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmTN +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iai, iail, iblj, icij, j, jbj, jcj, l; + + for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) + { + for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA ) + { + t0 = HPL_rzero; + for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 ) + { t0 += A[iail] * B[iblj]; } + if( BETA == HPL_rzero ) C[icij] = HPL_rzero; + else C[icij] *= BETA; + C[icij] += ALPHA * t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmTT +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iali, ibj, ibjl, icij, j, jai, jcj, l; + + for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) + { + for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA ) + { + t0 = HPL_rzero; + for( l = 0, iali = jai, ibjl = ibj; + l < K; l++, iali += 1, ibjl += LDB ) t0 += A[iali] * B[ibjl]; + if( BETA == HPL_rzero ) C[icij] = HPL_rzero; + else C[icij] *= BETA; + C[icij] += ALPHA * t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemm0 +( + const enum HPL_TRANS TRANSA, + const enum HPL_TRANS TRANSB, + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, + BETA, C, LDC ) + const enum HPL_TRANS TRANSA, TRANSB; + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + int i, j; + + if( ( M == 0 ) || ( N == 0 ) || + ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) && + ( BETA == HPL_rone ) ) ) return; + + if( ALPHA == HPL_rzero ) + { + for( j = 0; j < N; j++ ) + { for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; } + return; + } + + if( TRANSB == HplNoTrans ) + { + if( TRANSA == HplNoTrans ) + { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + else + { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + } + else + { + if( TRANSA == HplNoTrans ) + { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + else + { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dgemm +( + const enum HPL_ORDER ORDER, + const enum HPL_TRANS TRANSA, + const enum HPL_TRANS TRANSB, + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +void HPL_dgemm +( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const enum HPL_ORDER ORDER; + const enum HPL_TRANS TRANSA; + const enum HPL_TRANS TRANSB; + const int M; + const int N; + const int K; + const double ALPHA; + const double * A; + const int LDA; + const double * B; + const int LDB; + const double BETA; + double * C; + const int LDC; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dgemm performs one of the matrix-matrix operations + * + * C := alpha * op( A ) * op( B ) + beta * C + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X^T. + * + * Alpha and beta are scalars, and A, B and C are matrices, with op(A) + * an m by k matrix, op(B) a k by n matrix and C an m by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * TRANSA (local input) const enum HPL_TRANS + * On entry, TRANSA specifies the form of op(A) to be used in + * the matrix-matrix operation follows: + * TRANSA==HplNoTrans : op( A ) = A, + * TRANSA==HplTrans : op( A ) = A^T, + * TRANSA==HplConjTrans : op( A ) = A^T. + * + * TRANSB (local input) const enum HPL_TRANS + * On entry, TRANSB specifies the form of op(B) to be used in + * the matrix-matrix operation follows: + * TRANSB==HplNoTrans : op( B ) = B, + * TRANSB==HplTrans : op( B ) = B^T, + * TRANSB==HplConjTrans : op( B ) = B^T. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix + * op(A) and of the matrix C. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix + * op(B) and the number of columns of the matrix C. N must be + * at least zero. + * + * K (local input) const int + * On entry, K specifies the number of columns of the matrix + * op(A) and the number of rows of the matrix op(B). K must be + * be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then the elements of the matrices A and B + * need not be set on input. + * + * A (local input) const double * + * On entry, A is an array of dimension (LDA,ka), where ka is + * k when TRANSA==HplNoTrans, and is m otherwise. Before + * entry with TRANSA==HplNoTrans, the leading m by k part of + * the array A must contain the matrix A, otherwise the leading + * k by m part of the array A must contain the matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the first dimension of A as declared + * in the calling (sub) program. When TRANSA==HplNoTrans then + * LDA must be at least max(1,m), otherwise LDA must be at least + * max(1,k). + * + * B (local input) const double * + * On entry, B is an array of dimension (LDB,kb), where kb is + * n when TRANSB==HplNoTrans, and is k otherwise. Before + * entry with TRANSB==HplNoTrans, the leading k by n part of + * the array B must contain the matrix B, otherwise the leading + * n by k part of the array B must contain the matrix B. + * + * LDB (local input) const int + * On entry, LDB specifies the first dimension of B as declared + * in the calling (sub) program. When TRANSB==HplNoTrans then + * LDB must be at least max(1,k), otherwise LDB must be at least + * max(1,n). + * + * BETA (local input) const double + * On entry, BETA specifies the scalar beta. When BETA is + * supplied as zero then the elements of the matrix C need + * not be set on input. + * + * C (local input/output) double * + * On entry, C is an array of dimension (LDC,n). Before entry, + * the leading m by n part of the array C must contain the + * matrix C, except when beta is zero, in which case C need not + * be set on entry. On exit, the array C is overwritten by the + * m by n matrix ( alpha*op( A )*op( B ) + beta*C ). + * + * LDC (local input) const int + * On entry, LDC specifies the first dimension of C as declared + * in the calling (sub) program. LDC must be at least + * max(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + printf("Order %d, TransA %d, TransB %d, M %d, N %d, K %d\n", ORDER, TRANSA, TRANSB, M, N, K); + cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, + C, LDC ); + } + else + { + HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA, + C, LDC ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA, beta = BETA; +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef StringStructPtr + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef StringCrayStyle + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, F77K = K, + F77lda = LDA, F77ldb = LDB, F77ldc = LDC; +#else +#define F77M M +#define F77N N +#define F77K K +#define F77lda LDA +#define F77ldb LDB +#define F77ldc LDC +#endif + char ctransa, ctransb; + + if( TRANSA == HplNoTrans ) ctransa = 'N'; + else if( TRANSA == HplTrans ) ctransa = 'T'; + else ctransa = 'C'; + + if( TRANSB == HplNoTrans ) ctransb = 'N'; + else if( TRANSB == HplTrans ) ctransb = 'T'; + else ctransb = 'C'; + + if( ORDER == HplColumnMajor ) + { +#ifdef StringSunStyle + F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); + F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif +#ifdef StringStructVal + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif +#ifdef StringStructPtr + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif + } + else + { +#ifdef StringSunStyle + F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); + F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif +#ifdef StringStructVal + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif +#ifdef StringStructPtr + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif + } +#endif +/* + * End of HPL_dgemm + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemv.c new file mode 100644 index 000000000..6366c5a48 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dgemv.c @@ -0,0 +1,326 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dgemv + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dgemv0 +( + const enum HPL_TRANS TRANS, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + const double * X, + const int INCX, + const double BETA, + double * Y, + const int INCY +) +#else +static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) + const enum HPL_TRANS TRANS; + const int INCX, INCY, LDA, M, N; + const double ALPHA, BETA; + const double * A, * X; + double * Y; +#endif +{ +/* + * .. Local Variables .. + */ + int i, iaij, ix, iy, j, jaj, jx, jy; + register double t0; +/* .. + * .. Executable Statements .. + */ + if( ( M == 0 ) || ( N == 0 ) || + ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone ) ) ) return; + + if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; } + + if( TRANS == HplNoTrans ) + { + HPL_dscal( M, BETA, Y, INCY ); + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = ALPHA * X[jx]; + for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY ) + { Y[iy] += A[iaij] * t0; } + } + } + else + { + for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) + { + t0 = HPL_rzero; + for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) + { t0 += A[iaij] * X[ix]; } + if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0; + else Y[jy] = BETA * Y[jy] + ALPHA * t0; + } + } +} +#endif + +#ifdef STDC_HEADERS +void HPL_dgemv +( + const enum HPL_ORDER ORDER, + const enum HPL_TRANS TRANS, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + const double * X, + const int INCX, + const double BETA, + double * Y, + const int INCY +) +#else +void HPL_dgemv +( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) + const enum HPL_ORDER ORDER; + const enum HPL_TRANS TRANS; + const int M; + const int N; + const double ALPHA; + const double * A; + const int LDA; + const double * X; + const int INCX; + const double BETA; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dgemv performs one of the matrix-vector operations + * + * y := alpha * op( A ) * x + beta * y, + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X^T. + * + * where alpha and beta are scalars, x and y are vectors and A is an m + * by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANS specifies the operation to be performed as + * follows: + * TRANS = HplNoTrans y := alpha*A *x + beta*y, + * TRANS = HplTrans y := alpha*A^T*x + beta*y. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then A and X need not be set on input. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry, the leading m by n part of the + * array A must contain the matrix coefficients. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m). + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * BETA (local input) const double + * On entry, BETA specifies the scalar beta. When ALPHA is + * supplied as zero then Y need not be set on input. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * Before entry with BETA non-zero, the incremented array Y must + * contain the vector y. On exit, Y is overwritten by the + * updated vector y. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); + } + else + { + HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), + N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA, beta = BETA; +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR ftran; +#endif +#ifdef StringStructPtr + F77_CHAR ftran; +#endif +#ifdef StringCrayStyle + F77_CHAR ftran; +#endif + +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77incx = INCX, F77incy = INCY; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77incx INCX +#define F77incy INCY +#endif + char ctran; + + if( ORDER == HplColumnMajor ) + { + ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); + +#ifdef StringSunStyle + F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); + F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructVal + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructPtr + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif + } + else + { + ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); +#ifdef StringSunStyle + F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); + F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructVal + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructPtr + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif + } + +#endif +/* + * End of HPL_dgemv + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dger.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dger.c new file mode 100644 index 000000000..5ea702778 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dger.c @@ -0,0 +1,195 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dger + +#ifdef STDC_HEADERS +void HPL_dger +( + const enum HPL_ORDER ORDER, + const int M, + const int N, + const double ALPHA, + const double * X, + const int INCX, + double * Y, + const int INCY, + double * A, + const int LDA +) +#else +void HPL_dger +( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) + const enum HPL_ORDER ORDER; + const int M; + const int N; + const double ALPHA; + const double * X; + const int INCX; + double * Y; + const int INCY; + double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dger performs the rank 1 operation + * + * A := alpha * x * y^T + A, + * + * where alpha is a scalar, x is an m-element vector, y is an n-element + * vector and A is an m by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then X and Y need not be set on input. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( m - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * A (local input/output) double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry, the leading m by n part of the + * array A must contain the matrix coefficients. On exit, A is + * overwritten by the updated matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dger( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA ); +#endif +#ifdef HPL_CALL_VSIPL + register double t0; + int i, iaij, ix, iy, j, jaj, jx, jy; + + if( ( M == 0 ) || ( N == 0 ) || ( ALPHA == HPL_rzero ) ) return; + + if( ORDER == HplColumnMajor ) + { + for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) + { + t0 = ALPHA * Y[jy]; + for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) + { A[iaij] += X[ix] * t0; } + } + } + else + { + for( j = 0, jaj = 0, jx = 0; j < M; j++, jaj += LDA, jx += INCX ) + { + t0 = ALPHA * X[jx]; + for( i = 0, iaij = jaj, iy = 0; i < N; i++, iaij += 1, iy += INCY ) + { A[iaij] += Y[iy] * t0; } + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77incx = INCX, F77incy = INCY; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77incx INCX +#define F77incy INCY +#endif + + if( ORDER == HplColumnMajor ) + { F77dger( &F77M, &F77N, &alpha, X, &F77incx, Y, &F77incy, A, &F77lda ); } + else + { F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); } +#endif +/* + * End of HPL_dger + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dscal.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dscal.c new file mode 100644 index 000000000..7e041991f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dscal.c @@ -0,0 +1,179 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dscal + +#ifdef STDC_HEADERS +void HPL_dscal +( + const int N, + const double ALPHA, + double * X, + const int INCX +) +#else +void HPL_dscal +( N, ALPHA, X, INCX ) + const int N; + const double ALPHA; + double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dscal scales the vector x by alpha. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vector x. N must be + * at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero, then the entries of the incremented array X + * need not be set on input. + * + * X (local input/output) double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * On exit, the entries of the incremented array X are scaled + * by the scalar alpha. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dscal( N, ALPHA, X, INCX ); +#endif +#ifdef HPL_CALL_VSIPL + register double x0, x1, x2, x3, x4, x5, x6, x7; + register const double alpha = ALPHA; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incX3 = 3 * INCX, + incX4 = 4 * INCX, incX5 = 5 * INCX, + incX6 = 6 * INCX, incX7 = 7 * INCX, + incX8 = 8 * INCX; + + if( ( N > 0 ) && ( alpha != HPL_rone ) ) + { + if( alpha == HPL_rzero ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = (double *)X + nu * INCX; + + do + { + (*X) = HPL_rzero; X[incX4] = HPL_rzero; + X[INCX ] = HPL_rzero; X[incX5] = HPL_rzero; + X[incX2] = HPL_rzero; X[incX6] = HPL_rzero; + X[incX3] = HPL_rzero; X[incX7] = HPL_rzero; X += incX8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) { *X = HPL_rzero; X += INCX; } + } + else + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + x0 *= alpha; x4 *= alpha; x1 *= alpha; x5 *= alpha; + x2 *= alpha; x6 *= alpha; x3 *= alpha; x7 *= alpha; + + (*X) = x0; X[incX4] = x4; X[INCX ] = x1; X[incX5] = x5; + X[incX2] = x2; X[incX6] = x6; X[incX3] = x3; X[incX7] = x7; + + X += incX8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { x0 = (*X); x0 *= alpha; *X = x0; X += INCX; } + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX; +#else +#define F77N N +#define F77incx INCX +#endif + + F77dscal( &F77N, &alpha, X, &F77incx ); +#endif +/* + * End of HPL_dscal + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dswap.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dswap.c new file mode 100644 index 000000000..eb1b8e08d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dswap.c @@ -0,0 +1,157 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dswap + +#ifdef STDC_HEADERS +void HPL_dswap +( + const int N, + double * X, + const int INCX, + double * Y, + const int INCY +) +#else +void HPL_dswap +( N, X, INCX, Y, INCY ) + const int N; + double * X; + const int INCX; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dswap swaps the vectors x and y. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vectors x and y. N + * must be at least zero. + * + * X (local input/output) double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * On exit, the entries of the incremented array X are updated + * with the entries of the incremented array Y. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * On exit, the entries of the incremented array Y are updated + * with the entries of the incremented array X. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dswap( N, X, INCX, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + register double x0, x1, x2, x3, y0, y1, y2, y3; + double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY; + + if( N > 0 ) + { + if( ( nu = ( N >> 2 ) << 2 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); y0 = (*Y); x1 = X[INCX ]; y1 = Y[INCY ]; + x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3]; + *Y = x0; *X = y0; Y[INCY ] = x1; X[INCX ] = y1; + Y[incY2] = x2; X[incX2] = y2; Y[incY3] = x3; X[incX3] = y3; + X += incX4; Y += incY4; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { x0 = (*X); y0 = (*Y); *Y = x0; *X = y0; X += INCX; Y += INCY; } + } +#endif +#ifdef HPL_CALL_FBLAS +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; +#else +#define F77N N +#define F77incx INCX +#define F77incy INCY +#endif + F77dswap( &F77N, X, &F77incx, Y, &F77incy ); +#endif +/* + * End of HPL_dswap + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsm.c new file mode 100644 index 000000000..a336a7d29 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsm.c @@ -0,0 +1,977 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dtrsm + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) + { + B[ibkj] /= A[k+jak]; + for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; + i < M; i++, iaik +=1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) + { + for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; + i < M; i++, iaik +=1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; + i >= 0; i--, jai -= LDA, ibij -= 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; + k < M; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + t0 /= A[i+jai]; + B[ibij] = t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; + i >= 0; i--, jai -= LDA, ibij -= 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; + k < M; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + B[ibij] = t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; + k >= 0; k--, jak -= LDA, ibkj -= 1 ) + { + B[ibkj] /= A[k+jak]; + for( i = 0, iaik = jak, ibij = jbj; + i < k; i++, iaik += 1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; + k >= 0; k--, jak -= LDA, ibkj -= 1 ) + { + for( i = 0, iaik = jak, ibij = jbj; + i < k; i++, iaik += 1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaki, ibij, ibkj, j, jai, jbj, k; + register double t0; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + t0 /= A[i+jai]; + B[ibij] = t0; + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + B[ibij] = t0; + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; + j >= 0; j--, jaj -= LDA, jbj -= LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; + k < N; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; + j >= 0; j--, jaj -= LDA, jbj -= LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; + k < N; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) + { + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } + for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; + j < N; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) + { + for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; + j < N; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; + k >= 0; k--, jak -= LDA, jbk -= LDB ) + { + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } + for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; + k >= 0; k--, jak -= LDA, jbk -= LDB ) + { + for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsm0 +( + const enum HPL_SIDE SIDE, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) + const enum HPL_SIDE SIDE; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, j; + + if( ( M == 0 ) || ( N == 0 ) ) return; + + if( ALPHA == HPL_rzero ) + { + for( j = 0; j < N; j++ ) + { for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; } + return; + } + + if( SIDE == HplLeft ) + { + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + } + else + { + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dtrsm +( + const enum HPL_ORDER ORDER, + const enum HPL_SIDE SIDE, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dtrsm +( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) + const enum HPL_ORDER ORDER; + const enum HPL_SIDE SIDE; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int M; + const int N; + const double ALPHA; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dtrsm solves one of the matrix equations + * + * op( A ) * X = alpha * B, or X * op( A ) = alpha * B, + * + * where alpha is a scalar, X and B are m by n matrices, A is a unit, or + * non-unit, upper or lower triangular matrix and op(A) is one of + * + * op( A ) = A or op( A ) = A^T. + * + * The matrix X is overwritten on B. + * + * No test for singularity or near-singularity is included in this + * routine. Such tests must be performed before calling this routine. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * SIDE (local input) const enum HPL_SIDE + * On entry, SIDE specifies whether op(A) appears on the left + * or right of X as follows: + * SIDE==HplLeft op( A ) * X = alpha * B, + * SIDE==HplRight X * op( A ) = alpha * B. + * + * UPLO (local input) const enum HPL_UPLO + * On entry, UPLO specifies whether the upper or lower + * triangular part of the array A is to be referenced. When + * UPLO==HplUpper, only the upper triangular part of A is to be + * referenced, otherwise only the lower triangular part of A is + * to be referenced. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANSA specifies the form of op(A) to be used in + * the matrix-matrix operation follows: + * TRANSA==HplNoTrans : op( A ) = A, + * TRANSA==HplTrans : op( A ) = A^T, + * TRANSA==HplConjTrans : op( A ) = A^T. + * + * DIAG (local input) const enum HPL_DIAG + * On entry, DIAG specifies whether A is unit triangular or + * not. When DIAG==HplUnit, A is assumed to be unit triangular, + * and otherwise, A is not assumed to be unit triangular. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix B. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix B. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then the elements of the matrix B need not + * be set on input. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * k, where k is m when SIDE==HplLeft and is n + * otherwise. Before entry with UPLO==HplUpper, the leading + * k by k upper triangular part of the array A must contain the + * upper triangular matrix and the strictly lower triangular + * part of A is not referenced. When UPLO==HplLower on entry, + * the leading k by k lower triangular part of the array A must + * contain the lower triangular matrix and the strictly upper + * triangular part of A is not referenced. + * + * Note that when DIAG==HplUnit, the diagonal elements of A + * not referenced either, but are assumed to be unity. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise. + * + * B (local input/output) double * + * On entry, B points to an array of size equal to or greater + * than LDB * n. Before entry, the leading m by n part of the + * array B must contain the matrix B, except when beta is zero, + * in which case B need not be set on entry. On exit, the array + * B is overwritten by the m by n solution matrix. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of B as + * declared in the calling (sub) program. LDB must be at + * least MAX(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); + } + else + { + HPL_dtrsm0( ( SIDE == HplRight ? HplLeft : HplRight ), + ( UPLO == HplLower ? HplUpper : HplLower ), + TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef StringSunStyle +#if defined( HPL_USE_F77_INTEGER_DEF ) + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef StringStructPtr + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef StringCrayStyle + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77ldb = LDB; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77ldb LDB +#endif + char cside, cuplo, ctran, cdiag; + + if( TRANS == HplNoTrans ) ctran = 'N'; + else if( TRANS == HplTrans ) ctran = 'T'; + else ctran = 'C'; + cdiag = ( DIAG == HplUnit ? 'U' : 'N' ); + + if( ORDER == HplColumnMajor ) + { + cside = ( SIDE == HplRight ? 'R' : 'L' ); + cuplo = ( UPLO == HplLower ? 'L' : 'U' ); +#ifdef StringSunStyle + F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructVal + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructPtr + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif + } + else + { + cside = ( SIDE == HplRight ? 'L' : 'R' ); + cuplo = ( UPLO == HplLower ? 'U' : 'L' ); +#ifdef StringSunStyle + F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructVal + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructPtr + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif + } +#endif +/* + * End of HPL_dtrsm + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsv.c new file mode 100644 index 000000000..99e84f073 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_dtrsv.c @@ -0,0 +1,520 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dtrsv + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dtrsvLNN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLNN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX ) + { + X[jx] /= A[jaj]; t0 = X[jx]; + for( i = j+1, iaij = jaj+1, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLNU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLNU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = jaj+1, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLTN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLTN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX; + j >= 0; j--, jaj -= ldap1, jx -= INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = 1+jaj, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } + t0 /= A[jaj]; X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLTU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLTU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX; + j >= 0; j--, jaj -= ldap1, jx -= INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = 1+jaj, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } + X[jx] = t0; + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUNN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUNN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX; + j >= 0; j--, jaj -= LDA, jx -= INCX ) + { + X[jx] /= A[j+jaj]; t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { X[ix] -= t0 * A[iaij]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUNU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUNU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX; + j >= 0; j--, jaj -= LDA, jx -= INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { X[ix] -= t0 * A[iaij]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUTN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUTN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = 0, jaj = 0,jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { t0 -= A[iaij] * X[ix]; } + t0 /= A[iaij]; X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvUTU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUTU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { t0 -= A[iaij] * X[ix]; } + X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsv0 +( + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + if( N == 0 ) return; + + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) { HPL_dtrsvUNN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvUNU( N, A, LDA, X, INCX ); } + } + else + { + if( DIAG == HplNonUnit ) { HPL_dtrsvUTN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvUTU( N, A, LDA, X, INCX ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) { HPL_dtrsvLNN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvLNU( N, A, LDA, X, INCX ); } + } + else + { + if( DIAG == HplNonUnit ) { HPL_dtrsvLTN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvLTU( N, A, LDA, X, INCX ); } + } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dtrsv +( + const enum HPL_ORDER ORDER, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +void HPL_dtrsv +( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) + const enum HPL_ORDER ORDER; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int N; + const double * A; + const int LDA; + double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dtrsv solves one of the systems of equations + * + * A * x = b, or A^T * x = b, + * + * where b and x are n-element vectors and A is an n by n non-unit, or + * unit, upper or lower triangular matrix. + * + * No test for singularity or near-singularity is included in this + * routine. Such tests must be performed before calling this routine. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * UPLO (local input) const enum HPL_UPLO + * On entry, UPLO specifies whether the upper or lower + * triangular part of the array A is to be referenced. When + * UPLO==HplUpper, only the upper triangular part of A is to be + * referenced, otherwise only the lower triangular part of A is + * to be referenced. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANS specifies the equations to be solved as + * follows: + * TRANS==HplNoTrans A * x = b, + * TRANS==HplTrans A^T * x = b. + * + * DIAG (local input) const enum HPL_DIAG + * On entry, DIAG specifies whether A is unit triangular or + * not. When DIAG==HplUnit, A is assumed to be unit triangular, + * and otherwise, A is not assumed to be unit triangular. + * + * N (local input) const int + * On entry, N specifies the order of the matrix A. N must be at + * least zero. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry with UPLO==HplUpper, the leading + * n by n upper triangular part of the array A must contain the + * upper triangular matrix and the strictly lower triangular + * part of A is not referenced. When UPLO==HplLower on entry, + * the leading n by n lower triangular part of the array A must + * contain the lower triangular matrix and the strictly upper + * triangular part of A is not referenced. + * + * Note that when DIAG==HplUnit, the diagonal elements of A + * not referenced either, but are assumed to be unity. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,n). + * + * X (local input/output) double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * Before entry, the incremented array X must contain the n + * element right-hand side vector b. On exit, X is overwritten + * with the solution vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dtrsv( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ); + } + else + { + HPL_dtrsv0( ( UPLO == HplUpper ? HplLower : HplUpper ), + ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), + DIAG, N, A, LDA, X, INCX ); + } +#endif +#ifdef HPL_CALL_FBLAS +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR fuplo, ftran, fdiag; +#endif +#ifdef StringStructPtr + F77_CHAR fuplo, ftran, fdiag; +#endif +#ifdef StringCrayStyle + F77_CHAR fuplo, ftran, fdiag; +#endif + +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77lda = LDA, F77incx = INCX; +#else +#define F77N N +#define F77lda LDA +#define F77incx INCX +#endif + char cuplo, ctran, cdiag; + + if( ORDER == HplColumnMajor ) + { + cuplo = ( UPLO == HplUpper ? 'U' : 'L' ); + ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); + } + else + { + cuplo = ( UPLO == HplUpper ? 'L' : 'U' ); + ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); + } + cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' ); + +#ifdef StringSunStyle + F77dtrsv( &cuplo, &ctran, &cdiag, &F77N, A, &F77lda, X, &F77incx, + IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + fuplo = HPL_C2F_CHAR( cuplo ); + F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif +#ifdef StringStructVal + fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; + fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif +#ifdef StringStructPtr + fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; + fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsv( &fuplo, &ftran, &fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif + +#endif +/* + * End of HPL_dtrsv + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_idamax.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_idamax.c new file mode 100644 index 000000000..5ceabdf25 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/HPL_idamax.c @@ -0,0 +1,167 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_idamax + +#ifdef STDC_HEADERS +int HPL_idamax +( + const int N, + const double * X, + const int INCX +) +#else +int HPL_idamax +( N, X, INCX ) + const int N; + const double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_idamax returns the index in an n-vector x of the first element + * having maximum absolute value. + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vector x. N must be + * at least zero. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + return( (int)(cblas_idamax( N, X, INCX )) ); +#endif +#ifdef HPL_CALL_VSIPL + register double absxi, smax = HPL_rzero, x0, x1, x2, x3, + x4, x5, x6, x7; + const double * StX; + register int imax = 0, i = 0, j; + int nu; + const int incX2 = 2 * INCX, incX3 = 3 * INCX, + incX4 = 4 * INCX, incX5 = 5 * INCX, + incX6 = 6 * INCX, incX7 = 7 * INCX, + incX8 = 8 * INCX; + + if( N > 0 ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x1 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x2 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x3 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x4 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x5 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x6 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x7 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + + X += incX8; + + } while( X != StX ); + } + + for( j = N - nu; j != 0; j-- ) + { + x0 = (*X); + absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + X += INCX; + } + } + return( imax ); +#endif +#ifdef HPL_CALL_FBLAS +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX; +#else +#define F77N N +#define F77incx INCX +#endif + int imax = 0; + + if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1; + return( imax ); +#endif +/* + * End of HPL_idamax + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_daxpy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_daxpy.o new file mode 100644 index 000000000..ff89e13c3 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_daxpy.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dcopy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dcopy.o new file mode 100644 index 000000000..d0bc0e6e6 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dcopy.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dgemm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dgemm.o new file mode 100644 index 000000000..12e87044c Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dgemm.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dgemv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dgemv.o new file mode 100644 index 000000000..a9b801898 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dgemv.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dger.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dger.o new file mode 100644 index 000000000..255cfa4b2 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dger.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dscal.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dscal.o new file mode 100644 index 000000000..4cb4cd8c9 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dscal.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dtrsm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dtrsm.o new file mode 100644 index 000000000..339a5635f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dtrsm.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dtrsv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dtrsv.o new file mode 100644 index 000000000..2930120c9 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_dtrsv.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_idamax.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_idamax.o new file mode 100644 index 000000000..b765e7be6 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/HPL_idamax.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/Make.inc new file mode 120000 index 000000000..8547ec814 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/Make.inc @@ -0,0 +1 @@ +/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/Makefile new file mode 100644 index 000000000..ed9f3d0e2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/Makefile @@ -0,0 +1,98 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h +# +## Object files ######################################################## +# +HPL_blaobj = \ + HPL_dcopy.o HPL_daxpy.o HPL_dscal.o \ + HPL_idamax.o HPL_dgemv.o HPL_dtrsv.o \ + HPL_dger.o HPL_dgemm.o HPL_dtrsm.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_blaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_blaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dcopy.o : ../HPL_dcopy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dcopy.c +HPL_daxpy.o : ../HPL_daxpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_daxpy.c +HPL_dscal.o : ../HPL_dscal.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dscal.c +HPL_idamax.o : ../HPL_idamax.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_idamax.c +HPL_dgemv.o : ../HPL_dgemv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemv.c +HPL_dtrsv.o : ../HPL_dtrsv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsv.c +HPL_dger.o : ../HPL_dger.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dger.c +HPL_dgemm.o : ../HPL_dgemm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemm.c +HPL_dtrsm.o : ../HPL_dtrsm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsm.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/blas/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1rinM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1rinM.c new file mode 100644 index 000000000..dd03b79b1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1rinM.c @@ -0,0 +1,224 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_1rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_1rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_1rinM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_1rinM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, prev, + rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, then send message to its two + * next neighbors. Otherwise, probe for message. If the message is here, + * then receive it, and if I am not the last process of the ring, or + * just after the root process, then forward it to the next. Otherwise, + * inform the caller that the panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( next, + size ), msgid, comm ); + } + } + else + { + prev = MModSub1( rank, size ); + if( ( size > 2 ) && + ( MModSub1( prev, size ) == root ) ) partner = root; + else partner = prev; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && + ( prev != root ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_1rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_1rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1ring.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1ring.c new file mode 100644 index 000000000..dd5eb2d12 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_1ring.c @@ -0,0 +1,216 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_1ring +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_1ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_1ring +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_1ring( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, prev, rank, root, + size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, start spreading the panel. If + * I am not the root process, probe for message. If the message is here, + * then receive it, and if I am not the last process of the ring, then + * forward it to the next. Otherwise, inform the caller that the panel + * has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( rank, + size ), msgid, comm ); + } + else + { + prev = MModSub1( rank, size ); + + ierr = MPI_Iprobe( prev, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, prev, msgid, + comm, &PANEL->status[0] ); + next = MModAdd1( rank, size ); + if( ( ierr == MPI_SUCCESS ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, + msgid, comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_1ring +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_1ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2rinM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2rinM.c new file mode 100644 index 000000000..56581ea0d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2rinM.c @@ -0,0 +1,236 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_2rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_2rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_2rinM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_2rinM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, prev, + rank, roo2, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process send to its two right neighbors and mid-pro- + * cess. If I am not the root process, probe for message. If the message + * is there, then receive it. If I am not the last process of both rings + * then forward it to the next. Otherwise, inform the caller that the + * panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); roo2 = ( ( size + 1 ) >> 1 ); + roo2 = MModAdd( root, roo2, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + if( MModAdd1( next, size ) != roo2 ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, + MModAdd1( next, size ), msgid, comm ); + } + + if( ierr == MPI_SUCCESS ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid, + comm ); + } + } + } + else + { + prev = MModSub1( rank, size ); + if( ( prev == root ) || ( rank == roo2 ) || + ( MModSub1( prev, size ) == root ) ) partner = root; + else partner = prev; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && ( prev != root ) && + ( next != roo2 ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_2rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_2rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2ring.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2ring.c new file mode 100644 index 000000000..f0e6e2647 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_2ring.c @@ -0,0 +1,224 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_2ring +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_2ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_2ring +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_2ring( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, rank, + roo2, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process send to its right neighbor and mid-process. + * If I am not the root process, probe for message. If the message is + * there, then receive it, and if I am not the last process of both + * rings, then forward it to the next. Otherwise, inform the caller that + * the panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); roo2 = ( ( size + 1 ) >> 1 ); + roo2 = MModAdd( root, roo2, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid, + comm ); + } + } + else + { + partner = MModSub1( rank, size ); + if( ( partner == root ) || ( rank == roo2 ) ) partner = root; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && + ( next != roo2 ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_2ring +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_2ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bcast.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bcast.c new file mode 100644 index 000000000..100161152 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bcast.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_bcast +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast +( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_bcast broadcasts the current panel. Successful completion is + * indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to + * HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was + * not completed, in which case this function should be called again. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * IFLAG (output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * occured. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_bcast_1rinM( PANEL, IFLAG ); break; + case HPL_1RING : ierr = HPL_bcast_1ring( PANEL, IFLAG ); break; + case HPL_2RING_M : ierr = HPL_bcast_2rinM( PANEL, IFLAG ); break; + case HPL_2RING : ierr = HPL_bcast_2ring( PANEL, IFLAG ); break; + case HPL_BLONG_M : ierr = HPL_bcast_blonM( PANEL, IFLAG ); break; + case HPL_BLONG : ierr = HPL_bcast_blong( PANEL, IFLAG ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_bcast + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_binit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_binit.c new file mode 100644 index 000000000..3daf72b7d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_binit.c @@ -0,0 +1,108 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_binit +( + HPL_T_panel * PANEL +) +#else +int HPL_binit +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_binit initializes a row broadcast. Successful completion is + * indicated by the returned error code HPL_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS ); +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_binit_1rinM( PANEL ); break; + case HPL_1RING : ierr = HPL_binit_1ring( PANEL ); break; + case HPL_2RING_M : ierr = HPL_binit_2rinM( PANEL ); break; + case HPL_2RING : ierr = HPL_binit_2ring( PANEL ); break; + case HPL_BLONG_M : ierr = HPL_binit_blonM( PANEL ); break; + case HPL_BLONG : ierr = HPL_binit_blong( PANEL ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_binit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blonM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blonM.c new file mode 100644 index 000000000..5fa221937 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blonM.c @@ -0,0 +1,445 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +int HPL_binit_blonM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_blonM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif + return( HPL_SUCCESS ); +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF_S1 PANEL->buffers[I_SEND] +#define _M_COUNT_S1 PANEL->counts[I_SEND] +#define _M_TYPE_S1 PANEL->dtypes[I_SEND] + +#define _M_BUFF_S2 PANEL->buffers[I_SEND] +#define _M_COUNT_S2 PANEL->counts[I_SEND] +#define _M_TYPE_S2 PANEL->dtypes[I_SEND] + +#define _M_BUFF_R1 PANEL->buffers[I_RECV] +#define _M_COUNT_R1 PANEL->counts[I_RECV] +#define _M_TYPE_R1 PANEL->dtypes[I_RECV] + +#define _M_BUFF_R2 PANEL->buffers[I_RECV] +#define _M_COUNT_R2 PANEL->counts[I_RECV] +#define _M_TYPE_R2 PANEL->dtypes[I_RECV] + +#define _M_ROLL_BUFF_S PANEL->buffers[I_SEND] +#define _M_ROLL_COUNT_S PANEL->counts[I_SEND] +#define _M_ROLL_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_ROLL_BUFF_R PANEL->buffers[I_RECV] +#define _M_ROLL_COUNT_R PANEL->counts[I_RECV] +#define _M_ROLL_TYPE_R PANEL->dtypes[I_RECV] + +#else + +#define _M_BUFF_S1 (void *)(PANEL->L2) +#define _M_COUNT_S1 PANEL->len +#define _M_TYPE_S1 MPI_DOUBLE + +#define _M_BUFF_S2 (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_S2 lbuf +#define _M_TYPE_S2 MPI_DOUBLE + +#define _M_BUFF_R1 (void *)(PANEL->L2) +#define _M_COUNT_R1 PANEL->len +#define _M_TYPE_R1 MPI_DOUBLE + +#define _M_BUFF_R2 (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_R2 lbuf +#define _M_TYPE_R2 MPI_DOUBLE + +#define _M_ROLL_BUFF_S (void *)(PANEL->L2 + ibufS) +#define _M_ROLL_COUNT_S lbufS +#define _M_ROLL_TYPE_S MPI_DOUBLE +#define _M_ROLL_BUFF_R (void *)(PANEL->L2 + ibufR) +#define _M_ROLL_COUNT_R lbufR +#define _M_ROLL_TYPE_R MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_blonM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_blonM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int COUNT, count, go=1, ierr=MPI_SUCCESS, ibuf, + ibufR, ibufS, dummy=0, indx, ip2=1, k, l, + lbuf, lbufR, lbufS, mask=1, msgid, mydist, + mydist2, next, npm1, npm2, partner, prev, + rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process sends to its right neighbor, then spread + * the panel on the other npcol - 2 processes. If I am not the root + * process, probe for message received. If the message is there, then + * receive it. If I am just after the root process, return. Otherwise, + * keep spreading on those npcol - 2 processes. Otherwise, inform the + * caller that the panel has still not been received. + */ + comm = PANEL->grid->row_comm; rank = PANEL->grid->mycol; + root = PANEL->pcol; msgid = PANEL->msgid; + prev = MModSub1( rank, size ); + + if( rank == root ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, 0, PANEL->len, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S1, _M_COUNT_S1, _M_TYPE_S1, + MModAdd1( rank, size ), msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else if( prev == root ) + { +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + * + * ierr = MPI_Iprobe( root, msgid, comm, &go, &PANEL->status[0] ); + */ + if( ierr == MPI_SUCCESS ) + { /* if panel is here, proceed */ + if( go != 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + ierr = HPL_packL( PANEL, 0, PANEL->len, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R1, _M_COUNT_R1, _M_TYPE_R1, + root, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } + } +/* + * if I am just after the root, exit now. The message receive completed + * successfully, this guy is done. If there are only 2 processes in each + * row of processes, we are done as well. + */ + if( ( prev == root ) || ( size == 2 ) ) + { + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + return( *IFLAG ); + } +/* + * Otherwise, proceed with broadcast - Spread the panel across process + * columns + */ + npm2 = ( npm1 = size - 1 ) - 1; COUNT = PANEL->len; + + k = npm2; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + if( rank == root ) mydist2 = ( mydist = 0 ); + else mydist2 = ( mydist = MModSub( rank, root, size ) - 1 ); + + indx = ip2; count = COUNT / npm1; count = Mmax( count, 1 ); + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = COUNT - ( ibuf = indx * count ); + if( indx + ip2 < npm1 ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); } + + partner = mydist ^ ip2; + + if( ( mydist & ip2 ) != 0 ) + { + partner = MModAdd( root, partner, size ); + if( partner != root ) partner = MModAdd1( partner, size ); +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + */ +#if 0 + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { /* if panel is not here, return and keep testing */ + if( go == 0 ) + { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } +#endif + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R2, _M_COUNT_R2, _M_TYPE_R2, + partner, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + } + else if( partner < npm1 ) + { + partner = MModAdd( root, partner, size ); + if( partner != root ) partner = MModAdd1( partner, size ); + + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S2, _M_COUNT_S2, _M_TYPE_S2, + partner, msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE, + partner, msgid, comm ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; indx -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; } + + } while( ip2 > 0 ); +/* + * Roll the pieces + */ + prev = MModSub1( rank, size ); + if( MModSub1( prev, size ) == root ) prev = root; + next = MModAdd1( rank, size ); + if( rank == root ) next = MModAdd1( next, size ); + + for( k = 0; k < npm2; k++ ) + { + l = ( k >> 1 ); +/* + * Who is sending to who and how much + */ + if( ( ( mydist + k ) & 1 ) != 0 ) + { + ibufS = ( indx = MModAdd( mydist, l, npm1 ) ) * count; + lbufS = ( indx == npm2 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModSub( mydist, l+1, npm1 ) ) * count; + lbufR = ( indx == npm2 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = prev; + } + else + { + ibufS = ( indx = MModSub( mydist, l, npm1 ) ) * count; + lbufS = ( indx == npm2 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModAdd( mydist, l+1, npm1 ) ) * count; + lbufR = ( indx == npm2 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = next; + } +/* + * Exchange the messages + */ + if( lbufS > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufS, lbufS, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S, + _M_ROLL_TYPE_S, partner, msgid, comm, + &PANEL->request[0] ); + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->request[0] ); + } + + if( lbufR > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufR, lbufR, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R, + _M_ROLL_TYPE_R, partner, msgid, comm, + &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait ( &PANEL->request[0], &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_blonM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_blonM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } + + return( HPL_SUCCESS ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blong.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blong.c new file mode 100644 index 000000000..e57f11bcc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_blong.c @@ -0,0 +1,363 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +int HPL_binit_blong +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_blong( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif + return( HPL_SUCCESS ); +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF_S PANEL->buffers[I_SEND] +#define _M_COUNT_S PANEL->counts[I_SEND] +#define _M_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_BUFF_R PANEL->buffers[I_RECV] +#define _M_COUNT_R PANEL->counts[I_RECV] +#define _M_TYPE_R PANEL->dtypes[I_RECV] + +#define _M_ROLL_BUFF_S PANEL->buffers[I_SEND] +#define _M_ROLL_COUNT_S PANEL->counts[I_SEND] +#define _M_ROLL_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_ROLL_BUFF_R PANEL->buffers[I_RECV] +#define _M_ROLL_COUNT_R PANEL->counts[I_RECV] +#define _M_ROLL_TYPE_R PANEL->dtypes[I_RECV] + +#else + +#define _M_BUFF_S (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_S lbuf +#define _M_TYPE_S MPI_DOUBLE + +#define _M_BUFF_R (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_R lbuf +#define _M_TYPE_R MPI_DOUBLE + +#define _M_ROLL_BUFF_S (void *)(PANEL->L2 + ibufS) +#define _M_ROLL_COUNT_S lbufS +#define _M_ROLL_TYPE_S MPI_DOUBLE + +#define _M_ROLL_BUFF_R (void *)(PANEL->L2 + ibufR) +#define _M_ROLL_COUNT_R lbufR +#define _M_ROLL_TYPE_R MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_blong +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_blong( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int COUNT, count, dummy=0, ierr=MPI_SUCCESS, + ibuf, ibufR, ibufS, indx, ip2, k, l, lbuf, + lbufR, lbufS, mask, msgid, mydist, mydist2, + next, npm1, partner, prev, rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, start spreading the panel. If + * I am not the root process, test for message receive completion. If + * the message is there, then receive it, and keep spreading in a + * blocking fashion this time. Otherwise, inform the caller that the + * panel has still not been received. + */ + comm = PANEL->grid->row_comm; rank = PANEL->grid->mycol; + mask = PANEL->grid->col_mask; ip2 = PANEL->grid->col_ip2m1; + root = PANEL->pcol; msgid = PANEL->msgid; + COUNT = PANEL->len; npm1 = size - 1; + mydist2 = ( mydist = MModSub( rank, root, size ) ); indx = ip2; + count = COUNT / size; count = Mmax( count, 1 ); +/* + * Spread the panel across process columns + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = COUNT - ( ibuf = indx * count ); + if( indx + ip2 < size ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); } + + partner = mydist ^ ip2; + + if( ( mydist & ip2 ) != 0 ) + { + partner = MModAdd( root, partner, size ); +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + */ +#if 0 + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + if( ierr == MPI_SUCCESS ) + { /* if panel is not here, return and keep testing */ + if( go == 0 ) + { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } +#endif + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R, _M_COUNT_R, _M_TYPE_R, + partner, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + } + else if( partner < size ) + { + partner = MModAdd( root, partner, size ); + + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S, _M_COUNT_S, _M_TYPE_S, + partner, msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else /* Send message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE, + partner, msgid, comm ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; indx -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; } + + } while( ip2 > 0 ); +/* + * Roll the pieces + */ + prev = MModSub1( rank, size ); next = MModAdd1( rank, size ); + + for( k = 0; k < npm1; k++ ) + { + l = ( k >> 1 ); +/* + * Who is sending to who and how much + */ + if( ( ( mydist + k ) & 1 ) != 0 ) + { + ibufS = ( indx = MModAdd( mydist, l, size ) ) * count; + lbufS = ( indx == npm1 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModSub( mydist, l+1, size ) ) * count; + lbufR = ( indx == npm1 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = prev; + } + else + { + ibufS = ( indx = MModSub( mydist, l, size ) ) * count; + lbufS = ( indx == npm1 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModAdd( mydist, l+1, size ) ) * count; + lbufR = ( indx == npm1 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = next; + } +/* + * Exchange the messages + */ + if( lbufS > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufS, lbufS, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S, + _M_ROLL_TYPE_S, partner, msgid, comm, + &PANEL->request[0] ); + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->request[0] ); + } + + if( lbufR > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufR, lbufR, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R, + _M_ROLL_TYPE_R, partner, msgid, comm, + &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait ( &PANEL->request[0], &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_blong +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_blong( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } + + return( HPL_SUCCESS ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bwait.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bwait.c new file mode 100644 index 000000000..a2e0f4df8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_bwait.c @@ -0,0 +1,109 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_bwait +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_bwait HPL_bwait waits for the row broadcast of the current panel to + * terminate. Successful completion is indicated by the returned error + * code HPL_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS ); +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_bwait_1rinM( PANEL ); break; + case HPL_1RING : ierr = HPL_bwait_1ring( PANEL ); break; + case HPL_2RING_M : ierr = HPL_bwait_2rinM( PANEL ); break; + case HPL_2RING : ierr = HPL_bwait_2ring( PANEL ); break; + case HPL_BLONG_M : ierr = HPL_bwait_blonM( PANEL ); break; + case HPL_BLONG : ierr = HPL_bwait_blong( PANEL ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_bwait + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_copyL.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_copyL.c new file mode 100644 index 000000000..04f765a6b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_copyL.c @@ -0,0 +1,108 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_copyL +( + HPL_T_panel * PANEL +) +#else +void HPL_copyL +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_copyL copies the panel of columns, the L1 replicated submatrix, + * the pivot array and the info scalar into a contiguous workspace for + * later broadcast. + * + * The copy of this panel into a contiguous buffer can be enforced by + * specifying -DHPL_COPY_L in the architecture specific Makefile. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int jb, lda; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->mycol == PANEL->pcol ) + { + jb = PANEL->jb; lda = PANEL->lda; + + if( PANEL->grid->myrow == PANEL->prow ) + { + HPL_dlacpy( PANEL->mp-jb, jb, Mptr( PANEL->A, jb, -jb, lda ), + lda, PANEL->L2, PANEL->ldl2 ); + } + else + { + HPL_dlacpy( PANEL->mp, jb, Mptr( PANEL->A, 0, -jb, lda ), + lda, PANEL->L2, PANEL->ldl2 ); + } + } +/* + * End of HPL_copyL + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_packL.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_packL.c new file mode 100644 index 000000000..8a70ef83d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_packL.c @@ -0,0 +1,245 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_packL +( + HPL_T_panel * PANEL, + const int INDEX, + const int LEN, + const int IBUF +) +#else +int HPL_packL +( PANEL, INDEX, LEN, IBUF ) + HPL_T_panel * PANEL; + const int INDEX; + const int LEN; + const int IBUF; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_packL forms the MPI data type for the panel to be broadcast. + * Successful completion is indicated by the returned error code + * MPI_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * INDEX (input) const int + * On entry, INDEX points to the first entry of the packed + * buffer being broadcast. + * + * LEN (input) const int + * On entry, LEN is the length of the packed buffer. + * + * IBUF (input) const int + * On entry, IBUF specifies the panel buffer/count/type entries + * that should be initialized. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ +#ifndef HPL_COPY_L + MPI_Datatype * type = NULL; + void * * * bufs = NULL; + double * A; + int * blen = NULL; + MPI_Aint * disp = NULL; + int curr, i, i1, ibuf, ierr=MPI_SUCCESS, j1, + jb, jbm, jbp1, lda, len, m, m1, nbufs; +#else + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_COPY_L +/* + * Panel + L1 + DPIV have been copied into a contiguous buffer - Create + * and commit a contiguous data type + */ + PANEL->buffers[IBUF] = (void *)(PANEL->L2 + INDEX); + PANEL->counts [IBUF] = 1; + + ierr = MPI_Type_contiguous( LEN, MPI_DOUBLE, &PANEL->dtypes[IBUF] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &PANEL->dtypes[IBUF] ); + + return( ierr ); +#else +/* + * Panel is not contiguous (because of LDA and also L1 + DPIV) - Create + * and commit a struct data type + */ + jbp1 = ( jb = PANEL->jb ) + 1; +/* + * Temporaries to create the type struct. + */ + bufs = (void * * *)malloc( jbp1 * sizeof( void * * ) ); + blen = (int *)malloc( jbp1 * sizeof( int ) ); + disp = (MPI_Aint *)malloc( jbp1 * sizeof( MPI_Aint ) ); + type = (MPI_Datatype *)malloc( jbp1 * sizeof( MPI_Datatype ) ); + + if( ( bufs != NULL ) && ( blen != NULL ) && + ( disp != NULL ) && ( type != NULL ) ) + { + m = PANEL->mp; curr = (int)( PANEL->grid->myrow == PANEL->prow ); + if( curr != 0 ) m -= jb; + + len = LEN; ibuf = INDEX; nbufs = 0; jbm = jb * m; + + if( ( m > 0 ) && ( ibuf < jbm ) ) + { +/* + * Retrieve proper pointers depending on process row and column + */ + if( PANEL->grid->mycol == PANEL->pcol ) + { + lda = PANEL->lda; + if( curr != 0 ) { A = Mptr( PANEL->A, jb, -jb, lda ); } + else { A = Mptr( PANEL->A, 0, -jb, lda ); } + } + else { lda = PANEL->ldl2; A = PANEL->L2; } +/* + * Pack the first (partial) column of L + */ + m1 = m - ( i1 = ibuf - ( j1 = ibuf / m ) * m ); + m1 = Mmin( len, m1 ); + + bufs[nbufs] = (void *)(Mptr( A, i1, j1, lda )); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = m1; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + + nbufs++; len -= m1; j1++; ibuf += m1; +/* + * Pack the remaining columns of L + */ + while( ( len > 0 ) && ( j1 < jb ) ) + { + m1 = Mmin( len, m ); + + bufs[nbufs] = (void*)(Mptr( A, 0, j1, lda )); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = m1; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + + nbufs++; len -= m1; j1++; ibuf += m1; + } + } +/* + * Pack L1, DPIV, DINFO + */ + if( len > 0 ) + { /* L1, DPIV, DINFO */ + bufs[nbufs] = (void *)(PANEL->L1 + ibuf - jbm); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = len; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + nbufs++; + } + + for( i = 1; i < nbufs; i++ ) disp[i] -= disp[0]; disp[0] = 0; + + PANEL->buffers[IBUF] = (void *)(bufs[0]); PANEL->counts [IBUF] = 1; +/* + * construct the struct type + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_create_struct( nbufs, blen, disp, type, + &PANEL->dtypes[IBUF] ); +/* + * release temporaries + */ + if( bufs ) free( bufs ); + if( blen ) free( blen ); + if( disp ) free( disp ); + if( type ) free( type ); +/* + * commit the type + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &PANEL->dtypes[IBUF] ); + + return( ierr ); + } + else + { +/* + * Memory allocation failed -> abort + */ + HPL_pabort( __LINE__, "HPL_packL", "Memory allocation failed" ); + return( MPI_SUCCESS ); /* never executed (hopefully ...) */ + } +#endif +#else + /* HPL_USE_MPI_DATATYPE not defined - Oops, there is a bug + somewhere, so, just in case and until I find it ... */ + return( MPI_SUCCESS ); +#endif +/* + * End of HPL_packL + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_recv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_recv.c new file mode 100644 index 000000000..ff426891c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_recv.c @@ -0,0 +1,142 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_recv +( + double * RBUF, + int RCOUNT, + int SRC, + int RTAG, + MPI_Comm COMM +) +#else +int HPL_recv +( RBUF, RCOUNT, SRC, RTAG, COMM ) + double * RBUF; + int RCOUNT; + int SRC; + int RTAG; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_recv is a simple wrapper around MPI_Recv. Its main purpose is + * to allow for some experimentation / tuning of this simple routine. + * Successful completion is indicated by the returned error code + * HPL_SUCCESS. In the case of messages of length less than or equal to + * zero, this function returns immediately. + * + * Arguments + * ========= + * + * RBUF (local output) double * + * On entry, RBUF specifies the starting address of buffer to be + * received. + * + * RCOUNT (local input) int + * On entry, RCOUNT specifies the number of double precision + * entries in RBUF. RCOUNT must be at least zero. + * + * SRC (local input) int + * On entry, SRC specifies the rank of the sending process in + * the communication space defined by COMM. + * + * RTAG (local input) int + * On entry, STAG specifies the message tag to be used for this + * communication operation. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Status status; +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type; +#endif + int ierr; +/* .. + * .. Executable Statements .. + */ + if( RCOUNT <= 0 ) return( HPL_SUCCESS ); + +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(RBUF), 1, type, SRC, RTAG, COMM, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else + ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, SRC, RTAG, + COMM, &status ); +#endif + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_recv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_sdrv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_sdrv.c new file mode 100644 index 000000000..0b2363563 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_sdrv.c @@ -0,0 +1,239 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_sdrv +( + double * SBUF, + int SCOUNT, + int STAG, + double * RBUF, + int RCOUNT, + int RTAG, + int PARTNER, + MPI_Comm COMM +) +#else +int HPL_sdrv +( SBUF, SCOUNT, STAG, RBUF, RCOUNT, RTAG, PARTNER, COMM ) + double * SBUF; + int SCOUNT; + int STAG; + double * RBUF; + int RCOUNT; + int RTAG; + int PARTNER; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_sdrv is a simple wrapper around MPI_Sendrecv. Its main purpose is + * to allow for some experimentation and tuning of this simple function. + * Messages of length less than or equal to zero are not sent nor + * received. Successful completion is indicated by the returned error + * code HPL_SUCCESS. + * + * Arguments + * ========= + * + * SBUF (local input) double * + * On entry, SBUF specifies the starting address of buffer to be + * sent. + * + * SCOUNT (local input) int + * On entry, SCOUNT specifies the number of double precision + * entries in SBUF. SCOUNT must be at least zero. + * + * STAG (local input) int + * On entry, STAG specifies the message tag to be used for the + * sending communication operation. + * + * RBUF (local output) double * + * On entry, RBUF specifies the starting address of buffer to be + * received. + * + * RCOUNT (local input) int + * On entry, RCOUNT specifies the number of double precision + * entries in RBUF. RCOUNT must be at least zero. + * + * RTAG (local input) int + * On entry, RTAG specifies the message tag to be used for the + * receiving communication operation. + * + * PARTNER (local input) int + * On entry, PARTNER specifies the rank of the collaborative + * process in the communication space defined by COMM. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type[2]; +#endif + MPI_Request request; + MPI_Status status; + int ierr; +/* .. + * .. Executable Statements .. + */ + if( RCOUNT > 0 ) + { + if( SCOUNT > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE +/* + * Post asynchronous receive + */ + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( (void *)(RBUF), 1, type[0], PARTNER, + RTAG, COMM, &request ); +/* + * Blocking send + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, + STAG, COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[1] ); +/* + * Wait for the receive to complete + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[0] ); +#else +/* + * Post asynchronous receive + */ + ierr = MPI_Irecv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, + PARTNER, RTAG, COMM, &request ); +/* + * Blocking send + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, + PARTNER, STAG, COMM ); +/* + * Wait for the receive to complete + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); +#endif + } + else + { +/* + * Blocking receive + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(RBUF), 1, type[0], PARTNER, RTAG, + COMM, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[0] ); +#else + ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, + PARTNER, RTAG, COMM, &status ); +#endif + } + } + else if( SCOUNT > 0 ) + { +/* + * Blocking send + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG, + COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[1] ) ); +#else + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, PARTNER, + STAG, COMM ); +#endif + } + else { ierr = MPI_SUCCESS; } + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_sdrv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_send.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_send.c new file mode 100644 index 000000000..9e9868594 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/HPL_send.c @@ -0,0 +1,139 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_send +( + double * SBUF, + int SCOUNT, + int DEST, + int STAG, + MPI_Comm COMM +) +#else +int HPL_send +( SBUF, SCOUNT, DEST, STAG, COMM ) + double * SBUF; + int SCOUNT; + int DEST; + int STAG; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_send is a simple wrapper around MPI_Send. Its main purpose is + * to allow for some experimentation / tuning of this simple routine. + * Successful completion is indicated by the returned error code + * MPI_SUCCESS. In the case of messages of length less than or equal to + * zero, this function returns immediately. + * + * Arguments + * ========= + * + * SBUF (local input) double * + * On entry, SBUF specifies the starting address of buffer to be + * sent. + * + * SCOUNT (local input) int + * On entry, SCOUNT specifies the number of double precision + * entries in SBUF. SCOUNT must be at least zero. + * + * DEST (local input) int + * On entry, DEST specifies the rank of the receiving process in + * the communication space defined by COMM. + * + * STAG (local input) int + * On entry, STAG specifies the message tag to be used for this + * communication operation. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type; +#endif + int ierr; +/* .. + * .. Executable Statements .. + */ + if( SCOUNT <= 0 ) return( HPL_SUCCESS ); + +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type, DEST, STAG, COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, DEST, STAG, COMM ); +#endif + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_send + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_1rinM.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_1rinM.o new file mode 100644 index 000000000..3efb2c0bc Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_1rinM.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_1ring.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_1ring.o new file mode 100644 index 000000000..ae90bde67 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_1ring.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_2rinM.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_2rinM.o new file mode 100644 index 000000000..0d3a84021 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_2rinM.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_2ring.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_2ring.o new file mode 100644 index 000000000..6994ef5fb Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_2ring.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_bcast.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_bcast.o new file mode 100644 index 000000000..41728e2ef Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_bcast.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_binit.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_binit.o new file mode 100644 index 000000000..e9127505b Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_binit.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_blonM.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_blonM.o new file mode 100644 index 000000000..da8414559 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_blonM.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_blong.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_blong.o new file mode 100644 index 000000000..52b677450 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_blong.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_bwait.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_bwait.o new file mode 100644 index 000000000..2f5b30aa7 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_bwait.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_copyL.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_copyL.o new file mode 100644 index 000000000..7db34d0b4 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_copyL.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_packL.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_packL.o new file mode 100644 index 000000000..4561f82f0 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_packL.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_recv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_recv.o new file mode 100644 index 000000000..1cd54ade4 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_recv.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_sdrv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_sdrv.o new file mode 100644 index 000000000..8d188a0ec Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_sdrv.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_send.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_send.o new file mode 100644 index 000000000..6f242b1ed Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/HPL_send.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/Make.inc new file mode 120000 index 000000000..8547ec814 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/Make.inc @@ -0,0 +1 @@ +/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/Makefile new file mode 100644 index 000000000..529fe9aea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/Makefile @@ -0,0 +1,111 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h \ + $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_comobj = \ + HPL_1ring.o HPL_1rinM.o HPL_2ring.o \ + HPL_2rinM.o HPL_blong.o HPL_blonM.o \ + HPL_packL.o HPL_copyL.o HPL_binit.o \ + HPL_bcast.o HPL_bwait.o HPL_send.o \ + HPL_recv.o HPL_sdrv.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_comobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_comobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_1ring.o : ../HPL_1ring.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_1ring.c +HPL_1rinM.o : ../HPL_1rinM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_1rinM.c +HPL_2ring.o : ../HPL_2ring.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_2ring.c +HPL_2rinM.o : ../HPL_2rinM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_2rinM.c +HPL_blong.o : ../HPL_blong.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_blong.c +HPL_blonM.o : ../HPL_blonM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_blonM.c +HPL_packL.o : ../HPL_packL.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_packL.c +HPL_copyL.o : ../HPL_copyL.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_copyL.c +HPL_binit.o : ../HPL_binit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_binit.c +HPL_bcast.o : ../HPL_bcast.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_bcast.c +HPL_bwait.o : ../HPL_bwait.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_bwait.c +HPL_send.o : ../HPL_send.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_send.c +HPL_recv.o : ../HPL_recv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_recv.c +HPL_sdrv.o : ../HPL_sdrv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_sdrv.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/comm/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/Makefile new file mode 100644 index 000000000..93f18ebb3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/Makefile @@ -0,0 +1,118 @@ +# /* +# * -- High Performance Computing Linpack Benchmark (HPL) +# * Modifications Copyright (C) 2023 Intel Corporation​ +# * +# * -- Copyright notice and Licensing terms: +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions +# * are met: +# * +# * 1. Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * +# * 2. Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions, and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * +# * 3. All advertising materials mentioning features or use of this +# * software must display the following acknowledgement: +# * This product includes software developed at the University of +# * Tennessee, Knoxville, Innovative Computing Laboratory. +# * +# * 4. The name of the University, the name of the Laboratory, or the +# * names of its contributors may not be used to endorse or promote +# * products derived from this software without specific written +# * permission. +# * +# * -- Disclaimer: +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * --------------------------------------------------------------------- +# */ + +# /* +# * -- High Performance Computing Linpack Benchmark (HPL) +# * HPL - 2.3 - December 2, 2018 +# * Antoine P. Petitet +# * University of Tennessee, Knoxville +# * Innovative Computing Laboratory +# * (C) Copyright 2000-2008 All Rights Reserved +# * +# * -- Copyright notice and Licensing terms: +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions +# * are met: +# * +# * 1. Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * +# * 2. Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions, and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * +# * 3. All advertising materials mentioning features or use of this +# * software must display the following acknowledgement: +# * This product includes software developed at the University of +# * Tennessee, Knoxville, Innovative Computing Laboratory. +# * +# * 4. The name of the University, the name of the Laboratory, or the +# * names of its contributors may not be used to endorse or promote +# * products derived from this software without specific written +# * permission. +# * +# * -- Disclaimer: +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * --------------------------------------------------------------------- +# */ + +all: libdgemm.so.1.0.1 + +OBJS = cuda_dgemm.o + +.PRECIOUS: $(OBJS) + +all : libdgemm.so.1.0.1 + +fermi_dgemm.o : fermi_dgemm.c fermi_dgemm.h + +DEFINES = -DMPI +#DEFINES += -DUSE_FERMI_DGEMM +#DEFINES += -DVERBOSE_PRINT +#DEFINES += -DACML +#DEFINES += -DGOTO + +%.o: %.cpp + mpicc -O0 -c -fPIC $(DEFINES) $*.cpp -o $*.o -I/usr/local/cuda/include + +libdgemm.so.1.0.1: $(OBJS) + + mpicc -O3 -shared -Wl,-soname,libdgemm.so.1 -o libdgemm.so.1.0.1 $(OBJS) -L/usr/local/cuda/lib64 -lcudart -lcuda -lcublas + ln -sf libdgemm.so.1.0.1 libdgemm.so.1.0 + ln -sf libdgemm.so.1.0 libdgemm.so.1 + ln -sf libdgemm.so.1 libdgemm.so + +clean: + rm -f $(OBJS) $(CUBINS) libdgemm.so.1.0.1 libdgemm.so.1.0 libdgemm.so.1 libdgemm.so diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp new file mode 100644 index 000000000..50b2c4b90 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp @@ -0,0 +1,305 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ + +#define NUMBER_OF_STREAMS 4 +#define CHUNK_SIZE 512 +#define NN 64 +#define NM 128 +#define ERRCODE(e) (-(__LINE__ * 1000 + (e))) +//#define DEVICE_DEBUG +//#ifdef MPI +//#include +//#endif + + +#define _GNU_SOURCE + +#define CUDA_ERROR_CHECK +#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ ) +#define CudaCheckError() __cudaCheckError( __FILE__, __LINE__ ) + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include "mkl.h" + + +extern "C" { + + + + +inline void __cudaSafeCall( cudaError err, const char *file, const int line ) +{ + #ifdef CUDA_ERROR_CHECK + if ( cudaSuccess != err ) + { + fprintf( stderr, "cudaSafeCall() failed at %s:%i : %s\n", + file, line, cudaGetErrorString( err ) ); + exit( -1 ); + } + #endif + + return; +} + +inline void __cudaCheckError( const char *file, const int line ) +{ + #ifdef CUDA_ERROR_CHECK + cudaError err = cudaGetLastError(); + if ( cudaSuccess != err ) + { + fprintf( stderr, "cudaCheckError() failed at %s:%i : %s\n", + file, line, cudaGetErrorString( err ) ); + exit( -1 ); + } + + // More careful checking. However, this will affect performance. + // Comment away if needed. + err = cudaDeviceSynchronize(); + if( cudaSuccess != err ) + { + fprintf( stderr, "cudaCheckError() with sync failed at %s:%i : %s\n", + file, line, cudaGetErrorString( err ) ); + exit( -1 ); + } + #endif + + return; +} + + + + void dpcpp_dgemm + ( const int ORDER, + const int TRANSA, const int TRANSB, + const int M, const int N, const int K, + const double ALPHA, const double *A, const int LDA, + const double *B, const int LDB, const double BETA, + double *C, const int LDC); + + void dpcpp_dtrsm( + int HPL_ORDER, + int HPL_SIDE, + int HPL_UPLO, + int HPL_TRANS, + int HPL_DIAG, + const int, + const int, + const double, + const double *, + const int, + double *, + const int); +} + + +void dpcpp_dgemm +( const int ORDER, const int TRANSA, const int TRANSB, + const int M, const int N, const int K, + const double ALPHA,const double *A, const int LDA, + const double *B, const int LDB, + const double BETA, double *C, const int LDC) +{ + + if ((M==0)||(K==0)||(N==0)){ + return; + } + + + if ( (N) < NN || (M) < NM || (K) < 128){ + + #ifdef DEVICE_DEBUG + std::cout << "dgemm-Running on CPU" << std::endl; + #endif + + cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC); + return; + } + + + #ifdef DEVICE_DEBUG + std::cout << "dgemm-Running on GPU" << std::endl; + #endif + + double *devPtrA, *devPtrB, *devPtrC; + int status; + + CudaSafeCall(cudaMalloc((void **)&devPtrA, K * LDA *sizeof(double))); + CudaSafeCall(cudaMemcpy(devPtrA, &A[0], K * LDA *sizeof(double), cudaMemcpyHostToDevice)); + + CudaSafeCall(cudaMalloc((void **)&devPtrB, N * LDB *sizeof(double))); + CudaSafeCall(cudaMemcpy(devPtrB, &B[0], N * LDB *sizeof(double), cudaMemcpyHostToDevice)); + + CudaSafeCall(cudaMalloc((void **)&devPtrC, N * LDC *sizeof(double))); + CudaSafeCall(cudaMemcpy(devPtrC, &C[0], N * LDC *sizeof(double), cudaMemcpyHostToDevice)); + + + cudaDeviceSynchronize(); + cublasDgemm('N', 'N', M, N, K, ALPHA, devPtrA, LDA, devPtrB, LDB, BETA, devPtrC, LDC); + cudaDeviceSynchronize(); + CudaSafeCall(cudaMemcpy(&C[0], devPtrC, N * LDC *sizeof(double), cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + cudaFree(devPtrA); + cudaFree(devPtrB); + cudaFree(devPtrC); +} + +void dpcpp_dtrsm + +( const int ORDER, const int SIDE, + const int UPLO, const int TRANS, + const int DIAG, const int M, const int N, + const double ALPHA, const double* A, const int LDA, double* B, + const int LDB) +{ + + if ((M==0)||(N==0)){ + return; + } + + double *devPtrA, *devPtrB; + int status; + + + if ( (M) < 512 || (N) < 2*(M)){ + #ifdef DEVICE_DEBUG + std::cout << "dtrsm-Running on CPU" << std::endl; + #endif + cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, M, N, ALPHA, A, LDA, B, LDB); + + + return; + } + + #ifdef DEVICE_DEBUG + std::cout << "dtrsm-Running on GPU" << std::endl; + #endif + + + CudaSafeCall(cudaMalloc((void **)&devPtrA, M * LDA * sizeof(double))); + CudaSafeCall(cudaMemcpy(devPtrA, A, M * LDA * sizeof(double), cudaMemcpyHostToDevice)); + + + CudaSafeCall(cudaMalloc((void **)&devPtrB, N * LDB * sizeof(double))); + CudaSafeCall(cudaMemcpy(devPtrB, B, N * LDB * sizeof(double), cudaMemcpyHostToDevice)); + cudaDeviceSynchronize(); + + cublasDtrsm('L','L','N','U',M,N,ALPHA,devPtrA,LDA,devPtrB,LDB); + + cudaDeviceSynchronize(); + CudaSafeCall(cudaMemcpy(B, devPtrB, N * LDB * sizeof(double), cudaMemcpyDeviceToHost)); + + cudaDeviceSynchronize(); + cudaFree(devPtrA); + cudaFree(devPtrB); + + +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.h new file mode 100644 index 000000000..aa3008f94 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.h @@ -0,0 +1,148 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ + +#define NUMBER_OF_STREAMS 2 + +#include +#include +#include + +class DeviceManager; +static DeviceManager *instance[2]; + +class DeviceManager{ + cl::sycl::device *m_pDevice; + cl::sycl::queue queues[NUMBER_OF_STREAMS]; + + DeviceManager(){ + try{ + m_pDevice = new cl::sycl::device(cl::sycl::default_selector()); + }catch(...){ + std::cout << "ERROR: failed to create sycl device.\n"; + } + + auto exception_handler = [] (cl::sycl::exception_list exceptions) { + for (std::exception_ptr const& e : exceptions) { + try { + std::rethrow_exception(e); + } catch(cl::sycl::exception const& e) { + std::cout << "Caught asynchronous SYCL exception during GEMM:\n" + << e.what() << std::endl; + } + } + }; + + + + queues[0] = cl::sycl::queue(*m_pDevice, exception_handler); + queues[1] = cl::sycl::queue(*m_pDevice, exception_handler); + //DeviceManager::display_device_properties(*m_pDevice); + //std::cout << "Done\n"; + + } + public: + + static DeviceManager* getInstance(int mpi_id){ + if(!instance[mpi_id]){ + + std::cout << "Creating device for " << mpi_id << "\n"; + instance[mpi_id] = new DeviceManager(); + + } + return instance[mpi_id]; + } + + cl::sycl::device &getDevice(){ return *m_pDevice;} + cl::sycl::queue *getQueues(){ return queues;} + + static void display_device_properties(cl::sycl::device const &dev); +}; diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.o new file mode 100644 index 000000000..52546727c Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so new file mode 120000 index 000000000..505c044bb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so @@ -0,0 +1 @@ +libdgemm.so.1 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1 new file mode 120000 index 000000000..ab21c8005 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1 @@ -0,0 +1 @@ +libdgemm.so.1.0 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1.0 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1.0 new file mode 120000 index 000000000..d08629732 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1.0 @@ -0,0 +1 @@ +libdgemm.so.1.0.1 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1.0.1 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/libdgemm.so.1.0.1 new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_all_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_all_reduce.c new file mode 100644 index 000000000..776f48504 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_all_reduce.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_all_reduce +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const HPL_T_OP OP, + MPI_Comm COMM +) +#else +int HPL_all_reduce +( BUFFER, COUNT, DTYPE, OP, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const HPL_T_OP OP; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_all_reduce performs a global reduce operation across all + * processes of a group leaving the results on all processes. + * + * Arguments + * ========= + * + * BUFFER (local input/global output) void * + * On entry, BUFFER points to the buffer to be combined. On + * exit, this array contains the combined data and is identical + * on all processes in the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * OP (global input) const HPL_T_OP + * On entry, OP is a pointer to the local combine function. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr; +/* .. + * .. Executable Statements .. + */ + hplerr = HPL_reduce( BUFFER, COUNT, DTYPE, OP, 0, COMM ); + if( hplerr != MPI_SUCCESS ) return( hplerr ); + return( HPL_broadcast( BUFFER, COUNT, DTYPE, 0, COMM ) ); +/* + * End of HPL_all_reduce + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_barrier.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_barrier.c new file mode 100644 index 000000000..9a5d9b10a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_barrier.c @@ -0,0 +1,90 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_barrier +( + MPI_Comm COMM +) +#else +int HPL_barrier +( COMM ) + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_barrier blocks the caller until all process members have call it. + * The call returns at any process only after all group members have + * entered the call. + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i=0; +/* .. + * .. Executable Statements .. + */ + return( HPL_broadcast( (void*)(&i), 1, HPL_INT, 0, COMM ) ); +/* + * End of HPL_barrier + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_broadcast.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_broadcast.c new file mode 100644 index 000000000..42d962864 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_broadcast.c @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_broadcast +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const int ROOT, + MPI_Comm COMM +) +#else +int HPL_broadcast +( BUFFER, COUNT, DTYPE, ROOT, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const int ROOT; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_broadcast broadcasts a message from the process with rank ROOT to + * all processes in the group. + * + * Arguments + * ========= + * + * BUFFER (local input/output) void * + * On entry, BUFFER points to the buffer to be broadcast. On + * exit, this array contains the broadcast data and is identical + * on all processes in the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * ROOT (global input) const int + * On entry, ROOT is the coordinate of the source process. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr=MPI_SUCCESS, ip2=1, kk, mask=1, + mpierr, mydist, partner, rank, size, + tag = MSGID_BEGIN_COLL; + MPI_Status status; +/* .. + * .. Executable Statements .. + */ + if( COUNT <= 0 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_size( COMM, &size ); if( size <= 1 ) return( mpierr ); + mpierr = MPI_Comm_rank( COMM, &rank ); + + kk = size - 1; + while( kk > 1 ) { kk >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist = MModSub( rank, ROOT, size ); + + do + { + mask ^= ip2; + if( ( mydist & mask ) == 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Recv( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM, &status ); + } + else if( partner < size ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM ); + } + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + ip2 >>= 1; + } while( ip2 ); + + return( hplerr ); +/* + * End of HPL_broadcast + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_exit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_exit.c new file mode 100644 index 000000000..f0d00b065 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_exit.c @@ -0,0 +1,109 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_exit +( + HPL_T_grid * GRID +) +#else +int HPL_grid_exit +( GRID ) + HPL_T_grid * GRID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_exit marks the process grid object for deallocation. The + * returned error code MPI_SUCCESS indicates successful completion. + * Other error codes are (MPI) implementation dependent. + * + * Arguments + * ========= + * + * GRID (local input/output) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid to be released. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr = MPI_SUCCESS, mpierr; +/* .. + * .. Executable Statements .. + */ + if( GRID->all_comm != MPI_COMM_NULL ) + { + mpierr = MPI_Comm_free( &(GRID->row_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + mpierr = MPI_Comm_free( &(GRID->col_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + mpierr = MPI_Comm_free( &(GRID->all_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + + GRID->order = HPL_COLUMN_MAJOR; + + GRID->iam = GRID->myrow = GRID->mycol = -1; + GRID->nprow = GRID->npcol = GRID->nprocs = -1; + + GRID->row_ip2 = GRID->row_hdim = GRID->row_ip2m1 = GRID->row_mask = -1; + GRID->col_ip2 = GRID->col_hdim = GRID->col_ip2m1 = GRID->col_mask = -1; + + return( hplerr ); +/* + * End of HPL_grid_exit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_info.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_info.c new file mode 100644 index 000000000..95c5a7315 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_info.c @@ -0,0 +1,116 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_info +( + const HPL_T_grid * GRID, + int * NPROW, + int * NPCOL, + int * MYROW, + int * MYCOL +) +#else +int HPL_grid_info +( GRID, NPROW, NPCOL, MYROW, MYCOL ) + const HPL_T_grid * GRID; + int * NPROW; + int * NPCOL; + int * MYROW; + int * MYCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_info returns the grid shape and the coordinates in the grid + * of the calling process. Successful completion is indicated by the + * returned error code MPI_SUCCESS. Other error codes depend on the MPI + * implementation. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * NPROW (global output) int * + * On exit, NPROW specifies the number of process rows in the + * grid. NPROW is at least one. + * + * NPCOL (global output) int * + * On exit, NPCOL specifies the number of process columns in + * the grid. NPCOL is at least one. + * + * MYROW (global output) int * + * On exit, MYROW specifies my row process coordinate in the + * grid. MYROW is greater than or equal to zero and less than + * NPROW. + * + * MYCOL (global output) int * + * On exit, MYCOL specifies my column process coordinate in the + * grid. MYCOL is greater than or equal to zero and less than + * NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + *NPROW = GRID->nprow; *NPCOL = GRID->npcol; + *MYROW = GRID->myrow; *MYCOL = GRID->mycol; + return( MPI_SUCCESS ); +/* + * End of HPL_grid_info + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_init.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_init.c new file mode 100644 index 000000000..52111ac52 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_grid_init.c @@ -0,0 +1,184 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_init +( + MPI_Comm COMM, + const HPL_T_ORDER ORDER, + const int NPROW, + const int NPCOL, + HPL_T_grid * GRID +) +#else +int HPL_grid_init +( COMM, ORDER, NPROW, NPCOL, GRID ) + MPI_Comm COMM; + const HPL_T_ORDER ORDER; + const int NPROW; + const int NPCOL; + HPL_T_grid * GRID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_init creates a NPROW x NPCOL process grid using column- or + * row-major ordering from an initial collection of processes identified + * by an MPI communicator. Successful completion is indicated by the + * returned error code MPI_SUCCESS. Other error codes depend on the MPI + * implementation. The coordinates of processes that are not part of the + * grid are set to values outside of [0..NPROW) x [0..NPCOL). + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * On entry, COMM is the MPI communicator identifying the + * initial collection of processes out of which the grid is + * formed. + * + * ORDER (global input) const HPL_T_ORDER + * On entry, ORDER specifies how the processes should be ordered + * in the grid as follows: + * ORDER = HPL_ROW_MAJOR row-major ordering; + * ORDER = HPL_COLUMN_MAJOR column-major ordering; + * + * NPROW (global input) const int + * On entry, NPROW specifies the number of process rows in the + * grid to be created. NPROW must be at least one. + * + * NPCOL (global input) const int + * On entry, NPCOL specifies the number of process columns in + * the grid to be created. NPCOL must be at least one. + * + * GRID (local input/output) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information to be initialized. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hdim, hplerr=MPI_SUCCESS, ierr, ip2, k, + mask, mycol, myrow, nprocs, rank, size; +/* .. + * .. Executable Statements .. + */ + MPI_Comm_rank( COMM, &rank ); MPI_Comm_size( COMM, &size ); +/* + * Abort if illegal process grid + */ + nprocs = NPROW * NPCOL; + if( ( nprocs > size ) || ( NPROW < 1 ) || ( NPCOL < 1 ) ) + { HPL_pabort( __LINE__, "HPL_grid_init", "Illegal Grid" ); } +/* + * Row- or column-major ordering of the processes + */ + if( ORDER == HPL_ROW_MAJOR ) + { + GRID->order = HPL_ROW_MAJOR; + myrow = rank / NPCOL; mycol = rank - myrow * NPCOL; + } + else + { + GRID->order = HPL_COLUMN_MAJOR; + mycol = rank / NPROW; myrow = rank - mycol * NPROW; + } + GRID->iam = rank; GRID->myrow = myrow; GRID->mycol = mycol; + GRID->nprow = NPROW; GRID->npcol = NPCOL; GRID->nprocs = nprocs; +/* + * row_ip2 : largest power of two <= nprow; + * row_hdim : row_ip2 procs hypercube dim; + * row_ip2m1 : largest power of two <= nprow-1; + * row_mask : row_ip2m1 procs hypercube mask; + */ + hdim = 0; ip2 = 1; k = NPROW; + while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; } + GRID->row_ip2 = ip2; GRID->row_hdim = hdim; + + mask = ip2 = 1; k = NPROW - 1; + while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + GRID->row_ip2m1 = ip2; GRID->row_mask = mask; +/* + * col_ip2 : largest power of two <= npcol; + * col_hdim : col_ip2 procs hypercube dim; + * col_ip2m1 : largest power of two <= npcol-1; + * col_mask : col_ip2m1 procs hypercube mask; + */ + hdim = 0; ip2 = 1; k = NPCOL; + while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; } + GRID->col_ip2 = ip2; GRID->col_hdim = hdim; + + mask = ip2 = 1; k = NPCOL - 1; + while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + GRID->col_ip2m1 = ip2; GRID->col_mask = mask; +/* + * All communicator, leave if I am not part of this grid. Creation of the + * row- and column communicators. + */ + ierr = MPI_Comm_split( COMM, ( rank < nprocs ? 0 : MPI_UNDEFINED ), + rank, &(GRID->all_comm) ); + if( GRID->all_comm == MPI_COMM_NULL ) return( ierr ); + + ierr = MPI_Comm_split( GRID->all_comm, myrow, mycol, &(GRID->row_comm) ); + if( ierr != MPI_SUCCESS ) hplerr = ierr; + + ierr = MPI_Comm_split( GRID->all_comm, mycol, myrow, &(GRID->col_comm) ); + if( ierr != MPI_SUCCESS ) hplerr = ierr; + + return( hplerr ); +/* + * End of HPL_grid_init + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_max.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_max.c new file mode 100644 index 000000000..002aabe01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_max.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_max +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_max +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_max combines (max) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] ); + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] ); + } +/* + * End of HPL_max + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_min.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_min.c new file mode 100644 index 000000000..a99e5e58a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_min.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_min +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_min +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_min combines (min) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] ); + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] ); + } +/* + * End of HPL_min + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_pnum.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_pnum.c new file mode 100644 index 000000000..c80885b9a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_pnum.c @@ -0,0 +1,103 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pnum +( + const HPL_T_grid * GRID, + const int MYROW, + const int MYCOL +) +#else +int HPL_pnum +( GRID, MYROW, MYCOL ) + const HPL_T_grid * GRID; + const int MYROW; + const int MYCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pnum determines the rank of a process as a function of its + * coordinates in the grid. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * MYROW (local input) const int + * On entry, MYROW specifies the row coordinate of the process + * whose rank is to be determined. MYROW must be greater than or + * equal to zero and less than NPROW. + * + * MYCOL (local input) const int + * On entry, MYCOL specifies the column coordinate of the + * process whose rank is to be determined. MYCOL must be greater + * than or equal to zero and less than NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + if( GRID->order == HPL_ROW_MAJOR ) + return( MYROW * GRID->npcol + MYCOL ); + else + return( MYCOL * GRID->nprow + MYROW ); +/* + * End of HPL_pnum + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_reduce.c new file mode 100644 index 000000000..417c21163 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_reduce.c @@ -0,0 +1,179 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_reduce +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const HPL_T_OP OP, + const int ROOT, + MPI_Comm COMM +) +#else +int HPL_reduce +( BUFFER, COUNT, DTYPE, OP, ROOT, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const HPL_T_OP OP; + const int ROOT; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_reduce performs a global reduce operation across all processes of + * a group. Note that the input buffer is used as workarray and in all + * processes but the accumulating process corrupting the original data. + * + * Arguments + * ========= + * + * BUFFER (local input/output) void * + * On entry, BUFFER points to the buffer to be reduced. On + * exit, and in process of rank ROOT this array contains the + * reduced data. This buffer is also used as workspace during + * the operation in the other processes of the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * OP (global input) const HPL_T_OP + * On entry, OP is a pointer to the local combine function. + * + * ROOT (global input) const int + * On entry, ROOT is the coordinate of the accumulating process. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Status status; + void * buffer = NULL; + int hplerr=MPI_SUCCESS, d=1, i, ip2=1, mask=0, + mpierr, mydist, partner, rank, size, + tag = MSGID_BEGIN_COLL; +/* .. + * .. Executable Statements .. + */ + if( COUNT <= 0 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_size( COMM, &size ); + if( size == 1 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_rank( COMM, &rank ); + i = size - 1; while( i > 1 ) { i >>= 1; d++; } + + if( DTYPE == HPL_INT ) + buffer = (void *)( (int *) malloc( (size_t)(COUNT) * + sizeof( int ) ) ); + else + buffer = (void *)( (double *)malloc( (size_t)(COUNT) * + sizeof( double ) ) ); + + if( !( buffer ) ) + { HPL_pabort( __LINE__, "HPL_reduce", "Memory allocation failed" ); } + + if( ( mydist = MModSub( rank, ROOT, size ) ) == 0 ) + { + do + { + mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ), + MModAdd( ROOT, ip2, size ), tag, COMM, + &status ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + OP( COUNT, buffer, BUFFER, DTYPE ); + ip2 <<= 1; d--; + } while( d ); + } + else + { + do + { + if( ( mydist & mask ) == 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM ); + } + else if( partner < size ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM, &status ); + OP( COUNT, buffer, BUFFER, DTYPE ); + } + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + mask ^= ip2; ip2 <<= 1; d--; + } while( d ); + } + if( buffer ) free( buffer ); + + return( hplerr ); +/* + * End of HPL_reduce + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_sum.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_sum.c new file mode 100644 index 000000000..34cf87210 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/HPL_sum.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_sum +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_sum +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_sum combines (sum) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] += a[i]; + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] += a[i]; + } +/* + * End of HPL_sum + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_all_reduce.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_all_reduce.o new file mode 100644 index 000000000..ac0f38d00 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_all_reduce.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_barrier.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_barrier.o new file mode 100644 index 000000000..b842da4f7 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_barrier.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_broadcast.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_broadcast.o new file mode 100644 index 000000000..8a9fc0a30 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_broadcast.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_exit.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_exit.o new file mode 100644 index 000000000..9be5641c9 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_exit.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_info.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_info.o new file mode 100644 index 000000000..2d6495818 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_info.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_init.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_init.o new file mode 100644 index 000000000..596e96b7c Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_grid_init.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_max.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_max.o new file mode 100644 index 000000000..0e92eb194 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_max.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_min.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_min.o new file mode 100644 index 000000000..8c64b221a Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_min.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_pnum.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_pnum.o new file mode 100644 index 000000000..8da27eae3 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_pnum.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_reduce.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_reduce.o new file mode 100644 index 000000000..a758f26e7 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_reduce.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_sum.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_sum.o new file mode 100644 index 000000000..e4fafa0e1 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/HPL_sum.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/Make.inc new file mode 120000 index 000000000..8547ec814 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/Make.inc @@ -0,0 +1 @@ +/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/Makefile new file mode 100644 index 000000000..51549d817 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/Makefile @@ -0,0 +1,103 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h +# +## Object files ######################################################## +# +HPL_griobj = \ + HPL_grid_init.o HPL_pnum.o HPL_grid_info.o \ + HPL_grid_exit.o HPL_broadcast.o HPL_reduce.o \ + HPL_all_reduce.o HPL_barrier.o HPL_min.o \ + HPL_max.o HPL_sum.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_griobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_griobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_grid_init.o : ../HPL_grid_init.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_init.c +HPL_pnum.o : ../HPL_pnum.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pnum.c +HPL_grid_info.o : ../HPL_grid_info.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_info.c +HPL_grid_exit.o : ../HPL_grid_exit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_exit.c +HPL_broadcast.o : ../HPL_broadcast.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_broadcast.c +HPL_reduce.o : ../HPL_reduce.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_reduce.c +HPL_all_reduce.o : ../HPL_all_reduce.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_all_reduce.c +HPL_barrier.o : ../HPL_barrier.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_barrier.c +HPL_min.o : ../HPL_min.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_min.c +HPL_max.o : ../HPL_max.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_max.c +HPL_sum.o : ../HPL_sum.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_sum.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/grid/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_disp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_disp.c new file mode 100644 index 000000000..757dad242 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_disp.c @@ -0,0 +1,97 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pdpanel_disp +( + HPL_T_panel * * PANEL +) +#else +int HPL_pdpanel_disp +( PANEL ) + HPL_T_panel * * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_disp deallocates the panel structure and resources and + * stores the error code returned by the panel factorization. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * * + * On entry, PANEL points to the address of the panel data + * structure to be deallocated. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int mpierr; +/* .. + * .. Executable Statements .. + */ +/* + * Deallocate the panel resources and panel structure + */ + mpierr = HPL_pdpanel_free( *PANEL ); + if( *PANEL ) free( *PANEL ); + *PANEL = NULL; + + return( mpierr ); +/* + * End of HPL_pdpanel_disp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_free.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_free.c new file mode 100644 index 000000000..38b5b0d97 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_free.c @@ -0,0 +1,104 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pdpanel_free +( + HPL_T_panel * PANEL +) +#else +int HPL_pdpanel_free +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_free deallocates the panel resources and stores the error + * code returned by the panel factorization. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the panel data structure from + * which the resources should be deallocated. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( PANEL->pmat->info == 0 ) PANEL->pmat->info = *(PANEL->DINFO); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( PANEL->L1block, VSIP_TRUE ); + (void) vsip_blockrelease_d( PANEL->L2block, VSIP_TRUE ); + if( PANEL->grid->nprow > 1 ) + (void) vsip_blockrelease_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Destroy blocks + */ + vsip_blockdestroy_d( PANEL->L1block ); + vsip_blockdestroy_d( PANEL->L2block ); + if( PANEL->grid->nprow > 1 ) + vsip_blockdestroy_d( PANEL->Ublock ); +#endif + + if( PANEL->WORK ) free( PANEL->WORK ); + if( PANEL->IWORK ) free( PANEL->IWORK ); + + return( MPI_SUCCESS ); +/* + * End of HPL_pdpanel_free + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_init.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_init.c new file mode 100644 index 000000000..9e35c7fb4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_init.c @@ -0,0 +1,348 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +void HPL_pdpanel_init +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int M, + const int N, + const int JB, + HPL_T_pmat * A, + const int IA, + const int JA, + const int TAG, + HPL_T_panel * PANEL +) +#else +void HPL_pdpanel_init +( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int M; + const int N; + const int JB; + HPL_T_pmat * A; + const int IA; + const int JA; + const int TAG; + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_init initializes a panel data structure. + * + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * M (local input) const int + * On entry, M specifies the global number of rows of the panel. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the global number of columns of the + * panel and trailing submatrix. N must be at least zero. + * + * JB (global input) const int + * On entry, JB specifies is the number of columns of the panel. + * JB must be at least zero. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * IA (global input) const int + * On entry, IA is the global row index identifying the panel + * and trailing submatrix. IA must be at least zero. + * + * JA (global input) const int + * On entry, JA is the global column index identifying the panel + * and trailing submatrix. JA must be at least zero. + * + * TAG (global input) const int + * On entry, TAG is the row broadcast message id. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + size_t dalign; + int icurcol, icurrow, ii, itmp1, jj, lwork, + ml2, mp, mycol, myrow, nb, npcol, nprow, + nq, nu; +/* .. + * .. Executable Statements .. + */ + PANEL->grid = GRID; /* ptr to the process grid */ + PANEL->algo = ALGO; /* ptr to the algo parameters */ + PANEL->pmat = A; /* ptr to the local array info */ + + myrow = GRID->myrow; mycol = GRID->mycol; + nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb; + + HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol, + nprow, npcol, &ii, &jj, &icurrow, &icurcol ); + mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow ); + nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol ); + /* ptr to trailing part of A */ + PANEL->A = Mptr( (double *)(A->A), ii, jj, A->ld ); +/* + * Workspace pointers are initialized to NULL. + */ + PANEL->WORK = NULL; PANEL->L2 = NULL; PANEL->L1 = NULL; + PANEL->DPIV = NULL; PANEL->DINFO = NULL; PANEL->U = NULL; + PANEL->IWORK = NULL; +/* + * Local lengths, indexes process coordinates + */ + PANEL->nb = nb; /* distribution blocking factor */ + PANEL->jb = JB; /* panel width */ + PANEL->m = M; /* global # of rows of trailing part of A */ + PANEL->n = N; /* global # of cols of trailing part of A */ + PANEL->ia = IA; /* global row index of trailing part of A */ + PANEL->ja = JA; /* global col index of trailing part of A */ + PANEL->mp = mp; /* local # of rows of trailing part of A */ + PANEL->nq = nq; /* local # of cols of trailing part of A */ + PANEL->ii = ii; /* local row index of trailing part of A */ + PANEL->jj = jj; /* local col index of trailing part of A */ + PANEL->lda = A->ld; /* local leading dim of array A */ + PANEL->prow = icurrow; /* proc row owning 1st row of trailing A */ + PANEL->pcol = icurcol; /* proc col owning 1st col of trailing A */ + PANEL->msgid = TAG; /* message id to be used for panel bcast */ +/* + * Initialize ldl2 and len to temporary dummy values and Update tag for + * next panel + */ + PANEL->ldl2 = 0; /* local leading dim of array L2 */ + PANEL->len = 0; /* length of the buffer to broadcast */ +/* + * Figure out the exact amount of workspace needed by the factorization + * and the update - Allocate that space - Finish the panel data structu- + * re initialization. + * + * L1: JB x JB in all processes + * DPIV: JB in all processes + * DINFO: 1 in all processes + * + * We make sure that those three arrays are contiguous in memory for the + * later panel broadcast. We also choose to put this amount of space + * right after L2 (when it exist) so that one can receive a contiguous + * buffer. + */ + dalign = ALGO->align * sizeof( double ); + + if( npcol == 1 ) /* P x 1 process grid */ + { /* space for L1, DPIV, DINFO */ + lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 ); + if( nprow > 1 ) /* space for U */ + { nu = nq - JB; lwork += JB * Mmax( 0, nu ); } + + if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * + sizeof( double ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_init", + "Memory allocation failed" ); + } +/* + * Initialize the pointers of the panel structure - Always re-use A in + * the only process column + */ + PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 ); + PANEL->ldl2 = A->ld; + PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->DPIV = PANEL->L1 + JB * JB; + PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; + PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1: NULL ); + } + else + { /* space for L2, L1, DPIV */ + ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 ); + PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 ); +#ifdef HPL_COPY_L + lwork = ALGO->align + PANEL->len; +#else + lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len ); +#endif + if( nprow > 1 ) /* space for U */ + { + nu = ( mycol == icurcol ? nq - JB : nq ); + lwork += JB * Mmax( 0, nu ); + } + + if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * + sizeof( double ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_init", + "Memory allocation failed" ); + } +/* + * Initialize the pointers of the panel structure - Re-use A in the cur- + * rent process column when HPL_COPY_L is not defined. + */ +#ifdef HPL_COPY_L + PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->ldl2 = Mmax( 1, ml2 ); + PANEL->L1 = PANEL->L2 + ml2 * JB; +#else + if( mycol == icurcol ) + { + PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 ); + PANEL->ldl2 = A->ld; + PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign ); + } + else + { + PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->ldl2 = Mmax( 1, ml2 ); + PANEL->L1 = PANEL->L2 + ml2 * JB; + } +#endif + PANEL->DPIV = PANEL->L1 + JB * JB; + PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; + PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1 : NULL ); + } +#ifdef HPL_CALL_VSIPL + PANEL->Ablock = A->block; +/* + * Create blocks and bind them to the data pointers + */ + PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1), + (vsip_length)(JB*JB), VSIP_MEM_NONE ); + PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2), + (vsip_length)(PANEL->ldl2*JB), + VSIP_MEM_NONE ); + if( nprow > 1 ) + { + nu = ( mycol == icurcol ? nq - JB : nq ); + PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U), + (vsip_length)(JB * Mmax( 0, nu )), + VSIP_MEM_NONE ); + } + else { PANEL->Ublock = A->block; } +#endif +/* + * If nprow is 1, we just allocate an array of JB integers for the swap. + * When nprow > 1, we allocate the space for the index arrays immediate- + * ly. The exact size of this array depends on the swapping routine that + * will be used, so we allocate the maximum: + * + * IWORK[0] is of size at most 1 + + * IPL is of size at most 1 + + * IPID is of size at most 4 * JB + + * + * For HPL_pdlaswp00: + * lindxA is of size at most 2 * JB + + * lindxAU is of size at most 2 * JB + + * llen is of size at most NPROW + + * llen_sv is of size at most NPROW. + * + * For HPL_pdlaswp01: + * ipA is of size ar most 1 + + * lindxA is of size at most 2 * JB + + * lindxAU is of size at most 2 * JB + + * iplen is of size at most NPROW + 1 + + * ipmap is of size at most NPROW + + * ipmapm1 is of size at most NPROW + + * permU is of size at most JB + + * iwork is of size at most MAX( 2*JB, NPROW+1 ). + * + * that is 3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1)) + * = 4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ). + * + * We use the fist entry of this to work array to indicate whether the + * the local index arrays have already been computed, and if yes, by + * which function: + * IWORK[0] = -1: no index arrays have been computed so far; + * IWORK[0] = 0: HPL_pdlaswp00 already computed those arrays; + * IWORK[0] = 1: HPL_pdlaswp01 already computed those arrays; + * This allows to save some redundant and useless computations. + */ + if( nprow == 1 ) { lwork = JB; } + else + { + itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork ); + lwork = 4 + (9 * JB) + (3 * nprow) + itmp1; + } + + PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); + + if( PANEL->IWORK == NULL ) + { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); } + /* Initialize the first entry of the workarray */ + *(PANEL->IWORK) = -1; +/* + * End of HPL_pdpanel_init + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_new.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_new.c new file mode 100644 index 000000000..1dbd8a18f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/HPL_pdpanel_new.c @@ -0,0 +1,152 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanel_new +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int M, + const int N, + const int JB, + HPL_T_pmat * A, + const int IA, + const int JA, + const int TAG, + HPL_T_panel * * PANEL +) +#else +void HPL_pdpanel_new +( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int M; + const int N; + const int JB; + HPL_T_pmat * A; + const int IA; + const int JA; + const int TAG; + HPL_T_panel * * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_new creates and initializes a panel data structure. + * + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * M (local input) const int + * On entry, M specifies the global number of rows of the panel. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the global number of columns of the + * panel and trailing submatrix. N must be at least zero. + * + * JB (global input) const int + * On entry, JB specifies is the number of columns of the panel. + * JB must be at least zero. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * IA (global input) const int + * On entry, IA is the global row index identifying the panel + * and trailing submatrix. IA must be at least zero. + * + * JA (global input) const int + * On entry, JA is the global column index identifying the panel + * and trailing submatrix. JA must be at least zero. + * + * TAG (global input) const int + * On entry, TAG is the row broadcast message id. + * + * PANEL (local input/output) HPL_T_panel * * + * On entry, PANEL points to the address of the panel data + * structure to create and initialize. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * p = NULL; +/* .. + * .. Executable Statements .. + */ +/* + * Allocate the panel structure - Check for enough memory + */ + if( !( p = (HPL_T_panel *)malloc( sizeof( HPL_T_panel ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_new", "Memory allocation failed" ); + } + + HPL_pdpanel_init( GRID, ALGO, M, N, JB, A, IA, JA, TAG, p ); + *PANEL = p; +/* + * End of HPL_pdpanel_new + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_disp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_disp.o new file mode 100644 index 000000000..22d8bd1b5 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_disp.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_free.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_free.o new file mode 100644 index 000000000..ea345e7fc Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_free.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_init.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_init.o new file mode 100644 index 000000000..2eca8470c Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_init.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_new.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_new.o new file mode 100644 index 000000000..41f746d1f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/HPL_pdpanel_new.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/Make.inc new file mode 120000 index 000000000..8547ec814 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/Make.inc @@ -0,0 +1 @@ +/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/Makefile new file mode 100644 index 000000000..804749cc2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/Makefile @@ -0,0 +1,90 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ + $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_panobj = \ + HPL_pdpanel_new.o HPL_pdpanel_init.o HPL_pdpanel_disp.o \ + HPL_pdpanel_free.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_panobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_panobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pdpanel_new.o : ../HPL_pdpanel_new.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_new.c +HPL_pdpanel_init.o : ../HPL_pdpanel_init.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_init.c +HPL_pdpanel_disp.o : ../HPL_pdpanel_disp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_disp.c +HPL_pdpanel_free.o : ../HPL_pdpanel_free.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_free.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/panel/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp00N.c new file mode 100644 index 000000000..7ad5a1a99 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp00N.c @@ -0,0 +1,198 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP00N_DEPTH +#define HPL_LASWP00N_DEPTH 32 +#define HPL_LASWP00N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp00N +( + const int M, + const int N, + double * A, + const int LDA, + const int * IPIV +) +#else +void HPL_dlaswp00N +( M, N, A, LDA, IPIV ) + const int M; + const int N; + double * A; + const int LDA; + const int * IPIV; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp00N performs a series of local row interchanges on a matrix + * A. One row interchange is initiated for rows 0 through M-1 of A. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the array A to be + * interchanged. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the array A. + * N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N) to which + * the row interchanges will be applied. On exit, the permuted + * matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * IPIV (local input) const int * + * On entry, IPIV is an array of size M that contains the + * pivoting information. For k in [0..M), IPIV[k]=IROFF + l + * implies that local rows k and l are to be interchanged. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register double r; + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP00N_LOG2_DEPTH ); + int ip, nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP00N_LOG2_DEPTH ) + << HPL_LASWP00N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP00N_DEPTH, A += incA ) + { + for( i = 0; i < M; i++ ) + { + if( i != ( ip = IPIV[i] ) ) + { + a0 = A + i; a1 = A + ip; + + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#if ( HPL_LASWP00N_DEPTH > 1 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 2 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 4 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 8 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 16 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif + } + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + if( i != ( ip = IPIV[i] ) ) + { + a0 = A + i; a1 = A + ip; + for( j = 0; j < nr; j++, a0 += LDA, a1 += LDA ) + { r = *a0; *a0 = *a1; *a1 = r; } + } + } + } +/* + * End of HPL_dlaswp00N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01N.c new file mode 100644 index 000000000..786d1eff4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01N.c @@ -0,0 +1,209 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP01N_DEPTH +#define HPL_LASWP01N_DEPTH 32 +#define HPL_LASWP01N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp01N +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp01N +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp01N copies scattered rows of A into itself and into an + * array U. The row offsets in A of the source rows are specified by + * LINDXA. The destination of those rows are specified by LINDXAU. A + * positive value of LINDXAU indicates that the array destination is U, + * and A otherwise. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * moved within A or copied into U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * moved within A or copied into U. N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be moved within A or + * copied into U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). The rows + * of A specified by LINDXA are be copied within this array U at + * the positions indicated by positive values of LINDXAU. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be moved within A or + * or copied into U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U where the rows of A should be + * copied at. This array also contains the local row offsets in + * A where some of the rows of A should be moved to. A positive + * value of LINDXAU[i] indicates that the row LINDXA[i] of A + * should be copied into U at the position LINDXAU[i]; otherwise + * the row LINDXA[i] of A should be moved at the position + * -LINDXAU[i] within A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP01N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP01N_LOG2_DEPTH ); + int lda1, nu, nr; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01N_LOG2_DEPTH ) << + HPL_LASWP01N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP01N_DEPTH, A += incA, U += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; } + else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; } + + *a1 = *a0; a1 += lda1; a0 += LDA; +#if ( HPL_LASWP01N_DEPTH > 1 ) + *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 2 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 4 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 8 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 16 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; } + else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; } + for( j = 0; j < nr; j++, a1 += lda1, a0 += LDA ) { *a1 = *a0; } + } + } +/* + * End of HPL_dlaswp01N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01T.c new file mode 100644 index 000000000..429cfb6f2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp01T.c @@ -0,0 +1,252 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP01T_DEPTH +#define HPL_LASWP01T_DEPTH 32 +#define HPL_LASWP01T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp01T +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp01T +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp01T copies scattered rows of A into itself and into an + * array U. The row offsets in A of the source rows are specified by + * LINDXA. The destination of those rows are specified by LINDXAU. A + * positive value of LINDXAU indicates that the array destination is U, + * and A otherwise. Rows of A are stored as columns in U. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * moved within A or copied into U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * moved within A or copied into U. N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be moved within A or + * copied into U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,M). The rows + * of A specified by LINDXA are copied within this array U at + * the positions indicated by positive values of LINDXAU. The + * rows of A are stored as columns in U. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be moved within A or + * or copied into U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U where the rows of A should be + * copied at. This array also contains the local row offsets in + * A where some of the rows of A should be moved to. A positive + * value of LINDXAU[i] indicates that the row LINDXA[i] of A + * should be copied into U at the position LINDXAU[i]; otherwise + * the row LINDXA[i] of A should be moved at the position + * -LINDXAU[i] within A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP01T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP01T_LOG2_DEPTH ); + int nu, nr; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01T_LOG2_DEPTH ) << + HPL_LASWP01T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP01T_DEPTH, A += incA, U += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + + if( LINDXAU[i] >= 0 ) + { + a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU); + + a1[ 0] = *a0; a0 += LDA; +#if ( HPL_LASWP01T_DEPTH > 1 ) + a1[ 1] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 2 ) + a1[ 2] = *a0; a0 += LDA; a1[ 3] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 4 ) + a1[ 4] = *a0; a0 += LDA; a1[ 5] = *a0; a0 += LDA; + a1[ 6] = *a0; a0 += LDA; a1[ 7] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 8 ) + a1[ 8] = *a0; a0 += LDA; a1[ 9] = *a0; a0 += LDA; + a1[10] = *a0; a0 += LDA; a1[11] = *a0; a0 += LDA; + a1[12] = *a0; a0 += LDA; a1[13] = *a0; a0 += LDA; + a1[14] = *a0; a0 += LDA; a1[15] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 16 ) + a1[16] = *a0; a0 += LDA; a1[17] = *a0; a0 += LDA; + a1[18] = *a0; a0 += LDA; a1[19] = *a0; a0 += LDA; + a1[20] = *a0; a0 += LDA; a1[21] = *a0; a0 += LDA; + a1[22] = *a0; a0 += LDA; a1[23] = *a0; a0 += LDA; + a1[24] = *a0; a0 += LDA; a1[25] = *a0; a0 += LDA; + a1[26] = *a0; a0 += LDA; a1[27] = *a0; a0 += LDA; + a1[28] = *a0; a0 += LDA; a1[29] = *a0; a0 += LDA; + a1[30] = *a0; a0 += LDA; a1[31] = *a0; a0 += LDA; +#endif + } + else + { + a1 = A - (size_t)(LINDXAU[i]); + + *a1 = *a0; a1 += LDA; a0 += LDA; +#if ( HPL_LASWP01T_DEPTH > 1 ) + *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 2 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 4 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 8 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 16 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif + } + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + + if( LINDXAU[i] >= 0 ) + { + a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) { a1[j] = *a0; } + } + else + { + a1 = A - (size_t)(LINDXAU[i]); + for( j = 0; j < nr; j++, a1 += LDA, a0 += LDA ) { *a1 = *a0; } + } + } + } +/* + * End of HPL_dlaswp01T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp02N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp02N.c new file mode 100644 index 000000000..45c2f5f1f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp02N.c @@ -0,0 +1,205 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP02N_DEPTH +#define HPL_LASWP02N_DEPTH 32 +#define HPL_LASWP02N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp02N +( + const int M, + const int N, + const double * A, + const int LDA, + double * W0, + double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp02N +( M, N, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M; + const int N; + const double * A; + const int LDA; + double * W0; + double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp02N packs scattered rows of an array A into workspace W. + * The row offsets in A are specified by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * copied into W. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * copied into W. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be copied into W. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * W0 (local input/output) double * + * On exit, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local output) double * + * On entry, W is an array of size (LDW,M). On exit, W contains + * the rows LINDXA[i] for i in [0..M) of A stored contiguously + * in W(:,i). + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied into W. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U that should be copied into A and + * replaced by the rows of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * A0 = A, * a0; + double * w0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP02N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + for( i = 0; i < M; i++ ) + *(W0+(size_t)(i)*(size_t)(LDW)) = (double)(LINDXAU[i]); + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP02N_LOG2_DEPTH ) << + HPL_LASWP02N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP02N_DEPTH, A0 += incA, W += HPL_LASWP02N_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW); + + w0[ 0] = *a0; a0 += LDA; +#if ( HPL_LASWP02N_DEPTH > 1 ) + w0[ 1] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 2 ) + w0[ 2] = *a0; a0 += LDA; w0[ 3] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 4 ) + w0[ 4] = *a0; a0 += LDA; w0[ 5] = *a0; a0 += LDA; + w0[ 6] = *a0; a0 += LDA; w0[ 7] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 8 ) + w0[ 8] = *a0; a0 += LDA; w0[ 9] = *a0; a0 += LDA; + w0[10] = *a0; a0 += LDA; w0[11] = *a0; a0 += LDA; + w0[12] = *a0; a0 += LDA; w0[13] = *a0; a0 += LDA; + w0[14] = *a0; a0 += LDA; w0[15] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 16 ) + w0[16] = *a0; a0 += LDA; w0[17] = *a0; a0 += LDA; + w0[18] = *a0; a0 += LDA; w0[19] = *a0; a0 += LDA; + w0[20] = *a0; a0 += LDA; w0[21] = *a0; a0 += LDA; + w0[22] = *a0; a0 += LDA; w0[23] = *a0; a0 += LDA; + w0[24] = *a0; a0 += LDA; w0[25] = *a0; a0 += LDA; + w0[26] = *a0; a0 += LDA; w0[27] = *a0; a0 += LDA; + w0[28] = *a0; a0 += LDA; w0[29] = *a0; a0 += LDA; + w0[30] = *a0; a0 += LDA; w0[31] = *a0; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, a0 += LDA ) { w0[j] = *a0; } + } + } +/* + * End of HPL_dlaswp02N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03N.c new file mode 100644 index 000000000..760732a8d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03N.c @@ -0,0 +1,194 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP03N_DEPTH +#define HPL_LASWP03N_DEPTH 32 +#define HPL_LASWP03N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp03N +( + const int M, + const int N, + double * U, + const int LDU, + const double * W0, + const double * W, + const int LDW +) +#else +void HPL_dlaswp03N +( M, N, U, LDU, W0, W, LDW ) + const int M; + const int N; + double * U; + const int LDU; + const double * W0; + const double * W; + const int LDW; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp03N copies columns of W into rows of an array U. The + * destination in U of these columns contained in W is stored within W0. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of W stored + * contiguously that should be copied into U. M must be at least + * zero. + * + * N (local input) const int + * On entry, N specifies the length of columns of W stored + * contiguously that should be copied into U. N must be at least + * zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). Columns + * of W are copied as rows within this array U at the positions + * specified in W0. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M), that contains data + * to be copied into U. For i in [0..M), entries W(:,i) should + * be copied into the row or column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * u0; + const int incU = (int)( (unsigned int)(LDU) << + HPL_LASWP03N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03N_LOG2_DEPTH ) << + HPL_LASWP03N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP03N_DEPTH, U += incU, w += HPL_LASWP03N_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *u0 = w0[ 0]; u0 += LDU; +#if ( HPL_LASWP03N_DEPTH > 1 ) + *u0 = w0[ 1]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 2 ) + *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 4 ) + *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU; + *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 8 ) + *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU; + *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU; + *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU; + *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 16 ) + *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU; + *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU; + *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU; + *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU; + *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU; + *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU; + *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU; + *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; } + } + } +/* + * End of HPL_dlaswp03N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03T.c new file mode 100644 index 000000000..fece692ce --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp03T.c @@ -0,0 +1,186 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP03T_DEPTH +#define HPL_LASWP03T_DEPTH 32 +#define HPL_LASWP03T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp03T +( + const int M, + const int N, + double * U, + const int LDU, + const double * W0, + const double * W, + const int LDW +) +#else +void HPL_dlaswp03T +( M, N, U, LDU, W0, W, LDW ) + const int M; + const int N; + double * U; + const int LDU; + const double * W0; + const double * W; + const int LDW; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp03T copies columns of W into an array U. The destination + * in U of these columns contained in W is stored within W0. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of W stored + * contiguously that should be copied into U. M must be at least + * zero. + * + * N (local input) const int + * On entry, N specifies the length of columns of W stored + * contiguously that should be copied into U. N must be at least + * zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,M). Columns + * of W are copied within the array U at the positions specified + * in W0. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M), that contains data + * to be copied into U. For i in [0..M), entries W(:,i) should + * be copied into the row or column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * u0; + const int incU = ( 1 << HPL_LASWP03T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03T_LOG2_DEPTH ) << + HPL_LASWP03T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP03T_DEPTH, U += incU, w += HPL_LASWP03T_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU); + w0 = w + (size_t)(i) * (size_t)(LDW); + + u0[ 0] = w0[ 0]; +#if ( HPL_LASWP03T_DEPTH > 1 ) + u0[ 1] = w0[ 1]; +#endif +#if ( HPL_LASWP03T_DEPTH > 2 ) + u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3]; +#endif +#if ( HPL_LASWP03T_DEPTH > 4 ) + u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7]; +#endif +#if ( HPL_LASWP03T_DEPTH > 8 ) + u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11]; + u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15]; +#endif +#if ( HPL_LASWP03T_DEPTH > 16 ) + u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19]; + u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23]; + u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27]; + u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31]; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; } + } + } +/* + * End of HPL_dlaswp03T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04N.c new file mode 100644 index 000000000..4f9c490a5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04N.c @@ -0,0 +1,285 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP04N_DEPTH +#define HPL_LASWP04N_DEPTH 32 +#define HPL_LASWP04N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp04N +( + const int M0, + const int M1, + const int N, + double * U, + const int LDU, + double * A, + const int LDA, + const double * W0, + const double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp04N +( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M0; + const int M1; + const int N; + double * U; + const int LDU; + double * A; + const int LDA; + const double * W0; + const double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp04N copies M0 rows of U into A and replaces those rows of U + * with columns of W. In addition M1 - M0 columns of W are copied into + * rows of U. + * + * Arguments + * ========= + * + * M0 (local input) const int + * On entry, M0 specifies the number of rows of U that should be + * copied into A and replaced by columns of W. M0 must be at + * least zero. + * + * M1 (local input) const int + * On entry, M1 specifies the number of columns of W that should + * be copied into rows of U. M1 must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of U that should + * be copied into A. N must be at least zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows that are to be copied into A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M1). + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M0). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M0+M1), that contains + * data to be copied into U. For i in [M0..M0+M1), the entries + * W(:,i) are copied into the row W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M0 containing the + * local row indexes A into which rows of U are copied. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M0 that contains + * the local row indexes of U that should be copied into A and + * replaced by the columns of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP04N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP04N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04N_LOG2_DEPTH ) << + HPL_LASWP04N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP04N_DEPTH, A += incA, U += incU, + w += HPL_LASWP04N_DEPTH ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U + (size_t)(LINDXAU[i]); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *a0 = *u0; *u0 = w0[ 0]; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP04N_DEPTH > 1 ) + *a0 = *u0; *u0 = w0[ 1]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 2 ) + *a0 = *u0; *u0 = w0[ 2]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 3]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 4 ) + *a0 = *u0; *u0 = w0[ 4]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 5]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 6]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 7]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 8 ) + *a0 = *u0; *u0 = w0[ 8]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 9]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[10]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[11]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[12]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[13]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[14]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[15]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 16 ) + *a0 = *u0; *u0 = w0[16]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[17]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[18]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[19]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[20]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[21]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[22]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[23]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[24]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[25]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[26]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[27]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[28]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[29]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[30]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[31]; a0 += LDA; u0 += LDU; +#endif + } + + for( i = M0; i < M1; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *u0 = w0[ 0]; u0 += LDU; +#if ( HPL_LASWP04N_DEPTH > 1 ) + *u0 = w0[ 1]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 2 ) + *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 4 ) + *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU; + *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 8 ) + *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU; + *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU; + *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU; + *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 16 ) + *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU; + *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU; + *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU; + *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU; + *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU; + *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU; + *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU; + *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U + (size_t)(LINDXAU[i]); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) + { *a0 = *u0; *u0 = w0[j]; } + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; } + } + } +/* + * End of HPL_dlaswp04N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04T.c new file mode 100644 index 000000000..9cbb4c863 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp04T.c @@ -0,0 +1,270 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP04T_DEPTH +#define HPL_LASWP04T_DEPTH 32 +#define HPL_LASWP04T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp04T +( + const int M0, + const int M1, + const int N, + double * U, + const int LDU, + double * A, + const int LDA, + const double * W0, + const double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp04T +( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M0; + const int M1; + const int N; + double * U; + const int LDU; + double * A; + const int LDA; + const double * W0; + const double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp04T copies M0 columns of U into rows of A and replaces those + * columns of U with columns of W. In addition M1 - M0 columns of W are + * copied into U. + * + * Arguments + * ========= + * + * M0 (local input) const int + * On entry, M0 specifies the number of columns of U that should + * be copied into A and replaced by columns of W. M0 must be at + * least zero. + * + * M1 (local input) const int + * On entry, M1 specifies the number of columnns of W that will + * be copied into U. M1 must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the columns of U that + * will be copied into rows of A. N must be at least zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns that are to be copied into rows of + * A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M0). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M0+M1), that contains + * data to be copied into U. For i in [M0..M0+M1), the entries + * W(:,i) are copied into the column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M0 containing the + * local row indexes A into which columns of U are copied. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M0 that contains + * the local column indexes of U that should be copied into A + * and replaced by the columns of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP04T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP04T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04T_LOG2_DEPTH ) << + HPL_LASWP04T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP04T_DEPTH, A += incA, U += incU, + w += HPL_LASWP04T_DEPTH ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW; + + *a0 = u0[ 0]; u0[ 0] = w0[ 0]; a0 += LDA; +#if ( HPL_LASWP04T_DEPTH > 1 ) + *a0 = u0[ 1]; u0[ 1] = w0[ 1]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 2 ) + *a0 = u0[ 2]; u0[ 2] = w0[ 2]; a0 += LDA; + *a0 = u0[ 3]; u0[ 3] = w0[ 3]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 4 ) + *a0 = u0[ 4]; u0[ 4] = w0[ 4]; a0 += LDA; + *a0 = u0[ 5]; u0[ 5] = w0[ 5]; a0 += LDA; + *a0 = u0[ 6]; u0[ 6] = w0[ 6]; a0 += LDA; + *a0 = u0[ 7]; u0[ 7] = w0[ 7]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 8 ) + *a0 = u0[ 8]; u0[ 8] = w0[ 8]; a0 += LDA; + *a0 = u0[ 9]; u0[ 9] = w0[ 9]; a0 += LDA; + *a0 = u0[10]; u0[10] = w0[10]; a0 += LDA; + *a0 = u0[11]; u0[11] = w0[11]; a0 += LDA; + *a0 = u0[12]; u0[12] = w0[12]; a0 += LDA; + *a0 = u0[13]; u0[13] = w0[13]; a0 += LDA; + *a0 = u0[14]; u0[14] = w0[14]; a0 += LDA; + *a0 = u0[15]; u0[15] = w0[15]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 16 ) + *a0 = u0[16]; u0[16] = w0[16]; a0 += LDA; + *a0 = u0[17]; u0[17] = w0[17]; a0 += LDA; + *a0 = u0[18]; u0[18] = w0[18]; a0 += LDA; + *a0 = u0[19]; u0[19] = w0[19]; a0 += LDA; + *a0 = u0[20]; u0[20] = w0[20]; a0 += LDA; + *a0 = u0[21]; u0[21] = w0[21]; a0 += LDA; + *a0 = u0[22]; u0[22] = w0[22]; a0 += LDA; + *a0 = u0[23]; u0[23] = w0[23]; a0 += LDA; + *a0 = u0[24]; u0[24] = w0[24]; a0 += LDA; + *a0 = u0[25]; u0[25] = w0[25]; a0 += LDA; + *a0 = u0[26]; u0[26] = w0[26]; a0 += LDA; + *a0 = u0[27]; u0[27] = w0[27]; a0 += LDA; + *a0 = u0[28]; u0[28] = w0[28]; a0 += LDA; + *a0 = u0[29]; u0[29] = w0[29]; a0 += LDA; + *a0 = u0[30]; u0[30] = w0[30]; a0 += LDA; + *a0 = u0[31]; u0[31] = w0[31]; a0 += LDA; +#endif + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW; + + u0[ 0] = w0[ 0]; +#if ( HPL_LASWP04T_DEPTH > 1 ) + u0[ 1] = w0[ 1]; +#endif +#if ( HPL_LASWP04T_DEPTH > 2 ) + u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3]; +#endif +#if ( HPL_LASWP04T_DEPTH > 4 ) + u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7]; +#endif +#if ( HPL_LASWP04T_DEPTH > 8 ) + u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11]; + u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15]; +#endif +#if ( HPL_LASWP04T_DEPTH > 16 ) + u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19]; + u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23]; + u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27]; + u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31]; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW; + for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; u0[j] = w0[j]; } + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW; + for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; } + } + } +/* + * End of HPL_dlaswp04T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05N.c new file mode 100644 index 000000000..3edcf91a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05N.c @@ -0,0 +1,195 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP05N_DEPTH +#define HPL_LASWP05N_DEPTH 32 +#define HPL_LASWP05N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp05N +( + const int M, + const int N, + double * A, + const int LDA, + const double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp05N +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + const double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp05N copies rows of U of global offset LINDXAU into rows of + * A at positions indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of U that should be + * copied into A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of U that should + * be copied into A. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) const double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows that are to be copied into A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied from U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U that should be copied in A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * U0 = U, * u0; + double * a0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP05N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP05N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05N_LOG2_DEPTH ) << + HPL_LASWP05N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP05N_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]); + + *a0 = *u0; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP05N_DEPTH > 1 ) + *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 2 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 4 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 8 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 16 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { *a0 = *u0; } + } + } +/* + * End of HPL_dlaswp05N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05T.c new file mode 100644 index 000000000..0adaa102d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp05T.c @@ -0,0 +1,196 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP05T_DEPTH +#define HPL_LASWP05T_DEPTH 32 +#define HPL_LASWP05T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp05T +( + const int M, + const int N, + double * A, + const int LDA, + const double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp05T +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + const double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp05T copies columns of U of global offset LINDXAU into rows + * of A at positions indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of U that shouldbe copied into A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the columns of U that will + * be copied into rows of A. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) const double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns that are to be copied into rows of + * A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied from U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local column indexes of U that should be copied in A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * U0 = U, * u0; + double * a0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP05T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP05T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05T_LOG2_DEPTH ) << + HPL_LASWP05T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP05T_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[ i]); + u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU); + + *a0 = u0[ 0]; a0 += LDA; +#if ( HPL_LASWP05T_DEPTH > 1 ) + *a0 = u0[ 1]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 2 ) + *a0 = u0[ 2]; a0 += LDA; *a0 = u0[ 3]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 4 ) + *a0 = u0[ 4]; a0 += LDA; *a0 = u0[ 5]; a0 += LDA; + *a0 = u0[ 6]; a0 += LDA; *a0 = u0[ 7]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 8 ) + *a0 = u0[ 8]; a0 += LDA; *a0 = u0[ 9]; a0 += LDA; + *a0 = u0[10]; a0 += LDA; *a0 = u0[11]; a0 += LDA; + *a0 = u0[12]; a0 += LDA; *a0 = u0[13]; a0 += LDA; + *a0 = u0[14]; a0 += LDA; *a0 = u0[15]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 16 ) + *a0 = u0[16]; a0 += LDA; *a0 = u0[17]; a0 += LDA; + *a0 = u0[18]; a0 += LDA; *a0 = u0[19]; a0 += LDA; + *a0 = u0[20]; a0 += LDA; *a0 = u0[21]; a0 += LDA; + *a0 = u0[22]; a0 += LDA; *a0 = u0[23]; a0 += LDA; + *a0 = u0[24]; a0 += LDA; *a0 = u0[25]; a0 += LDA; + *a0 = u0[26]; a0 += LDA; *a0 = u0[27]; a0 += LDA; + *a0 = u0[28]; a0 += LDA; *a0 = u0[29]; a0 += LDA; + *a0 = u0[30]; a0 += LDA; *a0 = u0[31]; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[ i]); + u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; } + } + } +/* + * End of HPL_dlaswp05T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06N.c new file mode 100644 index 000000000..a74bae75c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06N.c @@ -0,0 +1,206 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP06N_DEPTH +#define HPL_LASWP06N_DEPTH 32 +#define HPL_LASWP06N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp06N +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA +) +#else +void HPL_dlaswp06N +( M, N, A, LDA, U, LDU, LINDXA ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp06N swaps rows of U with rows of A at positions + * indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * swapped with rows of U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of A that should + * be swapped with rows of U. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows or columns of U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows of U that are to be swapped with rows + * of A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be swapped with U. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * U0 = U, * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP06N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP06N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06N_LOG2_DEPTH ) << + HPL_LASWP06N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP06N_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i); + + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP06N_DEPTH > 1 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 2 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 4 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 8 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 16 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) + { r = *a0; *a0 = *u0; *u0 = r; } + } + } +/* + * End of HPL_dlaswp06N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06T.c new file mode 100644 index 000000000..fb53c2a31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp06T.c @@ -0,0 +1,207 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP06T_DEPTH +#define HPL_LASWP06T_DEPTH 32 +#define HPL_LASWP06T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp06T +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA +) +#else +void HPL_dlaswp06T +( M, N, A, LDA, U, LDU, LINDXA ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp06T swaps columns of U with rows of A at positions + * indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * swapped with columns of U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of A that should + * be swapped with columns of U. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns of U that are to be swapped with + * rows of A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be swapped with U. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * U0 = U, * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP06T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP06T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06T_LOG2_DEPTH ) << + HPL_LASWP06T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP06T_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U0 + (size_t)(i) * (size_t)(LDU); + + r = *a0; *a0 = u0[ 0]; u0[ 0] = r; a0 += LDA; +#if ( HPL_LASWP06T_DEPTH > 1 ) + r = *a0; *a0 = u0[ 1]; u0[ 1] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 2 ) + r = *a0; *a0 = u0[ 2]; u0[ 2] = r; a0 += LDA; + r = *a0; *a0 = u0[ 3]; u0[ 3] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 4 ) + r = *a0; *a0 = u0[ 4]; u0[ 4] = r; a0 += LDA; + r = *a0; *a0 = u0[ 5]; u0[ 5] = r; a0 += LDA; + r = *a0; *a0 = u0[ 6]; u0[ 6] = r; a0 += LDA; + r = *a0; *a0 = u0[ 7]; u0[ 7] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 8 ) + r = *a0; *a0 = u0[ 8]; u0[ 8] = r; a0 += LDA; + r = *a0; *a0 = u0[ 9]; u0[ 9] = r; a0 += LDA; + r = *a0; *a0 = u0[10]; u0[10] = r; a0 += LDA; + r = *a0; *a0 = u0[11]; u0[11] = r; a0 += LDA; + r = *a0; *a0 = u0[12]; u0[12] = r; a0 += LDA; + r = *a0; *a0 = u0[13]; u0[13] = r; a0 += LDA; + r = *a0; *a0 = u0[14]; u0[14] = r; a0 += LDA; + r = *a0; *a0 = u0[15]; u0[15] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 16 ) + r = *a0; *a0 = u0[16]; u0[16] = r; a0 += LDA; + r = *a0; *a0 = u0[17]; u0[17] = r; a0 += LDA; + r = *a0; *a0 = u0[18]; u0[18] = r; a0 += LDA; + r = *a0; *a0 = u0[19]; u0[19] = r; a0 += LDA; + r = *a0; *a0 = u0[20]; u0[20] = r; a0 += LDA; + r = *a0; *a0 = u0[21]; u0[21] = r; a0 += LDA; + r = *a0; *a0 = u0[22]; u0[22] = r; a0 += LDA; + r = *a0; *a0 = u0[23]; u0[23] = r; a0 += LDA; + r = *a0; *a0 = u0[24]; u0[24] = r; a0 += LDA; + r = *a0; *a0 = u0[25]; u0[25] = r; a0 += LDA; + r = *a0; *a0 = u0[26]; u0[26] = r; a0 += LDA; + r = *a0; *a0 = u0[27]; u0[27] = r; a0 += LDA; + r = *a0; *a0 = u0[28]; u0[28] = r; a0 += LDA; + r = *a0; *a0 = u0[29]; u0[29] = r; a0 += LDA; + r = *a0; *a0 = u0[30]; u0[30] = r; a0 += LDA; + r = *a0; *a0 = u0[31]; u0[31] = r; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U0 + (size_t)(i) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) + { r = *a0; *a0 = u0[j]; u0[j] = r; } + } + } +/* + * End of HPL_dlaswp06T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp10N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp10N.c new file mode 100644 index 000000000..7dbf934f2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_dlaswp10N.c @@ -0,0 +1,186 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP10N_DEPTH +#define HPL_LASWP10N_DEPTH 32 +#define HPL_LASWP10N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp10N +( + const int M, + const int N, + double * A, + const int LDA, + const int * IPIV +) +#else +void HPL_dlaswp10N +( M, N, A, LDA, IPIV ) + const int M; + const int N; + double * A; + const int LDA; + const int * IPIV; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp10N performs a sequence of local column interchanges on a + * matrix A. One column interchange is initiated for columns 0 through + * N-1 of A. + * + * Arguments + * ========= + * + * M (local input) const int + * __arg0__ + * + * N (local input) const int + * On entry, M specifies the number of rows of the array A. M + * must be at least zero. + * + * A (local input/output) double * + * On entry, N specifies the number of columns of the array A. N + * must be at least zero. + * + * LDA (local input) const int + * On entry, A points to an array of dimension (LDA,N). This + * array contains the columns onto which the interchanges should + * be applied. On exit, A contains the permuted matrix. + * + * IPIV (local input) const int * + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * a0, * a1; + const int incA = ( 1 << HPL_LASWP10N_LOG2_DEPTH ); + int jp, mr, mu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + mr = M - ( mu = (int)( ( (unsigned int)(M) >> HPL_LASWP10N_LOG2_DEPTH ) + << HPL_LASWP10N_LOG2_DEPTH ) ); + + for( j = 0; j < N; j++ ) + { + if( j != ( jp = IPIV[j] ) ) + { + a0 = A + j * LDA; a1 = A + jp * LDA; + + for( i = 0; i < mu; i += incA, a0 += incA, a1 += incA ) + { + r = *a0; *a0 = *a1; *a1 = r; +#if ( HPL_LASWP10N_DEPTH > 1 ) + r = a0[ 1]; a0[ 1] = a1[ 1]; a1[ 1] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 2 ) + r = a0[ 2]; a0[ 2] = a1[ 2]; a1[ 2] = r; + r = a0[ 3]; a0[ 3] = a1[ 3]; a1[ 3] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 4 ) + r = a0[ 4]; a0[ 4] = a1[ 4]; a1[ 4] = r; + r = a0[ 5]; a0[ 5] = a1[ 5]; a1[ 5] = r; + r = a0[ 6]; a0[ 6] = a1[ 6]; a1[ 6] = r; + r = a0[ 7]; a0[ 7] = a1[ 7]; a1[ 7] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 8 ) + r = a0[ 8]; a0[ 8] = a1[ 8]; a1[ 8] = r; + r = a0[ 9]; a0[ 9] = a1[ 9]; a1[ 9] = r; + r = a0[10]; a0[10] = a1[10]; a1[10] = r; + r = a0[11]; a0[11] = a1[11]; a1[11] = r; + r = a0[12]; a0[12] = a1[12]; a1[12] = r; + r = a0[13]; a0[13] = a1[13]; a1[13] = r; + r = a0[14]; a0[14] = a1[14]; a1[14] = r; + r = a0[15]; a0[15] = a1[15]; a1[15] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 16 ) + r = a0[16]; a0[16] = a1[16]; a1[16] = r; + r = a0[17]; a0[17] = a1[17]; a1[17] = r; + r = a0[18]; a0[18] = a1[18]; a1[18] = r; + r = a0[19]; a0[19] = a1[19]; a1[19] = r; + r = a0[20]; a0[20] = a1[20]; a1[20] = r; + r = a0[21]; a0[21] = a1[21]; a1[21] = r; + r = a0[22]; a0[22] = a1[22]; a1[22] = r; + r = a0[23]; a0[23] = a1[23]; a1[23] = r; + r = a0[24]; a0[24] = a1[24]; a1[24] = r; + r = a0[25]; a0[25] = a1[25]; a1[25] = r; + r = a0[26]; a0[26] = a1[26]; a1[26] = r; + r = a0[27]; a0[27] = a1[27]; a1[27] = r; + r = a0[28]; a0[28] = a1[28]; a1[28] = r; + r = a0[29]; a0[29] = a1[29]; a1[29] = r; + r = a0[30]; a0[30] = a1[30]; a1[30] = r; + r = a0[31]; a0[31] = a1[31]; a1[31] = r; +#endif + } + + for( i = 0; i < mr; i++ ) + { r = a0[i]; a0[i] = a1[i]; a1[i] = r; } + } + } +/* + * End of HPL_dlaswp10N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2l.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2l.c new file mode 100644 index 000000000..e1b5bbfac --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2l.c @@ -0,0 +1,151 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxg2l +( + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxg2l +( IG, INB, NB, SRCPROC, NPROCS ) + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2l computes the local index of a matrix entry pointed to by + * the global index IG. This local returned index is the same in all + * processes. + * + * Arguments + * ========= + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, if SRCPROC = -1, the data is not distributed but + * replicated, in which case this routine returns IG in all + * processes. Otherwise, the value of SRCPROC is ignored. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + return( IG ); +/* + * IG = INB - NB + ( l * NPROCS + MYROC ) * NB + X with 0 <= X < NB, + * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC + * with 0 <= MYROC < NPROCS. The local index to be returned depends on + * whether IG resides in the process owning the first partial block of + * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB, + * so that if NPROCS divides i+1, i.e. MYROC=0, we have i+1 = l*NPROCS. + * If we set j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is + * equal to (j+1) * NPROCS. Conversely, if NPROCS does not divide i+1, + * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that + * j=l and thus (j+1)*NPROCS > i+1. + */ + j = ( i = ( IG - INB ) / NB ) / NPROCS; +/* + * When IG resides in the process owning the first partial block of size + * INB (MYROC = 0), then the result IL can be written as: + * IL = INB - NB + l * NB + X = IG + ( l - (l * NPROCS + MYROC) ) * NB. + * Using the above notation, we have i+1 = l*NPROCS + MYROC = l*NPROCS, + * i.e l = ( i+1 ) / NPROCS = j+1, since NPROCS divides i+1, therefore + * IL = IG + ( j + 1 - ( i + 1 ) ) * NB. + * + * Otherwise when MYROC >= 1, the result IL can be written as: + * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB. + * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1, + * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e + * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB. + */ + return( NB * (j - i) + + ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ) ); +/* + * End of HPL_indxg2l + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2lp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2lp.c new file mode 100644 index 000000000..74662f9d2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2lp.c @@ -0,0 +1,176 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_indxg2lp +( + int * IL, + int * PROC, + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +void HPL_indxg2lp +( IL, PROC, IG, INB, NB, SRCPROC, NPROCS ) + int * IL; + int * PROC; + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2lp computes the local index of a matrix entry pointed to by + * the global index IG as well as the process coordinate which posseses + * this entry. The local returned index is the same in all processes. + * + * Arguments + * ========= + * + * IL (output) int * + * On exit, IL specifies the local index corresponding to IG. IL + * is at least zero. + * + * PROC (output) int * + * On exit, PROC is the coordinate of the process owning the + * entry specified by the global index IG. PROC is at least zero + * and less than NPROCS. + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, if SRCPROC = -1, the data is not distributed but + * replicated, in which case this routine returns IG in all + * processes. Otherwise, the value of SRCPROC is ignored. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) + { +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + *IL = IG; + *PROC = SRCPROC; + } + else + { +/* + * IG = INB - NB + ( l * NPROCS + MYROC ) * NB + X with 0 <= X < NB, + * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC + * with 0 <= MYROC < NPROCS. The local index to be returned depends on + * whether IG resides in the process owning the first partial block of + * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB, + * so that if NPROCS divides i+1, i.e. MYROC=0, we have i+1 = l*NPROCS. + * If we set j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is + * equal to (j+1) * NPROCS. Conversely, if NPROCS does not divide i+1, + * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that + * j=l and thus (j+1)*NPROCS > i+1. + */ + j = ( i = ( IG - INB ) / NB ) / NPROCS; +/* + * IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC and take + * the NPROCS modulo (definition of the block-cyclic data distribution). + */ + *PROC = SRCPROC + 1 + i; + *PROC = MPosMod( *PROC, NPROCS ); +/* + * When IG resides in the process owning the first partial block of size + * INB (MYROC = 0), then the result IL can be written as: + * IL = INB - NB + l * NB + X = IG + ( l - (l * NPROCS + MYROC) ) * NB. + * Using the above notation, we have i+1 = l*NPROCS + MYROC = l*NPROCS, + * i.e l = ( i+1 ) / NPROCS = j+1, since NPROCS divides i+1, therefore + * IL = IG + ( j + 1 - ( i + 1 ) ) * NB. + * + * Otherwise when MYROC >= 1, the result IL can be written as: + * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB. + * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1, + * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e + * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB. + */ + *IL = NB * (j - i) + + ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ); + } +/* + * End of HPL_indxg2lp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2p.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2p.c new file mode 100644 index 000000000..d0e75f516 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxg2p.c @@ -0,0 +1,128 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxg2p +( + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxg2p +( IG, INB, NB, SRCPROC, NPROCS ) + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2p computes the process coordinate which posseses the entry + * of a matrix specified by a global index IG. + * + * Arguments + * ========= + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int proc; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + return( SRCPROC ); +/* + * Otherwise, IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC + * and take the NPROCS modulo (definition of the block-cyclic data dis- + * tribution). + */ + proc = SRCPROC + 1 + ( IG - INB ) / NB; + return( MPosMod( proc, NPROCS ) ); +/* + * End of HPL_indxg2p + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxl2g.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxl2g.c new file mode 100644 index 000000000..7f139425a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_indxl2g.c @@ -0,0 +1,164 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxl2g +( + const int IL, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxl2g +( IL, INB, NB, PROC, SRCPROC, NPROCS ) + const int IL; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxl2g computes the global index of a matrix entry pointed to + * by the local index IL of the process indicated by PROC. + * + * Arguments + * ========= + * + * IL (input) const int + * On entry, IL specifies the local index of the matrix entry. + * IL must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whose + * local array row or column is to be determined. PROC must be + * at least zero and strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) + { +/* + * The data is not distributed, or there is just one process in this di- + * mension of the grid. + */ + return( IL ); + } + else if( PROC == SRCPROC ) + { +/* + * If I am SRCPROC, my first block is of size INB + */ + if( IL < INB ) +/* + * If IL belongs to the first block, the local and global indexes are + * equal. + */ + return ( IL ); +/* + * The number of entire blocks before the one IL belongs to is + * ( IL - INB ) / NB + 1. In the other NPROCS-1 processes, there are + * thus NB*( ( IL-INB )/NB + 1 ) entries, that are globally before the + * global entry corresponding to IL. + */ + return( ( NPROCS - 1 ) * NB * ( ( IL - INB ) / NB + 1 ) + IL ); + } + else if( PROC < SRCPROC ) + { +/* + * Otherwise, the process of coordinate MOD(SRCPROC+1, NPROCS) owns the + * second block. Let IPROC = PROC-SRCPROC-1+NPROCS be the number of pro- + * cesses between this process and PROC not included when going from + * left to right on the process line with possible wrap around. These + * IPROC processes have one more NB block than the other processes, who + * own IL / NB blocks of size NB. + */ + return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1+NPROCS )+IL+INB ); + } + else + { +/* + * Same reasoning as above with IPROC = PROC - SRCPROC - 1. + */ + return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1 )+IL+INB ); + } +/* + * End of HPL_indxl2g + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_infog2l.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_infog2l.c new file mode 100644 index 000000000..2580f2ad4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_infog2l.c @@ -0,0 +1,382 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_infog2l +( + int I, + int J, + const int IMB, + const int MB, + const int INB, + const int NB, + const int RSRC, + const int CSRC, + const int MYROW, + const int MYCOL, + const int NPROW, + const int NPCOL, + int * II, + int * JJ, + int * PROW, + int * PCOL +) +#else +void HPL_infog2l +( I, J, IMB, MB, INB, NB, RSRC, CSRC, MYROW, MYCOL, NPROW, NPCOL, II, JJ, PROW, PCOL ) + int I; + int J; + const int IMB; + const int MB; + const int INB; + const int NB; + const int RSRC; + const int CSRC; + const int MYROW; + const int MYCOL; + const int NPROW; + const int NPCOL; + int * II; + int * JJ; + int * PROW; + int * PCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_infog2l computes the starting local index II, JJ corresponding to + * the submatrix starting globally at the entry pointed by I, J. This + * routine returns the coordinates in the grid of the process owning the + * matrix entry of global indexes I, J, namely PROW and PCOL. + * + * Arguments + * ========= + * + * I (global input) int + * On entry, I specifies the global row index of the matrix + * entry. I must be at least zero. + * + * J (global input) int + * On entry, J specifies the global column index of the matrix + * entry. J must be at least zero. + * + * IMB (global input) const int + * On entry, IMB specifies the size of the first row block of + * the global matrix. IMB must be at least one. + * + * MB (global input) const int + * On entry, MB specifies the blocking factor used to partition + * and distribute the rows of the matrix A. MB must be larger + * than one. + * + * INB (global input) const int + * On entry, INB specifies the size of the first column block of + * the global matrix. INB must be at least one. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the columns of the matrix A. NB must be larger + * than one. + * + * RSRC (global input) const int + * On entry, RSRC specifies the row coordinate of the process + * that possesses the row I. RSRC must be at least zero and + * strictly less than NPROW. + * + * CSRC (global input) const int + * On entry, CSRC specifies the column coordinate of the process + * that possesses the column J. CSRC must be at least zero and + * strictly less than NPCOL. + * + * MYROW (local input) const int + * On entry, MYROW specifies my row process coordinate in the + * grid. MYROW is greater than or equal to zero and less than + * NPROW. + * + * MYCOL (local input) const int + * On entry, MYCOL specifies my column process coordinate in the + * grid. MYCOL is greater than or equal to zero and less than + * NPCOL. + * + * NPROW (global input) const int + * On entry, NPROW specifies the number of process rows in the + * grid. NPROW is at least one. + * + * NPCOL (global input) const int + * On entry, NPCOL specifies the number of process columns in + * the grid. NPCOL is at least one. + * + * II (local output) int * + * On exit, II specifies the local starting row index of the + * submatrix. On exit, II is at least 0. + * + * JJ (local output) int * + * On exit, JJ specifies the local starting column index of the + * submatrix. On exit, JJ is at least 0. + * + * PROW (global output) int * + * On exit, PROW is the row coordinate of the process owning the + * entry specified by the global index I. PROW is at least zero + * and less than NPROW. + * + * PCOL (global output) int * + * On exit, PCOL is the column coordinate of the process owning + * the entry specified by the global index J. PCOL is at least + * zero and less than NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc; +/* .. + * .. Executable Statements .. + */ + imb = IMB; + *PROW = RSRC; + + if( ( *PROW == -1 ) || ( NPROW == 1 ) ) + { +/* + * The data is not distributed, or there is just one process row in the + * grid. + */ + *II = I; + } + else if( I < imb ) + { +/* + * I refers to an entry in the first block of rows + */ + *II = ( MYROW == *PROW ? I : 0 ); + } + else + { + mb = MB; + rsrc = *PROW; +/* + * The discussion goes as follows: compute my distance from the source + * process so that within this process coordinate system, the source + * process is the process such that mydist = 0, or equivalently + * MYROW == rsrc. + * + * Find out the global coordinate of the block I belongs to (nblocks), + * as well as the minimum local number of blocks that every process has. + * + * when mydist < nblocks-ilocblk*NPROCS, I own ilocblk + 1 full blocks, + * when mydist > nblocks-ilocblk*NPROCS, I own ilocblk full blocks, + * when mydist = nblocks-ilocblk*NPROCS, I own ilocblk full blocks + * but not I, or I own ilocblk + 1 blocks and the entry I refers to. + */ + if( MYROW == rsrc ) + { +/* + * I refers to an entry that is not in the first block, find out which + * process has it. + */ + nblocks = ( I - imb ) / mb + 1; + *PROW += nblocks; + *PROW -= ( *PROW / NPROW ) * NPROW; +/* + * Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only + * three possible cases: + * + * 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I do not own + * I, in which case II = IMB + ( ilocblk - 1 ) * MB. Note that this + * case cannot happen when ilocblk is zero, since nblocks is at + * least one. + * + * 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in + * which case I and II can respectively be written as IMB + + * (nblocks-1)*NB + IL and IMB + (ilocblk-1) * MB + IL. That is + * II = I + (ilocblk-nblocks)*MB. Note that this case cannot happen + * when ilocblk is zero, since nblocks is at least one. + * + * 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns + * ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB. + * Note that when ilocblk is zero, II is just IMB. + */ + if( nblocks < NPROW ) + { + *II = imb; + } + else + { + ilocblk = nblocks / NPROW; + if( ilocblk * NPROW >= nblocks ) + { + *II = ( ( MYROW == *PROW ) ? + I + ( ilocblk - nblocks ) * mb : + imb + ( ilocblk - 1 ) * mb ); + } + else + { + *II = imb + ilocblk * mb; + } + } + } + else + { +/* + * I refers to an entry that is not in the first block, find out which + * process has it. + */ + nblocks = ( I -= imb ) / mb + 1; + *PROW += nblocks; + *PROW -= ( *PROW / NPROW ) * NPROW; +/* + * Compute my distance from the source process so that within this pro- + * cess coordinate system, the source process is the process such that + * mydist=0. + */ + if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW; +/* + * When mydist < nblocks - ilocblk * NPROW, I own ilocblk+1 full blocks + * of size MB since I am not the source process, i.e. II=(ilocblk+1)*MB. + * When mydist>=nblocks-ilocblk*NPROW and I do not own I, I own ilocblk + * full blocks of size MB, i.e. II = ilocblk*MB, otherwise I own ilocblk + * blocks and I, in which case I can be written as IMB + (nblocks-1)*MB + * + IL and II = ilocblk*MB + IL = I - IMB + (ilocblk - nblocks + 1)*MB. + */ + if( nblocks < NPROW ) + { + mydist -= nblocks; + *II = ( ( mydist < 0 ) ? mb : + ( ( MYROW == *PROW ) ? + I + ( 1 - nblocks ) * mb : 0 ) ); + } + else + { + ilocblk = nblocks / NPROW; + mydist -= nblocks - ilocblk * NPROW; + *II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb : + ( ( MYROW == *PROW ) ? + ( ilocblk - nblocks + 1 ) * mb + I : + ilocblk * mb ) ); + } + } + } +/* + * Idem for the columns + */ + inb = INB; + *PCOL = CSRC; + + if( ( *PCOL == -1 ) || ( NPCOL == 1 ) ) + { + *JJ = J; + } + else if( J < inb ) + { + *JJ = ( MYCOL == *PCOL ? J : 0 ); + } + else + { + nb = NB; + csrc = *PCOL; + + if( MYCOL == csrc ) + { + nblocks = ( J - inb ) / nb + 1; + *PCOL += nblocks; + *PCOL -= ( *PCOL / NPCOL ) * NPCOL; + + if( nblocks < NPCOL ) + { + *JJ = inb; + } + else + { + ilocblk = nblocks / NPCOL; + if( ilocblk * NPCOL >= nblocks ) + { + *JJ = ( ( MYCOL == *PCOL ) ? + J + ( ilocblk - nblocks ) * nb : + inb + ( ilocblk - 1 ) * nb ); + } + else + { + *JJ = inb + ilocblk * nb; + } + } + } + else + { + nblocks = ( J -= inb ) / nb + 1; + *PCOL += nblocks; + *PCOL -= ( *PCOL / NPCOL ) * NPCOL; + + if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL; + + if( nblocks < NPCOL ) + { + mydist -= nblocks; + *JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ? + J + ( 1 - nblocks )*nb : 0 ) ); + } + else + { + ilocblk = nblocks / NPCOL; + mydist -= nblocks - ilocblk * NPCOL; + *JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb : + ( ( MYCOL == *PCOL ) ? + ( ilocblk - nblocks + 1 ) * nb + J : + ilocblk * nb ) ); + } + } + } +/* + * End of HPL_infog2l + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numroc.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numroc.c new file mode 100644 index 000000000..39cd736d3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numroc.c @@ -0,0 +1,120 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_numroc +( + const int N, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_numroc +( N, INB, NB, PROC, SRCPROC, NPROCS ) + const int N; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_numroc returns the local number of matrix rows/columns process + * PROC will get if we give out N rows/columns starting from global + * index 0. + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the number of rows/columns being dealt + * out. N must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whose + * local portion is determined. PROC must be at least zero and + * strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + return( HPL_numrocI( N, 0, INB, NB, PROC, SRCPROC, NPROCS ) ); +/* + * End of HPL_numroc + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numrocI.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numrocI.c new file mode 100644 index 000000000..70f3497de --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_numrocI.c @@ -0,0 +1,243 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_numrocI +( + const int N, + const int I, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_numrocI +( N, I, INB, NB, PROC, SRCPROC, NPROCS ) + const int N; + const int I; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_numrocI returns the local number of matrix rows/columns process + * PROC will get if we give out N rows/columns starting from global + * index I. + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the number of rows/columns being dealt + * out. N must be at least zero. + * + * I (input) const int + * On entry, I specifies the global index of the matrix entry + * I must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of th + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whos + * local portion is determined. PROC must be at least zero an + * strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the proces + * that possesses the first row or column of the matrix. SRCPRO + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process row + * or columns over which the matrix is distributed. NPROCS mus + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ilocblk, inb, mydist, nblocks, srcproc; +/* .. + * .. Executable Statements .. + */ + if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * The data is not distributed, or there is just one process in this di- + * mension of the grid. + */ + return( N ); +/* + * Compute coordinate of process owning I and corresponding INB + */ + srcproc = SRCPROC; + + if( ( inb = INB - I ) <= 0 ) + { +/* + * I is not in the first block, find out which process has it and update + * the size of first block + */ + srcproc += ( nblocks = (-inb) / NB + 1 ); + srcproc -= ( srcproc / NPROCS ) * NPROCS; + inb += nblocks * NB; + } +/* + * Now everything is just like N, I=0, INB, NB, srcproc, NPROCS. The + * discussion goes as follows: compute my distance from the source pro- + * cess so that within this process coordinate system, the source pro- + * cess is the process such that mydist = 0, or PROC == srcproc. + * + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries. Then remark that + * + * when mydist < nblocks - ilocblk*NPROCS, I own ilocblk+1 full blocks, + * when mydist > nblocks - ilocblk*NPROCS, I own ilocblk full blocks, + * when mydist = nblocks - ilocblk*NPROCS, either the last block is not + * full and I own it, or the last block is full and I am the first pro- + * cess owning only ilocblk full blocks. + */ + if( PROC == srcproc ) + { +/* + * I am the source process, i.e. I own I (mydist=0). When N <= INB, the + * answer is simply N. + */ + if( N <= inb ) return( N ); +/* + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries. + */ + nblocks = ( N - inb ) / NB + 1; +/* + * Since mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only + * two possible cases: + * + * 1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS di- + * vides the global number of full blocks, then the source process + * srcproc owns one more block than the other processes; and N can + * be rewritten as N = INB + (nblocks-1) * NB + LNB with LNB >= 0 + * size of the last block. Similarly, the local value Np correspon- + * ding to N can be written as Np = INB + (ilocblk-1) * NB + LNB = + * N + ( ilocblk-1 - (nblocks-1) )*NB. Note that this case cannot + * happen when ilocblk is zero, since nblocks is at least one. + * + * 2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only + * owns full blocks, and therefore Np = INB + ilocblk * NB. Note + * that when ilocblk is zero, Np is just INB. + */ + if( nblocks < NPROCS ) return( inb ); + + ilocblk = nblocks / NPROCS; + return( ( nblocks - ilocblk * NPROCS ) ? inb + ilocblk * NB : + N + ( ilocblk - nblocks ) * NB ); + } + else + { +/* + * I am not the source process. When N <= INB, the answer is simply 0. + */ + if( N <= inb ) return( 0 ); +/* + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries + */ + nblocks = ( N - inb ) / NB + 1; +/* + * Compute my distance from the source process so that within this pro- + * cess coordinate system, the source process is the process such that + * mydist=0. + */ + if( ( mydist = PROC - srcproc ) < 0 ) mydist += NPROCS; +/* + * When mydist < nblocks - ilocblk*NPROCS, I own ilocblk + 1 full blocks + * of size NB since I am not the source process, + * + * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks + * of size NB since I am not the source process, + * + * when mydist = nblocks - ilocblk*NPROCS, + * either the last block is not full and I own it, in which case + * N = INB + (nblocks - 1)*NB + LNB with LNB the size of the last + * block such that NB > LNB > 0; the local value Np corresponding to + * N is given by Np = ilocblk*NB+LNB = N-INB+(ilocblk-nblocks+1)*NB; + * or the last block is full and I am the first process owning only + * ilocblk full blocks of size NB, that is N = INB+(nblocks-1)*NB and + * Np = ilocblk * NB = N - INB + (ilocblk-nblocks+1) * NB. + */ + if( nblocks < NPROCS ) + return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 : + N - inb + NB * ( 1 - nblocks ) ) ); + + ilocblk = nblocks / NPROCS; + mydist -= nblocks - ilocblk * NPROCS; + return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB : + ( ( mydist > 0 ) ? ilocblk * NB : + N - inb + NB * ( ilocblk - nblocks + 1 ) ) ); + } +/* + * End of HPL_numrocI + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pabort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pabort.c new file mode 100644 index 000000000..268975fc1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pabort.c @@ -0,0 +1,137 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pabort +( + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_pabort( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pabort displays an error message on stderr and halts execution. + * + * + * Arguments + * ========= + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + int rank; + char cline[128]; +#ifndef STDC_HEADERS + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( stderr, "%s %s %d, %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR", "from process #", rank, "in function", + SRNAME, cline ); + else + HPL_fprintf( stderr, + "%s %s %d, %s %d %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR", "from process #", rank, "on line", LINE, + "of function", SRNAME, cline ); + + MPI_Abort( MPI_COMM_WORLD, -1 ); + exit( -1 ); +/* + * End of HPL_pabort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlamch.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlamch.c new file mode 100644 index 000000000..73cf649da --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlamch.c @@ -0,0 +1,143 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_pdlamch +( + MPI_Comm COMM, + const HPL_T_MACH CMACH +) +#else +double HPL_pdlamch +( COMM, CMACH ) + MPI_Comm COMM; + const HPL_T_MACH CMACH; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlamch determines machine-specific arithmetic constants such as + * the relative machine precision (eps), the safe minimum(sfmin) such that + * 1/sfmin does not overflow, the base of the machine (base), the precision + * (prec), the number of (base) digits in the mantissa (t), whether + * rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum + * exponent before (gradual) underflow (emin), the underflow threshold + * (rmin)- base**(emin-1), the largest exponent before overflow (emax), the + * overflow threshold (rmax) - (base**emax)*(1-eps). + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * CMACH (global input) const HPL_T_MACH + * Specifies the value to be returned by HPL_pdlamch + * = HPL_MACH_EPS, HPL_pdlamch := eps (default) + * = HPL_MACH_SFMIN, HPL_pdlamch := sfmin + * = HPL_MACH_BASE, HPL_pdlamch := base + * = HPL_MACH_PREC, HPL_pdlamch := eps*base + * = HPL_MACH_MLEN, HPL_pdlamch := t + * = HPL_MACH_RND, HPL_pdlamch := rnd + * = HPL_MACH_EMIN, HPL_pdlamch := emin + * = HPL_MACH_RMIN, HPL_pdlamch := rmin + * = HPL_MACH_EMAX, HPL_pdlamch := emax + * = HPL_MACH_RMAX, HPL_pdlamch := rmax + * + * where + * + * eps = relative machine precision, + * sfmin = safe minimum, + * base = base of the machine, + * prec = eps*base, + * t = number of digits in the mantissa, + * rnd = 1.0 if rounding occurs in addition, + * emin = minimum exponent before underflow, + * rmin = underflow threshold, + * emax = largest exponent before overflow, + * rmax = overflow threshold. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double param; +/* .. + * .. Executable Statements .. + */ + param = HPL_dlamch( CMACH ); + + switch( CMACH ) + { + case HPL_MACH_EPS : + case HPL_MACH_SFMIN : + case HPL_MACH_EMIN : + case HPL_MACH_RMIN : + (void) HPL_all_reduce( (void *)(¶m), 1, HPL_DOUBLE, + HPL_max, COMM ); + break; + case HPL_MACH_EMAX : + case HPL_MACH_RMAX : + (void) HPL_all_reduce( (void *)(¶m), 1, HPL_DOUBLE, + HPL_min, COMM ); + break; + default : + break; + } + + return( param ); +/* + * End of HPL_pdlamch + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlange.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlange.c new file mode 100644 index 000000000..40bdcc36b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlange.c @@ -0,0 +1,242 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_pdlange +( + const HPL_T_grid * GRID, + const HPL_T_NORM NORM, + const int M, + const int N, + const int NB, + const double * A, + const int LDA +) +#else +double HPL_pdlange +( GRID, NORM, M, N, NB, A, LDA ) + const HPL_T_grid * GRID; + const HPL_T_NORM NORM; + const int M; + const int N; + const int NB; + const double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlange returns the value of the one norm, or the infinity norm, + * or the element of largest absolute value of a distributed matrix A: + * + * + * max(abs(A(i,j))) when NORM = HPL_NORM_A, + * norm1(A), when NORM = HPL_NORM_1, + * normI(A), when NORM = HPL_NORM_I, + * + * where norm1 denotes the one norm of a matrix (maximum column sum) and + * normI denotes the infinity norm of a matrix (maximum row sum). Note + * that max(abs(A(i,j))) is not a matrix norm. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * NORM (global input) const HPL_T_NORM + * On entry, NORM specifies the value to be returned by this + * function as described above. + * + * M (global input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,LocQ(N)), + * that contains the local pieces of the distributed matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double s, v0=HPL_rzero, * work = NULL; + MPI_Comm Acomm, Ccomm, Rcomm; + int ii, jj, mp, mycol, myrow, npcol, nprow, + nq; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Rcomm = GRID->row_comm; Ccomm = GRID->col_comm; + Acomm = GRID->all_comm; + + Mnumroc( mp, M, NB, NB, myrow, 0, nprow ); + Mnumroc( nq, N, NB, NB, mycol, 0, npcol ); + + if( Mmin( M, N ) == 0 ) { return( v0 ); } + else if( NORM == HPL_NORM_A ) + { +/* + * max( abs( A ) ) + */ + if( ( nq > 0 ) && ( mp > 0 ) ) + { + for( jj = 0; jj < nq; jj++ ) + { + for( ii = 0; ii < mp; ii++ ) + { v0 = Mmax( v0, Mabs( *A ) ); A++; } + A += LDA - mp; + } + } + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0, + Acomm ); + } + else if( NORM == HPL_NORM_1 ) + { +/* + * Find norm_1( A ). + */ + if( nq > 0 ) + { + work = (double*)malloc( (size_t)(nq) * sizeof( double ) ); + if( work == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); } + + for( jj = 0; jj < nq; jj++ ) + { + s = HPL_rzero; + for( ii = 0; ii < mp; ii++ ) { s += Mabs( *A ); A++; } + work[jj] = s; A += LDA - mp; + } +/* + * Find sum of global matrix columns, store on row 0 of process grid + */ + (void) HPL_reduce( (void *)(work), nq, HPL_DOUBLE, HPL_sum, + 0, Ccomm ); +/* + * Find maximum sum of columns for 1-norm + */ + if( myrow == 0 ) + { v0 = work[HPL_idamax( nq, work, 1 )]; v0 = Mabs( v0 ); } + if( work ) free( work ); + } +/* + * Find max in row 0, store result in process (0,0) + */ + if( myrow == 0 ) + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0, + Rcomm ); + } + else if( NORM == HPL_NORM_I ) + { +/* + * Find norm_inf( A ) + */ + if( mp > 0 ) + { + work = (double*)malloc( (size_t)(mp) * sizeof( double ) ); + if( work == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); } + + for( ii = 0; ii < mp; ii++ ) { work[ii] = HPL_rzero; } + + for( jj = 0; jj < nq; jj++ ) + { + for( ii = 0; ii < mp; ii++ ) + { work[ii] += Mabs( *A ); A++; } + A += LDA - mp; + } +/* + * Find sum of global matrix rows, store on column 0 of process grid + */ + (void) HPL_reduce( (void *)(work), mp, HPL_DOUBLE, HPL_sum, + 0, Rcomm ); +/* + * Find maximum sum of rows for inf-norm + */ + if( mycol == 0 ) + { v0 = work[HPL_idamax( mp, work, 1 )]; v0 = Mabs( v0 ); } + if( work ) free( work ); + } +/* + * Find max in column 0, store result in process (0,0) + */ + if( mycol == 0 ) + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, + 0, Ccomm ); + } +/* + * Broadcast answer to every process in the grid + */ + (void) HPL_broadcast( (void *)(&v0), 1, HPL_DOUBLE, 0, Acomm ); + + return( v0 ); +/* + * End of HPL_pdlange + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlaprnt.c new file mode 100644 index 000000000..20f11129a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pdlaprnt.c @@ -0,0 +1,236 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaprnt +( + const HPL_T_grid * GRID, + const int M, + const int N, + const int NB, + double * A, + const int LDA, + const int IAROW, + const int IACOL, + const char * CMATNM +) +#else +void HPL_pdlaprnt +( GRID, M, N, NB, A, LDA, IAROW, IACOL, CMATNM ) + const HPL_T_grid * GRID; + const int M; + const int N; + const int NB; + double * A; + const int LDA; + const int IAROW; + const int IACOL; + const char * CMATNM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaprnt prints to standard error a distributed matrix A. The + * local pieces of A are sent to the process of coordinates (0,0) in + * the grid and then printed. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * M (global input) const int + * On entry, M specifies the number of rows of the coefficient + * matrix A. M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the + * coefficient matrix A. N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * A (local input) double * + * On entry, A points to an array of dimension (LDA,LocQ(N)). + * This array contains the coefficient matrix to be printed. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * IAROW (global input) const int + * On entry, IAROW specifies the row process coordinate owning + * the first row of A. IAROW must be larger than or equal to + * zero and less than NPROW. + * + * IACOL (global input) const int + * On entry, IACOL specifies the column process coordinate + * owning the first column of A. IACOL must be larger than or + * equal to zero and less than NPCOL. + * + * CMATNM (global input) const char * + * On entry, CMATNM is the name of the matrix to be printed. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm Acomm; + double * buf = NULL; + int h, i, ib, icurcol=IACOL, icurrow=IAROW, + ii=0, j, jb, jj=0, mycol, myrow, npcol, + nprow, src; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Acomm = GRID->all_comm; + if( ( myrow == 0 ) && ( mycol == 0 ) ) + buf = (double*)malloc( (size_t)(NB) * sizeof( double ) ); + + for( j = 0; j < N; j += NB ) + { + jb = N-j; jb = Mmin( jb, NB ); + for( h = 0; h < jb; h++ ) + { + (void) HPL_barrier( Acomm ); + + for( i = 0; i < M; i += NB ) + { + ib = M-i; ib = Mmin( ib, NB ); + if( ( icurrow == 0 ) && ( icurcol == 0 ) ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_dlaprnt( ib, 1, Mptr( A, ii, jj+h, LDA ), i+1, + j+h+1, LDA, CMATNM ); + } + else + { + if( ( myrow == icurrow ) && ( mycol == icurcol ) ) + { + (void) HPL_send( Mptr( A, ii, jj+h, LDA ), ib, 0, + 9000+(j+h)*M+i, Acomm ); + } + else if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + src = HPL_pnum( GRID, icurrow, icurcol ); + (void) HPL_recv( buf, ib, src, 9000+(j+h)*M+i, + Acomm ); + if (buf != NULL) + HPL_dlaprnt( ib, 1, buf, i+1, j+h+1, NB, CMATNM ); + } + } + if( myrow == icurrow ) ii += ib; + icurrow = MModAdd1( icurrow, nprow ); + (void) HPL_barrier( Acomm ); + } + ii = 0; icurrow = IAROW; + } + if( mycol == icurcol ) jj += jb; + icurcol = MModAdd1( icurcol, npcol ); + (void) HPL_barrier( Acomm ); + } + if( ( myrow == 0 ) && ( mycol == 0 ) && ( buf ) ) free( buf ); +/* + * End of HPL_pdlaprnt + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pwarn.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pwarn.c new file mode 100644 index 000000000..a9f666f89 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/HPL_pwarn.c @@ -0,0 +1,139 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pwarn +( + FILE * STREAM, + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_pwarn( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pwarn displays an error message. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + int rank; + char cline[128]; +#ifndef STDC_HEADERS + FILE * STREAM; + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( STREAM, "%s %s %d, %s %s:\n>>> %s <<<\n\n", + "HPL ERROR", "from process #", rank, "in function", + SRNAME, cline ); + else + HPL_fprintf( STREAM, "%s %s %d, %s %d %s %s:\n>>> %s <<<\n\n", + "HPL ERROR", "from process #", rank, "on line", LINE, + "of function", SRNAME, cline ); +/* + * End of HPL_pwarn + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp00N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp00N.o new file mode 100644 index 000000000..d84ffda98 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp00N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp01N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp01N.o new file mode 100644 index 000000000..3108f50af Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp01N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp01T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp01T.o new file mode 100644 index 000000000..be595015b Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp01T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp02N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp02N.o new file mode 100644 index 000000000..93be20fff Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp02N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp03N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp03N.o new file mode 100644 index 000000000..590fd4d1d Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp03N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp03T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp03T.o new file mode 100644 index 000000000..0a3bb8457 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp03T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp04N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp04N.o new file mode 100644 index 000000000..087a0c1e5 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp04N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp04T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp04T.o new file mode 100644 index 000000000..9dd386467 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp04T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp05N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp05N.o new file mode 100644 index 000000000..06f5cfb2f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp05N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp05T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp05T.o new file mode 100644 index 000000000..39f157054 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp05T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp06N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp06N.o new file mode 100644 index 000000000..4eb581ad9 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp06N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp06T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp06T.o new file mode 100644 index 000000000..695696633 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp06T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp10N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp10N.o new file mode 100644 index 000000000..9e61ce691 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_dlaswp10N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2l.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2l.o new file mode 100644 index 000000000..e01b53375 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2l.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2lp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2lp.o new file mode 100644 index 000000000..47d7464fe Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2lp.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2p.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2p.o new file mode 100644 index 000000000..c7fde1dcc Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxg2p.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxl2g.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxl2g.o new file mode 100644 index 000000000..3fc06a373 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_indxl2g.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_infog2l.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_infog2l.o new file mode 100644 index 000000000..d1e9791b3 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_infog2l.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_numroc.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_numroc.o new file mode 100644 index 000000000..5c9ee9fd6 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_numroc.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_numrocI.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_numrocI.o new file mode 100644 index 000000000..89b5cfa00 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_numrocI.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pabort.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pabort.o new file mode 100644 index 000000000..a59aca124 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pabort.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlamch.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlamch.o new file mode 100644 index 000000000..c7731580e Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlamch.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlange.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlange.o new file mode 100644 index 000000000..3dc00e880 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlange.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlaprnt.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlaprnt.o new file mode 100644 index 000000000..ede794de4 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pdlaprnt.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pwarn.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pwarn.o new file mode 100644 index 000000000..1313518bd Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/HPL_pwarn.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/Make.inc new file mode 120000 index 000000000..8547ec814 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/Make.inc @@ -0,0 +1 @@ +/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/Makefile new file mode 100644 index 000000000..ea93cd150 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/Makefile @@ -0,0 +1,137 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_pauxil.h +# +## Object files ######################################################## +# +HPL_pauobj = \ + HPL_indxg2l.o HPL_indxg2lp.o HPL_indxg2p.o \ + HPL_indxl2g.o HPL_infog2l.o HPL_numroc.o \ + HPL_numrocI.o HPL_dlaswp00N.o HPL_dlaswp10N.o \ + HPL_dlaswp01N.o HPL_dlaswp01T.o HPL_dlaswp02N.o \ + HPL_dlaswp03N.o HPL_dlaswp03T.o HPL_dlaswp04N.o \ + HPL_dlaswp04T.o HPL_dlaswp05N.o HPL_dlaswp05T.o \ + HPL_dlaswp06N.o HPL_dlaswp06T.o HPL_pwarn.o \ + HPL_pabort.o HPL_pdlaprnt.o HPL_pdlamch.o \ + HPL_pdlange.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pauobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pauobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_indxg2l.o : ../HPL_indxg2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2l.c +HPL_indxg2lp.o : ../HPL_indxg2lp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2lp.c +HPL_indxg2p.o : ../HPL_indxg2p.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2p.c +HPL_indxl2g.o : ../HPL_indxl2g.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxl2g.c +HPL_infog2l.o : ../HPL_infog2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_infog2l.c +HPL_numroc.o : ../HPL_numroc.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_numroc.c +HPL_numrocI.o : ../HPL_numrocI.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_numrocI.c +HPL_dlaswp00N.o : ../HPL_dlaswp00N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp00N.c +HPL_dlaswp10N.o : ../HPL_dlaswp10N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp10N.c +HPL_dlaswp01N.o : ../HPL_dlaswp01N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01N.c +HPL_dlaswp01T.o : ../HPL_dlaswp01T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01T.c +HPL_dlaswp02N.o : ../HPL_dlaswp02N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp02N.c +HPL_dlaswp03N.o : ../HPL_dlaswp03N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03N.c +HPL_dlaswp03T.o : ../HPL_dlaswp03T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03T.c +HPL_dlaswp04N.o : ../HPL_dlaswp04N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04N.c +HPL_dlaswp04T.o : ../HPL_dlaswp04T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04T.c +HPL_dlaswp05N.o : ../HPL_dlaswp05N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05N.c +HPL_dlaswp05T.o : ../HPL_dlaswp05T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05T.c +HPL_dlaswp06N.o : ../HPL_dlaswp06N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06N.c +HPL_dlaswp06T.o : ../HPL_dlaswp06T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06T.c +HPL_pwarn.o : ../HPL_pwarn.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pwarn.c +HPL_pabort.o : ../HPL_pabort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pabort.c +HPL_pdlaprnt.o : ../HPL_pdlaprnt.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaprnt.c +HPL_pdlamch.o : ../HPL_pdlamch.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlamch.c +HPL_pdlange.o : ../HPL_pdlange.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlange.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pauxil/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocmax.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocmax.c new file mode 100644 index 000000000..644641412 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocmax.c @@ -0,0 +1,149 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dlocmax +( + HPL_T_panel * PANEL, + const int N, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocmax +( PANEL, N, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int N; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocmax finds the maximum entry in the current column and packs + * the useful information in WORK[0:3]. On exit, WORK[0] contains the + * local maximum absolute value scalar, WORK[1] is the corresponding + * local row index, WORK[2] is the corresponding global row index, and + * WORK[3] is the coordinate of the process owning this max. When N is + * less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set + * to the total number of process rows. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * N (local input) const int + * On entry, N specifies the local number of rows of the column + * of A on which we operate. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 4. On exit, + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A; + int kk, igindx, ilindx, myrow, nb, nprow; +/* .. + * .. Executable Statements .. + */ + if( N > 0 ) + { + A = Mptr( PANEL->A, II, JJ, PANEL->lda ); + myrow = PANEL->grid->myrow; + nprow = PANEL->grid->nprow; + nb = PANEL->nb; + kk = PANEL->ii + II + ( ilindx = HPL_idamax( N, A, 1 ) ); + Mindxl2g( igindx, kk, nb, nb, myrow, 0, nprow ); +/* + * WORK[0] := local maximum absolute value scalar, + * WORK[1] := corresponding local row index, + * WORK[2] := corresponding global row index, + * WORK[3] := coordinate of process owning this max. + */ + WORK[0] = A[ilindx]; WORK[1] = (double)(ilindx); + WORK[2] = (double)(igindx); WORK[3] = (double)(myrow); + } + else + { +/* + * If I do not have any row of A, then set the coordinate of the process + * (WORK[3]) owning this "ghost" row, such that it will never be used, + * even if there are only zeros in the current column of A. + */ + WORK[0] = WORK[1] = WORK[2] = HPL_rzero; + WORK[3] = (double)(PANEL->grid->nprow); + } +/* + * End of HPL_dlocmax + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpN.c new file mode 100644 index 000000000..a3919500a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpN.c @@ -0,0 +1,436 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LOCSWP_DEPTH +#define HPL_LOCSWP_DEPTH 32 +#define HPL_LOCSWP_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlocswpN +( + HPL_T_panel * PANEL, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocswpN +( PANEL, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocswpN performs the local swapping operations within a panel. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. The N0 length max + * row is stored in WORK[4:4+N0-1]; Note that this is also the + * JJth row (or column) of L1. The remaining part of this array + * is used as workspace. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax; + double * A1, * A2, * L, * Wr0, * Wmx; + int ilindx, lda, myrow, n0, nr, nu; + register int i; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda; + + Wr0 = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0]; + nu = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) + << HPL_LOCSWP_LOG2_DEPTH ); + nr = n0 - nu; +/* + * Replicated swap and copy of the current (new) row of A into L1 + */ + L = Mptr( PANEL->L1, JJ, 0, n0 ); +/* + * If the pivot is non-zero ... + */ + if( gmax != HPL_rzero ) + { +/* + * and if I own the current row of A ... + */ + if( myrow == PANEL->prow ) + { +/* + * and if I also own the row to be swapped with the current row of A ... + */ + if( myrow == (int)(WORK[3]) ) + { +/* + * and if the current row of A is not to swapped with itself ... + */ + if( ( ilindx = (int)(WORK[1]) ) != 0 ) + { +/* + * then copy the max row into L1 and locally swap the 2 rows of A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + A2 = Mptr( A1, ilindx, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) + { + *L=*A1=Wmx[ 0]; *A2=Wr0[ 0]; L+=n0; A1+=lda; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L=*A1=Wmx[ 1]; *A2=Wr0[ 1]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L=*A1=Wmx[ 2]; *A2=Wr0[ 2]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 3]; *A2=Wr0[ 3]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L=*A1=Wmx[ 4]; *A2=Wr0[ 4]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 5]; *A2=Wr0[ 5]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 6]; *A2=Wr0[ 6]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 7]; *A2=Wr0[ 7]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L=*A1=Wmx[ 8]; *A2=Wr0[ 8]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 9]; *A2=Wr0[ 9]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[10]; *A2=Wr0[10]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[11]; *A2=Wr0[11]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[12]; *A2=Wr0[12]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[13]; *A2=Wr0[13]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[14]; *A2=Wr0[14]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[15]; *A2=Wr0[15]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L=*A1=Wmx[16]; *A2=Wr0[16]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[17]; *A2=Wr0[17]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[18]; *A2=Wr0[18]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[19]; *A2=Wr0[19]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[20]; *A2=Wr0[20]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[21]; *A2=Wr0[21]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[22]; *A2=Wr0[22]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[23]; *A2=Wr0[23]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[24]; *A2=Wr0[24]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[25]; *A2=Wr0[25]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[26]; *A2=Wr0[26]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[27]; *A2=Wr0[27]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[28]; *A2=Wr0[28]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[29]; *A2=Wr0[29]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[30]; *A2=Wr0[30]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[31]; *A2=Wr0[31]; L+=n0; A1+=lda; A2+=lda; +#endif + } + for( i = 0; i < nr; i++, L += n0, A1 += lda, A2 += lda ) + { *L = *A1 = Wmx[i]; *A2 = Wr0[i]; } + } + else + { +/* + * otherwise the current row of A is swapped with itself, so just copy + * the current of A into L1. + */ + *Mptr( PANEL->A, II, JJ, lda ) = gmax; + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) + { + *L = Wmx[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wmx[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0; + *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0; + *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0; + *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0; + *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0; + *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0; + *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0; + *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0; + *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0; + *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0; + *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0; + *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0; +#endif + } + for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; } + } + } + else + { +/* + * otherwise, the row to be swapped with the current row of A is in Wmx, + * so copy Wmx into L1 and A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH ) + { + *L = *A1 = Wmx[ 0]; L += n0; A1 += lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = *A1 = Wmx[ 1]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = *A1 = Wmx[ 2]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 3]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = *A1 = Wmx[ 4]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 5]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 6]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 7]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = *A1 = Wmx[ 8]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 9]; L += n0; A1 += lda; + *L = *A1 = Wmx[10]; L += n0; A1 += lda; + *L = *A1 = Wmx[11]; L += n0; A1 += lda; + *L = *A1 = Wmx[12]; L += n0; A1 += lda; + *L = *A1 = Wmx[13]; L += n0; A1 += lda; + *L = *A1 = Wmx[14]; L += n0; A1 += lda; + *L = *A1 = Wmx[15]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = *A1 = Wmx[16]; L += n0; A1 += lda; + *L = *A1 = Wmx[17]; L += n0; A1 += lda; + *L = *A1 = Wmx[18]; L += n0; A1 += lda; + *L = *A1 = Wmx[19]; L += n0; A1 += lda; + *L = *A1 = Wmx[20]; L += n0; A1 += lda; + *L = *A1 = Wmx[21]; L += n0; A1 += lda; + *L = *A1 = Wmx[22]; L += n0; A1 += lda; + *L = *A1 = Wmx[23]; L += n0; A1 += lda; + *L = *A1 = Wmx[24]; L += n0; A1 += lda; + *L = *A1 = Wmx[25]; L += n0; A1 += lda; + *L = *A1 = Wmx[26]; L += n0; A1 += lda; + *L = *A1 = Wmx[27]; L += n0; A1 += lda; + *L = *A1 = Wmx[28]; L += n0; A1 += lda; + *L = *A1 = Wmx[29]; L += n0; A1 += lda; + *L = *A1 = Wmx[30]; L += n0; A1 += lda; + *L = *A1 = Wmx[31]; L += n0; A1 += lda; +#endif + } + + for( i = 0; i < nr; i++, L += n0, A1 += lda ) + { *L = *A1 = Wmx[i]; } + } + } + else + { +/* + * otherwise I do not own the current row of A, so copy the max row Wmx + * into L1. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH ) + { + *L = Wmx[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wmx[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0; + *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0; + *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0; + *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0; + *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0; + *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0; + *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0; + *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0; + *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0; + *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0; + *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0; + *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0; +#endif + } + for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; } +/* + * and if I own the max row, overwrite it with the current row Wr0. + */ + if( myrow == (int)(WORK[3]) ) + { + A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *A2 = Wr0[ 0]; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *A2 = Wr0[ 1]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda; + *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda; + *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda; + *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda; + *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda; + *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda; + *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda; + *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda; + *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda; + *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda; + *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda; + *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda; +#endif + } + + for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; } + } + } + } + else + { +/* + * Otherwise the max element in the current column is zero, simply copy + * the current row Wr0 into L1. The matrix is singular. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *L = Wr0[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wr0[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wr0[ 2]; L+=n0; *L = Wr0[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wr0[ 4]; L+=n0; *L = Wr0[ 5]; L+=n0; + *L = Wr0[ 6]; L+=n0; *L = Wr0[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wr0[ 8]; L+=n0; *L = Wr0[ 9]; L+=n0; + *L = Wr0[10]; L+=n0; *L = Wr0[11]; L+=n0; + *L = Wr0[12]; L+=n0; *L = Wr0[13]; L+=n0; + *L = Wr0[14]; L+=n0; *L = Wr0[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wr0[16]; L+=n0; *L = Wr0[17]; L+=n0; + *L = Wr0[18]; L+=n0; *L = Wr0[19]; L+=n0; + *L = Wr0[20]; L+=n0; *L = Wr0[21]; L+=n0; + *L = Wr0[22]; L+=n0; *L = Wr0[23]; L+=n0; + *L = Wr0[24]; L+=n0; *L = Wr0[25]; L+=n0; + *L = Wr0[26]; L+=n0; *L = Wr0[27]; L+=n0; + *L = Wr0[28]; L+=n0; *L = Wr0[29]; L+=n0; + *L = Wr0[30]; L+=n0; *L = Wr0[31]; L+=n0; +#endif + } + + for( i = 0; i < nr; i++, L += n0 ) { *L = Wr0[i]; } +/* + * set INFO. + */ + if( *(PANEL->DINFO) == 0.0 ) + *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1); + } +/* + * End of HPL_dlocswpN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpT.c new file mode 100644 index 000000000..89b86e35a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_dlocswpT.c @@ -0,0 +1,406 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LOCSWP_DEPTH +#define HPL_LOCSWP_DEPTH 32 +#define HPL_LOCSWP_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlocswpT +( + HPL_T_panel * PANEL, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocswpT +( PANEL, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocswpT performs the local swapping operations within a panel. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. The N0 length max + * row is stored in WORK[4:4+N0-1]; Note that this is also the + * JJth row (or column) of L1. The remaining part of this array + * is used as workspace. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax; + double * A1, * A2, * L, * Wr0, * Wmx; + int ilindx, lda, myrow, n0, nr, nu; + register int i; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda; + + Wr0 = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0]; + nu = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) + << HPL_LOCSWP_LOG2_DEPTH ); + nr = n0 - nu; +/* + * Replicated swap and copy of the current (new) row of A into L1 + */ + L = Mptr( PANEL->L1, 0, JJ, n0 ); +/* + * If the pivot is non-zero ... + */ + if( gmax != HPL_rzero ) + { +/* + * and if I own the current row of A ... + */ + if( myrow == PANEL->prow ) + { +/* + * and if I also own the row to be swapped with the current row of A ... + */ + if( myrow == (int)(WORK[3]) ) + { +/* + * and if the current row of A is not to swapped with itself ... + */ + if( ( ilindx = (int)(WORK[1]) ) != 0 ) + { +/* + * then copy the max row into L1 and locally swap the 2 rows of A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + A2 = Mptr( A1, ilindx, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH, + L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=*A1=Wmx[ 0]; *A2=Wr0[ 0]; A1+=lda; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=*A1=Wmx[ 1]; *A2=Wr0[ 1]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=*A1=Wmx[ 2]; *A2=Wr0[ 2]; A1+=lda; A2+=lda; + L[ 3]=*A1=Wmx[ 3]; *A2=Wr0[ 3]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=*A1=Wmx[ 4]; *A2=Wr0[ 4]; A1+=lda; A2+=lda; + L[ 5]=*A1=Wmx[ 5]; *A2=Wr0[ 5]; A1+=lda; A2+=lda; + L[ 6]=*A1=Wmx[ 6]; *A2=Wr0[ 6]; A1+=lda; A2+=lda; + L[ 7]=*A1=Wmx[ 7]; *A2=Wr0[ 7]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=*A1=Wmx[ 8]; *A2=Wr0[ 8]; A1+=lda; A2+=lda; + L[ 9]=*A1=Wmx[ 9]; *A2=Wr0[ 9]; A1+=lda; A2+=lda; + L[10]=*A1=Wmx[10]; *A2=Wr0[10]; A1+=lda; A2+=lda; + L[11]=*A1=Wmx[11]; *A2=Wr0[11]; A1+=lda; A2+=lda; + L[12]=*A1=Wmx[12]; *A2=Wr0[12]; A1+=lda; A2+=lda; + L[13]=*A1=Wmx[13]; *A2=Wr0[13]; A1+=lda; A2+=lda; + L[14]=*A1=Wmx[14]; *A2=Wr0[14]; A1+=lda; A2+=lda; + L[15]=*A1=Wmx[15]; *A2=Wr0[15]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=*A1=Wmx[16]; *A2=Wr0[16]; A1+=lda; A2+=lda; + L[17]=*A1=Wmx[17]; *A2=Wr0[17]; A1+=lda; A2+=lda; + L[18]=*A1=Wmx[18]; *A2=Wr0[18]; A1+=lda; A2+=lda; + L[19]=*A1=Wmx[19]; *A2=Wr0[19]; A1+=lda; A2+=lda; + L[20]=*A1=Wmx[20]; *A2=Wr0[20]; A1+=lda; A2+=lda; + L[21]=*A1=Wmx[21]; *A2=Wr0[21]; A1+=lda; A2+=lda; + L[22]=*A1=Wmx[22]; *A2=Wr0[22]; A1+=lda; A2+=lda; + L[23]=*A1=Wmx[23]; *A2=Wr0[23]; A1+=lda; A2+=lda; + L[24]=*A1=Wmx[24]; *A2=Wr0[24]; A1+=lda; A2+=lda; + L[25]=*A1=Wmx[25]; *A2=Wr0[25]; A1+=lda; A2+=lda; + L[26]=*A1=Wmx[26]; *A2=Wr0[26]; A1+=lda; A2+=lda; + L[27]=*A1=Wmx[27]; *A2=Wr0[27]; A1+=lda; A2+=lda; + L[28]=*A1=Wmx[28]; *A2=Wr0[28]; A1+=lda; A2+=lda; + L[29]=*A1=Wmx[29]; *A2=Wr0[29]; A1+=lda; A2+=lda; + L[30]=*A1=Wmx[30]; *A2=Wr0[30]; A1+=lda; A2+=lda; + L[31]=*A1=Wmx[31]; *A2=Wr0[31]; A1+=lda; A2+=lda; +#endif + } + + for( i = 0; i < nr; i++, A1 += lda, A2 += lda ) + { L[i] = *A1 = Wmx[i]; *A2 = Wr0[i]; } + } + else + { +/* + * otherwise the current row of A is swapped with itself, so just copy + * the current of A into L1. + */ + *Mptr( PANEL->A, II, JJ, lda ) = gmax; + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wmx[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wmx[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; + L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wmx[ 8]; L[12]=Wmx[12]; + L[ 9]=Wmx[ 9]; L[13]=Wmx[13]; + L[10]=Wmx[10]; L[14]=Wmx[14]; + L[11]=Wmx[11]; L[15]=Wmx[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wmx[16]; L[20]=Wmx[20]; + L[17]=Wmx[17]; L[21]=Wmx[21]; + L[18]=Wmx[18]; L[22]=Wmx[22]; + L[19]=Wmx[19]; L[23]=Wmx[23]; + L[24]=Wmx[24]; L[28]=Wmx[28]; + L[25]=Wmx[25]; L[29]=Wmx[29]; + L[26]=Wmx[26]; L[30]=Wmx[30]; + L[27]=Wmx[27]; L[31]=Wmx[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; } + } + } + else + { +/* + * otherwise, the row to be swapped with the current row of A is in Wmx, + * so copy Wmx into L1 and A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=*A1=Wmx[ 0]; A1+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=*A1=Wmx[ 1]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=*A1=Wmx[ 2]; A1+=lda; L[ 3]=*A1=Wmx[ 3]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=*A1=Wmx[ 4]; A1+=lda; L[ 5]=*A1=Wmx[ 5]; A1+=lda; + L[ 6]=*A1=Wmx[ 6]; A1+=lda; L[ 7]=*A1=Wmx[ 7]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=*A1=Wmx[ 8]; A1+=lda; L[ 9]=*A1=Wmx[ 9]; A1+=lda; + L[10]=*A1=Wmx[10]; A1+=lda; L[11]=*A1=Wmx[11]; A1+=lda; + L[12]=*A1=Wmx[12]; A1+=lda; L[13]=*A1=Wmx[13]; A1+=lda; + L[14]=*A1=Wmx[14]; A1+=lda; L[15]=*A1=Wmx[15]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=*A1=Wmx[16]; A1+=lda; L[17]=*A1=Wmx[17]; A1+=lda; + L[18]=*A1=Wmx[18]; A1+=lda; L[19]=*A1=Wmx[19]; A1+=lda; + L[20]=*A1=Wmx[20]; A1+=lda; L[21]=*A1=Wmx[21]; A1+=lda; + L[22]=*A1=Wmx[22]; A1+=lda; L[23]=*A1=Wmx[23]; A1+=lda; + L[24]=*A1=Wmx[24]; A1+=lda; L[25]=*A1=Wmx[25]; A1+=lda; + L[26]=*A1=Wmx[26]; A1+=lda; L[27]=*A1=Wmx[27]; A1+=lda; + L[28]=*A1=Wmx[28]; A1+=lda; L[29]=*A1=Wmx[29]; A1+=lda; + L[30]=*A1=Wmx[30]; A1+=lda; L[31]=*A1=Wmx[31]; A1+=lda; +#endif + } + + for( i = 0; i < nr; i++, A1 += lda ) { L[i]=*A1=Wmx[i]; } + } + } + else + { +/* + * otherwise I do not own the current row of A, so copy the max row Wmx + * into L1. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wmx[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wmx[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wmx[ 8]; L[ 9]=Wmx[ 9]; L[10]=Wmx[10]; L[11]=Wmx[11]; + L[12]=Wmx[12]; L[13]=Wmx[13]; L[14]=Wmx[14]; L[15]=Wmx[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wmx[16]; L[17]=Wmx[17]; L[18]=Wmx[18]; L[19]=Wmx[19]; + L[20]=Wmx[20]; L[21]=Wmx[21]; L[22]=Wmx[22]; L[23]=Wmx[23]; + L[24]=Wmx[24]; L[25]=Wmx[25]; L[26]=Wmx[26]; L[27]=Wmx[27]; + L[28]=Wmx[28]; L[29]=Wmx[29]; L[30]=Wmx[30]; L[31]=Wmx[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; } +/* + * and if I own the max row, overwrite it with the current row Wr0. + */ + if( myrow == (int)(WORK[3]) ) + { + A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *A2 = Wr0[ 0]; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *A2 = Wr0[ 1]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda; + *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda; + *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda; + *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda; + *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda; + *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda; + *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda; + *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda; + *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda; + *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda; + *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda; + *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda; +#endif + } + for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; } + } + } + } + else + { +/* + * Otherwise the max element in the current column is zero, simply copy + * the current row Wr0 into L1. The matrix is singular. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wr0[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wr0[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wr0[ 2]; L[ 3]=Wr0[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wr0[ 4]; L[ 5]=Wr0[ 5]; L[ 6]=Wr0[ 6]; L[ 7]=Wr0[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wr0[ 8]; L[12]=Wr0[12]; L[ 9]=Wr0[ 9]; L[13]=Wr0[13]; + L[10]=Wr0[10]; L[14]=Wr0[14]; L[11]=Wr0[11]; L[15]=Wr0[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wr0[16]; L[20]=Wr0[20]; L[17]=Wr0[17]; L[21]=Wr0[21]; + L[18]=Wr0[18]; L[22]=Wr0[22]; L[19]=Wr0[19]; L[23]=Wr0[23]; + L[24]=Wr0[24]; L[28]=Wr0[28]; L[25]=Wr0[25]; L[29]=Wr0[29]; + L[26]=Wr0[26]; L[30]=Wr0[30]; L[27]=Wr0[27]; L[31]=Wr0[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wr0[i]; } +/* + * Set INFO. + */ + if( *(PANEL->DINFO) == 0.0 ) + *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1); + } +/* + * End of HPL_dlocswpT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdfact.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdfact.c new file mode 100644 index 000000000..1d99c6e14 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdfact.c @@ -0,0 +1,141 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdfact +( + HPL_T_panel * PANEL +) +#else +void HPL_pdfact +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdfact recursively factorizes a 1-dimensional panel of columns. + * The RPFACT function pointer specifies the recursive algorithm to be + * used, either Crout, Left- or Right looking. NBMIN allows to vary the + * recursive stopping criterium in terms of the number of columns in the + * panel, and NDIV allows to specify the number of subpanels each panel + * should be divided into. Usuallly a value of 2 will be chosen. Finally + * PFACT is a function pointer specifying the non-recursive algorithm to + * to be used on at most NBMIN columns. One can also choose here between + * Crout, Left- or Right looking. Empirical tests seem to indicate that + * values of 4 or 8 for NBMIN give the best results. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + void * vptr = NULL; + int align, jb; +/* .. + * .. Executable Statements .. + */ + jb = PANEL->jb; PANEL->n -= jb; PANEL->ja += jb; + + if( ( PANEL->grid->mycol != PANEL->pcol ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_RPFACT ); +#endif + align = PANEL->algo->align; + vptr = (void *)malloc( ( (size_t)(align) + + (size_t)(((4+((unsigned int)(jb) << 1)) << 1) )) * + sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdfact", "Memory allocation failed" ); } +/* + * Factor the panel - Update the panel pointers + */ + PANEL->algo->rffun( PANEL, PANEL->mp, jb, 0, (double *)HPL_PTR( vptr, + ((size_t)(align) * sizeof(double) ) ) ); + if( vptr ) free( vptr ); + + PANEL->A = Mptr( PANEL->A, 0, jb, PANEL->lda ); + PANEL->nq -= jb; PANEL->jj += jb; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_RPFACT ); +#endif +/* + * End of HPL_pdfact + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdmxswp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdmxswp.c new file mode 100644 index 000000000..b14452197 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdmxswp.c @@ -0,0 +1,311 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdmxswp +( + HPL_T_panel * PANEL, + const int M, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_pdmxswp +( PANEL, M, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int M; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdmxswp swaps and broadcasts the absolute value max row using + * bi-directional exchange. The buffer is partially set by HPL_dlocmax. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by + * + * log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) + * + * where lat and bdwth are the latency and bandwidth of the network for + * double precision real elements. Communication only occurs in one + * process column. Mono-directional links will cause the communication + * cost to double. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of the matrix + * column on which this function operates. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * It is assumed that HPL_dlocmax was called prior to this + * routine to initialize the first four entries of this array. + * On exit, the N0 length max row is stored in WORK[4:4+N0-1]; + * Note that this is also the JJth row (or column) of L1. The + * remaining part is used as a temporary array. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax, tmp1; + double * A0, * Wmx, * Wwork; + HPL_T_grid * grid; + MPI_Comm comm; + unsigned int hdim, ip2, ip2_, ipow, k, mask; + int Np2, cnt_, cnt0, i, icurrow, lda, mydist, + mydis_, myrow, n0, nprow, partner, rcnt, + root, scnt, size_; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_MXSWP ); +#endif + grid = PANEL->grid; myrow = grid->myrow; nprow = grid->nprow; +/* + * ip2 : the smallest power of two less than or equal to nprow; + * hdim : dimension of the hypercube made of those ip2 processes; + * Np2 : logical flag indicating whether or not nprow is a power of 2; + */ + comm = grid->col_comm; ip2 = (unsigned int)(grid->row_ip2); + hdim = (unsigned int)(grid->row_hdim); n0 = PANEL->jb; + icurrow = PANEL->prow; Np2 = (int)( ( size_ = nprow - ip2 ) != 0 ); + mydist = MModSub( myrow, icurrow, nprow ); +/* + * Set up pointers in workspace: WORK and Wwork point to the beginning + * of the buffers of size 4 + 2*N0 to be combined. Wmx points to the row + * owning the local (before combine) and global (after combine) absolute + * value max. A0 points to the copy of the current row of the matrix. + */ + cnt0 = ( cnt_ = n0 + 4 ) + n0; A0 = ( Wmx = WORK + 4 ) + n0; + Wwork = WORK + cnt0; +/* + * Wmx[0:N0-1] := A[ilindx,0:N0-1] where ilindx is (int)(WORK[1]) (row + * with max in current column). If I am the current process row, pack in + * addition the current row of A in A0[0:N0-1]. If I do not own any row + * of A, then zero out Wmx[0:N0-1]. + */ + if( M > 0 ) + { + lda = PANEL->lda; + HPL_dcopy( n0, Mptr( PANEL->A, II+(int)(WORK[1]), 0, lda ), lda, + Wmx, 1 ); + if( myrow == icurrow ) + { HPL_dcopy( n0, Mptr( PANEL->A, II, 0, lda ), lda, A0, 1 ); } + } + else { for( i = 0; i < n0; i++ ) Wmx[i] = HPL_rzero; } +/* + * Combine the results (bi-directional exchange): the process coordina- + * tes are relative to icurrow, this allows to reduce the communication + * volume when nprow is not a power of 2. + * + * When nprow is not a power of 2: proc[i-ip2] receives local data from + * proc[i] for all i in [ip2..nprow). In addition, proc[0] (icurrow) + * sends to proc[ip2] the current row of A for later broadcast in procs + * [ip2..nprow). + */ + if( ( Np2 != 0 ) && + ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) ) + { + if( ( mydist & ip2 ) != 0 ) + { + if( mydist == (int)(ip2) ) + (void) HPL_sdrv( WORK, cnt_, MSGID_BEGIN_PFACT, A0, n0, + MSGID_BEGIN_PFACT, MModAdd( partner, + icurrow, nprow ), comm ); + else + (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else + { + if( mydist == 0 ) + (void) HPL_sdrv( A0, n0, MSGID_BEGIN_PFACT, Wwork, cnt_, + MSGID_BEGIN_PFACT, MModAdd( partner, + icurrow, nprow ), comm ); + else + (void) HPL_recv( Wwork, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + + tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] ); + if( ( tmp1 > gmax ) || + ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) ) + { HPL_dcopy( cnt_, Wwork, 1, WORK, 1 ); } + } + } + + if( mydist < (int)(ip2) ) + { +/* + * power of 2 part of the processes collection: processes [0..ip2) are + * combining (binary exchange); proc[0] has two rows to send, but one to + * receive. At every step k in [0..hdim) of the algorithm, a process + * pair exchanging 2 rows is such that myrow >> k+1 is 0. Among those + * processes the ones that are sending one more row than what they are + * receiving are such that myrow >> k is equal to 0. + */ + k = 0; ipow = 1; + + while( k < hdim ) + { + if( ( (unsigned int)(mydist) >> ( k + 1 ) ) == 0 ) + { + if( ( (unsigned int)(mydist) >> k ) == 0 ) + { scnt = cnt0; rcnt = cnt_; } + else + { scnt = cnt_; rcnt = cnt0; } + } + else { scnt = rcnt = cnt_; } + + partner = (int)( (unsigned int)(mydist) ^ ipow ); + (void) HPL_sdrv( WORK, scnt, MSGID_BEGIN_PFACT, Wwork, rcnt, + MSGID_BEGIN_PFACT, MModAdd( partner, icurrow, + nprow ), comm ); + + tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] ); + if( ( tmp1 > gmax ) || + ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) ) + { + HPL_dcopy( ( rcnt == cnt0 ? cnt0 : cnt_ ), Wwork, 1, + WORK, 1 ); + } + else if( rcnt == cnt0 ) + { HPL_dcopy( n0, Wwork+cnt_, 1, A0, 1 ); } + + ipow <<= 1; k++; + } + } + else if( size_ > 1 ) + { +/* + * proc[ip2] broadcast current row of A to procs [ip2+1..nprow). + */ + k = (unsigned int)(size_) - 1; ip2_ = mask = 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( A0, n0, MModAdd( root, partner, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else if( partner < size_ ) + { + (void) HPL_send( A0, n0, MModAdd( root, partner, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + } + ip2_ >>= 1; + } while( ip2_ > 0 ); + } +/* + * If nprow is not a power of 2, for all i in [ip2..nprow), proc[i-ip2] + * sends the pivot row to proc[i] along with the first four entries of + * the WORK array. + */ + if( ( Np2 != 0 ) && + ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) ) + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_recv( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else + { + (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + } +/* + * Save the global pivot index in pivot array + */ + (PANEL->DPIV)[JJ] = WORK[2]; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_MXSWP ); +#endif +/* + * End of HPL_pdmxswp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrN.c new file mode 100644 index 000000000..4ea170b73 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrN.c @@ -0,0 +1,270 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpancrN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpancrN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpancrN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Crout variant of the usual + * one-dimensional algorithm. The lower triangular N0-by-N0 upper block + * of the panel is stored in no-transpose form (i.e. just like the input + * matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); +/* + * Compute row (column) jj of L1 + */ + if( kk > 0 ) + { + L1ptr = Mptr( L1, jj, jj+1, n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk, Nm1 ); + Xv1 = vsip_msubview_d( Xv0, jj, ICOFF, 1, kk ); + Yv1 = vsip_msubview_d( Xv0, jj, jj+1, 1, Nm1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Av1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplTrans, kk, Nm1, -HPL_rone, + Mptr( L1, ICOFF, jj+1, n0 ), n0, Mptr( L1, jj, + ICOFF, n0 ), n0, HPL_rone, L1ptr, n0 ); +#endif + if( curr != 0 ) + HPL_dcopy( Nm1, L1ptr, n0, Mptr( A, ii, jj+1, lda ), lda ); + } +/* + * Scale current column by its absolute value max entry - Update dia- + * diagonal and subdiagonal elements in column A(iip1:iip1+Mm1-1, jj+1) + * and find local absolute value max in that column (Only one pass + * through cache for each current column). This sequence of operations + * could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk+1, 1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + vsip_mdestroy_d( Yv1 ); + vsip_mdestroy_d( Xv1 ); + vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, ICOFF, + jj+1, n0 ), 1, HPL_rone, Mptr( A, iip1, jj+1, lda ), + 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; kk++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); + +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpancrN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrT.c new file mode 100644 index 000000000..50ed300aa --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpancrT.c @@ -0,0 +1,267 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpancrT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpancrT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpancrT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Crout variant of the usual + * one-dimensional algorithm. The lower triangular N0-by-N0 upper block + * of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); +/* + * Compute row (column) jj of L1 + */ + if( kk > 0 ) + { + L1ptr = Mptr( L1, jj+1, jj, n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Xv0, jj+1, ICOFF, Nm1, kk ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj, kk, 1 ); + Yv1 = vsip_msubview_d( Xv0, jj+1, jj, Nm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Nm1, kk, -HPL_rone, + Mptr( L1, jj+1, ICOFF, n0 ), n0, Mptr( L1, ICOFF, + jj, n0 ), 1, HPL_rone, L1ptr, 1 ); +#endif + if( curr != 0 ) + HPL_dcopy( Nm1, L1ptr, 1, Mptr( A, ii, jj+1, lda ), lda ); + } +/* + * Scale current column by its absolute value max entry - Update dia- + * diagonal and subdiagonal elements in column A(iip1:iip1+Mm1-1, jj+1) + * and find local absolute value max in that column (Only one pass + * through cache for each current column). This sequence of operations + * could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 ); + Xv1 = vsip_msubview_d( Xv0, jj+1, ICOFF, 1, kk+1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, jj+1, ICOFF, + n0 ), n0, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; kk++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpancrT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllN.c new file mode 100644 index 000000000..fa471198d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllN.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanllN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanllN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanllN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Left-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in no-transpose form (i.e. just like the + * input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column and initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + + L1ptr = Mptr( L1, ICOFF, jj+1, n0 ); kk = jj + 1 - ICOFF; + HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, HplUnit, kk, + Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, 1 ); +/* + * Scale current column by its absolute value max entry - Update and + * find local absolute value max in next column (Only one pass through + * cache for each next column). This sequence of operations could bene- + * fit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk, 1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, 1, + HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) + { + HPL_dcopy( kk, L1ptr, 1, Mptr( A, ICOFF, jj+1, lda ), 1 ); + ii = iip1; iip1++; m = Mm1; Mm1--; + } + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanllN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllT.c new file mode 100644 index 000000000..a6e1b67bd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanllT.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanllT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanllT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanllT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Left-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column and initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + + L1ptr = Mptr( L1, jj+1, ICOFF, n0 ); kk = jj + 1 - ICOFF; + HPL_dtrsv( HplColumnMajor, HplUpper, HplTrans, HplUnit, kk, + Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, n0 ); +/* + * Scale current column by its absolute value max entry - Update and + * find local absolute value max in next column (Only one pass through + * cache for each next column). This sequence of operations could bene- + * fit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk ); + Xv1 = vsip_msubview_d( Xv0, jj+1, ICOFF, 1, kk ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, n0, + HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) + { + HPL_dcopy( kk, L1ptr, n0, Mptr( A, ICOFF, jj+1, lda ), 1 ); + ii = iip1; iip1++; m = Mm1; Mm1--; + } + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); + +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanllT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlN.c new file mode 100644 index 000000000..0a3b9a542 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlN.c @@ -0,0 +1,250 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanrlN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanrlN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanrlN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Right-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in no-transpose form (i.e. just like the + * input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Acur, * Anxt; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Xv1, * Yv0, * Yv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, lda, m=M; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 >= 1 ) + { + Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda ); +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); +/* + * Scale current column by its absolute value max entry - Update trai- + * ling sub-matrix and find local absolute value max in next column (On- + * ly one pass through cache for each current column). This sequence of + * operations could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 ); + HPL_daxpy( Mm1, -WORK[4+jj+1], Acur, 1, Anxt, 1 ); + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); +#ifdef HPL_CALL_VSIPL + if( Nm1 > 1 ) + { +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2, + Mm1, Nm1-1 ); + Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj, + Mm1, 1 ); + Yv1 = vsip_msubview_d( Yv0, jj, jj+2, 1, Nm1-1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); + } +#else + if( Nm1 > 1 ) + HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + WORK+4+jj+2, 1, Mptr( Anxt, 0, 1, lda ), lda ); +#endif +/* + * Same thing as above but with worse data access on y (A += x * y^T) + * + * if( Nm1 > 1 ) ) + * HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + * Mptr( L1, jj, jj+2, n0 ), n0, Mptr( Anxt, 0, 1, lda ), + * lda ); + */ + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Yv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanrlN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlT.c new file mode 100644 index 000000000..68c1afc02 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdpanrlT.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanrlT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanrlT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanrlT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Right-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Acur, * Anxt, * L1; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Xv1, * Yv0, * Yv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, lda, m=M, + n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 >= 1 ) + { + Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda ); +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); +/* + * Scale current column by its absolute value max entry - Update trai- + * ling sub-matrix and find local absolute value max in next column (On- + * ly one pass through cache for each current column). This sequence of + * operations could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 ); + HPL_daxpy( Mm1, -(*(Mptr( L1, jj+1, jj, n0 ))), Acur, 1, Anxt, 1 ); + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + + if( Nm1 > 1 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2, + Mm1, Nm1-1 ); + Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj, + Mm1, 1 ); + Yv1 = vsip_msubview_d( Yv0, jj+2, jj, Nm1-1, 1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + Mptr( L1, jj+2, jj, n0 ), 1, Mptr( Anxt, 0, 1, lda ), + lda ); +#endif + } + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Yv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanrlT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrN.c new file mode 100644 index 000000000..348d7ebe6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrN.c @@ -0,0 +1,282 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpancrN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpancrN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpancrN HPL_pdrpancrN recursively factorizes a panel of columns using the + * recursive Crout variant of the usual one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Local update - Factor current panel - Replicated update and solve + */ +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + } + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, jj, + -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, + 0, jj, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), + lda ); +#endif + HPL_pdrpancrN( PANEL, m, jb, ioff, WORK ); + + if( n > 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + Av2 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n ); + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff+jb, jj, n ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, jb, n, + jj, -HPL_rone, Mptr( L1ptr, jj, 0, n0 ), n0, + Mptr( L1ptr, 0, jj+jb, n0 ), n0, HPL_rone, + Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, + n0 ), n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); + } +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpancrN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrT.c new file mode 100644 index 000000000..a1ecfac2c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpancrT.c @@ -0,0 +1,282 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpancrT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpancrT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpancrT recursively factorizes a panel of columns using the + * recursive Crout variant of the usual one-dimensional algorithm. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Local update - Factor current panel - Replicated update and solve + */ +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, + VSIP_MAT_TRANS, HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, jj, + -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, + jj, 0, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), + lda ); +#endif + HPL_pdrpancrT( PANEL, m, jb, ioff, WORK ); + + if( n > 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Lv0, ioff+jb, ICOFF, n, jj ); + Av2 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb ); + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, + VSIP_MAT_NTRANS, HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, n, jb, + jj, -HPL_rone, Mptr( L1ptr, jj+jb, 0, n0 ), n0, + Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone, + Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); +#endif + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, + n0 ), n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); + } +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpancrT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllN.c new file mode 100644 index 000000000..4dbc13b44 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllN.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanllN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanllN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanllN recursively factorizes a panel of columns using the + * recursive Left-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Replicated solve - Local update - Factor current panel + */ + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, + jj, jb, HPL_rone, L1ptr, n0, Mptr( L1ptr, 0, jj, n0 ), + n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jj ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jj ); + } + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, + jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, + Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj, lda ), lda ); +#endif + HPL_pdrpanllN( PANEL, m, jb, ioff, WORK ); +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanllN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllT.c new file mode 100644 index 000000000..887caeb87 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanllT.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanllT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanllT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanllT recursively factorizes a panel of columns using the + * recursive Left-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Replicated solve - Local update - Factor current panel + */ + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, jb, jj, HPL_rone, L1ptr, n0, Mptr( L1ptr, + jj, 0, n0 ), n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jj ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jj ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Av2 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, + jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, + Mptr( L1ptr, jj, 0, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj, lda ), lda ); +#endif + HPL_pdrpanllT( PANEL, m, jb, ioff, WORK ); +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanllT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlN.c new file mode 100644 index 000000000..22f105cf4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlN.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanrlN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanrlN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanrlN recursively factorizes a panel of columns using the + * recursive Right-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Factor current panel - Replicated solve - Local update + */ + HPL_pdrpanrlN( PANEL, m, jb, ioff, WORK ); + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), + n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); + if( curr != 0 ) { ii += jb; m -= jb; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb, + m, n ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m, n ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, n, + jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda, + Mptr( L1ptr, jj, jj+jb, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj+jb, lda ), lda ); +#endif +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanrlN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlT.c new file mode 100644 index 000000000..a77301b9b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/HPL_pdrpanrlT.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanrlT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanrlT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanrlT recursively factorizes a panel of columns using the + * recursive Right-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Factor current panel - Replicated solve - Local update + */ + HPL_pdrpanrlT( PANEL, m, jb, ioff, WORK ); + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), + n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); + if( curr != 0 ) { ii += jb; m -= jb; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb, + m, N ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m, n ); + } + Lv1 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, n, + jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda, + Mptr( L1ptr, jj+jb, jj, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj+jb, lda ), lda ); +#endif +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanrlT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocmax.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocmax.o new file mode 100644 index 000000000..80c7da494 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocmax.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocswpN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocswpN.o new file mode 100644 index 000000000..6402eb6b7 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocswpN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocswpT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocswpT.o new file mode 100644 index 000000000..d1d72ab3d Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_dlocswpT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdfact.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdfact.o new file mode 100644 index 000000000..defc0a050 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdfact.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdmxswp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdmxswp.o new file mode 100644 index 000000000..ff0ce4cec Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdmxswp.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpancrN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpancrN.o new file mode 100644 index 000000000..2ed4cbf13 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpancrN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpancrT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpancrT.o new file mode 100644 index 000000000..f461a1bca Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpancrT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanllN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanllN.o new file mode 100644 index 000000000..1f5cd25a8 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanllN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanllT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanllT.o new file mode 100644 index 000000000..d2422b8ed Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanllT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanrlN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanrlN.o new file mode 100644 index 000000000..21641a08b Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanrlN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanrlT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanrlT.o new file mode 100644 index 000000000..4b5c0fbad Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdpanrlT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpancrN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpancrN.o new file mode 100644 index 000000000..e74bf6712 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpancrN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpancrT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpancrT.o new file mode 100644 index 000000000..c6fc53453 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpancrT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanllN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanllN.o new file mode 100644 index 000000000..9581736c6 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanllN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanllT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanllT.o new file mode 100644 index 000000000..83de419e2 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanllT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanrlN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanrlN.o new file mode 100644 index 000000000..1d1f5c17a Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanrlN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanrlT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanrlT.o new file mode 100644 index 000000000..cda3fd920 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/HPL_pdrpanrlT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/Make.inc new file mode 120000 index 000000000..8547ec814 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/Make.inc @@ -0,0 +1 @@ +/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/Makefile new file mode 100644 index 000000000..bf4634d31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/Makefile @@ -0,0 +1,118 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pfact.h +# +## Object files ######################################################## +# +HPL_pfaobj = \ + HPL_dlocmax.o HPL_dlocswpN.o HPL_dlocswpT.o \ + HPL_pdmxswp.o HPL_pdpancrN.o HPL_pdpancrT.o \ + HPL_pdpanllN.o HPL_pdpanllT.o HPL_pdpanrlN.o \ + HPL_pdpanrlT.o HPL_pdrpanllN.o HPL_pdrpanllT.o \ + HPL_pdrpancrN.o HPL_pdrpancrT.o HPL_pdrpanrlN.o \ + HPL_pdrpanrlT.o HPL_pdfact.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pfaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pfaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dlocmax.o : ../HPL_dlocmax.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocmax.c +HPL_dlocswpN.o : ../HPL_dlocswpN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpN.c +HPL_dlocswpT.o : ../HPL_dlocswpT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpT.c +HPL_pdmxswp.o : ../HPL_pdmxswp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmxswp.c +HPL_pdpancrN.o : ../HPL_pdpancrN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrN.c +HPL_pdpancrT.o : ../HPL_pdpancrT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrT.c +HPL_pdpanllN.o : ../HPL_pdpanllN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllN.c +HPL_pdpanllT.o : ../HPL_pdpanllT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllT.c +HPL_pdpanrlN.o : ../HPL_pdpanrlN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlN.c +HPL_pdpanrlT.o : ../HPL_pdpanrlT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlT.c +HPL_pdrpanllN.o : ../HPL_pdrpanllN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllN.c +HPL_pdrpanllT.o : ../HPL_pdrpanllT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllT.c +HPL_pdrpancrN.o : ../HPL_pdrpancrN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrN.c +HPL_pdrpancrT.o : ../HPL_pdrpancrT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrT.c +HPL_pdrpanrlN.o : ../HPL_pdrpanrlN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlN.c +HPL_pdrpanrlT.o : ../HPL_pdrpanrlT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlT.c +HPL_pdfact.o : ../HPL_pdfact.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdfact.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pfact/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_equil.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_equil.c new file mode 100644 index 000000000..b917a6525 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_equil.c @@ -0,0 +1,253 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_equil +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_TRANS TRANS, + const int N, + double * U, + const int LDU, + int * IPLEN, + const int * IPMAP, + const int * IPMAPM1, + int * IWORK +) +#else +void HPL_equil +( PBCST, IFLAG, PANEL, TRANS, N, U, LDU, IPLEN, IPMAP, IPMAPM1, IWORK ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_TRANS TRANS; + const int N; + double * U; + const int LDU; + int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_equil equilibrates the local pieces of U, so that on exit to + * this function, pieces of U contained in every process row are of the + * same size. This phase makes the rolling phase optimal. In addition, + * this function probes for the column panel L and forwards it when + * possible. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be equilibrated) information. + * + * TRANS (global input) const enum HPL_TRANS + * On entry, TRANS specifies whether U is stored in transposed + * or non-transposed form. + * + * N (local input) const int + * On entry, N specifies the number of rows or columns of U. N + * must be at least 0. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[nprow]) when U is stored in + * non-transposed form, and MAX(1,N) otherwise. + * + * IPLEN (global input) int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension NPROW+1. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, ip, ipU, ipcur, iprow, iptgt, lastrow, + left, npm1, nprow, ll, llU, llcur, lltgt, + right, slen, smax, smin; +/* .. + * .. Executable Statements .. + */ + if( ( npm1 = ( nprow = PANEL->grid->nprow ) - 1 ) <= 1 ) return; +/* + * If the current distribution of the pieces of U is already optimal for + * the rolling phase, then return imediately. The optimal distribution + * is such that ip processes have smax items and the remaining processes + * only have smin items. Another way to check this is to verify that all + * differences IPLEN[i+1] - IPLEN[i] are either smin or smax. + */ + smax = ( ( slen = IPLEN[nprow] ) + npm1 ) / nprow; + ip = slen - nprow * ( smin = slen / nprow ); + + iprow = 0; + do + { + ll = IPLEN[iprow+1] - IPLEN[iprow]; iprow++; + } while( ( iprow < nprow ) && ( ( ll == smin ) || ( ll == smax ) ) ); + + if( iprow == nprow ) return; +/* + * Now, we are sure the distribution of the pieces of U is not optimal + * with respect to the rolling phase, thus perform equilibration. Go + * through the list of processes: Processes that have rows that do not + * belong to them with respect to the optimal mapping spread them in a + * logarithmic fashion. To simplify a little bit the implementation, and + * mainly the packing, a source process row spreads its data to its left + * first, and then to its right. + */ + IWORK[nprow] = slen; + + for( iprow = 0; iprow < nprow; iprow++ ) + { + llU = IPLEN[iprow+1] - ( ipU = IPLEN[iprow] ); + if( iprow < ip ) { lltgt = smax; iptgt = iprow * smax; } + else { lltgt = smin; iptgt = iprow * smin + ip; } + + left = ( ipU < iptgt ); right = ( iptgt + lltgt < ipU + llU ); +/* + * If I have something to spread to either the left or the right + */ + if( ( llU > 0 ) && ( left || right ) ) + { /* Figure out how much every other process should have */ + + ipcur = ipU; llcur = llU; + + for( i = 0; i < nprow; i++ ) + { + if( i < ip ) { lltgt = smax; iptgt = i * smax; } + else { lltgt = smin; iptgt = i * smin + ip; } + lastrow = iptgt + lltgt - 1; + + if( ( lastrow >= ipcur ) && ( llcur > 0 ) ) + { ll = lastrow - ipcur + 1; ll = Mmin( ll, llcur ); llcur -= ll; } + else { ll = 0; } + + IWORK[i] = ipcur; ipcur += ll; IWORK[i+1] = ipcur; + } +/* + * Equilibration phase + */ + if( TRANS == HplNoTrans ) + { + if( left ) + { + HPL_spreadN( PBCST, IFLAG, PANEL, HplLeft, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + + if( right ) + { + HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + } + else + { + if( left ) + { + HPL_spreadT( PBCST, IFLAG, PANEL, HplLeft, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + + if( right ) + { + HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + } + } + } +/* + * Finally update IPLEN with the indexes corresponding to the new dis- + * tribution of U - IPLEN[nprow] remained unchanged. + */ + for( i = 0; i < nprow; i++ ) IPLEN[i] = ( i < ip ? i*smax : i*smin + ip ); +/* + * End of HPL_equil + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_logsort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_logsort.c new file mode 100644 index 000000000..0715159bd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_logsort.c @@ -0,0 +1,185 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_logsort +( + const int NPROCS, + const int ICURROC, + int * IPLEN, + int * IPMAP, + int * IPMAPM1 +) +#else +void HPL_logsort +( NPROCS, ICURROC, IPLEN, IPMAP, IPMAPM1 ) + const int NPROCS; + const int ICURROC; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_logsort computes an array IPMAP and its inverse IPMAPM1 that + * contain the logarithmic sorted processes id with repect to the local + * number of rows of U that they own. This is necessary to ensure that + * the logarithmic spreading of U is optimal in terms of number of steps + * and communication volume as well. In other words, the larget pieces + * of U will be sent a minimal number of times. + * + * Arguments + * ========= + * + * NPROCS (global input) const int + * On entry, NPROCS specifies the number of process rows in the + * process grid. NPROCS is at least one. + * + * ICURROC (global input) const int + * On entry, ICURROC is the source process row. + * + * IPLEN (global input/output) int * + * On entry, IPLEN is an array of dimension NPROCS+1, such that + * IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U, + * that process i-1 has. On exit, IPLEN[i] is the number of + * rows of U in the processes before process IPMAP[i] after the + * sort, with the convention that IPLEN[NPROCS] is the total + * number of rows of the panel. In other words, IPLEN[i+1] - + * IPLEN[i] is the number of rows of A that should be moved to + * the process IPMAP[i]. IPLEN is such that the number of rows + * of the source process row is IPLEN[1] - IPLEN[0], and the + * remaining entries of this array are sorted so that the + * quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROCS. On exit, + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myroc] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROCS. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROCS) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dist, i, ip, iplen_i, iplen_j, itmp, j, k; +/* .. + * .. Executable Statements .. + */ +/* + * Compute the logarithmic distance between process j and process 0, as + * well as the maximum logarithmic distance. IPMAPM1 is workarray here. + */ + for( j = 0, dist = 0; j < NPROCS; j++ ) + { + IPMAP[j] = MModAdd( j, ICURROC, NPROCS ); ip = j; itmp = 0; + do { if( ip & 1 ) itmp++; ip >>= 1; } while ( ip ); + IPMAPM1[j] = itmp; if( itmp > dist ) dist = itmp; + } +/* + * Shift IPLEN[1..NPROCS] of ICURROC places, so that IPLEN[1] is now + * what used to be IPLEN[ICURROC+1]. Initialize IPMAP, so that IPMAP[0] + * is ICURROC. + */ + for( j = 0; j < ICURROC; j++ ) + { + for( i = 2, itmp = IPLEN[1]; i <= NPROCS; i++ ) IPLEN[i-1] = IPLEN[i]; + IPLEN[NPROCS] = itmp; + } +/* + * logarithmic sort + */ + for( k = 1; k <= dist; k++ ) + { + for( j = 1; j < NPROCS; j++ ) + { + if( IPMAPM1[j] == k ) + { + for( i = 2; i < NPROCS; i++ ) + { + if( k < IPMAPM1[i] ) + { + iplen_i = IPLEN[i+1]; iplen_j = IPLEN[j+1]; + + if( iplen_j < iplen_i ) + { + IPLEN[j+1] = iplen_i; IPLEN[i+1] = iplen_j; + itmp = IPMAP[j]; IPMAP[j] = IPMAP[i]; + IPMAP[i] = itmp; + } + } + } + } + } + } +/* + * Compute IPLEN and IPMAPM1 (the inverse of IPMAP) + */ + IPLEN[0] = 0; + + for( i = 0; i < NPROCS; i++ ) + { + IPMAPM1[ IPMAP[i] ] = i; + IPLEN[i+1] += IPLEN[i]; + } +/* + * End of HPL_logsort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv.c new file mode 100644 index 000000000..ced74269e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv.c @@ -0,0 +1,116 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesv +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesv +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesv factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with or without look-ahead. The lower triangular factor is left + * unpivoted and the pivots are not returned. The right hand side is the + * N+1 column of the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( A->n <= 0 ) return; + + A->info = 0; + + if( ( ALGO->depth == 0 ) || ( GRID->npcol == 1 ) ) + { + HPL_pdgesv0( GRID, ALGO, A ); + } + else + { + HPL_pdgesvK2( GRID, ALGO, A ); + } +/* + * Solve upper triangular system + */ + if( A->info == 0 ) HPL_pdtrsv( GRID, A ); +/* + * End of HPL_pdgesv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv0.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv0.c new file mode 100644 index 000000000..d79b6fa55 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesv0.c @@ -0,0 +1,167 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesv0 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesv0 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesv0 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * without look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, j, jb, n, nb, tag=MSGID_BEGIN_FACT, + test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( N = A->n ) <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + + HPL_pdupdate = ALGO->upfun; nb = A->nb; +/* + * Allocate a panel list of length 1 - Allocate panel[0] resources + */ + panel = (HPL_T_panel **)malloc( sizeof( HPL_T_panel * ) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesv0", "Memory allocation failed" ); } + + HPL_pdpanel_new( GRID, ALGO, N, N+1, Mmin( N, nb ), A, 0, 0, tag, + &panel[0] ); +/* + * Loop over the columns of A + */ + for( j = 0; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && GRID->mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Release panel resources - re-initialize panel data structure + */ + (void) HPL_pdpanel_free( panel[0] ); + HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[0] ); +/* + * Factor and broadcast current panel - update + */ + HPL_pdfact( panel[0] ); + (void) HPL_binit( panel[0] ); + do + { (void) HPL_bcast( panel[0], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[0] ); + HPL_pdupdate( NULL, NULL, panel[0], -1 ); +/* + * Update message id for next factorization + */ + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Release panel resources and panel list + */ + (void) HPL_pdpanel_disp( &panel[0] ); + + if( panel ) free( panel ); +/* + * End of HPL_pdgesv0 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK1.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK1.c new file mode 100644 index 000000000..ff1958cfc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK1.c @@ -0,0 +1,222 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesvK1 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesvK1 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesvK1 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, depth, icurcol=0, j, jb, jj=0, jstart, + k, mycol, n, nb, nn, npcol, nq, + tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + mycol = GRID->mycol; npcol = GRID->npcol; + depth = ALGO->depth; HPL_pdupdate = ALGO->upfun; + N = A->n; nb = A->nb; + + if( N <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + +/* + * Allocate a panel list of length depth + 1 (depth >= 1) + */ + panel = (HPL_T_panel **)malloc( (size_t)(depth+1)*sizeof( HPL_T_panel *) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesvK1", "Memory allocation failed" ); } +/* + * Create and initialize the first depth panels + */ + nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0; + + for( k = 0; k < depth; k++ ) + { + jb = Mmin( nn, nb ); + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart, + tag, &panel[k] ); + nn -= jb; jstart += jb; + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Initialize the lookahead - Factor jstart columns: panel[0..depth-1] + */ + for( k = 0, j = 0; k < depth; k++ ) + { + jb = jstart - j; jb = Mmin( jb, nb ); j += jb; +/* + * Factor and broadcast k-th panel - use long topology for those + */ + HPL_pdfact( panel[k] ); + (void) HPL_binit( panel[k] ); + do + { (void) HPL_bcast( panel[k], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[k] ); +/* + * Partial update of the depth-1-k panels in front of me + */ + if( k < depth - 1 ) + { + nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol ); + HPL_pdupdate( NULL, NULL, panel[k], nn ); + } + } +/* + * Main loop over the remaining columns of A + */ + for( j = jstart; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Allocate current panel resources - Finish latest update - Factor and + * broadcast current panel + */ + HPL_pdpanel_new( GRID, ALGO, n, n+1, jb, A, j, j, tag, &panel[depth] ); + + if( mycol == icurcol ) + { + nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) /* partial updates 0..depth-1 */ + HPL_pdupdate( NULL, NULL, panel[k], nn ); + HPL_pdfact( panel[depth] ); /* factor current panel */ + } + else { nn = 0; } + /* Finish the latest update and broadcast the current panel */ + (void) HPL_binit( panel[depth] ); + HPL_pdupdate( panel[depth], &test, panel[0], nq-nn ); + (void) HPL_bwait( panel[depth] ); +/* + * Release latest panel resources - circular of the panel pointers + * Go to the next process row and column - update the message ids for + * broadcast + */ + (void) HPL_pdpanel_disp( &panel[0] ); + for( k = 0; k < depth; k++ ) panel[k] = panel[k+1]; + + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Clean-up: Finish updates - release panels and panel list + */ + nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) + { + HPL_pdupdate( NULL, NULL, panel[k], nn ); + (void) HPL_pdpanel_disp( &panel[k] ); + } + + if( panel ) free( panel ); +/* + * End of HPL_pdgesvK1 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK2.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK2.c new file mode 100644 index 000000000..dec506ab9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdgesvK2.c @@ -0,0 +1,231 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesvK2 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesvK2 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesvK2 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * p, * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, depth, icurcol=0, j, jb, jj=0, jstart, + k, mycol, n, nb, nn, npcol, nq, + tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + mycol = GRID->mycol; npcol = GRID->npcol; + depth = ALGO->depth; HPL_pdupdate = ALGO->upfun; + N = A->n; nb = A->nb; + + if( N <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + +/* + * Allocate a panel list of length depth + 1 (depth >= 1) + */ + panel = (HPL_T_panel **)malloc( (size_t)(depth+1) * sizeof( HPL_T_panel *) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesvK2", "Memory allocation failed" ); } +/* + * Create and initialize the first depth panels + */ + nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0; + + for( k = 0; k < depth; k++ ) + { + jb = Mmin( nn, nb ); + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart, + tag, &panel[k] ); + nn -= jb; jstart += jb; + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Create last depth+1 panel + */ + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, Mmin( nn, nb ), A, jstart, + jstart, tag, &panel[depth] ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); +/* + * Initialize the lookahead - Factor jstart columns: panel[0..depth-1] + */ + for( k = 0, j = 0; k < depth; k++ ) + { + jb = jstart - j; jb = Mmin( jb, nb ); j += jb; +/* + * Factor and broadcast k-th panel + */ + HPL_pdfact( panel[k] ); + (void) HPL_binit( panel[k] ); + do + { (void) HPL_bcast( panel[k], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[k] ); +/* + * Partial update of the depth-k-1 panels in front of me + */ + if( k < depth - 1 ) + { + nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol ); + HPL_pdupdate( NULL, NULL, panel[k], nn ); + } + } +/* + * Main loop over the remaining columns of A + */ + for( j = jstart; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Initialize current panel - Finish latest update, Factor and broadcast + * current panel + */ + (void) HPL_pdpanel_free( panel[depth] ); + HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[depth] ); + + if( mycol == icurcol ) + { + nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) /* partial updates 0..depth-1 */ + (void) HPL_pdupdate( NULL, NULL, panel[k], nn ); + HPL_pdfact( panel[depth] ); /* factor current panel */ + } + else { nn = 0; } + /* Finish the latest update and broadcast the current panel */ + (void) HPL_binit( panel[depth] ); + HPL_pdupdate( panel[depth], &test, panel[0], nq-nn ); + (void) HPL_bwait( panel[depth] ); +/* + * Circular of the panel pointers: + * xtmp = x[0]; for( k=0; k < depth; k++ ) x[k] = x[k+1]; x[d] = xtmp; + * + * Go to next process row and column - update the message ids for broadcast + */ + p = panel[0]; for( k = 0; k < depth; k++ ) panel[k] = panel[k+1]; + panel[depth] = p; + + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Clean-up: Finish updates - release panels and panel list + */ + nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) + { + (void) HPL_pdupdate( NULL, NULL, panel[k], nn ); + (void) HPL_pdpanel_disp( &panel[k] ); + } + (void) HPL_pdpanel_disp( &panel[depth] ); + + if( panel ) free( panel ); +/* + * End of HPL_pdgesvK2 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c new file mode 100644 index 000000000..b4433e1be --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c @@ -0,0 +1,432 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp00N +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp00N +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp00N applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * Bi-directional exchange is used to perform the swap :: broadcast of + * the row panel U at once, resulting in a lower number of messages than + * usual as well as a lower communication volume. With P process rows and + * assuming bi-directional links, the running time of this function can + * be approximated by: + * + * log_2(P) * (lat + NB*LocQ(N) / bdwth) + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. Mono + * directional links will double this communication cost. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be broadcast and swapped) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + HPL_T_grid * grid; + double * A, * U, * W; + void * vptr = NULL; + int * ipID, * lindxA, * lindxAU, * llen, + * llen_sv; + unsigned int ip2, ip2_=1, ipdist, ipow=1, mask=1, + mydist, mydis_; + int Cmsgid=MSGID_BEGIN_PFACT, Np2, align, + hdim, i, icurrow, *iflag, ipA, ipW, *ipl, + iprow, jb, k, lda, ldW, myrow, n, nprow, + partner, root, size_, usize; +#define LDU jb +/* .. + * .. Executable Statements .. + */ + n = Mmin( NN, PANEL->n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Retrieve parameters from the PANEL data structure + */ + grid = PANEL->grid; nprow = grid->nprow; myrow = grid->myrow; + comm = grid->col_comm; ip2 = (unsigned int)grid->row_ip2; + hdim = grid->row_hdim; align = PANEL->algo->align; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; usize = jb * n; + ldW = n + 1; +/* + * Allocate space for temporary W (ldW * jb) + */ + vptr = (void*)malloc( + ((size_t)(align) + ((size_t)(jb) * (size_t)(ldW))) * sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlaswp00N", "Memory allocation failed" ); } + + W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) ); +/* + * Construct ipID and its local counter parts lindxA, lindxAU - llen is + * the number of rows/columns that I have in workspace and that I should + * send. Compute lindx_, ipA, llen if it has not already been done for + * this panel; + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + lindxA = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k; + llen = lindxAU + k; llen_sv = llen + nprow; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } + else if( *iflag == 1 ) /* HPL_pdlaswp01N called before: reuse ipID */ + { + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } +/* + * Copy the llen_sv into llen - Reset ipA to its correct value + */ + ipA = llen_sv[myrow]; + for( i = 0; i < nprow; i++ ) { llen[i] = llen_sv[i]; } +/* + * For i in [0..2*jb), lindxA[i] is the offset in A of a row that ulti- + * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ). In icurrow, + * we directly pack into U, otherwise we pack into workspace. The first + * entry of each column packed in workspace is in fact the row or column + * offset in U where it should go to. + */ + if( myrow == icurrow ) + { + HPL_dlaswp01N( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } + else + { + HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * Algorithm for bi-directional data exchange: + * + * As long as I have not talked to a process that already had the data + * from icurrow, I will be sending the workspace, otherwise I will be + * sending U. Note that the columns in workspace contain the local index + * in U they should go to. + * + * If I am receiving from a process that has the data from icurrow, I + * will be receiving in U, copy the data of U that stays into A, and + * then the columns I have in workspace into U; otherwise I will be re- + * ceiving in the remaining workspace. If I am one of those processes + * that already has the data from icurrow, I will be immediately copying + * the data I have in my workspace into U. + * + * When I receive U, some of U should be copied in my piece of A before + * I can copy the rows I have in my workspace into U. This information + * is kept in the lists lindx_: the row lindxAU[i] should be copied in + * the row lindxA[i] of my piece of A, just as in the reversed initial + * packing operation. Those rows are thus the first ones in the work ar- + * ray. After this operation has been performed, I will not need + * those lindx arrays, and I will always be sending a buffer of size + * jb x n, or n x jb, that is, U. + * + * At every step of the algorithm, it is necesary to update the list + * llen, so that I can figure out how large the next messages I will be + * sending/receiving are. It is obvious when I am sending U. It is not + * otherwise. + * + * We choose icurrow to be the source of the bi-directional exchange. + * This allows the processes in the non-power 2 part to receive U at the + * first exchange, and then broadcast internally this U so that those + * processes can grab their piece of A. + */ + if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; } + ipW = ipA; + Np2 = ( ( size_ = nprow - ip2 ) != 0 ); + mydist = (unsigned int)MModSub( myrow, icurrow, nprow ); +/* + * bi-directional exchange: If nprow is not a power of 2, proc[i-ip2] + * receives local data from proc[i] for all i in [ip2..nprow); icurrow + * is the source, these last process indexes are relative to icurrow. + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + + if( mydist == 0 ) /* I am the current row: I send U and recv W */ + { + (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW, + Cmsgid, partner, comm ); + if( llen[partner] > 0 ) + HPL_dlaswp03N( llen[partner], n, U, LDU, W, W+1, ldW ); + } + else if( mydist == ip2 ) + { /* I recv U for later Bcast, I send my W */ + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + } + else /* None of us is icurrow, we exchange our Ws */ + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm ); + } + else + { + (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, + partner, Cmsgid, comm ); + if( llen[partner] > 0 ) ipW += llen[partner]; + } + } + } +/* + * Update llen + */ + for( i = 1; i < size_; i++ ) + { + iprow = MModAdd( icurrow, i, nprow ); + partner = MModAdd( iprow, (int)(ip2), nprow ); + llen[ iprow ] += llen[ partner ]; + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * power of 2 part of the processes collection: only processes [0..ip2) + * are working; some of them (mydist >> (k+1) == 0) either send or re- + * ceive U. At every step k, k is in [0 .. hdim), of the algorithm, a + * process pair that exchanges U is such that (mydist >> (k+1) == 0). + * Among those processes, the ones that are sending U are such that + * mydist >> k == 0. + */ + if( mydist < ip2 ) + { + k = 0; + + while( k < hdim ) + { + partner = (int)(mydist ^ ipow); + partner = MModAdd( icurrow, partner, nprow ); +/* + * Exchange and combine the local results - If I receive U, then I must + * copy from U the rows that belong to my piece of A, and then update U + * by copying in it the rows I have accumulated in W. Otherwise, I re- + * ceive W. In this later case, and I have U, I shall update my copy of + * U by copying in it the rows I have accumulated in W. If I did not + * have U before, I simply need to update my pointer in W for later use. + */ + if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 ) + { + if( ( mydist >> (unsigned int)(k) ) == 0 ) + { + (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW, + ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + HPL_dlaswp03N( llen[partner], n, U, LDU, Mptr( W, 0, ipW, + ldW ), Mptr( W, 1, ipW, ldW ), ldW ); + ipW += llen[partner]; + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + HPL_dlaswp04N( ipA, llen[myrow], n, U, LDU, A, lda, W, + W+1, ldW, lindxA, lindxAU ); + } + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0, + ipW, ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + ipW += llen[partner]; + } +/* + * Update llen - Go to next process pairs + */ + iprow = icurrow; ipdist = 0; + do + { + if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist ) + { + partner = MModAdd( icurrow, partner, nprow ); + llen[iprow] += llen[partner]; + llen[partner] = llen[iprow]; + } + iprow = MModAdd( iprow, 1, nprow ); ipdist++; + + } while( ipdist < ip2 ); + + ipow <<= 1; k++; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + } + else + { +/* + * non power of 2 part of the process collection: proc[ip2] broadcast U + * to procs[ip2..nprow) (relatively to icurrow). + */ + if( size_ > 1 ) + { + k = size_ - 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = (unsigned int)MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + + } + else if( partner < size_ ) + { + (void) HPL_send( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + } + } + ip2_ >>= 1; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2_ > 0 ); + } +/* + * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece + * of A. + */ + HPL_dlaswp05N( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } +/* + * If nprow is not a power of 2, proc[i-ip2] sends global result to + * proc[i] for all i in [ip2..nprow); + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + if( ( mydist & ip2 ) != 0 ) + { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); } + else + { (void) HPL_send( U, usize, partner, Cmsgid, comm ); } + } + + if( vptr ) free( vptr ); +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp00N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c new file mode 100644 index 000000000..7a9764c09 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c @@ -0,0 +1,433 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp00T +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp00T +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp00T applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * Bi-directional exchange is used to perform the swap :: broadcast of + * the row panel U at once, resulting in a lower number of messages than + * usual as well as a lower communication volume. With P process rows and + * assuming bi-directional links, the running time of this function can + * be approximated by: + * + * log_2(P) * (lat + NB*LocQ(N) / bdwth) + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. Mono + * directional links will double this communication cost. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be broadcast and swapped) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + HPL_T_grid * grid; + double * A, * U, * W; + void * vptr = NULL; + int * ipID, * lindxA, * lindxAU, * llen, + * llen_sv; + unsigned int ip2, ip2_=1, ipdist, ipow=1, mask=1, + mydist, mydis_; + int Cmsgid=MSGID_BEGIN_PFACT, Np2, align, + hdim, i, icurrow, *iflag, ipA, ipW, *ipl, + iprow, jb, k, lda, ldW, myrow, n, nprow, + partner, root, size_, usize; +#define LDU n +/* .. + * .. Executable Statements .. + */ + n = Mmin( NN, PANEL->n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Retrieve parameters from the PANEL data structure + */ + grid = PANEL->grid; nprow = grid->nprow; myrow = grid->myrow; + comm = grid->col_comm; ip2 = (unsigned int)grid->row_ip2; + hdim = grid->row_hdim; align = PANEL->algo->align; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; usize = jb * n; + ldW = n + 1; +/* + * Allocate space for temporary W (ldW * jb) + */ + vptr = (void*)malloc( ( (size_t)(align) + + ((size_t)(jb) * (size_t)(ldW))) * + sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlaswp00T", "Memory allocation failed" ); } + + W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) ); +/* + * Construct ipID and its local counter parts lindxA, lindxAU - llen is + * the number of rows/columns that I have in workspace and that I should + * send. Compute lindx_, ipA, llen if it has not already been done for + * this panel; + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + lindxA = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k; + llen = lindxAU + k; llen_sv = llen + nprow; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } + else if( *iflag == 1 ) /* HPL_pdlaswp01T called before: reuse ipID */ + { + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } +/* + * Copy the llen_sv into llen - Reset ipA to its correct value + */ + ipA = llen_sv[myrow]; + for( i = 0; i < nprow; i++ ) { llen[i] = llen_sv[i]; } +/* + * For i in [0..2*jb), lindxA[i] is the offset in A of a row that ulti- + * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ). In icurrow, + * we directly pack into U, otherwise we pack into workspace. The first + * entry of each column packed in workspace is in fact the row or column + * offset in U where it should go to. + */ + if( myrow == icurrow ) + { + HPL_dlaswp01T( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } + else + { + HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * Algorithm for bi-directional data exchange: + * + * As long as I have not talked to a process that already had the data + * from icurrow, I will be sending the workspace, otherwise I will be + * sending U. Note that the columns in workspace contain the local index + * in U they should go to. + * + * If I am receiving from a process that has the data from icurrow, I + * will be receiving in U, copy the data of U that stays into A, and + * then the columns I have in workspace into U; otherwise I will be re- + * ceiving in the remaining workspace. If I am one of those processes + * that already has the data from icurrow, I will be immediately copying + * the data I have in my workspace into U. + * + * When I receive U, some of U should be copied in my piece of A before + * I can copy the rows I have in my workspace into U. This information + * is kept in the lists lindx_: the row lindxAU[i] should be copied in + * the row lindxA[i] of my piece of A, just as in the reversed initial + * packing operation. Those rows are thus the first ones in the work ar- + * ray. After this operation has been performed, I will not need + * those lindx arrays, and I will always be sending a buffer of size + * jb x n, or n x jb, that is, U. + * + * At every step of the algorithm, it is necesary to update the list + * llen, so that I can figure out how large the next messages I will be + * sending/receiving are. It is obvious when I am sending U. It is not + * otherwise. + * + * We choose icurrow to be the source of the bi-directional exchange. + * This allows the processes in the non-power 2 part to receive U at the + * first exchange, and then broadcast internally this U so that those + * processes can grab their piece of A. + */ + if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; } + ipW = ipA; + Np2 = ( ( size_ = nprow - ip2 ) != 0 ); + mydist = (unsigned int)MModSub( myrow, icurrow, nprow ); +/* + * bi-directional exchange: If nprow is not a power of 2, proc[i-ip2] + * receives local data from proc[i] for all i in [ip2..nprow); icurrow + * is the source, these last process indexes are relative to icurrow. + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + + if( mydist == 0 ) /* I am the current row: I send U and recv W */ + { + (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW, + Cmsgid, partner, comm ); + if( llen[partner] > 0 ) + HPL_dlaswp03T( llen[partner], n, U, LDU, W, W+1, ldW ); + } + else if( mydist == ip2 ) + { /* I recv U for later Bcast, I send my W */ + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + } + else /* None of us is icurrow, we exchange our Ws */ + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm ); + } + else + { + (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, + partner, Cmsgid, comm ); + if( llen[partner] > 0 ) ipW += llen[partner]; + } + } + } +/* + * Update llen + */ + for( i = 1; i < size_; i++ ) + { + iprow = MModAdd( icurrow, i, nprow ); + partner = MModAdd( iprow, (int)(ip2), nprow ); + llen[ iprow ] += llen[ partner ]; + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * power of 2 part of the processes collection: only processes [0..ip2) + * are working; some of them (mydist >> (k+1) == 0) either send or re- + * ceive U. At every step k, k is in [0 .. hdim), of the algorithm, a + * process pair that exchanges U is such that (mydist >> (k+1) == 0). + * Among those processes, the ones that are sending U are such that + * mydist >> k == 0. + */ + if( mydist < ip2 ) + { + k = 0; + + while( k < hdim ) + { + partner = (int)(mydist ^ ipow); + partner = MModAdd( icurrow, partner, nprow ); +/* + * Exchange and combine the local results - If I receive U, then I must + * copy from U the rows that belong to my piece of A, and then update U + * by copying in it the rows I have accumulated in W. Otherwise, I re- + * ceive W. In this later case, and I have U, I shall update my copy of + * U by copying in it the rows I have accumulated in W. If I did not + * have U before, I simply need to update my pointer in W for later use. + */ + if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 ) + { + if( ( mydist >> (unsigned int)(k) ) == 0 ) + { + (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW, + ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + HPL_dlaswp03T( llen[partner], n, U, LDU, Mptr( W, 0, ipW, + ldW ), Mptr( W, 1, ipW, ldW ), ldW ); + ipW += llen[partner]; + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + HPL_dlaswp04T( ipA, llen[myrow], n, U, LDU, A, lda, W, + W+1, ldW, lindxA, lindxAU ); + } + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0, + ipW, ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + ipW += llen[partner]; + } +/* + * Update llen - Go to next process pairs + */ + iprow = icurrow; ipdist = 0; + do + { + if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist ) + { + partner = MModAdd( icurrow, partner, nprow ); + llen[iprow] += llen[partner]; + llen[partner] = llen[iprow]; + } + iprow = MModAdd( iprow, 1, nprow ); ipdist++; + + } while( ipdist < ip2 ); + + ipow <<= 1; k++; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + } + else + { +/* + * non power of 2 part of the process collection: proc[ip2] broadcast U + * to procs[ip2..nprow) (relatively to icurrow). + */ + if( size_ > 1 ) + { + k = size_ - 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = (unsigned int)MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + + } + else if( partner < size_ ) + { + (void) HPL_send( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + } + } + ip2_ >>= 1; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2_ > 0 ); + } +/* + * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece + * of A. + */ + HPL_dlaswp05T( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } +/* + * If nprow is not a power of 2, proc[i-ip2] sends global result to + * proc[i] for all i in [ip2..nprow); + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + if( ( mydist & ip2 ) != 0 ) + { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); } + else + { (void) HPL_send( U, usize, partner, Cmsgid, comm ); } + } + + if( vptr ) free( vptr ); +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp00T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c new file mode 100644 index 000000000..31f219840 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c @@ -0,0 +1,217 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp01N +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp01N +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp01N applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * A "Spread then roll" algorithm performs the swap :: broadcast of the + * row panel U at once, resulting in a minimal communication volume and + * a "very good" use of the connectivity if available. With P process + * rows and assuming bi-directional links, the running time of this + * function can be approximated by: + * + * (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. K is + * a constant in (2,3] that depends on the achieved bandwidth during a + * simultaneous message exchange between two processes. An empirical + * optimistic value of K is typically 2.4. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * U; + int * ipID, * iplen, * ipmap, * ipmapm1, + * iwork, * lindxA = NULL, * lindxAU, + * permU; + static int equil=-1; + int icurrow, * iflag, * ipA, * ipl, jb, k, + lda, myrow, n, nprow; +#define LDU jb +/* .. + * .. Executable Statements .. + */ + n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Decide whether equilibration should be performed or not + */ + if( equil == -1 ) equil = PANEL->algo->equil; +/* + * Retrieve parameters from the PANEL data structure + */ + nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; +/* + * Compute ipID (if not already done for this panel). lindxA and lindxAU + * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1 + * are of size nprow, permU is of length jb, and this function needs a + * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): + * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1) + * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1); + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + ipA = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1; + lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1; + ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( *iflag == 0 ) /* HPL_pdlaswp00N called before: reuse ipID */ + { + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( ( *iflag == 1 ) && ( equil != 0 ) ) + { /* HPL_pdlaswp01N was call before only re-compute IPLEN, IPMAP */ + HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 ); + *iflag = 1; + } +/* + * Copy into U the rows to be spread (local to icurrow) + */ + if( myrow == icurrow ) + { HPL_dlaswp01N( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } +/* + * Spread U - optionally probe for column panel + */ + HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen, + ipmap, ipmapm1 ); +/* + * Local exchange (everywhere but in process row icurrow) + */ + if( myrow != icurrow ) + { + k = ipmapm1[myrow]; + HPL_dlaswp06N( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, iplen[k], + 0, LDU ), LDU, lindxA ); + } +/* + * Equilibration + */ + if( equil != 0 ) + HPL_equil( PBCST, IFLAG, PANEL, HplNoTrans, n, U, LDU, iplen, + ipmap, ipmapm1, iwork ); +/* + * Rolling phase + */ + HPL_rollN( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 ); +/* + * Permute U in every process row + */ + HPL_dlaswp00N( jb, n, U, LDU, permU ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp01N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c new file mode 100644 index 000000000..0c4de2669 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c @@ -0,0 +1,217 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp01T +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp01T +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp01T applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * A "Spread then roll" algorithm performs the swap :: broadcast of the + * row panel U at once, resulting in a minimal communication volume and + * a "very good" use of the connectivity if available. With P process + * rows and assuming bi-directional links, the running time of this + * function can be approximated by: + * + * (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. K is + * a constant in (2,3] that depends on the achieved bandwidth during a + * simultaneous message exchange between two processes. An empirical + * optimistic value of K is typically 2.4. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * U; + int * ipID, * iplen, * ipmap, * ipmapm1, + * iwork, * lindxA = NULL, * lindxAU, + * permU; + static int equil=-1; + int icurrow, * iflag, * ipA, * ipl, jb, k, + lda, myrow, n, nprow; +#define LDU n +/* .. + * .. Executable Statements .. + */ + n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Decide whether equilibration should be performed or not + */ + if( equil == -1 ) equil = PANEL->algo->equil; +/* + * Retrieve parameters from the PANEL data structure + */ + nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; +/* + * Compute ipID (if not already done for this panel). lindxA and lindxAU + * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1 + * are of size nprow, permU is of length jb, and this function needs a + * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): + * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1) + * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1); + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + ipA = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1; + lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1; + ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( *iflag == 0 ) /* HPL_pdlaswp00T called before: reuse ipID */ + { + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( ( *iflag == 1 ) && ( equil != 0 ) ) + { /* HPL_pdlaswp01T was call before only re-compute IPLEN, IPMAP */ + HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 ); + *iflag = 1; + } +/* + * Copy into U the rows to be spread (local to icurrow) + */ + if( myrow == icurrow ) + { HPL_dlaswp01T( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } +/* + * Spread U - optionally probe for column panel + */ + HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen, + ipmap, ipmapm1 ); +/* + * Local exchange (everywhere but in process row icurrow) + */ + if( myrow != icurrow ) + { + k = ipmapm1[myrow]; + HPL_dlaswp06T( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, 0, + iplen[k], LDU ), LDU, lindxA ); + } +/* + * Equilibration + */ + if( equil != 0 ) + HPL_equil( PBCST, IFLAG, PANEL, HplTrans, n, U, LDU, iplen, ipmap, + ipmapm1, iwork ); +/* + * Rolling phase + */ + HPL_rollT( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 ); +/* + * Permute U in every process row + */ + HPL_dlaswp10N( n, jb, U, LDU, permU ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp01T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdtrsv.c new file mode 100644 index 000000000..d2135130a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdtrsv.c @@ -0,0 +1,296 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdtrsv +( + HPL_T_grid * GRID, + HPL_T_pmat * AMAT +) +#else +void HPL_pdtrsv +( GRID, AMAT ) + HPL_T_grid * GRID; + HPL_T_pmat * AMAT; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdtrsv solves an upper triangular system of linear equations. + * + * The rhs is the last column of the N by N+1 matrix A. The solve starts + * in the process column owning the Nth column of A, so the rhs b may + * need to be moved one process column to the left at the beginning. The + * routine therefore needs a column vector in every process column but + * the one owning b. The result is replicated in all process rows, and + * returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. + * + * The algorithm uses decreasing one-ring broadcast in process rows and + * columns implemented in terms of synchronous communication point to + * point primitives. The lookahead of depth 1 is used to minimize the + * critical path. This entire operation is essentially ``latency'' bound + * and an estimate of its running time is given by: + * + * (move rhs) lat + N / ( P bdwth ) + + * (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + + * gam2 N^2 / ( P Q ), + * + * where gam2 is an estimate of the Level 2 BLAS rate of execution. + * There are N / NB diagonal blocks. One must exchange 2 messages of + * length NB to compute the next NB entries of the vector solution, as + * well as performing a total of N^2 floating point operations. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * AMAT (local input/output) HPL_T_pmat * + * On entry, AMAT points to the data structure containing the + * local array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm Ccomm, Rcomm; + double * A=NULL, * Aprev=NULL, * Aptr, * XC=NULL, + * XR=NULL, * Xd=NULL, * Xdprev=NULL, + * W=NULL; + int Alcol, Alrow, Anpprev, Anp, Anq, Bcol, + Cmsgid, GridIsNotPx1, GridIsNot1xQ, Rmsgid, + Wfr=0, colprev, kb, kbprev, lda, mycol, + myrow, n, n1, n1p, n1pprev=0, nb, npcol, + nprow, rowprev, tmp1, tmp2; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PTRSV ); +#endif + if( ( n = AMAT->n ) <= 0 ) return; + nb = AMAT->nb; lda = AMAT->ld; A = AMAT->A; XR = AMAT->X; + + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Rcomm = GRID->row_comm; Rmsgid = MSGID_BEGIN_PTRSV; + Ccomm = GRID->col_comm; Cmsgid = MSGID_BEGIN_PTRSV + 1; + GridIsNot1xQ = ( nprow > 1 ); GridIsNotPx1 = ( npcol > 1 ); +/* + * Move the rhs in the process column owning the last column of A. + */ + Mnumroc( Anp, n, nb, nb, myrow, 0, nprow ); + Mnumroc( Anq, n, nb, nb, mycol, 0, npcol ); + + tmp1 = ( n - 1 ) / nb; + Alrow = tmp1 - ( tmp1 / nprow ) * nprow; + Alcol = tmp1 - ( tmp1 / npcol ) * npcol; + kb = n - tmp1 * nb; + + Aptr = (double *)(A); XC = Mptr( Aptr, 0, Anq, lda ); + Mindxg2p( n, nb, nb, Bcol, 0, npcol ); + + if( ( Anp > 0 ) && ( Alcol != Bcol ) ) + { + if( mycol == Bcol ) + { (void) HPL_send( XC, Anp, Alcol, Rmsgid, Rcomm ); } + else if( mycol == Alcol ) + { (void) HPL_recv( XC, Anp, Bcol, Rmsgid, Rcomm ); } + } + Rmsgid = ( Rmsgid + 2 > + MSGID_END_PTRSV ? MSGID_BEGIN_PTRSV : Rmsgid + 2 ); + if( mycol != Alcol ) + { for( tmp1=0; tmp1 < Anp; tmp1++ ) XC[tmp1] = HPL_rzero; } +/* + * Set up lookahead + */ + n1 = ( npcol - 1 ) * nb; n1 = Mmax( n1, nb ); + if( Anp > 0 ) + { + W = (double*)malloc( (size_t)(Mmin( n1, Anp )) * sizeof( double ) ); + if( W == NULL ) + { HPL_pabort( __LINE__, "HPL_pdtrsv", "Memory allocation failed" ); } + Wfr = 1; + } + + Anpprev = Anp; Xdprev = XR; Aprev = Aptr = Mptr( Aptr, 0, Anq, lda ); + tmp1 = n - kb; tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1pprev, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); + + if( myrow == Alrow ) { Anpprev = ( Anp -= kb ); } + if( mycol == Alcol ) + { + Aprev = ( Aptr -= lda * kb ); Anq -= kb; Xdprev = ( Xd = XR + Anq ); + if( myrow == Alrow ) + { + HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit, + kb, Aptr+Anp, lda, XC+Anp, 1 ); + HPL_dcopy( kb, XC+Anp, 1, Xd, 1 ); + } + } + + rowprev = Alrow; Alrow = MModSub1( Alrow, nprow ); + colprev = Alcol; Alcol = MModSub1( Alcol, npcol ); + kbprev = kb; n -= kb; + tmp1 = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); +/* + * Start the operations + */ + while( n > 0 ) + { + if( mycol == Alcol ) { Aptr -= lda * kb; Anq -= kb; Xd = XR + Anq; } + if( myrow == Alrow ) { Anp -= kb; } +/* + * Broadcast (decreasing-ring) of previous solution block in previous + * process column, compute partial update of current block and send it + * to current process column. + */ + if( mycol == colprev ) + { +/* + * Send previous solution block in process row above + */ + if( myrow == rowprev ) + { + if( GridIsNot1xQ ) + (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ), + Cmsgid, Ccomm ); + } + else + { + (void) HPL_recv( Xdprev, kbprev, MModAdd1( myrow, nprow ), + Cmsgid, Ccomm ); + } +/* + * Compute partial update of previous solution block and send it to cur- + * rent column + */ + if( n1pprev > 0 ) + { + tmp1 = Anpprev - n1pprev; + HPL_dgemv( HplColumnMajor, HplNoTrans, n1pprev, kbprev, + -HPL_rone, Aprev+tmp1, lda, Xdprev, 1, HPL_rone, + XC+tmp1, 1 ); + if( GridIsNotPx1 ) + (void) HPL_send( XC+tmp1, n1pprev, Alcol, Rmsgid, Rcomm ); + } +/* + * Finish the (decreasing-ring) broadcast of the solution block in pre- + * vious process column + */ + if( ( myrow != rowprev ) && + ( myrow != MModAdd1( rowprev, nprow ) ) ) + (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ), + Cmsgid, Ccomm ); + } + else if( mycol == Alcol ) + { +/* + * Current column receives and accumulates partial update of previous + * solution block + */ + if( n1pprev > 0 ) + { + (void) HPL_recv( W, n1pprev, colprev, Rmsgid, Rcomm ); + HPL_daxpy( n1pprev, HPL_rone, W, 1, XC+Anpprev-n1pprev, 1 ); + } + } +/* + * Solve current diagonal block + */ + if( ( mycol == Alcol ) && ( myrow == Alrow ) ) + { + HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit, + kb, Aptr+Anp, lda, XC+Anp, 1 ); + HPL_dcopy( kb, XC+Anp, 1, XR+Anq, 1 ); + } +/* +* Finish previous update +*/ + if( ( mycol == colprev ) && ( ( tmp1 = Anpprev - n1pprev ) > 0 ) ) + HPL_dgemv( HplColumnMajor, HplNoTrans, tmp1, kbprev, -HPL_rone, + Aprev, lda, Xdprev, 1, HPL_rone, XC, 1 ); +/* +* Save info of current step and update info for the next step +*/ + if( mycol == Alcol ) { Xdprev = Xd; Aprev = Aptr; } + if( myrow == Alrow ) { Anpprev -= kb; } + rowprev = Alrow; colprev = Alcol; + n1pprev = n1p; kbprev = kb; n -= kb; + Alrow = MModSub1( Alrow, nprow ); Alcol = MModSub1( Alcol, npcol ); + tmp1 = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); + + Rmsgid = ( Rmsgid+2 > MSGID_END_PTRSV ? + MSGID_BEGIN_PTRSV : Rmsgid+2 ); + Cmsgid = ( Cmsgid+2 > MSGID_END_PTRSV ? + MSGID_BEGIN_PTRSV+1 : Cmsgid+2 ); + } +/* + * Replicate last solution block + */ + if( mycol == colprev ) + (void) HPL_broadcast( (void *)(XR), kbprev, HPL_DOUBLE, rowprev, + Ccomm ); + + if( Wfr ) free( W ); +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PTRSV ); +#endif +/* + * End of HPL_pdtrsv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNN.c new file mode 100644 index 000000000..7e31ddcd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNN.c @@ -0,0 +1,442 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateNN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateNN +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateNN broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU jb +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, n ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, 0, nn, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateNN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNT.c new file mode 100644 index 000000000..faa3ef207 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateNT.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateNT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateNT +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateNT broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU n +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, nn, 0, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateNT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTN.c new file mode 100644 index 000000000..a16aa26a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTN.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateTN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateTN +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateTN broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU jb +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, n ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, 0, nn, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateTN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTT.c new file mode 100644 index 000000000..81e6cc4b7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pdupdateTT.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateTT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateTT +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateTT broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU n +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, nn, 0, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateTT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_perm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_perm.c new file mode 100644 index 000000000..bf7cc4503 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_perm.c @@ -0,0 +1,131 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_perm +( + const int N, + int * LINDXA, + int * LINDXAU, + int * IWORK +) +#else +void HPL_perm +( N, LINDXA, LINDXAU, IWORK ) + const int N; + int * LINDXA; + int * LINDXAU; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_perm combines two index arrays and generate the corresponding + * permutation. First, this function computes the inverse of LINDXA, and + * then combine it with LINDXAU. Second, in order to be able to perform + * the permutation in place, LINDXAU is overwritten by the sequence of + * permutation producing the same result. What we ultimately want to + * achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the + * call to this function, this in place permutation can be performed by + * for i in [0..N) swap U[i] with U[LINDXAU[i]]. + * + * Arguments + * ========= + * + * N (global input) const int + * On entry, N specifies the length of the arrays LINDXA and + * LINDXAU. N should be at least zero. + * + * LINDXA (global input/output) int * + * On entry, LINDXA is an array of dimension N containing the + * source indexes. On exit, LINDXA contains the combined index + * array. + * + * LINDXAU (global input/output) int * + * On entry, LINDXAU is an array of dimension N containing the + * target indexes. On exit, LINDXAU contains the sequence of + * permutation, that should be applied in increasing order to + * permute the underlying array U in place. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension N. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j, k, fndd; +/* .. + * .. Executable Statements .. + */ +/* + * Inverse LINDXA - combine LINDXA and LINDXAU - Initialize IWORK + */ + for( i = 0; i < N; i++ ) { IWORK[LINDXA[i]] = i; } + for( i = 0; i < N; i++ ) { LINDXA[i] = LINDXAU[IWORK[i]]; IWORK[i] = i; } + + for( i = 0; i < N; i++ ) + { + /* search LINDXA such that LINDXA[j] == i */ + j = 0; do { fndd = ( LINDXA[j] == i ); j++; } while( !fndd ); j--; + /* search IWORK such that IWORK[k] == j */ + k = 0; do { fndd = ( IWORK[k] == j ); k++; } while( !fndd ); k--; + /* swap IWORK[i] and IWORK[k]; LINDXAU[i] = k */ + j = IWORK[i]; IWORK[i] = IWORK[k]; IWORK[k] = j; + LINDXAU[i] = k; + } +/* + * End of HPL_perm + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pipid.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pipid.c new file mode 100644 index 000000000..ab5ef949f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_pipid.c @@ -0,0 +1,187 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pipid +( + HPL_T_panel * PANEL, + int * K, + int * IPID +) +#else +void HPL_pipid +( PANEL, K, IPID ) + HPL_T_panel * PANEL; + int * K; + int * IPID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pipid computes an array IPID that contains the source and final + * destination of matrix rows resulting from the application of N + * interchanges as computed by the LU factorization with row partial + * pivoting. The array IPID is such that the row of global index IPID(i) + * should be mapped onto the row of global index IPID(i+1). Note that we + * cannot really know the length of IPID a priori. However, we know that + * this array is at least 2*N long, since there are N rows to swap and + * broadcast. The length of this array must be smaller than or equal to + * 4*N, since every row is swapped with at most a single distinct remote + * row. The algorithm constructing IPID goes as follows: Let IA be the + * global index of the first row to be swapped. + * + * For every row src IA + i with i in [0..N) to be swapped with row dst + * such that dst is given by DPIV[i]: + * + * Is row src the destination of a previous row of the current block, + * that is, is there k odd such that IPID(k) is equal to src ? + * Yes: update this destination with dst. For example, if the + * pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, + * we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it + * was thought so far ... + * No : add the pair (src,dst) at the end of IPID; row src has not + * been moved yet. + * + * Is row dst different from src the destination of a previous row of + * the current block, i.e., is there k odd such that IPID(k) is equal to + * dst ? + * Yes: update IPID(k) with src. For example, if the pivot array + * is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in + * fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought + * so far ... + * No : add the pair (dst,src) at the end of IPID; row dst has not + * been moved yet. + * + * Note that when src is equal to dst, the pair (dst,src) should not be + * added to IPID in order to avoid duplicated entries in this array. + * During the construction of the array IPID, we make sure that the + * first N entries are such that IPID(k) with k odd is equal to IA+k/2. + * For k in [0..K/2), the row of global index IPID(2*k) should be + * mapped onto the row of global index IPID(2*k+1). + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global output) int * + * On exit, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global output) int * + * On entry, IPID is an array of length 4*N. On exit, the first + * K entries of that array contain the src and final destination + * resulting from the application of the N interchanges as + * specified by DPIV. The pairs (src,dst) are contiguously + * stored and sorted so that IPID(2*i+1) is equal to IA+i with i + * in [0..N) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, fndd, fnds, ia, i, j, jb, lst, off, + src; + double * dpiv; +/* .. + * .. Executable Statements .. + */ + dpiv = PANEL->DPIV; jb = PANEL->jb; src = ia = PANEL->ia; + dst = (int)(dpiv[0]); IPID[0] = dst; IPID[1] = src; *K = 2; + if( src != dst ) { IPID[2] = src; IPID[3] = dst; *K += 2; } + + for( i = 1; i < jb; i++ ) + { + fnds = 0; j = 1; + + if( ( src = ia + i ) == ( dst = (int)(dpiv[i]) ) ) + { + do { if( src == IPID[j] ) { fnds = j; } else { j += 2; } } + while( !( fnds ) && ( j < *K ) ); + if( !fnds ) { lst = *K; off = 2; IPID[lst] = src; } + else { lst = fnds-1; off = 0; } + IPID[lst+1] = dst; + } + else + { + fndd = 0; + do + { + if ( src == IPID[j] ) { fnds = j; } + else if( dst == IPID[j] ) { fndd = j; } + j += 2; + } + while( ( !( fnds ) || !( fndd ) ) && ( j < *K ) ); + if( !fnds ) { IPID[*K] = src; IPID[*K+1] = dst; off = 2; } + else { IPID[fnds] = dst; off = 0; } + if( !fndd ) { lst = *K+off; IPID[lst ] = dst; off += 2; } + else { lst = fndd-1; } + IPID[lst+1] = src; + } +/* + * Enforce IPID(1,i) equal to src = ia + i + */ + if( lst != ( j = ( i << 1 ) ) ) + { + src = IPID[j ]; IPID[j ] = IPID[lst ]; IPID[lst ] = src; + dst = IPID[j+1]; IPID[j+1] = IPID[lst+1]; IPID[lst+1] = dst; + } + *K += off; + } +/* + * End of HPL_pipid + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx0.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx0.c new file mode 100644 index 000000000..be12639d0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx0.c @@ -0,0 +1,281 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx0 +( + HPL_T_panel * PANEL, + const int K, + int * IPID, + int * LINDXA, + int * LINDXAU, + int * LLEN +) +#else +void HPL_plindx0 +( PANEL, K, IPID, LINDXA, LINDXAU, LLEN ) + HPL_T_panel * PANEL; + const int K; + int * IPID; + int * LINDXA; + int * LINDXAU; + int * LLEN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx0 computes two local arrays LINDXA and LINDXAU containing + * the local source and final destination position resulting from the + * application of row interchanges. + * + * On entry, the array IPID of length K is such that the row of global + * index IPID(i) should be mapped onto row of global index IPID(i+1). + * Let IA be the global index of the first row to be swapped. For k in + * [0..K/2), the row of global index IPID(2*k) should be mapped onto the + * row of global index IPID(2*k+1). The question then, is to determine + * which rows should ultimately be part of U. + * + * First, some rows of the process ICURROW may be swapped locally. One + * of this row belongs to U, the other one belongs to my local piece of + * A. The other rows of the current block are swapped with remote rows + * and are thus not part of U. These rows however should be sent along, + * and grabbed by the other processes as we progress in the exchange + * phase. + * + * So, assume that I am ICURROW and consider a row of index IPID(2*i) + * that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less + * than N, this row is locally swapped and should be copied into U at + * the position IPID(2*i+1) - IA. No row will be exchanged for this one. + * If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be + * locally copied into my local piece of A at the position corresponding + * to the row of global index IPID(2*i+1). + * + * If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) + * is to be swapped away and strictly speaking does not belong to U, but + * to A remotely. Since this process will however send this array U, + * this row is copied into U, exactly where the row IPID(2*i+1) should + * go. For this, we search IPID for k1, such that IPID(2*k1) is equal to + * IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position + * IPID(2*k1+1)-IA. + * + * It is thus important to put the rows that go into U, i.e., such that + * IPID(2*i+1) - IA is less than N at the begining of the array IPID. By + * doing so, U is formed, and the local copy is performed in just one + * sweep. + * + * Two lists LINDXA and LINDXAU are built. LINDXA contains the local + * index of the rows I have that should be copied. LINDXAU contains the + * local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A + * is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). In the process + * ICURROW, the initial packing algorithm proceeds as follows. + * + * for all entries in IPID, + * if IPID(2*i) is in ICURROW, + * if IPID(2*i+1) is in ICURROW, + * if( IPID(2*i+1) - IA < N ) + * save corresponding local position + * of this row (LINDXA); + * save local position (LINDXAU) in U + * where this row goes; + * [copy row IPID(2*i) in U at position + * IPID(2*i+1)-IA; ]; + * else + * save corresponding local position of + * this row (LINDXA); + * save local position (-LINDXAU) in A + * where this row goes; + * [copy row IPID(2*i) in my piece of A + * at IPID(2*i+1);] + * end if + * else + * find k1 such that IPID(2*k1) = IPID(2*i+1); + * copy row IPID(2*i) in U at position + * IPID(2*k1+1)-IA; + * save corresponding local position of this + * row (LINDXA); + * save local position (LINDXAU) in U where + * this row goes; + * end if + * end if + * end for + * + * Second, if I am not the current row process ICURROW, all source rows + * in IPID that I own are part of U. Indeed, they are swapped with one + * row of the current block of rows, and the main factorization + * algorithm proceeds one row after each other. The processes different + * from ICURROW, should exchange and accumulate those rows until they + * receive some data previously owned by the process ICURROW. + * + * In processes different from ICURROW, the initial packing algorithm + * proceeds as follows. Consider a row of global index IPID(2*i) that I + * own. When I will be receiving data previously owned by ICURROW, i.e., + * U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, + * and this particular row of U should be first copied into my piece of + * A, at A(il,:), where il is the local row index corresponding to + * IPID(2*i). Now,initially, this row will be packed into workspace, say + * as the kth row of that work array. The following algorithm sets + * LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row + * should be copied. LINDXA(k) stores the local index in A where this + * row of U should be copied, i.e il. + * + * for all entries in IPID, + * if IPID(2*i) is not in ICURROW, + * copy row IPID(2*i) in work array; + * save corresponding local position + * of this row (LINDXA); + * save position (LINDXAU) in U where + * this row should be copied; + * end if + * end for + * + * Since we are at it, we also globally figure out how many rows every + * process has. That is necessary, because it would rather be cumbersome + * to figure it on the fly during the bi-directional exchange phase. + * This information is kept in the array LLEN of size NPROW. Also note + * that the arrays LINDXA and LINDXAU are of max length equal to 2*N. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * LINDXA (local output) int * + * On entry, LINDXA is an array of dimension 2*N. On exit, this + * array contains the local indexes of the rows of A I have that + * should be copied into U. + * + * LINDXAU (local output) int * + * On exit, LINDXAU is an array of dimension 2*N. On exit, this + * array contains the local destination information encoded as + * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be + * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). + * + * LLEN (global output) int * + * On entry, LLEN is an array of length NPROW. On exit, it + * contains how many rows every process has. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, dstrow, fndd, i, ia, icurrow, il, + ip=0, iroff, j, jb, myrow, nb, nprow, + src, srcrow; +/* .. + * .. Executable Statements .. + */ +/* + * Compute the local arrays LINDXA and LINDXAU containing the local + * source and final destination position resulting from the application + * of N interchanges. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + icurrow = PANEL->prow; jb = PANEL->jb; + nb = PANEL->nb; ia = PANEL->ia; + iroff = PANEL->ii; + + for( i = 0; i < nprow; i++ ) LLEN[i] = 0; + + for( i = 0; i < K; i += 2 ) + { + src = IPID[i]; + Mindxg2p( src, nb, nb, srcrow, 0, nprow ); LLEN[ srcrow ]++; + + if( myrow == srcrow ) + { + Mindxg2l( il, src, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; dst = IPID[i+1]; + + if( myrow == icurrow ) + { + Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + if( dstrow == icurrow ) + { + if( dst - ia < jb ) { LINDXAU[ip] = dst - ia; } + else + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXAU[ip] = iroff - il; + } + } + else + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + LINDXAU[ip] = IPID[j-1] - ia; + } + } + else { LINDXAU[ip] = dst - ia; } + + ip++; + } + } +/* + * End of HPL_plindx0 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx1.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx1.c new file mode 100644 index 000000000..a24fd4c56 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx1.c @@ -0,0 +1,275 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx1 +( + HPL_T_panel * PANEL, + const int K, + const int * IPID, + int * IPA, + int * LINDXA, + int * LINDXAU, + int * IPLEN, + int * IPMAP, + int * IPMAPM1, + int * PERMU, + int * IWORK +) +#else +void HPL_plindx1 +( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK ) + HPL_T_panel * PANEL; + const int K; + const int * IPID; + int * IPA; + int * LINDXA; + int * LINDXAU; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; + int * PERMU; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx1 computes two local arrays LINDXA and LINDXAU containing + * the local source and final destination position resulting from the + * application of row interchanges. In addition, this function computes + * three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic + * mapping information for the spreading phase. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) const int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * IPA (global output) int * + * On exit, IPA specifies the number of rows that the current + * process row has that either belong to U or should be swapped + * with remote rows of A. + * + * LINDXA (global output) int * + * On entry, LINDXA is an array of dimension 2*N. On exit, this + * array contains the local indexes of the rows of A I have that + * should be copied into U. + * + * LINDXAU (global output) int * + * On exit, LINDXAU is an array of dimension 2*N. On exit, this + * array contains the local destination information encoded as + * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be + * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). + * + * IPLEN (global output) int * + * On entry, IPLEN is an array of dimension NPROW + 1. On exit, + * this array is such that IPLEN[i] is the number of rows of A + * in the processes before process IPMAP[i] after the sort + * with the convention that IPLEN[nprow] is the total number of + * rows of the panel. In other words IPLEN[i+1]-IPLEN[i] is the + * local number of rows of A that should be moved to the process + * IPMAP[i]. IPLEN is such that the number of rows of the source + * process row can be computed as IPLEN[1] - IPLEN[0], and the + * remaining entries of this array are sorted so that the + * quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROW. On exit, this + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myrow] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROW. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROCS) + * + * PERMU (global output) int * + * On entry, PERMU is an array of dimension JB. On exit, PERMU + * contains a sequence of permutations, that should be applied + * in increasing order to permute in place the row panel U. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension 2*JB. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int * iwork; + int dst, dstrow, fndd, i, ia, icurrow, il, + ip, ipU, iroff, j, jb, myrow, nb, nprow, + src, srcrow; +/* .. + * .. Executable Statements .. + */ +/* + * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1 + */ + HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 ); +/* + * Compute the local arrays LINDXA and LINDXAU containing the local + * source and final destination position resulting from the application + * of N interchanges. Compute LINDXA and LINDXAU in icurrow, and LINDXA + * elsewhere and PERMU in every process. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + jb = PANEL->jb; nb = PANEL->nb; ia = PANEL->ia; + iroff = PANEL->ii; icurrow = PANEL->prow; + + iwork = IWORK + jb; + + if( myrow == icurrow ) + { + for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) + { + src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + + if( srcrow == icurrow ) + { + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + + Mindxg2l( il, src, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; + + if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) + { + PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; + j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; + IPLEN[il]++; ipU++; + } + else if( dstrow != icurrow ) + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + + PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow]; + j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; + IPLEN[il]++; ipU++; + } + else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) ) + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXAU[ip] = iroff - il; + } + ip++; + } + } + *IPA = ip; + } + else + { + for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) + { + src = IPID[i ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); +/* + * LINDXA[i] is the local index of the row of A that belongs into U + */ + if( myrow == dstrow ) + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; ip++; + } +/* + * iwork[i] is the local (current) position index in U + * PERMU[i] is the local (final) destination index in U + */ + if( srcrow == icurrow ) + { + if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) + { + PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; + iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; + } + else if( dstrow != icurrow ) + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow]; + iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; + } + } + } + *IPA = 0; + } +/* + * Simplify iwork and PERMU, return in PERMU the sequence of permutation + * that need to be apply to U after it has been broadcast. + */ + HPL_perm( jb, iwork, PERMU, IWORK ); +/* + * Reset IPLEN to its correct value + */ + for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1]; + IPLEN[0] = 0; +/* + * End of HPL_plindx1 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx10.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx10.c new file mode 100644 index 000000000..fa460fd35 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_plindx10.c @@ -0,0 +1,155 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx10 +( + HPL_T_panel * PANEL, + const int K, + const int * IPID, + int * IPLEN, + int * IPMAP, + int * IPMAPM1 +) +#else +void HPL_plindx10 +( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PANEL; + const int K; + const int * IPID; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx10 computes three arrays IPLEN, IPMAP and IPMAPM1 that + * contain the logarithmic mapping information for the spreading phase. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) const int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * IPLEN (global output) int * + * On entry, IPLEN is an array of dimension NPROW + 1. On exit, + * this array is such that IPLEN[i] is the number of rows of A + * in the processes before process IMAP[i] after the sort, with + * the convention that IPLEN[nprow] is the total number of rows. + * In other words, IPLEN[i+1] - IPLEN[i] is the local number of + * rows of A that should be moved for each process. IPLEN is + * such that the number of rows of the source process row can be + * computed as IPLEN[1] - IPLEN[0], and the remaining entries of + * this array are sorted so that the quantities IPLEN[i+1] - + * IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROW. On exit, this + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myrow] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROW. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROW) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, dstrow, i, ia, icurrow, jb, nb, + nprow, src, srcrow; +/* .. + * .. Executable Statements .. + */ + nprow = PANEL->grid->nprow; jb = PANEL->jb; nb = PANEL->nb; + ia = PANEL->ia; icurrow = PANEL->prow; +/* + * Compute redundantly the local number of rows that each process has + * and that belong to U in IPLEN[1 .. nprow+1] + */ + for( i = 0; i <= nprow; i++ ) IPLEN[i] = 0; + + for( i = 0; i < K; i += 2 ) + { + src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + if( srcrow == icurrow ) + { + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + if( ( dstrow != srcrow ) || ( dst - ia < jb ) ) IPLEN[dstrow+1]++; + } + } +/* + * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1 + * (the inverse of IPMAP) + */ + HPL_logsort( nprow, icurrow, IPLEN, IPMAP, IPMAPM1 ); +/* + * End of HPL_plindx10 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollN.c new file mode 100644 index 000000000..e68590a01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollN.c @@ -0,0 +1,225 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +void HPL_rollN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int N, + double * U, + const int LDU, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_rollN +( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int N; + double * U; + const int LDU; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rollN rolls the local arrays containing the local pieces of U, so + * that on exit to this function U is replicated in every process row. + * In addition, this function probe for the presence of the column panel + * and forwards it when available. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be rolled) information. + * + * N (local input) const int + * On entry, N specifies the number of columns of U. N must be + * at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[NPROW]). + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process row. + * + * IPMAP (global input) const int * + * On entry, IMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Datatype type[2]; + MPI_Status status; + MPI_Request request; + MPI_Comm comm; + int Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS, + ierr=MPI_SUCCESS, il, k, l, lengthR, + lengthS, mydist, myrow, next, npm1, nprow, + partner, prev; +/* .. + * .. Executable Statements .. + */ + if( N <= 0 ) return; + + npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow; + comm = PANEL->grid->col_comm; +/* + * Rolling phase + */ + mydist = IPMAPM1[myrow]; + prev = IPMAP[MModSub1( mydist, nprow )]; + next = IPMAP[MModAdd1( mydist, nprow )]; + + for( k = 0; k < npm1; k++ ) + { + l = (int)( (unsigned int)(k) >> 1 ); + + if( ( ( mydist + k ) & 1 ) != 0 ) + { + il = MModAdd( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModSub( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev; + } + else + { + il = MModSub( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModAdd( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next; + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lengthR, LDU, MPI_DOUBLE, + &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, ibufR, 0, LDU ), 1, type[I_RECV], + partner, Cmsgid, comm, &request ); + } + + if( lengthS > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lengthS, LDU, MPI_DOUBLE, + &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibufS, 0, LDU ), 1, type[I_SEND], + partner, Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_SEND] ); + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_RECV] ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_rollN", "MPI call failed" ); } +/* + * End of HPL_rollN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollT.c new file mode 100644 index 000000000..0160c9412 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_rollT.c @@ -0,0 +1,259 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +void HPL_rollT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int N, + double * U, + const int LDU, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_rollT +( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int N; + double * U; + const int LDU; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rollT rolls the local arrays containing the local pieces of U, so + * that on exit to this function U is replicated in every process row. + * In addition, this function probe for the presence of the column panel + * and forwards it when available. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be rolled) information. + * + * N (local input) const int + * On entry, N specifies the local number of rows of U. N must + * be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,N). + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process row. + * + * IPMAP (global input) const int * + * On entry, IMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#if 0 + MPI_Datatype type[2]; +#endif + MPI_Status status; + MPI_Request request; + MPI_Comm comm; + int Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS, + ierr=MPI_SUCCESS, il, k, l, lengthR, + lengthS, mydist, myrow, next, npm1, nprow, + partner, prev; +/* .. + * .. Executable Statements .. + */ + if( N <= 0 ) return; + + npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow; + comm = PANEL->grid->col_comm; +/* + * Rolling phase + */ + mydist = IPMAPM1[myrow]; + prev = IPMAP[MModSub1( mydist, nprow )]; + next = IPMAP[MModAdd1( mydist, nprow )]; + + for( k = 0; k < npm1; k++ ) + { + l = (int)( (unsigned int)(k) >> 1 ); + + if( ( ( mydist + k ) & 1 ) != 0 ) + { + il = MModAdd( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModSub( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev; + } + else + { + il = MModSub( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModAdd( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next; + } + + if( lengthR > 0 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lengthR * LDU, MPI_DOUBLE, + &type[I_RECV] ); + else + ierr = MPI_Type_vector( lengthR, N, LDU, MPI_DOUBLE, + &type[I_RECV] ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, 0, ibufR, LDU ), 1, type[I_RECV], + partner, Cmsgid, comm, &request ); +#else +/* + * In our case, LDU is N - Do not use the MPI datatype. + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, 0, ibufR, LDU ), lengthR*LDU, + MPI_DOUBLE, partner, Cmsgid, comm, &request ); +#endif + } + + if( lengthS > 0 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lengthS*LDU, MPI_DOUBLE, + &type[I_SEND] ); + else + ierr = MPI_Type_vector( lengthS, N, LDU, MPI_DOUBLE, + &type[I_SEND] ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibufS, LDU ), 1, type[I_SEND], + partner, Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_SEND] ); +#else +/* + * In our case, LDU is N - Do not use the MPI datatype. + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibufS, LDU ), lengthS*LDU, + MPI_DOUBLE, partner, Cmsgid, comm ); +#endif + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); +#if 0 + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_RECV] ); +#endif + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_rollT", "MPI call failed" ); } +/* + * End of HPL_rollT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadN.c new file mode 100644 index 000000000..202611e7f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadN.c @@ -0,0 +1,303 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_spreadN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_SIDE SIDE, + const int N, + double * U, + const int LDU, + const int SRCDIST, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_spreadN +( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_SIDE SIDE; + const int N; + double * U; + const int LDU; + const int SRCDIST; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_spreadN spreads the local array containing local pieces of U, so + * that on exit to this function, a piece of U is contained in every + * process row. The array IPLEN contains the number of rows of U, that + * should be spread on any given process row. This function also probes + * for the presence of the column panel PBCST. In case of success, this + * panel will be forwarded. If PBCST is NULL on input, this probing + * mechanism will be disabled. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be spread) information. + * + * SIDE (global input) const enum HPL_SIDE + * On entry, SIDE specifies whether the local piece of U located + * in process IPMAP[SRCDIST] should be spread to the right or to + * the left. This feature is used by the equilibration process. + * + * N (global input) const int + * On entry, N specifies the local number of columns of U. N + * must be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[nprow]). + * + * SRCDIST (local input) const int + * On entry, SRCDIST specifies the source process that spreads + * its piece of U. + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process before process IPMAP[i], with the convention + * that IPLEN[nprow] is the total number of rows. In other words + * IPLEN[i+1] - IPLEN[i] is the local number of rows of U that + * should be moved to process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Datatype type; + MPI_Status status; + MPI_Comm comm; + unsigned int ip2=1, mask=1, mydist, mydist2; + int Cmsgid=MSGID_BEGIN_PFACT, ibuf, + ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow, + npm1, nprow, partner; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + comm = PANEL->grid->col_comm; +/* + * Spread U to the left + */ + if( SIDE == HplLeft ) + { + nprow = ( npm1 = SRCDIST ) + 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) > + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2; + lgth = IPLEN[nprow]; + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + else if( partner < nprow ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il += ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + else + { + npm1 = ( nprow -= SRCDIST ) - 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) < + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist -= SRCDIST ); il = ip2; + lgth = IPLEN[SRCDIST+nprow]; +/* + * Spread U to the right - offset the IPLEN, and IPMAP arrays + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + k = il ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ); + k = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf; + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + else if( partner < nprow ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il += ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_spreadN", "MPI call failed" ); } +/* + * End of HPL_spreadN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadT.c new file mode 100644 index 000000000..1adf93507 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/HPL_spreadT.c @@ -0,0 +1,372 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_spreadT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_SIDE SIDE, + const int N, + double * U, + const int LDU, + const int SRCDIST, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_spreadT +( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_SIDE SIDE; + const int N; + double * U; + const int LDU; + const int SRCDIST; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_spreadT spreads the local array containing local pieces of U, so + * that on exit to this function, a piece of U is contained in every + * process row. The array IPLEN contains the number of columns of U, + * that should be spread on any given process row. This function also + * probes for the presence of the column panel PBCST. If available, + * this panel will be forwarded. If PBCST is NULL on input, this + * probing mechanism will be disabled. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be spread) information. + * + * SIDE (global input) const enum HPL_SIDE + * On entry, SIDE specifies whether the local piece of U located + * in process IPMAP[SRCDIST] should be spread to the right or to + * the left. This feature is used by the equilibration process. + * + * N (global input) const int + * On entry, N specifies the local number of rows of U. N must + * be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,N). + * + * SRCDIST (local input) const int + * On entry, SRCDIST specifies the source process that spreads + * its piece of U. + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process before process IPMAP[i], with the convention + * that IPLEN[nprow] is the total number of rows. In other words + * IPLEN[i+1] - IPLEN[i] is the local number of rows of U that + * should be moved to process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#if 0 + MPI_Datatype type; +#endif + MPI_Status status; + MPI_Comm comm; + unsigned int ip2=1, mask=1, mydist, mydist2; + int Cmsgid=MSGID_BEGIN_PFACT, ibuf, + ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow, + npm1, nprow, partner; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + comm = PANEL->grid->col_comm; +/* + * Spread U + */ + if( SIDE == HplLeft ) + { + nprow = ( npm1 = SRCDIST ) + 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) > + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2; + lgth = IPLEN[nprow]; + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[npm1-partner], + Cmsgid, comm, &status ); +#endif + } + else if( partner < nprow ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[npm1-partner], + Cmsgid, comm ); +#endif + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il += ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + else + { + npm1 = ( nprow -= SRCDIST ) - 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) < + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist -= SRCDIST ); il = ip2; +/* + * Spread to the right - offset the IPLEN and IPMAP arrays + */ + lgth = IPLEN[SRCDIST+nprow]; +/* + * Spread U + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + k = il ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ); + k = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf; + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[SRCDIST+partner], + Cmsgid, comm, &status ); +#endif + } + else if( partner < nprow ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[SRCDIST+partner], + Cmsgid, comm ); +#endif + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il += ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_spreadT", "MPI call failed" ); } +/* + * End of HPL_spreadT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_equil.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_equil.o new file mode 100644 index 000000000..8f1c51fed Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_equil.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_logsort.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_logsort.o new file mode 100644 index 000000000..bcdbb6bc8 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_logsort.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesv.o new file mode 100644 index 000000000..eebf1d2bd Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesv.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesv0.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesv0.o new file mode 100644 index 000000000..7f9f518d0 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesv0.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesvK1.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesvK1.o new file mode 100644 index 000000000..2e9264e1b Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesvK1.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesvK2.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesvK2.o new file mode 100644 index 000000000..a60d80722 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdgesvK2.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp00N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp00N.o new file mode 100644 index 000000000..bee048eab Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp00N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp00T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp00T.o new file mode 100644 index 000000000..503cc4120 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp00T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp01N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp01N.o new file mode 100644 index 000000000..40903b766 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp01N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp01T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp01T.o new file mode 100644 index 000000000..a9d2aa518 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdlaswp01T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdtrsv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdtrsv.o new file mode 100644 index 000000000..90ba3bb3c Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdtrsv.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateNN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateNN.o new file mode 100644 index 000000000..c824b6c3d Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateNN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateNT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateNT.o new file mode 100644 index 000000000..e29f83012 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateNT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateTN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateTN.o new file mode 100644 index 000000000..0c246fbd0 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateTN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateTT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateTT.o new file mode 100644 index 000000000..363c2ffc4 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pdupdateTT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_perm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_perm.o new file mode 100644 index 000000000..edfda7f58 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_perm.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pipid.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pipid.o new file mode 100644 index 000000000..76fa9d64f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_pipid.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx0.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx0.o new file mode 100644 index 000000000..9e136a0dc Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx0.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx1.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx1.o new file mode 100644 index 000000000..b4871bfcc Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx1.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx10.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx10.o new file mode 100644 index 000000000..4fbb806ae Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_plindx10.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_rollN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_rollN.o new file mode 100644 index 000000000..04dcb0fa6 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_rollN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_rollT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_rollT.o new file mode 100644 index 000000000..26c36d981 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_rollT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_spreadN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_spreadN.o new file mode 100644 index 000000000..513ee6fe1 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_spreadN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_spreadT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_spreadT.o new file mode 100644 index 000000000..a4ecb7f87 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/HPL_spreadT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/Make.inc new file mode 120000 index 000000000..8547ec814 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/Make.inc @@ -0,0 +1 @@ +/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/Makefile new file mode 100644 index 000000000..7898665f0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/Makefile @@ -0,0 +1,136 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ + $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_pgeobj = \ + HPL_pipid.o HPL_plindx0.o HPL_pdlaswp00N.o \ + HPL_pdlaswp00T.o HPL_perm.o HPL_logsort.o \ + HPL_plindx10.o HPL_plindx1.o HPL_spreadN.o \ + HPL_spreadT.o HPL_rollN.o HPL_rollT.o \ + HPL_equil.o HPL_pdlaswp01N.o HPL_pdlaswp01T.o \ + HPL_pdupdateNN.o HPL_pdupdateNT.o HPL_pdupdateTN.o \ + HPL_pdupdateTT.o HPL_pdtrsv.o HPL_pdgesv0.o \ + HPL_pdgesvK1.o HPL_pdgesvK2.o HPL_pdgesv.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pgeobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pgeobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pipid.o : ../HPL_pipid.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pipid.c +HPL_plindx0.o : ../HPL_plindx0.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx0.c +HPL_pdlaswp00N.o : ../HPL_pdlaswp00N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00N.c +HPL_pdlaswp00T.o : ../HPL_pdlaswp00T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00T.c +HPL_perm.o : ../HPL_perm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_perm.c +HPL_logsort.o : ../HPL_logsort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_logsort.c +HPL_plindx10.o : ../HPL_plindx10.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx10.c +HPL_plindx1.o : ../HPL_plindx1.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx1.c +HPL_spreadN.o : ../HPL_spreadN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadN.c +HPL_spreadT.o : ../HPL_spreadT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadT.c +HPL_rollN.o : ../HPL_rollN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollN.c +HPL_rollT.o : ../HPL_rollT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollT.c +HPL_equil.o : ../HPL_equil.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_equil.c +HPL_pdlaswp01N.o : ../HPL_pdlaswp01N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01N.c +HPL_pdlaswp01T.o : ../HPL_pdlaswp01T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01T.c +HPL_pdupdateNN.o : ../HPL_pdupdateNN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNN.c +HPL_pdupdateNT.o : ../HPL_pdupdateNT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNT.c +HPL_pdupdateTN.o : ../HPL_pdupdateTN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTN.c +HPL_pdupdateTT.o : ../HPL_pdupdateTT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTT.c +HPL_pdtrsv.o : ../HPL_pdtrsv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtrsv.c +HPL_pdgesv0.o : ../HPL_pdgesv0.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv0.c +HPL_pdgesvK1.o : ../HPL_pdgesvK1.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK1.c +HPL_pdgesvK2.o : ../HPL_pdgesvK2.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK2.c +HPL_pdgesv.o : ../HPL_pdgesv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/pgesv/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/MainSourceFiles.yaml b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/MainSourceFiles.yaml new file mode 100644 index 000000000..19e73e079 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/MainSourceFiles.yaml @@ -0,0 +1,1000 @@ +--- +MainSourceFile: MainSrcFiles_placehold +Replacements: + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 6545 + Length: 0 + ReplacementText: "#include \n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 6822 + Length: 0 + ReplacementText: "\n#include \n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 6825 + Length: 18 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 6843 + Length: 26 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 6869 + Length: 20 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 6956 + Length: 9 + ReplacementText: 'dpct::err0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7044 + Length: 197 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7334 + Length: 0 + ReplacementText: ' try ' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7364 + Length: 0 + ReplacementText: " /*\n DPCT1010:1: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code.\n */\n" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7372 + Length: 9 + ReplacementText: 'dpct::err0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7388 + Length: 18 + ReplacementText: '0' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7416 + Length: 199 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7739 + Length: 23 + ReplacementText: 'DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 7772 + Length: 208 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 8006 + Length: 0 + ReplacementText: "\ncatch (sycl::exception const &exc) {\n std::cerr << exc.what() << \"Exception caught at file:\" << __FILE__ << \", line:\" << __LINE__ << std::endl;\n std::exit(1);\n}" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 8954 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9513 + Length: 54 + ReplacementText: 'DPCT_CHECK_ERROR(devPtrA = sycl::malloc_device(K * LDA, q_ct1))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9587 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9637 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9662 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9687 + Length: 55 + ReplacementText: 'DPCT_CHECK_ERROR(devPtrB = sycl::malloc_device(N * LDB, q_ct1))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9762 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9813 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9838 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9863 + Length: 54 + ReplacementText: 'DPCT_CHECK_ERROR(devPtrC = sycl::malloc_device(N * LDC, q_ct1))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9937 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 9987 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10012 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10025 + Length: 23 + ReplacementText: 'dev_ct1.queues_wait_and_throw()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10054 + Length: 85 + ReplacementText: 'oneapi::mkl::blas::column_major::gemm(*dpct::get_current_device().get_saved_queue(), oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, M, N, K, ALPHA, devPtrA, LDA, devPtrB, LDB, BETA, devPtrC, LDC).wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10145 + Length: 23 + ReplacementText: 'dev_ct1.queues_wait_and_throw()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10187 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10237 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10262 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10269 + Length: 23 + ReplacementText: 'dev_ct1.queues_wait_and_throw()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10299 + Length: 17 + ReplacementText: 'sycl::free(devPtrA, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10322 + Length: 17 + ReplacementText: 'sycl::free(devPtrB, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10345 + Length: 17 + ReplacementText: 'sycl::free(devPtrC, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 10637 + Length: 0 + ReplacementText: "\n dpct::device_ext &dev_ct1 = dpct::get_current_device();\n sycl::queue &q_ct1 = dev_ct1.in_order_queue();" + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11149 + Length: 55 + ReplacementText: 'DPCT_CHECK_ERROR(devPtrA = sycl::malloc_device(M * LDA, q_ct1))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11224 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11271 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11296 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11334 + Length: 55 + ReplacementText: 'DPCT_CHECK_ERROR(devPtrB = sycl::malloc_device(N * LDB, q_ct1))' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11409 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11456 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11481 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11488 + Length: 23 + ReplacementText: 'dev_ct1.queues_wait_and_throw()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11522 + Length: 62 + ReplacementText: 'oneapi::mkl::blas::column_major::trsm(*dpct::get_current_device().get_saved_queue(), oneapi::mkl::side::left, oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, oneapi::mkl::diag::unit, M, N, ALPHA, devPtrA, LDA, devPtrB, LDB).wait()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11595 + Length: 23 + ReplacementText: 'dev_ct1.queues_wait_and_throw()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11642 + Length: 10 + ReplacementText: 'DPCT_CHECK_ERROR(q_ct1.memcpy' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11689 + Length: 24 + ReplacementText: '' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11714 + Length: 0 + ReplacementText: '.wait())' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11726 + Length: 23 + ReplacementText: 'dev_ct1.queues_wait_and_throw()' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11755 + Length: 17 + ReplacementText: 'sycl::free(devPtrA, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false + - FilePath: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Offset: 11778 + Length: 17 + ReplacementText: 'sycl::free(devPtrB, q_ct1)' + ConstantFlag: '' + ConstantOffset: 0 + InitStr: '' + NewHostVarName: '' + BlockLevelFormatFlag: false +MainSourceFilesDigest: + - MainSourceFile: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/src/cuda/cuda_dgemm.cpp' + Digest: c9ea63d69505b8c70080ff9792b77dd8 +DpctVersion: 18.0.0 +MainHelperFileName: '' +USMLevel: '' +FeatureMap: {} +CompileTargets: + /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl: + - MigratedFileName: './testing/ptest/HPL_pddriver.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdinfo.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdtest.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pddriver.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdinfo.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdtest.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pddriver.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdinfo.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdtest.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pddriver.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdinfo.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptest/HPL_pdtest.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a: + - MigratedFileName: './src/auxil/HPL_dlacpy.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_dlatcpy.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_fprintf.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_warn.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_abort.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_dlaprnt.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_dlange.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/auxil/HPL_dlamch.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dcopy.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_daxpy.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dscal.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_idamax.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dgemv.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dtrsv.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dger.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dgemm.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/blas/HPL_dtrsm.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_1ring.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_1rinM.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_2ring.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_2rinM.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_blong.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_blonM.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_packL.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_copyL.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_binit.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_bcast.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_bwait.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_send.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_recv.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/comm/HPL_sdrv.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_grid_init.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_pnum.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_grid_info.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_grid_exit.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_broadcast.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_reduce.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_all_reduce.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_barrier.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_min.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_max.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/grid/HPL_sum.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/panel/HPL_pdpanel_new.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/panel/HPL_pdpanel_init.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/panel/HPL_pdpanel_disp.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/panel/HPL_pdpanel_free.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_indxg2l.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_indxg2lp.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_indxg2p.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_indxl2g.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_infog2l.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_numroc.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_numrocI.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp00N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp10N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp01N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp01T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp02N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp03N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp03T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp04N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp04T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp05N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp05T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp06N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_dlaswp06T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_pwarn.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_pabort.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_pdlaprnt.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_pdlamch.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pauxil/HPL_pdlange.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_dlocmax.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_dlocswpN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_dlocswpT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdmxswp.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdpancrN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdpancrT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdpanllN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdpanllT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdpanrlN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdpanrlT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdrpanllN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdrpanllT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdrpancrN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdrpancrT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdrpanrlN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdrpanrlT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pfact/HPL_pdfact.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pipid.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_plindx0.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdlaswp00N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdlaswp00T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_perm.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_logsort.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_plindx10.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_plindx1.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_spreadN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_spreadT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_rollN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_rollT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_equil.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdlaswp01N.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdlaswp01T.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdupdateNN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdupdateNT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdupdateTN.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdupdateTT.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdtrsv.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdgesv0.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdgesvK1.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdgesvK2.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './src/pgesv/HPL_pdgesv.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_dmatgen.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_ladd.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_lmul.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_xjumpm.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_jumpit.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_rand.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/matgen/HPL_setran.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/timer/HPL_timer.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/timer/HPL_timer_cputime.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/timer/HPL_timer_walltime.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/pmatgen/HPL_pdmatgen.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptimer/HPL_ptimer.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptimer/HPL_ptimer_cputime.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + - MigratedFileName: './testing/ptimer/HPL_ptimer_walltime.c' + CompileOptions: '-DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ' + Compiler: cc + libdgemm.so.1.0.1: + - MigratedFileName: './src/cuda/cuda_dgemm.cpp.dp.cpp' + CompileOptions: '-O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ' + Compiler: cc + - MigratedFileName: './src/cuda/cuda_dgemm.cpp.dp.cpp' + CompileOptions: '-O0 -DMPI -I ./include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ' + Compiler: cc +OptionMap: + AnalysisScopePath: + Value: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3' + Specified: false + AsyncHandler: + Value: 'false' + Specified: false + CommentsEnabled: + Value: 'false' + Specified: false + CompilationsDir: + Value: '/home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3' + Specified: true + CtadEnabled: + Value: 'false' + Specified: false + EnablepProfiling: + Value: 'false' + Specified: false + ExperimentalFlag: + Value: '0' + Specified: false + ExplicitClNamespace: + Value: 'false' + Specified: false + ExplicitNamespace: + Value: '20' + Specified: false + ExtensionDDFlag: + Value: '0' + Specified: false + ExtensionDEFlag: + Value: '4294967295' + Specified: false + HelperFuncPreferenceFlag: + Value: '0' + Specified: false + NDRangeDim: + Value: '3' + Specified: false + NoDRYPattern: + Value: 'false' + Specified: false + NoUseGenericSpace: + Value: '' + Specified: true + OptimizeMigration: + Value: 'false' + Specified: false + ProcessAll: + Value: 'false' + Specified: false + RuleFile: + Value: '' + Specified: false + SyclNamedLambda: + Value: 'false' + Specified: false + UsmLevel: + Value: '1' + Specified: false +... diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/Makefile.dpct b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/Makefile.dpct new file mode 100644 index 000000000..15b4e8109 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/Makefile.dpct @@ -0,0 +1,1018 @@ +CC := icpx + +LD := $(CC) + +#DPCT2001:4: You can link with more library by add them here. +LIB := -lmpi + +FLAGS := -fPIC + +ifeq ($(shell which $(CC)),) + $(error ERROR - $(CC) compiler not found) +endif + +ROOT_DIR := $(shell dirname $(shell which $(CC))) +INCLUDE_SYCL := $(ROOT_DIR)/../include +INCLUDE_CL := $(ROOT_DIR)/../include/sycl + +TARGET_0_SRC_0 = ./testing/ptest/HPL_pddriver.c +TARGET_0_OBJ_0 = ./testing/ptest/HPL_pddriver.o +TARGET_0_FLAG_0 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_1 = ./testing/ptest/HPL_pdinfo.c +TARGET_0_OBJ_1 = ./testing/ptest/HPL_pdinfo.o +TARGET_0_FLAG_1 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_2 = ./testing/ptest/HPL_pdtest.c +TARGET_0_OBJ_2 = ./testing/ptest/HPL_pdtest.o +TARGET_0_FLAG_2 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_3 = ./testing/ptest/HPL_pddriver.c +TARGET_0_OBJ_3 = ./testing/ptest/HPL_pddriver.o +TARGET_0_FLAG_3 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_4 = ./testing/ptest/HPL_pdinfo.c +TARGET_0_OBJ_4 = ./testing/ptest/HPL_pdinfo.o +TARGET_0_FLAG_4 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_5 = ./testing/ptest/HPL_pdtest.c +TARGET_0_OBJ_5 = ./testing/ptest/HPL_pdtest.o +TARGET_0_FLAG_5 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_6 = ./testing/ptest/HPL_pddriver.c +TARGET_0_OBJ_6 = ./testing/ptest/HPL_pddriver.o +TARGET_0_FLAG_6 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_7 = ./testing/ptest/HPL_pdinfo.c +TARGET_0_OBJ_7 = ./testing/ptest/HPL_pdinfo.o +TARGET_0_FLAG_7 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_8 = ./testing/ptest/HPL_pdtest.c +TARGET_0_OBJ_8 = ./testing/ptest/HPL_pdtest.o +TARGET_0_FLAG_8 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_9 = ./testing/ptest/HPL_pddriver.c +TARGET_0_OBJ_9 = ./testing/ptest/HPL_pddriver.o +TARGET_0_FLAG_9 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_10 = ./testing/ptest/HPL_pdinfo.c +TARGET_0_OBJ_10 = ./testing/ptest/HPL_pdinfo.o +TARGET_0_FLAG_10 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_0_SRC_11 = ./testing/ptest/HPL_pdtest.c +TARGET_0_OBJ_11 = ./testing/ptest/HPL_pdtest.o +TARGET_0_FLAG_11 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_0 = ./src/auxil/HPL_dlacpy.c +TARGET_1_OBJ_0 = ./src/auxil/HPL_dlacpy.o +TARGET_1_FLAG_0 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_1 = ./src/auxil/HPL_dlatcpy.c +TARGET_1_OBJ_1 = ./src/auxil/HPL_dlatcpy.o +TARGET_1_FLAG_1 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_2 = ./src/auxil/HPL_fprintf.c +TARGET_1_OBJ_2 = ./src/auxil/HPL_fprintf.o +TARGET_1_FLAG_2 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_3 = ./src/auxil/HPL_warn.c +TARGET_1_OBJ_3 = ./src/auxil/HPL_warn.o +TARGET_1_FLAG_3 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_4 = ./src/auxil/HPL_abort.c +TARGET_1_OBJ_4 = ./src/auxil/HPL_abort.o +TARGET_1_FLAG_4 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_5 = ./src/auxil/HPL_dlaprnt.c +TARGET_1_OBJ_5 = ./src/auxil/HPL_dlaprnt.o +TARGET_1_FLAG_5 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_6 = ./src/auxil/HPL_dlange.c +TARGET_1_OBJ_6 = ./src/auxil/HPL_dlange.o +TARGET_1_FLAG_6 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_7 = ./src/auxil/HPL_dlamch.c +TARGET_1_OBJ_7 = ./src/auxil/HPL_dlamch.o +TARGET_1_FLAG_7 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -I ./include ${FLAGS} + +TARGET_1_SRC_8 = ./src/blas/HPL_dcopy.c +TARGET_1_OBJ_8 = ./src/blas/HPL_dcopy.o +TARGET_1_FLAG_8 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_9 = ./src/blas/HPL_daxpy.c +TARGET_1_OBJ_9 = ./src/blas/HPL_daxpy.o +TARGET_1_FLAG_9 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_10 = ./src/blas/HPL_dscal.c +TARGET_1_OBJ_10 = ./src/blas/HPL_dscal.o +TARGET_1_FLAG_10 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_11 = ./src/blas/HPL_idamax.c +TARGET_1_OBJ_11 = ./src/blas/HPL_idamax.o +TARGET_1_FLAG_11 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_12 = ./src/blas/HPL_dgemv.c +TARGET_1_OBJ_12 = ./src/blas/HPL_dgemv.o +TARGET_1_FLAG_12 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_13 = ./src/blas/HPL_dtrsv.c +TARGET_1_OBJ_13 = ./src/blas/HPL_dtrsv.o +TARGET_1_FLAG_13 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_14 = ./src/blas/HPL_dger.c +TARGET_1_OBJ_14 = ./src/blas/HPL_dger.o +TARGET_1_FLAG_14 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_15 = ./src/blas/HPL_dgemm.c +TARGET_1_OBJ_15 = ./src/blas/HPL_dgemm.o +TARGET_1_FLAG_15 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_16 = ./src/blas/HPL_dtrsm.c +TARGET_1_OBJ_16 = ./src/blas/HPL_dtrsm.o +TARGET_1_FLAG_16 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_17 = ./src/comm/HPL_1ring.c +TARGET_1_OBJ_17 = ./src/comm/HPL_1ring.o +TARGET_1_FLAG_17 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_18 = ./src/comm/HPL_1rinM.c +TARGET_1_OBJ_18 = ./src/comm/HPL_1rinM.o +TARGET_1_FLAG_18 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_19 = ./src/comm/HPL_2ring.c +TARGET_1_OBJ_19 = ./src/comm/HPL_2ring.o +TARGET_1_FLAG_19 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_20 = ./src/comm/HPL_2rinM.c +TARGET_1_OBJ_20 = ./src/comm/HPL_2rinM.o +TARGET_1_FLAG_20 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_21 = ./src/comm/HPL_blong.c +TARGET_1_OBJ_21 = ./src/comm/HPL_blong.o +TARGET_1_FLAG_21 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_22 = ./src/comm/HPL_blonM.c +TARGET_1_OBJ_22 = ./src/comm/HPL_blonM.o +TARGET_1_FLAG_22 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_23 = ./src/comm/HPL_packL.c +TARGET_1_OBJ_23 = ./src/comm/HPL_packL.o +TARGET_1_FLAG_23 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_24 = ./src/comm/HPL_copyL.c +TARGET_1_OBJ_24 = ./src/comm/HPL_copyL.o +TARGET_1_FLAG_24 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_25 = ./src/comm/HPL_binit.c +TARGET_1_OBJ_25 = ./src/comm/HPL_binit.o +TARGET_1_FLAG_25 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_26 = ./src/comm/HPL_bcast.c +TARGET_1_OBJ_26 = ./src/comm/HPL_bcast.o +TARGET_1_FLAG_26 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_27 = ./src/comm/HPL_bwait.c +TARGET_1_OBJ_27 = ./src/comm/HPL_bwait.o +TARGET_1_FLAG_27 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_28 = ./src/comm/HPL_send.c +TARGET_1_OBJ_28 = ./src/comm/HPL_send.o +TARGET_1_FLAG_28 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_29 = ./src/comm/HPL_recv.c +TARGET_1_OBJ_29 = ./src/comm/HPL_recv.o +TARGET_1_FLAG_29 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_30 = ./src/comm/HPL_sdrv.c +TARGET_1_OBJ_30 = ./src/comm/HPL_sdrv.o +TARGET_1_FLAG_30 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_31 = ./src/grid/HPL_grid_init.c +TARGET_1_OBJ_31 = ./src/grid/HPL_grid_init.o +TARGET_1_FLAG_31 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_32 = ./src/grid/HPL_pnum.c +TARGET_1_OBJ_32 = ./src/grid/HPL_pnum.o +TARGET_1_FLAG_32 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_33 = ./src/grid/HPL_grid_info.c +TARGET_1_OBJ_33 = ./src/grid/HPL_grid_info.o +TARGET_1_FLAG_33 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_34 = ./src/grid/HPL_grid_exit.c +TARGET_1_OBJ_34 = ./src/grid/HPL_grid_exit.o +TARGET_1_FLAG_34 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_35 = ./src/grid/HPL_broadcast.c +TARGET_1_OBJ_35 = ./src/grid/HPL_broadcast.o +TARGET_1_FLAG_35 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_36 = ./src/grid/HPL_reduce.c +TARGET_1_OBJ_36 = ./src/grid/HPL_reduce.o +TARGET_1_FLAG_36 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_37 = ./src/grid/HPL_all_reduce.c +TARGET_1_OBJ_37 = ./src/grid/HPL_all_reduce.o +TARGET_1_FLAG_37 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_38 = ./src/grid/HPL_barrier.c +TARGET_1_OBJ_38 = ./src/grid/HPL_barrier.o +TARGET_1_FLAG_38 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_39 = ./src/grid/HPL_min.c +TARGET_1_OBJ_39 = ./src/grid/HPL_min.o +TARGET_1_FLAG_39 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_40 = ./src/grid/HPL_max.c +TARGET_1_OBJ_40 = ./src/grid/HPL_max.o +TARGET_1_FLAG_40 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_41 = ./src/grid/HPL_sum.c +TARGET_1_OBJ_41 = ./src/grid/HPL_sum.o +TARGET_1_FLAG_41 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_42 = ./src/panel/HPL_pdpanel_new.c +TARGET_1_OBJ_42 = ./src/panel/HPL_pdpanel_new.o +TARGET_1_FLAG_42 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_43 = ./src/panel/HPL_pdpanel_init.c +TARGET_1_OBJ_43 = ./src/panel/HPL_pdpanel_init.o +TARGET_1_FLAG_43 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_44 = ./src/panel/HPL_pdpanel_disp.c +TARGET_1_OBJ_44 = ./src/panel/HPL_pdpanel_disp.o +TARGET_1_FLAG_44 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_45 = ./src/panel/HPL_pdpanel_free.c +TARGET_1_OBJ_45 = ./src/panel/HPL_pdpanel_free.o +TARGET_1_FLAG_45 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_46 = ./src/pauxil/HPL_indxg2l.c +TARGET_1_OBJ_46 = ./src/pauxil/HPL_indxg2l.o +TARGET_1_FLAG_46 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_47 = ./src/pauxil/HPL_indxg2lp.c +TARGET_1_OBJ_47 = ./src/pauxil/HPL_indxg2lp.o +TARGET_1_FLAG_47 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_48 = ./src/pauxil/HPL_indxg2p.c +TARGET_1_OBJ_48 = ./src/pauxil/HPL_indxg2p.o +TARGET_1_FLAG_48 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_49 = ./src/pauxil/HPL_indxl2g.c +TARGET_1_OBJ_49 = ./src/pauxil/HPL_indxl2g.o +TARGET_1_FLAG_49 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_50 = ./src/pauxil/HPL_infog2l.c +TARGET_1_OBJ_50 = ./src/pauxil/HPL_infog2l.o +TARGET_1_FLAG_50 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_51 = ./src/pauxil/HPL_numroc.c +TARGET_1_OBJ_51 = ./src/pauxil/HPL_numroc.o +TARGET_1_FLAG_51 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_52 = ./src/pauxil/HPL_numrocI.c +TARGET_1_OBJ_52 = ./src/pauxil/HPL_numrocI.o +TARGET_1_FLAG_52 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_53 = ./src/pauxil/HPL_dlaswp00N.c +TARGET_1_OBJ_53 = ./src/pauxil/HPL_dlaswp00N.o +TARGET_1_FLAG_53 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_54 = ./src/pauxil/HPL_dlaswp10N.c +TARGET_1_OBJ_54 = ./src/pauxil/HPL_dlaswp10N.o +TARGET_1_FLAG_54 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_55 = ./src/pauxil/HPL_dlaswp01N.c +TARGET_1_OBJ_55 = ./src/pauxil/HPL_dlaswp01N.o +TARGET_1_FLAG_55 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_56 = ./src/pauxil/HPL_dlaswp01T.c +TARGET_1_OBJ_56 = ./src/pauxil/HPL_dlaswp01T.o +TARGET_1_FLAG_56 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_57 = ./src/pauxil/HPL_dlaswp02N.c +TARGET_1_OBJ_57 = ./src/pauxil/HPL_dlaswp02N.o +TARGET_1_FLAG_57 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_58 = ./src/pauxil/HPL_dlaswp03N.c +TARGET_1_OBJ_58 = ./src/pauxil/HPL_dlaswp03N.o +TARGET_1_FLAG_58 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_59 = ./src/pauxil/HPL_dlaswp03T.c +TARGET_1_OBJ_59 = ./src/pauxil/HPL_dlaswp03T.o +TARGET_1_FLAG_59 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_60 = ./src/pauxil/HPL_dlaswp04N.c +TARGET_1_OBJ_60 = ./src/pauxil/HPL_dlaswp04N.o +TARGET_1_FLAG_60 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_61 = ./src/pauxil/HPL_dlaswp04T.c +TARGET_1_OBJ_61 = ./src/pauxil/HPL_dlaswp04T.o +TARGET_1_FLAG_61 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_62 = ./src/pauxil/HPL_dlaswp05N.c +TARGET_1_OBJ_62 = ./src/pauxil/HPL_dlaswp05N.o +TARGET_1_FLAG_62 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_63 = ./src/pauxil/HPL_dlaswp05T.c +TARGET_1_OBJ_63 = ./src/pauxil/HPL_dlaswp05T.o +TARGET_1_FLAG_63 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_64 = ./src/pauxil/HPL_dlaswp06N.c +TARGET_1_OBJ_64 = ./src/pauxil/HPL_dlaswp06N.o +TARGET_1_FLAG_64 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_65 = ./src/pauxil/HPL_dlaswp06T.c +TARGET_1_OBJ_65 = ./src/pauxil/HPL_dlaswp06T.o +TARGET_1_FLAG_65 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_66 = ./src/pauxil/HPL_pwarn.c +TARGET_1_OBJ_66 = ./src/pauxil/HPL_pwarn.o +TARGET_1_FLAG_66 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_67 = ./src/pauxil/HPL_pabort.c +TARGET_1_OBJ_67 = ./src/pauxil/HPL_pabort.o +TARGET_1_FLAG_67 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_68 = ./src/pauxil/HPL_pdlaprnt.c +TARGET_1_OBJ_68 = ./src/pauxil/HPL_pdlaprnt.o +TARGET_1_FLAG_68 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_69 = ./src/pauxil/HPL_pdlamch.c +TARGET_1_OBJ_69 = ./src/pauxil/HPL_pdlamch.o +TARGET_1_FLAG_69 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_70 = ./src/pauxil/HPL_pdlange.c +TARGET_1_OBJ_70 = ./src/pauxil/HPL_pdlange.o +TARGET_1_FLAG_70 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_71 = ./src/pfact/HPL_dlocmax.c +TARGET_1_OBJ_71 = ./src/pfact/HPL_dlocmax.o +TARGET_1_FLAG_71 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_72 = ./src/pfact/HPL_dlocswpN.c +TARGET_1_OBJ_72 = ./src/pfact/HPL_dlocswpN.o +TARGET_1_FLAG_72 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_73 = ./src/pfact/HPL_dlocswpT.c +TARGET_1_OBJ_73 = ./src/pfact/HPL_dlocswpT.o +TARGET_1_FLAG_73 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_74 = ./src/pfact/HPL_pdmxswp.c +TARGET_1_OBJ_74 = ./src/pfact/HPL_pdmxswp.o +TARGET_1_FLAG_74 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_75 = ./src/pfact/HPL_pdpancrN.c +TARGET_1_OBJ_75 = ./src/pfact/HPL_pdpancrN.o +TARGET_1_FLAG_75 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_76 = ./src/pfact/HPL_pdpancrT.c +TARGET_1_OBJ_76 = ./src/pfact/HPL_pdpancrT.o +TARGET_1_FLAG_76 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_77 = ./src/pfact/HPL_pdpanllN.c +TARGET_1_OBJ_77 = ./src/pfact/HPL_pdpanllN.o +TARGET_1_FLAG_77 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_78 = ./src/pfact/HPL_pdpanllT.c +TARGET_1_OBJ_78 = ./src/pfact/HPL_pdpanllT.o +TARGET_1_FLAG_78 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_79 = ./src/pfact/HPL_pdpanrlN.c +TARGET_1_OBJ_79 = ./src/pfact/HPL_pdpanrlN.o +TARGET_1_FLAG_79 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_80 = ./src/pfact/HPL_pdpanrlT.c +TARGET_1_OBJ_80 = ./src/pfact/HPL_pdpanrlT.o +TARGET_1_FLAG_80 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_81 = ./src/pfact/HPL_pdrpanllN.c +TARGET_1_OBJ_81 = ./src/pfact/HPL_pdrpanllN.o +TARGET_1_FLAG_81 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_82 = ./src/pfact/HPL_pdrpanllT.c +TARGET_1_OBJ_82 = ./src/pfact/HPL_pdrpanllT.o +TARGET_1_FLAG_82 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_83 = ./src/pfact/HPL_pdrpancrN.c +TARGET_1_OBJ_83 = ./src/pfact/HPL_pdrpancrN.o +TARGET_1_FLAG_83 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_84 = ./src/pfact/HPL_pdrpancrT.c +TARGET_1_OBJ_84 = ./src/pfact/HPL_pdrpancrT.o +TARGET_1_FLAG_84 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_85 = ./src/pfact/HPL_pdrpanrlN.c +TARGET_1_OBJ_85 = ./src/pfact/HPL_pdrpanrlN.o +TARGET_1_FLAG_85 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_86 = ./src/pfact/HPL_pdrpanrlT.c +TARGET_1_OBJ_86 = ./src/pfact/HPL_pdrpanrlT.o +TARGET_1_FLAG_86 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_87 = ./src/pfact/HPL_pdfact.c +TARGET_1_OBJ_87 = ./src/pfact/HPL_pdfact.o +TARGET_1_FLAG_87 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_88 = ./src/pgesv/HPL_pipid.c +TARGET_1_OBJ_88 = ./src/pgesv/HPL_pipid.o +TARGET_1_FLAG_88 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_89 = ./src/pgesv/HPL_plindx0.c +TARGET_1_OBJ_89 = ./src/pgesv/HPL_plindx0.o +TARGET_1_FLAG_89 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_90 = ./src/pgesv/HPL_pdlaswp00N.c +TARGET_1_OBJ_90 = ./src/pgesv/HPL_pdlaswp00N.o +TARGET_1_FLAG_90 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_91 = ./src/pgesv/HPL_pdlaswp00T.c +TARGET_1_OBJ_91 = ./src/pgesv/HPL_pdlaswp00T.o +TARGET_1_FLAG_91 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_92 = ./src/pgesv/HPL_perm.c +TARGET_1_OBJ_92 = ./src/pgesv/HPL_perm.o +TARGET_1_FLAG_92 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_93 = ./src/pgesv/HPL_logsort.c +TARGET_1_OBJ_93 = ./src/pgesv/HPL_logsort.o +TARGET_1_FLAG_93 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_94 = ./src/pgesv/HPL_plindx10.c +TARGET_1_OBJ_94 = ./src/pgesv/HPL_plindx10.o +TARGET_1_FLAG_94 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_95 = ./src/pgesv/HPL_plindx1.c +TARGET_1_OBJ_95 = ./src/pgesv/HPL_plindx1.o +TARGET_1_FLAG_95 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_96 = ./src/pgesv/HPL_spreadN.c +TARGET_1_OBJ_96 = ./src/pgesv/HPL_spreadN.o +TARGET_1_FLAG_96 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_97 = ./src/pgesv/HPL_spreadT.c +TARGET_1_OBJ_97 = ./src/pgesv/HPL_spreadT.o +TARGET_1_FLAG_97 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_98 = ./src/pgesv/HPL_rollN.c +TARGET_1_OBJ_98 = ./src/pgesv/HPL_rollN.o +TARGET_1_FLAG_98 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_99 = ./src/pgesv/HPL_rollT.c +TARGET_1_OBJ_99 = ./src/pgesv/HPL_rollT.o +TARGET_1_FLAG_99 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_100 = ./src/pgesv/HPL_equil.c +TARGET_1_OBJ_100 = ./src/pgesv/HPL_equil.o +TARGET_1_FLAG_100 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_101 = ./src/pgesv/HPL_pdlaswp01N.c +TARGET_1_OBJ_101 = ./src/pgesv/HPL_pdlaswp01N.o +TARGET_1_FLAG_101 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_102 = ./src/pgesv/HPL_pdlaswp01T.c +TARGET_1_OBJ_102 = ./src/pgesv/HPL_pdlaswp01T.o +TARGET_1_FLAG_102 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_103 = ./src/pgesv/HPL_pdupdateNN.c +TARGET_1_OBJ_103 = ./src/pgesv/HPL_pdupdateNN.o +TARGET_1_FLAG_103 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_104 = ./src/pgesv/HPL_pdupdateNT.c +TARGET_1_OBJ_104 = ./src/pgesv/HPL_pdupdateNT.o +TARGET_1_FLAG_104 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_105 = ./src/pgesv/HPL_pdupdateTN.c +TARGET_1_OBJ_105 = ./src/pgesv/HPL_pdupdateTN.o +TARGET_1_FLAG_105 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_106 = ./src/pgesv/HPL_pdupdateTT.c +TARGET_1_OBJ_106 = ./src/pgesv/HPL_pdupdateTT.o +TARGET_1_FLAG_106 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_107 = ./src/pgesv/HPL_pdtrsv.c +TARGET_1_OBJ_107 = ./src/pgesv/HPL_pdtrsv.o +TARGET_1_FLAG_107 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_108 = ./src/pgesv/HPL_pdgesv0.c +TARGET_1_OBJ_108 = ./src/pgesv/HPL_pdgesv0.o +TARGET_1_FLAG_108 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_109 = ./src/pgesv/HPL_pdgesvK1.c +TARGET_1_OBJ_109 = ./src/pgesv/HPL_pdgesvK1.o +TARGET_1_FLAG_109 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_110 = ./src/pgesv/HPL_pdgesvK2.c +TARGET_1_OBJ_110 = ./src/pgesv/HPL_pdgesvK2.o +TARGET_1_FLAG_110 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_111 = ./src/pgesv/HPL_pdgesv.c +TARGET_1_OBJ_111 = ./src/pgesv/HPL_pdgesv.o +TARGET_1_FLAG_111 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_112 = ./testing/matgen/HPL_dmatgen.c +TARGET_1_OBJ_112 = ./testing/matgen/HPL_dmatgen.o +TARGET_1_FLAG_112 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_113 = ./testing/matgen/HPL_ladd.c +TARGET_1_OBJ_113 = ./testing/matgen/HPL_ladd.o +TARGET_1_FLAG_113 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_114 = ./testing/matgen/HPL_lmul.c +TARGET_1_OBJ_114 = ./testing/matgen/HPL_lmul.o +TARGET_1_FLAG_114 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_115 = ./testing/matgen/HPL_xjumpm.c +TARGET_1_OBJ_115 = ./testing/matgen/HPL_xjumpm.o +TARGET_1_FLAG_115 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_116 = ./testing/matgen/HPL_jumpit.c +TARGET_1_OBJ_116 = ./testing/matgen/HPL_jumpit.o +TARGET_1_FLAG_116 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_117 = ./testing/matgen/HPL_rand.c +TARGET_1_OBJ_117 = ./testing/matgen/HPL_rand.o +TARGET_1_FLAG_117 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_118 = ./testing/matgen/HPL_setran.c +TARGET_1_OBJ_118 = ./testing/matgen/HPL_setran.o +TARGET_1_FLAG_118 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_119 = ./testing/timer/HPL_timer.c +TARGET_1_OBJ_119 = ./testing/timer/HPL_timer.o +TARGET_1_FLAG_119 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_120 = ./testing/timer/HPL_timer_cputime.c +TARGET_1_OBJ_120 = ./testing/timer/HPL_timer_cputime.o +TARGET_1_FLAG_120 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_121 = ./testing/timer/HPL_timer_walltime.c +TARGET_1_OBJ_121 = ./testing/timer/HPL_timer_walltime.o +TARGET_1_FLAG_121 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_122 = ./testing/pmatgen/HPL_pdmatgen.c +TARGET_1_OBJ_122 = ./testing/pmatgen/HPL_pdmatgen.o +TARGET_1_FLAG_122 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_123 = ./testing/ptimer/HPL_ptimer.c +TARGET_1_OBJ_123 = ./testing/ptimer/HPL_ptimer.o +TARGET_1_FLAG_123 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_124 = ./testing/ptimer/HPL_ptimer_cputime.c +TARGET_1_OBJ_124 = ./testing/ptimer/HPL_ptimer_cputime.o +TARGET_1_FLAG_124 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_1_SRC_125 = ./testing/ptimer/HPL_ptimer_walltime.c +TARGET_1_OBJ_125 = ./testing/ptimer/HPL_ptimer_walltime.o +TARGET_1_FLAG_125 = -DAdd__ -DF77_INTEGER=int -DStringSunStyle -DHPL_CALL_CBLAS -I./include -I./include/intel64 -O3 -I ./include ${FLAGS} + +TARGET_2_SRC_0 = ./src/cuda/cuda_dgemm.cpp.dp.cpp +TARGET_2_OBJ_0 = ./src/cuda/cuda_dgemm.cpp.dp.o +TARGET_2_FLAG_0 = -O0 -DMPI -I ./include -I ${MKLROOT}/include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS} + +TARGET_2_SRC_1 = ./src/cuda/cuda_dgemm.cpp.dp.cpp +TARGET_2_OBJ_1 = ./src/cuda/cuda_dgemm.cpp.dp.o +TARGET_2_FLAG_1 = -O0 -DMPI -I ./include -I ${MKLROOT}/include -I $(INCLUDE_SYCL) -I $(INCLUDE_CL) ${FLAGS} + +TARGET_0 := /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/bin/intel64/xhpl +TARGET_1 := /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a +TARGET_2 := libdgemm.so.1.0.1 + +TARGET := ${TARGET_1} ${TARGET_2} ${TARGET_0} +.PHONY:all clean +OBJS_0 := ${TARGET_0_OBJ_0} ${TARGET_0_OBJ_1} ${TARGET_0_OBJ_2} ${TARGET_0_OBJ_3} ${TARGET_0_OBJ_4} ${TARGET_0_OBJ_5} ${TARGET_0_OBJ_6} ${TARGET_0_OBJ_7} ${TARGET_0_OBJ_8} ${TARGET_0_OBJ_9} ${TARGET_0_OBJ_10} ${TARGET_0_OBJ_11} +OBJS_1 := ${TARGET_1_OBJ_0} ${TARGET_1_OBJ_1} ${TARGET_1_OBJ_2} ${TARGET_1_OBJ_3} ${TARGET_1_OBJ_4} ${TARGET_1_OBJ_5} ${TARGET_1_OBJ_6} ${TARGET_1_OBJ_7} ${TARGET_1_OBJ_8} ${TARGET_1_OBJ_9} ${TARGET_1_OBJ_10} ${TARGET_1_OBJ_11} ${TARGET_1_OBJ_12} ${TARGET_1_OBJ_13} ${TARGET_1_OBJ_14} ${TARGET_1_OBJ_15} ${TARGET_1_OBJ_16} ${TARGET_1_OBJ_17} ${TARGET_1_OBJ_18} ${TARGET_1_OBJ_19} ${TARGET_1_OBJ_20} ${TARGET_1_OBJ_21} ${TARGET_1_OBJ_22} ${TARGET_1_OBJ_23} ${TARGET_1_OBJ_24} ${TARGET_1_OBJ_25} ${TARGET_1_OBJ_26} ${TARGET_1_OBJ_27} ${TARGET_1_OBJ_28} ${TARGET_1_OBJ_29} ${TARGET_1_OBJ_30} ${TARGET_1_OBJ_31} ${TARGET_1_OBJ_32} ${TARGET_1_OBJ_33} ${TARGET_1_OBJ_34} ${TARGET_1_OBJ_35} ${TARGET_1_OBJ_36} ${TARGET_1_OBJ_37} ${TARGET_1_OBJ_38} ${TARGET_1_OBJ_39} ${TARGET_1_OBJ_40} ${TARGET_1_OBJ_41} ${TARGET_1_OBJ_42} ${TARGET_1_OBJ_43} ${TARGET_1_OBJ_44} ${TARGET_1_OBJ_45} ${TARGET_1_OBJ_46} ${TARGET_1_OBJ_47} ${TARGET_1_OBJ_48} ${TARGET_1_OBJ_49} ${TARGET_1_OBJ_50} ${TARGET_1_OBJ_51} ${TARGET_1_OBJ_52} ${TARGET_1_OBJ_53} ${TARGET_1_OBJ_54} ${TARGET_1_OBJ_55} ${TARGET_1_OBJ_56} ${TARGET_1_OBJ_57} ${TARGET_1_OBJ_58} ${TARGET_1_OBJ_59} ${TARGET_1_OBJ_60} ${TARGET_1_OBJ_61} ${TARGET_1_OBJ_62} ${TARGET_1_OBJ_63} ${TARGET_1_OBJ_64} ${TARGET_1_OBJ_65} ${TARGET_1_OBJ_66} ${TARGET_1_OBJ_67} ${TARGET_1_OBJ_68} ${TARGET_1_OBJ_69} ${TARGET_1_OBJ_70} ${TARGET_1_OBJ_71} ${TARGET_1_OBJ_72} ${TARGET_1_OBJ_73} ${TARGET_1_OBJ_74} ${TARGET_1_OBJ_75} ${TARGET_1_OBJ_76} ${TARGET_1_OBJ_77} ${TARGET_1_OBJ_78} ${TARGET_1_OBJ_79} ${TARGET_1_OBJ_80} ${TARGET_1_OBJ_81} ${TARGET_1_OBJ_82} ${TARGET_1_OBJ_83} ${TARGET_1_OBJ_84} ${TARGET_1_OBJ_85} ${TARGET_1_OBJ_86} ${TARGET_1_OBJ_87} ${TARGET_1_OBJ_88} ${TARGET_1_OBJ_89} ${TARGET_1_OBJ_90} ${TARGET_1_OBJ_91} ${TARGET_1_OBJ_92} ${TARGET_1_OBJ_93} ${TARGET_1_OBJ_94} ${TARGET_1_OBJ_95} ${TARGET_1_OBJ_96} ${TARGET_1_OBJ_97} ${TARGET_1_OBJ_98} ${TARGET_1_OBJ_99} ${TARGET_1_OBJ_100} ${TARGET_1_OBJ_101} ${TARGET_1_OBJ_102} ${TARGET_1_OBJ_103} ${TARGET_1_OBJ_104} ${TARGET_1_OBJ_105} ${TARGET_1_OBJ_106} ${TARGET_1_OBJ_107} ${TARGET_1_OBJ_108} ${TARGET_1_OBJ_109} ${TARGET_1_OBJ_110} ${TARGET_1_OBJ_111} ${TARGET_1_OBJ_112} ${TARGET_1_OBJ_113} ${TARGET_1_OBJ_114} ${TARGET_1_OBJ_115} ${TARGET_1_OBJ_116} ${TARGET_1_OBJ_117} ${TARGET_1_OBJ_118} ${TARGET_1_OBJ_119} ${TARGET_1_OBJ_120} ${TARGET_1_OBJ_121} ${TARGET_1_OBJ_122} ${TARGET_1_OBJ_123} ${TARGET_1_OBJ_124} ${TARGET_1_OBJ_125} +OBJS_2 := ${TARGET_2_OBJ_0} ${TARGET_2_OBJ_1} +all: $(TARGET) +$(TARGET_0): $(OBJS_0) + $(CC) -fsycl -o $@ $^ $(LIB) -qmkl /home/local_user/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/lib/intel64/libhpl.a libdgemm.so.1.0.1 + +$(TARGET_0_OBJ_0):$(TARGET_0_SRC_0) + cc -c ${TARGET_0_SRC_0} -o ${TARGET_0_OBJ_0} $(TARGET_0_FLAG_0) + +$(TARGET_0_OBJ_1):$(TARGET_0_SRC_1) + cc -c ${TARGET_0_SRC_1} -o ${TARGET_0_OBJ_1} $(TARGET_0_FLAG_1) + +$(TARGET_0_OBJ_2):$(TARGET_0_SRC_2) + cc -c ${TARGET_0_SRC_2} -o ${TARGET_0_OBJ_2} $(TARGET_0_FLAG_2) + +$(TARGET_0_OBJ_3):$(TARGET_0_SRC_3) + cc -c ${TARGET_0_SRC_3} -o ${TARGET_0_OBJ_3} $(TARGET_0_FLAG_3) + +$(TARGET_0_OBJ_4):$(TARGET_0_SRC_4) + cc -c ${TARGET_0_SRC_4} -o ${TARGET_0_OBJ_4} $(TARGET_0_FLAG_4) + +$(TARGET_0_OBJ_5):$(TARGET_0_SRC_5) + cc -c ${TARGET_0_SRC_5} -o ${TARGET_0_OBJ_5} $(TARGET_0_FLAG_5) + +$(TARGET_0_OBJ_6):$(TARGET_0_SRC_6) + cc -c ${TARGET_0_SRC_6} -o ${TARGET_0_OBJ_6} $(TARGET_0_FLAG_6) + +$(TARGET_0_OBJ_7):$(TARGET_0_SRC_7) + cc -c ${TARGET_0_SRC_7} -o ${TARGET_0_OBJ_7} $(TARGET_0_FLAG_7) + +$(TARGET_0_OBJ_8):$(TARGET_0_SRC_8) + cc -c ${TARGET_0_SRC_8} -o ${TARGET_0_OBJ_8} $(TARGET_0_FLAG_8) + +$(TARGET_0_OBJ_9):$(TARGET_0_SRC_9) + cc -c ${TARGET_0_SRC_9} -o ${TARGET_0_OBJ_9} $(TARGET_0_FLAG_9) + +$(TARGET_0_OBJ_10):$(TARGET_0_SRC_10) + cc -c ${TARGET_0_SRC_10} -o ${TARGET_0_OBJ_10} $(TARGET_0_FLAG_10) + +$(TARGET_0_OBJ_11):$(TARGET_0_SRC_11) + cc -c ${TARGET_0_SRC_11} -o ${TARGET_0_OBJ_11} $(TARGET_0_FLAG_11) + +$(TARGET_1): $(OBJS_1) + ar -r $@ $^ $(LIB) + +$(TARGET_1_OBJ_0):$(TARGET_1_SRC_0) + cc -c ${TARGET_1_SRC_0} -o ${TARGET_1_OBJ_0} $(TARGET_1_FLAG_0) + +$(TARGET_1_OBJ_1):$(TARGET_1_SRC_1) + cc -c ${TARGET_1_SRC_1} -o ${TARGET_1_OBJ_1} $(TARGET_1_FLAG_1) + +$(TARGET_1_OBJ_2):$(TARGET_1_SRC_2) + cc -c ${TARGET_1_SRC_2} -o ${TARGET_1_OBJ_2} $(TARGET_1_FLAG_2) + +$(TARGET_1_OBJ_3):$(TARGET_1_SRC_3) + cc -c ${TARGET_1_SRC_3} -o ${TARGET_1_OBJ_3} $(TARGET_1_FLAG_3) + +$(TARGET_1_OBJ_4):$(TARGET_1_SRC_4) + cc -c ${TARGET_1_SRC_4} -o ${TARGET_1_OBJ_4} $(TARGET_1_FLAG_4) + +$(TARGET_1_OBJ_5):$(TARGET_1_SRC_5) + cc -c ${TARGET_1_SRC_5} -o ${TARGET_1_OBJ_5} $(TARGET_1_FLAG_5) + +$(TARGET_1_OBJ_6):$(TARGET_1_SRC_6) + cc -c ${TARGET_1_SRC_6} -o ${TARGET_1_OBJ_6} $(TARGET_1_FLAG_6) + +$(TARGET_1_OBJ_7):$(TARGET_1_SRC_7) + cc -c ${TARGET_1_SRC_7} -o ${TARGET_1_OBJ_7} $(TARGET_1_FLAG_7) + +$(TARGET_1_OBJ_8):$(TARGET_1_SRC_8) + cc -c ${TARGET_1_SRC_8} -o ${TARGET_1_OBJ_8} $(TARGET_1_FLAG_8) + +$(TARGET_1_OBJ_9):$(TARGET_1_SRC_9) + cc -c ${TARGET_1_SRC_9} -o ${TARGET_1_OBJ_9} $(TARGET_1_FLAG_9) + +$(TARGET_1_OBJ_10):$(TARGET_1_SRC_10) + cc -c ${TARGET_1_SRC_10} -o ${TARGET_1_OBJ_10} $(TARGET_1_FLAG_10) + +$(TARGET_1_OBJ_11):$(TARGET_1_SRC_11) + cc -c ${TARGET_1_SRC_11} -o ${TARGET_1_OBJ_11} $(TARGET_1_FLAG_11) + +$(TARGET_1_OBJ_12):$(TARGET_1_SRC_12) + cc -c ${TARGET_1_SRC_12} -o ${TARGET_1_OBJ_12} $(TARGET_1_FLAG_12) + +$(TARGET_1_OBJ_13):$(TARGET_1_SRC_13) + cc -c ${TARGET_1_SRC_13} -o ${TARGET_1_OBJ_13} $(TARGET_1_FLAG_13) + +$(TARGET_1_OBJ_14):$(TARGET_1_SRC_14) + cc -c ${TARGET_1_SRC_14} -o ${TARGET_1_OBJ_14} $(TARGET_1_FLAG_14) + +$(TARGET_1_OBJ_15):$(TARGET_1_SRC_15) + cc -c ${TARGET_1_SRC_15} -o ${TARGET_1_OBJ_15} $(TARGET_1_FLAG_15) + +$(TARGET_1_OBJ_16):$(TARGET_1_SRC_16) + cc -c ${TARGET_1_SRC_16} -o ${TARGET_1_OBJ_16} $(TARGET_1_FLAG_16) + +$(TARGET_1_OBJ_17):$(TARGET_1_SRC_17) + cc -c ${TARGET_1_SRC_17} -o ${TARGET_1_OBJ_17} $(TARGET_1_FLAG_17) + +$(TARGET_1_OBJ_18):$(TARGET_1_SRC_18) + cc -c ${TARGET_1_SRC_18} -o ${TARGET_1_OBJ_18} $(TARGET_1_FLAG_18) + +$(TARGET_1_OBJ_19):$(TARGET_1_SRC_19) + cc -c ${TARGET_1_SRC_19} -o ${TARGET_1_OBJ_19} $(TARGET_1_FLAG_19) + +$(TARGET_1_OBJ_20):$(TARGET_1_SRC_20) + cc -c ${TARGET_1_SRC_20} -o ${TARGET_1_OBJ_20} $(TARGET_1_FLAG_20) + +$(TARGET_1_OBJ_21):$(TARGET_1_SRC_21) + cc -c ${TARGET_1_SRC_21} -o ${TARGET_1_OBJ_21} $(TARGET_1_FLAG_21) + +$(TARGET_1_OBJ_22):$(TARGET_1_SRC_22) + cc -c ${TARGET_1_SRC_22} -o ${TARGET_1_OBJ_22} $(TARGET_1_FLAG_22) + +$(TARGET_1_OBJ_23):$(TARGET_1_SRC_23) + cc -c ${TARGET_1_SRC_23} -o ${TARGET_1_OBJ_23} $(TARGET_1_FLAG_23) + +$(TARGET_1_OBJ_24):$(TARGET_1_SRC_24) + cc -c ${TARGET_1_SRC_24} -o ${TARGET_1_OBJ_24} $(TARGET_1_FLAG_24) + +$(TARGET_1_OBJ_25):$(TARGET_1_SRC_25) + cc -c ${TARGET_1_SRC_25} -o ${TARGET_1_OBJ_25} $(TARGET_1_FLAG_25) + +$(TARGET_1_OBJ_26):$(TARGET_1_SRC_26) + cc -c ${TARGET_1_SRC_26} -o ${TARGET_1_OBJ_26} $(TARGET_1_FLAG_26) + +$(TARGET_1_OBJ_27):$(TARGET_1_SRC_27) + cc -c ${TARGET_1_SRC_27} -o ${TARGET_1_OBJ_27} $(TARGET_1_FLAG_27) + +$(TARGET_1_OBJ_28):$(TARGET_1_SRC_28) + cc -c ${TARGET_1_SRC_28} -o ${TARGET_1_OBJ_28} $(TARGET_1_FLAG_28) + +$(TARGET_1_OBJ_29):$(TARGET_1_SRC_29) + cc -c ${TARGET_1_SRC_29} -o ${TARGET_1_OBJ_29} $(TARGET_1_FLAG_29) + +$(TARGET_1_OBJ_30):$(TARGET_1_SRC_30) + cc -c ${TARGET_1_SRC_30} -o ${TARGET_1_OBJ_30} $(TARGET_1_FLAG_30) + +$(TARGET_1_OBJ_31):$(TARGET_1_SRC_31) + cc -c ${TARGET_1_SRC_31} -o ${TARGET_1_OBJ_31} $(TARGET_1_FLAG_31) + +$(TARGET_1_OBJ_32):$(TARGET_1_SRC_32) + cc -c ${TARGET_1_SRC_32} -o ${TARGET_1_OBJ_32} $(TARGET_1_FLAG_32) + +$(TARGET_1_OBJ_33):$(TARGET_1_SRC_33) + cc -c ${TARGET_1_SRC_33} -o ${TARGET_1_OBJ_33} $(TARGET_1_FLAG_33) + +$(TARGET_1_OBJ_34):$(TARGET_1_SRC_34) + cc -c ${TARGET_1_SRC_34} -o ${TARGET_1_OBJ_34} $(TARGET_1_FLAG_34) + +$(TARGET_1_OBJ_35):$(TARGET_1_SRC_35) + cc -c ${TARGET_1_SRC_35} -o ${TARGET_1_OBJ_35} $(TARGET_1_FLAG_35) + +$(TARGET_1_OBJ_36):$(TARGET_1_SRC_36) + cc -c ${TARGET_1_SRC_36} -o ${TARGET_1_OBJ_36} $(TARGET_1_FLAG_36) + +$(TARGET_1_OBJ_37):$(TARGET_1_SRC_37) + cc -c ${TARGET_1_SRC_37} -o ${TARGET_1_OBJ_37} $(TARGET_1_FLAG_37) + +$(TARGET_1_OBJ_38):$(TARGET_1_SRC_38) + cc -c ${TARGET_1_SRC_38} -o ${TARGET_1_OBJ_38} $(TARGET_1_FLAG_38) + +$(TARGET_1_OBJ_39):$(TARGET_1_SRC_39) + cc -c ${TARGET_1_SRC_39} -o ${TARGET_1_OBJ_39} $(TARGET_1_FLAG_39) + +$(TARGET_1_OBJ_40):$(TARGET_1_SRC_40) + cc -c ${TARGET_1_SRC_40} -o ${TARGET_1_OBJ_40} $(TARGET_1_FLAG_40) + +$(TARGET_1_OBJ_41):$(TARGET_1_SRC_41) + cc -c ${TARGET_1_SRC_41} -o ${TARGET_1_OBJ_41} $(TARGET_1_FLAG_41) + +$(TARGET_1_OBJ_42):$(TARGET_1_SRC_42) + cc -c ${TARGET_1_SRC_42} -o ${TARGET_1_OBJ_42} $(TARGET_1_FLAG_42) + +$(TARGET_1_OBJ_43):$(TARGET_1_SRC_43) + cc -c ${TARGET_1_SRC_43} -o ${TARGET_1_OBJ_43} $(TARGET_1_FLAG_43) + +$(TARGET_1_OBJ_44):$(TARGET_1_SRC_44) + cc -c ${TARGET_1_SRC_44} -o ${TARGET_1_OBJ_44} $(TARGET_1_FLAG_44) + +$(TARGET_1_OBJ_45):$(TARGET_1_SRC_45) + cc -c ${TARGET_1_SRC_45} -o ${TARGET_1_OBJ_45} $(TARGET_1_FLAG_45) + +$(TARGET_1_OBJ_46):$(TARGET_1_SRC_46) + cc -c ${TARGET_1_SRC_46} -o ${TARGET_1_OBJ_46} $(TARGET_1_FLAG_46) + +$(TARGET_1_OBJ_47):$(TARGET_1_SRC_47) + cc -c ${TARGET_1_SRC_47} -o ${TARGET_1_OBJ_47} $(TARGET_1_FLAG_47) + +$(TARGET_1_OBJ_48):$(TARGET_1_SRC_48) + cc -c ${TARGET_1_SRC_48} -o ${TARGET_1_OBJ_48} $(TARGET_1_FLAG_48) + +$(TARGET_1_OBJ_49):$(TARGET_1_SRC_49) + cc -c ${TARGET_1_SRC_49} -o ${TARGET_1_OBJ_49} $(TARGET_1_FLAG_49) + +$(TARGET_1_OBJ_50):$(TARGET_1_SRC_50) + cc -c ${TARGET_1_SRC_50} -o ${TARGET_1_OBJ_50} $(TARGET_1_FLAG_50) + +$(TARGET_1_OBJ_51):$(TARGET_1_SRC_51) + cc -c ${TARGET_1_SRC_51} -o ${TARGET_1_OBJ_51} $(TARGET_1_FLAG_51) + +$(TARGET_1_OBJ_52):$(TARGET_1_SRC_52) + cc -c ${TARGET_1_SRC_52} -o ${TARGET_1_OBJ_52} $(TARGET_1_FLAG_52) + +$(TARGET_1_OBJ_53):$(TARGET_1_SRC_53) + cc -c ${TARGET_1_SRC_53} -o ${TARGET_1_OBJ_53} $(TARGET_1_FLAG_53) + +$(TARGET_1_OBJ_54):$(TARGET_1_SRC_54) + cc -c ${TARGET_1_SRC_54} -o ${TARGET_1_OBJ_54} $(TARGET_1_FLAG_54) + +$(TARGET_1_OBJ_55):$(TARGET_1_SRC_55) + cc -c ${TARGET_1_SRC_55} -o ${TARGET_1_OBJ_55} $(TARGET_1_FLAG_55) + +$(TARGET_1_OBJ_56):$(TARGET_1_SRC_56) + cc -c ${TARGET_1_SRC_56} -o ${TARGET_1_OBJ_56} $(TARGET_1_FLAG_56) + +$(TARGET_1_OBJ_57):$(TARGET_1_SRC_57) + cc -c ${TARGET_1_SRC_57} -o ${TARGET_1_OBJ_57} $(TARGET_1_FLAG_57) + +$(TARGET_1_OBJ_58):$(TARGET_1_SRC_58) + cc -c ${TARGET_1_SRC_58} -o ${TARGET_1_OBJ_58} $(TARGET_1_FLAG_58) + +$(TARGET_1_OBJ_59):$(TARGET_1_SRC_59) + cc -c ${TARGET_1_SRC_59} -o ${TARGET_1_OBJ_59} $(TARGET_1_FLAG_59) + +$(TARGET_1_OBJ_60):$(TARGET_1_SRC_60) + cc -c ${TARGET_1_SRC_60} -o ${TARGET_1_OBJ_60} $(TARGET_1_FLAG_60) + +$(TARGET_1_OBJ_61):$(TARGET_1_SRC_61) + cc -c ${TARGET_1_SRC_61} -o ${TARGET_1_OBJ_61} $(TARGET_1_FLAG_61) + +$(TARGET_1_OBJ_62):$(TARGET_1_SRC_62) + cc -c ${TARGET_1_SRC_62} -o ${TARGET_1_OBJ_62} $(TARGET_1_FLAG_62) + +$(TARGET_1_OBJ_63):$(TARGET_1_SRC_63) + cc -c ${TARGET_1_SRC_63} -o ${TARGET_1_OBJ_63} $(TARGET_1_FLAG_63) + +$(TARGET_1_OBJ_64):$(TARGET_1_SRC_64) + cc -c ${TARGET_1_SRC_64} -o ${TARGET_1_OBJ_64} $(TARGET_1_FLAG_64) + +$(TARGET_1_OBJ_65):$(TARGET_1_SRC_65) + cc -c ${TARGET_1_SRC_65} -o ${TARGET_1_OBJ_65} $(TARGET_1_FLAG_65) + +$(TARGET_1_OBJ_66):$(TARGET_1_SRC_66) + cc -c ${TARGET_1_SRC_66} -o ${TARGET_1_OBJ_66} $(TARGET_1_FLAG_66) + +$(TARGET_1_OBJ_67):$(TARGET_1_SRC_67) + cc -c ${TARGET_1_SRC_67} -o ${TARGET_1_OBJ_67} $(TARGET_1_FLAG_67) + +$(TARGET_1_OBJ_68):$(TARGET_1_SRC_68) + cc -c ${TARGET_1_SRC_68} -o ${TARGET_1_OBJ_68} $(TARGET_1_FLAG_68) + +$(TARGET_1_OBJ_69):$(TARGET_1_SRC_69) + cc -c ${TARGET_1_SRC_69} -o ${TARGET_1_OBJ_69} $(TARGET_1_FLAG_69) + +$(TARGET_1_OBJ_70):$(TARGET_1_SRC_70) + cc -c ${TARGET_1_SRC_70} -o ${TARGET_1_OBJ_70} $(TARGET_1_FLAG_70) + +$(TARGET_1_OBJ_71):$(TARGET_1_SRC_71) + cc -c ${TARGET_1_SRC_71} -o ${TARGET_1_OBJ_71} $(TARGET_1_FLAG_71) + +$(TARGET_1_OBJ_72):$(TARGET_1_SRC_72) + cc -c ${TARGET_1_SRC_72} -o ${TARGET_1_OBJ_72} $(TARGET_1_FLAG_72) + +$(TARGET_1_OBJ_73):$(TARGET_1_SRC_73) + cc -c ${TARGET_1_SRC_73} -o ${TARGET_1_OBJ_73} $(TARGET_1_FLAG_73) + +$(TARGET_1_OBJ_74):$(TARGET_1_SRC_74) + cc -c ${TARGET_1_SRC_74} -o ${TARGET_1_OBJ_74} $(TARGET_1_FLAG_74) + +$(TARGET_1_OBJ_75):$(TARGET_1_SRC_75) + cc -c ${TARGET_1_SRC_75} -o ${TARGET_1_OBJ_75} $(TARGET_1_FLAG_75) + +$(TARGET_1_OBJ_76):$(TARGET_1_SRC_76) + cc -c ${TARGET_1_SRC_76} -o ${TARGET_1_OBJ_76} $(TARGET_1_FLAG_76) + +$(TARGET_1_OBJ_77):$(TARGET_1_SRC_77) + cc -c ${TARGET_1_SRC_77} -o ${TARGET_1_OBJ_77} $(TARGET_1_FLAG_77) + +$(TARGET_1_OBJ_78):$(TARGET_1_SRC_78) + cc -c ${TARGET_1_SRC_78} -o ${TARGET_1_OBJ_78} $(TARGET_1_FLAG_78) + +$(TARGET_1_OBJ_79):$(TARGET_1_SRC_79) + cc -c ${TARGET_1_SRC_79} -o ${TARGET_1_OBJ_79} $(TARGET_1_FLAG_79) + +$(TARGET_1_OBJ_80):$(TARGET_1_SRC_80) + cc -c ${TARGET_1_SRC_80} -o ${TARGET_1_OBJ_80} $(TARGET_1_FLAG_80) + +$(TARGET_1_OBJ_81):$(TARGET_1_SRC_81) + cc -c ${TARGET_1_SRC_81} -o ${TARGET_1_OBJ_81} $(TARGET_1_FLAG_81) + +$(TARGET_1_OBJ_82):$(TARGET_1_SRC_82) + cc -c ${TARGET_1_SRC_82} -o ${TARGET_1_OBJ_82} $(TARGET_1_FLAG_82) + +$(TARGET_1_OBJ_83):$(TARGET_1_SRC_83) + cc -c ${TARGET_1_SRC_83} -o ${TARGET_1_OBJ_83} $(TARGET_1_FLAG_83) + +$(TARGET_1_OBJ_84):$(TARGET_1_SRC_84) + cc -c ${TARGET_1_SRC_84} -o ${TARGET_1_OBJ_84} $(TARGET_1_FLAG_84) + +$(TARGET_1_OBJ_85):$(TARGET_1_SRC_85) + cc -c ${TARGET_1_SRC_85} -o ${TARGET_1_OBJ_85} $(TARGET_1_FLAG_85) + +$(TARGET_1_OBJ_86):$(TARGET_1_SRC_86) + cc -c ${TARGET_1_SRC_86} -o ${TARGET_1_OBJ_86} $(TARGET_1_FLAG_86) + +$(TARGET_1_OBJ_87):$(TARGET_1_SRC_87) + cc -c ${TARGET_1_SRC_87} -o ${TARGET_1_OBJ_87} $(TARGET_1_FLAG_87) + +$(TARGET_1_OBJ_88):$(TARGET_1_SRC_88) + cc -c ${TARGET_1_SRC_88} -o ${TARGET_1_OBJ_88} $(TARGET_1_FLAG_88) + +$(TARGET_1_OBJ_89):$(TARGET_1_SRC_89) + cc -c ${TARGET_1_SRC_89} -o ${TARGET_1_OBJ_89} $(TARGET_1_FLAG_89) + +$(TARGET_1_OBJ_90):$(TARGET_1_SRC_90) + cc -c ${TARGET_1_SRC_90} -o ${TARGET_1_OBJ_90} $(TARGET_1_FLAG_90) + +$(TARGET_1_OBJ_91):$(TARGET_1_SRC_91) + cc -c ${TARGET_1_SRC_91} -o ${TARGET_1_OBJ_91} $(TARGET_1_FLAG_91) + +$(TARGET_1_OBJ_92):$(TARGET_1_SRC_92) + cc -c ${TARGET_1_SRC_92} -o ${TARGET_1_OBJ_92} $(TARGET_1_FLAG_92) + +$(TARGET_1_OBJ_93):$(TARGET_1_SRC_93) + cc -c ${TARGET_1_SRC_93} -o ${TARGET_1_OBJ_93} $(TARGET_1_FLAG_93) + +$(TARGET_1_OBJ_94):$(TARGET_1_SRC_94) + cc -c ${TARGET_1_SRC_94} -o ${TARGET_1_OBJ_94} $(TARGET_1_FLAG_94) + +$(TARGET_1_OBJ_95):$(TARGET_1_SRC_95) + cc -c ${TARGET_1_SRC_95} -o ${TARGET_1_OBJ_95} $(TARGET_1_FLAG_95) + +$(TARGET_1_OBJ_96):$(TARGET_1_SRC_96) + cc -c ${TARGET_1_SRC_96} -o ${TARGET_1_OBJ_96} $(TARGET_1_FLAG_96) + +$(TARGET_1_OBJ_97):$(TARGET_1_SRC_97) + cc -c ${TARGET_1_SRC_97} -o ${TARGET_1_OBJ_97} $(TARGET_1_FLAG_97) + +$(TARGET_1_OBJ_98):$(TARGET_1_SRC_98) + cc -c ${TARGET_1_SRC_98} -o ${TARGET_1_OBJ_98} $(TARGET_1_FLAG_98) + +$(TARGET_1_OBJ_99):$(TARGET_1_SRC_99) + cc -c ${TARGET_1_SRC_99} -o ${TARGET_1_OBJ_99} $(TARGET_1_FLAG_99) + +$(TARGET_1_OBJ_100):$(TARGET_1_SRC_100) + cc -c ${TARGET_1_SRC_100} -o ${TARGET_1_OBJ_100} $(TARGET_1_FLAG_100) + +$(TARGET_1_OBJ_101):$(TARGET_1_SRC_101) + cc -c ${TARGET_1_SRC_101} -o ${TARGET_1_OBJ_101} $(TARGET_1_FLAG_101) + +$(TARGET_1_OBJ_102):$(TARGET_1_SRC_102) + cc -c ${TARGET_1_SRC_102} -o ${TARGET_1_OBJ_102} $(TARGET_1_FLAG_102) + +$(TARGET_1_OBJ_103):$(TARGET_1_SRC_103) + cc -c ${TARGET_1_SRC_103} -o ${TARGET_1_OBJ_103} $(TARGET_1_FLAG_103) + +$(TARGET_1_OBJ_104):$(TARGET_1_SRC_104) + cc -c ${TARGET_1_SRC_104} -o ${TARGET_1_OBJ_104} $(TARGET_1_FLAG_104) + +$(TARGET_1_OBJ_105):$(TARGET_1_SRC_105) + cc -c ${TARGET_1_SRC_105} -o ${TARGET_1_OBJ_105} $(TARGET_1_FLAG_105) + +$(TARGET_1_OBJ_106):$(TARGET_1_SRC_106) + cc -c ${TARGET_1_SRC_106} -o ${TARGET_1_OBJ_106} $(TARGET_1_FLAG_106) + +$(TARGET_1_OBJ_107):$(TARGET_1_SRC_107) + cc -c ${TARGET_1_SRC_107} -o ${TARGET_1_OBJ_107} $(TARGET_1_FLAG_107) + +$(TARGET_1_OBJ_108):$(TARGET_1_SRC_108) + cc -c ${TARGET_1_SRC_108} -o ${TARGET_1_OBJ_108} $(TARGET_1_FLAG_108) + +$(TARGET_1_OBJ_109):$(TARGET_1_SRC_109) + cc -c ${TARGET_1_SRC_109} -o ${TARGET_1_OBJ_109} $(TARGET_1_FLAG_109) + +$(TARGET_1_OBJ_110):$(TARGET_1_SRC_110) + cc -c ${TARGET_1_SRC_110} -o ${TARGET_1_OBJ_110} $(TARGET_1_FLAG_110) + +$(TARGET_1_OBJ_111):$(TARGET_1_SRC_111) + cc -c ${TARGET_1_SRC_111} -o ${TARGET_1_OBJ_111} $(TARGET_1_FLAG_111) + +$(TARGET_1_OBJ_112):$(TARGET_1_SRC_112) + cc -c ${TARGET_1_SRC_112} -o ${TARGET_1_OBJ_112} $(TARGET_1_FLAG_112) + +$(TARGET_1_OBJ_113):$(TARGET_1_SRC_113) + cc -c ${TARGET_1_SRC_113} -o ${TARGET_1_OBJ_113} $(TARGET_1_FLAG_113) + +$(TARGET_1_OBJ_114):$(TARGET_1_SRC_114) + cc -c ${TARGET_1_SRC_114} -o ${TARGET_1_OBJ_114} $(TARGET_1_FLAG_114) + +$(TARGET_1_OBJ_115):$(TARGET_1_SRC_115) + cc -c ${TARGET_1_SRC_115} -o ${TARGET_1_OBJ_115} $(TARGET_1_FLAG_115) + +$(TARGET_1_OBJ_116):$(TARGET_1_SRC_116) + cc -c ${TARGET_1_SRC_116} -o ${TARGET_1_OBJ_116} $(TARGET_1_FLAG_116) + +$(TARGET_1_OBJ_117):$(TARGET_1_SRC_117) + cc -c ${TARGET_1_SRC_117} -o ${TARGET_1_OBJ_117} $(TARGET_1_FLAG_117) + +$(TARGET_1_OBJ_118):$(TARGET_1_SRC_118) + cc -c ${TARGET_1_SRC_118} -o ${TARGET_1_OBJ_118} $(TARGET_1_FLAG_118) + +$(TARGET_1_OBJ_119):$(TARGET_1_SRC_119) + cc -c ${TARGET_1_SRC_119} -o ${TARGET_1_OBJ_119} $(TARGET_1_FLAG_119) + +$(TARGET_1_OBJ_120):$(TARGET_1_SRC_120) + cc -c ${TARGET_1_SRC_120} -o ${TARGET_1_OBJ_120} $(TARGET_1_FLAG_120) + +$(TARGET_1_OBJ_121):$(TARGET_1_SRC_121) + cc -c ${TARGET_1_SRC_121} -o ${TARGET_1_OBJ_121} $(TARGET_1_FLAG_121) + +$(TARGET_1_OBJ_122):$(TARGET_1_SRC_122) + cc -c ${TARGET_1_SRC_122} -o ${TARGET_1_OBJ_122} $(TARGET_1_FLAG_122) + +$(TARGET_1_OBJ_123):$(TARGET_1_SRC_123) + cc -c ${TARGET_1_SRC_123} -o ${TARGET_1_OBJ_123} $(TARGET_1_FLAG_123) + +$(TARGET_1_OBJ_124):$(TARGET_1_SRC_124) + cc -c ${TARGET_1_SRC_124} -o ${TARGET_1_OBJ_124} $(TARGET_1_FLAG_124) + +$(TARGET_1_OBJ_125):$(TARGET_1_SRC_125) + cc -c ${TARGET_1_SRC_125} -o ${TARGET_1_OBJ_125} $(TARGET_1_FLAG_125) + +$(TARGET_2): $(OBJS_2) + $(CC) -fsycl -shared -o $@ $^ $(LIB) -qmkl + +$(TARGET_2_OBJ_0):$(TARGET_2_SRC_0) + cc -c ${TARGET_2_SRC_0} -o ${TARGET_2_OBJ_0} $(TARGET_2_FLAG_0) + +$(TARGET_2_OBJ_1):$(TARGET_2_SRC_1) + icpx -c ${TARGET_2_SRC_1} -o ${TARGET_2_OBJ_1} $(TARGET_2_FLAG_1) + +clean: + rm -f ${OBJS_0} ${OBJS_1} ${OBJS_2} $(TARGET) diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl.h new file mode 100644 index 000000000..6d131963f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl.h @@ -0,0 +1,97 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_H +#define HPL_H +/* + * --------------------------------------------------------------------- + * HPL default compile options that can overridden in the Make. + * --------------------------------------------------------------------- + */ +#ifndef HPL_NO_MPI_DATATYPE /* Use MPI user-defined data type */ +#define HPL_USE_MPI_DATATYPE +#endif + +#ifndef HPL_COPY_L /* do not copy L, use MPI user-defined data types */ +#define HPL_NO_COPY_L +#endif + +#ifndef HPL_DETAILED_TIMING /* Do not enable detailed timings */ +#define HPL_NO_DETAILED_TIMING +#endif + +#ifndef HPL_CALL_VSIPL /* Call the Fortran 77 BLAS interface */ +#ifndef HPL_CALL_CBLAS /* there can be only one */ +#define HPL_CALL_FBLAS +#endif +#endif +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pfact.h" +#include "hpl_pgesv.h" + +#include "hpl_timer.h" +#include "hpl_matgen.h" +#include "hpl_test.h" + +#include "hpl_ptimer.h" +#include "hpl_pmatgen.h" +#include "hpl_ptest.h" + +#endif +/* + * End of hpl.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_auxil.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_auxil.h new file mode 100644 index 000000000..861caf380 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_auxil.h @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_AUXIL_H +#define HPL_AUXIL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +/* + * --------------------------------------------------------------------- + * typedef definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_NORM_A = 800, HPL_NORM_1 = 801, HPL_NORM_I = 802 } HPL_T_NORM; + +typedef enum +{ + HPL_MACH_EPS = 900, /* relative machine precision */ + HPL_MACH_SFMIN = 901, /* safe minimum st 1/sfmin does not overflow */ + HPL_MACH_BASE = 902, /* base = base of the machine */ + HPL_MACH_PREC = 903, /* prec = eps*base */ + HPL_MACH_MLEN = 904, /* number of (base) digits in the mantissa */ + HPL_MACH_RND = 905, /* 1.0 if rounding occurs in addition */ + HPL_MACH_EMIN = 906, /* min exponent before (gradual) underflow */ + HPL_MACH_RMIN = 907, /* underflow threshold base**(emin-1) */ + HPL_MACH_EMAX = 908, /* largest exponent before overflow */ + HPL_MACH_RMAX = 909 /* overflow threshold - (base**emax)*(1-eps) */ + +} HPL_T_MACH; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_fprintf +STDC_ARGS( ( + FILE *, + const char *, + ... +) ); +void HPL_warn +STDC_ARGS( ( + FILE *, + int, + const char *, + const char *, + ... +) ); +void HPL_abort +STDC_ARGS( ( + int, + const char *, + const char *, + ... +) ); +void HPL_dlacpy +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dlatcpy +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dlaprnt +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int, + const int, + const char * +) ); +double HPL_dlange +STDC_ARGS( ( + const HPL_T_NORM, + const int, + const int, + const double *, + const int +) ); +double HPL_dlamch +STDC_ARGS( ( + const HPL_T_MACH +) ); + +#endif +/* + * End of hpl_auxil.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_blas.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_blas.h new file mode 100644 index 000000000..2a510471a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_blas.h @@ -0,0 +1,630 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_BLAS_H +#define HPL_BLAS_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" + + +/* + * --------------------------------------------------------------------- + * typedef definitions + * --------------------------------------------------------------------- + */ +enum HPL_ORDER +{ HplRowMajor = 101, HplColumnMajor = 102 }; +enum HPL_TRANS +{ HplNoTrans = 111, HplTrans = 112, HplConjTrans = 113 }; +enum HPL_UPLO +{ HplUpper = 121, HplLower = 122 }; +enum HPL_DIAG +{ HplNonUnit = 131, HplUnit = 132 }; +enum HPL_SIDE +{ HplLeft = 141, HplRight = 142 }; + + +#ifdef HPL_CALL_CBLAS + + +/* + * --------------------------------------------------------------------- + * The C interface of the BLAS is available ... + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define CBLAS_INDEX int + +#define CBLAS_ORDER HPL_ORDER +#define CblasRowMajor HplRowMajor +#define CblasColMajor HplColMajor + +#define CBLAS_TRANSPOSE HPL_TRANS +#define CblasNoTrans HplNoTrans +#define CblasTrans HplTrans +#define CblasConjTrans HplConjTrans + +#define CBLAS_UPLO HPL_UPLO +#define CblasUpper HplUpper +#define CblasLower HplLower + +#define CBLAS_DIAG HPL_DIAG +#define CblasNonUnit HplNonUnit +#define CblasUnit HplUnit + +#define CBLAS_SIDE HPL_SIDE +#define CblasLeft HplLeft +#define CblasRight HplRight +/* + * --------------------------------------------------------------------- + * CBLAS Function prototypes + * --------------------------------------------------------------------- + */ +CBLAS_INDEX cblas_idamax +STDC_ARGS( +( const int, const double *, const int ) ); +void cblas_dswap +STDC_ARGS( +( const int, double *, const int, double *, + const int ) ); +void cblas_dcopy +STDC_ARGS( +( const int, const double *, const int, double *, + const int ) ); +void cblas_daxpy +STDC_ARGS( +( const int, const double, const double *, const int, + double *, const int ) ); +void cblas_dscal +STDC_ARGS( +( const int, const double, double *, const int ) ); + +void cblas_dgemv +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const int, const int, const double, const double *, + const int, const double *, const int, const double, + double *, const int ) ); + +void cblas_dger +STDC_ARGS( +( const enum CBLAS_ORDER, const int, const int, + const double, const double *, const int, const double *, + const int, double *, const int ) ); +void cblas_dtrsv +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_UPLO, + const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG, + const int, const double *, const int, double *, + const int ) ); + +void cblas_dgemm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const enum CBLAS_TRANSPOSE, const int, const int, + const int, const double, const double *, const int, + const double *, const int, const double, double *, + const int ) ); + +void cblas_dtrsm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_SIDE, + const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, + const enum CBLAS_DIAG, const int, const int, + const double, const double *, const int, double *, + const int ) ); +void dpcpp_dgemm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const enum CBLAS_TRANSPOSE, const int, const int, + const int, const double, const double *, const int, + const double *, const int, const double, double *, + const int ) ); + +void dpcpp_dtrsm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_SIDE, + const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, + const enum CBLAS_DIAG, const int, const int, + const double, const double *, const int, double *, + const int ) ); +/* + * --------------------------------------------------------------------- + * HPL C BLAS macro definition + * --------------------------------------------------------------------- + */ +#define HPL_dswap cblas_dswap +#define HPL_dcopy cblas_dcopy +#define HPL_daxpy cblas_daxpy +#define HPL_dscal cblas_dscal +#define HPL_idamax cblas_idamax + +#define HPL_dgemv cblas_dgemv +#define HPL_dtrsv cblas_dtrsv +#define HPL_dger cblas_dger + +//#define HPL_dgemm cblas_dgemm +//#define HPL_dtrsm cblas_dtrsm +#define HPL_dgemm dpcpp_dgemm +#define HPL_dtrsm dpcpp_dtrsm + +#endif + +//#define HPL_hello sss_gemm + +#ifdef HPL_CALL_FBLAS +/* + * --------------------------------------------------------------------- + * Use the Fortran 77 interface of the BLAS ... + * --------------------------------------------------------------------- + * Defaults: Add_, F77_INTEGER=int, StringSunStyle + * --------------------------------------------------------------------- + */ +#ifndef NoChange +#ifndef UpCase +#ifndef Add__ +#ifndef Add_ + +#define Add_ + +#endif +#endif +#endif +#endif + +#ifndef F77_INTEGER +#define F77_INTEGER int +#else +#define HPL_USE_F77_INTEGER_DEF +#endif + +#ifndef StringCrayStyle +#ifndef StringStructVal +#ifndef StringStructPtr +#ifndef StringSunStyle + +#define StringSunStyle + +#endif +#endif +#endif +#endif +/* + * --------------------------------------------------------------------- + * Fortran 77 <-> C interface + * --------------------------------------------------------------------- + * + * These macros identifies how Fortran routines will be called. + * + * Add_ : the Fortran compiler expects the name of C functions to be + * in all lower case and to have an underscore postfixed it (Suns, Intel + * compilers expect this). + * + * NoChange : the Fortran compiler expects the name of C functions to be + * in all lower case (IBM RS6K compilers do this). + * + * UpCase : the Fortran compiler expects the name of C functions to be + * in all upcase. (Cray compilers expect this). + * + * Add__ : the Fortran compiler in use is f2c, a Fortran to C conver- + * ter. + */ +#ifdef NoChange +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm(...) + */ +#define F77dswap dswap +#define F77dscal dscal +#define F77dcopy dcopy +#define F77daxpy daxpy +#define F77idamax idamax + +#define F77dgemv dgemv +#define F77dtrsv dtrsv +#define F77dger dger + +#define F77dgemm dgemm +#define F77dtrsm dtrsm + +#endif + +#ifdef UpCase +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) DGEMM(...) + */ +#ifdef CRAY_BLAS + +#define F77dswap SSWAP +#define F77dscal SSCAL +#define F77dcopy SCOPY +#define F77daxpy SAXPY +#define F77idamax ISAMAX + +#define F77dgemv SGEMV +#define F77dtrsv STRSV +#define F77dger SGER + +#define F77dgemm SGEMM +#define F77dtrsm STRSM + +#else + +#define F77dswap DSWAP +#define F77dscal DSCAL +#define F77dcopy DCOPY +#define F77daxpy DAXPY +#define F77idamax IDAMAX + +#define F77dgemv DGEMV +#define F77dtrsv DTRSV +#define F77dger DGER + +#define F77dgemm DGEMM +#define F77dtrsm DTRSM + +#endif + +#endif + +#ifdef Add_ +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm_(...) + */ +#define F77dswap dswap_ +#define F77dscal dscal_ +#define F77dcopy dcopy_ +#define F77daxpy daxpy_ +#define F77idamax idamax_ + +#define F77dgemv dgemv_ +#define F77dtrsv dtrsv_ +#define F77dger dger_ + +#define F77dgemm dgemm_ +#define F77dtrsm dtrsm_ + +#endif + +#ifdef Add__ +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm_(...) + */ +#define F77dswap dswap_ +#define F77dscal dscal_ +#define F77dcopy dcopy_ +#define F77daxpy daxpy_ +#define F77idamax idamax_ + +#define F77dgemv dgemv_ +#define F77dtrsv dtrsv_ +#define F77dger dger_ + +#define F77dgemm dgemm_ +#define F77dtrsm dtrsm_ +//#define F77hello sss_gemm + +#endif +//#define F77hello sss_gemm +/* + * --------------------------------------------------------------------- + * Typedef definitions and conversion utilities + * --------------------------------------------------------------------- + */ +#ifdef StringCrayStyle + +#include + /* Type of character argument in a FORTRAN call */ +#define F77_CHAR _fcd + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(_fcdtocp(c) )) +#define HPL_C2F_CHAR(c) (_cptofcd(&(c), 1)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringStructVal + /* Type of character argument in a FORTRAN call */ +typedef struct { char *cp; F77_INTEGER len; } F77_CHAR; + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c.cp)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringStructPtr + /* Type of character argument in a FORTRAN call */ +typedef struct { char *cp; F77_INTEGER len; } F77_CHAR; + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c->cp)) + +#define F77_CHAR_DECL F77_CHAR * /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringSunStyle + /* Type of character argument in a FORTRAN call */ +#define F77_CHAR char * + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c)) +#define HPL_C2F_CHAR(c) (&(c)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ +#define F77_1_CHAR , F77_INTEGER +#define F77_2_CHAR F77_1_CHAR F77_1_CHAR +#define F77_3_CHAR F77_2_CHAR F77_1_CHAR +#define F77_4_CHAR F77_3_CHAR F77_1_CHAR + +#endif +/* ------------------------------------------------------------------ */ + +#ifndef F77_1_CHAR +#define F77_1_CHAR +#define F77_2_CHAR +#define F77_3_CHAR +#define F77_4_CHAR +#endif + +#define F77_INT_DECL const F77_INTEGER * /* input integer */ +#define F77_SIN_DECL const double * /* input scalar */ +#define F77_VIN_DECL const double * /* input vector */ +#define F77_VINOUT_DECL double * /* input/output matrix */ +#define F77_MIN_DECL const double * /* input matrix */ +#define F77_MINOUT_DECL double * /* input/output matrix */ + +#ifdef CRAY_PVP_ENV /* Type of FORTRAN functions */ +#define F77_VOID_FUN extern fortran void /* subroutine */ +#define F77_INT_FUN extern fortran int /* integer function */ +#else +#define F77_VOID_FUN extern void /* subroutine */ +#define F77_INT_FUN extern int /* integer function */ +#endif +/* + * --------------------------------------------------------------------- + * Fortran 77 BLAS function prototypes + * --------------------------------------------------------------------- + */ +F77_VOID_FUN F77dswap +STDC_ARGS( +( F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL, F77_VINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77dscal +STDC_ARGS( +( F77_INT_DECL, F77_SIN_DECL, F77_VINOUT_DECL, F77_INT_DECL ) ); +F77_VOID_FUN F77dcopy +STDC_ARGS( +( F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, F77_VINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77daxpy +STDC_ARGS( +( F77_INT_DECL, F77_SIN_DECL, F77_VIN_DECL, F77_INT_DECL, + F77_VINOUT_DECL, F77_INT_DECL ) ); +F77_INT_FUN F77idamax +STDC_ARGS( +( F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL ) ); + +F77_VOID_FUN F77dgemv +STDC_ARGS( +( F77_CHAR_DECL, F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, + F77_SIN_DECL, F77_VINOUT_DECL, F77_INT_DECL F77_1_CHAR ) ); +F77_VOID_FUN F77dger +STDC_ARGS( +( F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_VIN_DECL, + F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, F77_MINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77dtrsv +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, F77_INT_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL + F77_3_CHAR ) ); + +F77_VOID_FUN F77dgemm +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_INT_DECL, F77_INT_DECL, + F77_INT_DECL, F77_SIN_DECL, F77_MIN_DECL, F77_INT_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MINOUT_DECL, + F77_INT_DECL F77_2_CHAR ) ); +F77_VOID_FUN F77dtrsm +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, + F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MIN_DECL, + F77_INT_DECL, F77_MINOUT_DECL, F77_INT_DECL F77_4_CHAR ) ); + +#endif +/* + * --------------------------------------------------------------------- + * HPL BLAS Function prototypes + * --------------------------------------------------------------------- + */ +#ifndef HPL_CALL_CBLAS + +int HPL_idamax +STDC_ARGS( ( + const int, + const double *, + const int +) ); +void HPL_daxpy +STDC_ARGS( ( + const int, + const double, + const double *, + const int, + double *, + const int +) ); +void HPL_dcopy +STDC_ARGS( ( + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dscal +STDC_ARGS( ( + const int, + const double, + double *, + const int +) ); +void HPL_dswap +STDC_ARGS( ( + const int, + double *, + const int, + double *, + const int +) ); +void HPL_dgemv +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_TRANS, + const int, + const int, + const double, + const double *, + const int, + const double *, + const int, + const double, + double *, + const int +) ); +void HPL_dger +STDC_ARGS( ( + const enum HPL_ORDER, + const int, + const int, + const double, + const double *, + const int, + double *, + const int, + double *, + const int +) ); +void HPL_dtrsv +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_UPLO, + const enum HPL_TRANS, + const enum HPL_DIAG, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dgemm +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_TRANS, + const enum HPL_TRANS, + const int, + const int, + const int, + const double, + const double *, + const int, + const double *, + const int, + const double, + double *, + const int +) ); +void HPL_hello +STDC_ARGS( ( +) ); +#endif +void HPL_dtrsm +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_SIDE, + const enum HPL_UPLO, + const enum HPL_TRANS, + const enum HPL_DIAG, + const int, + const int, + const double, + const double *, + const int, + double *, + const int +) ); + +//#endif + +#endif +/* + * hpl_blas.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_comm.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_comm.h new file mode 100644 index 000000000..e3ba51a57 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_comm.h @@ -0,0 +1,161 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_COMM_H +#define HPL_COMM_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +#include "hpl_panel.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_1RING = 401, /* Increasing ring */ + HPL_1RING_M = 402, /* Increasing ring (modified) */ + HPL_2RING = 403, /* Increasing 2-ring */ + HPL_2RING_M = 404, /* Increasing 2-ring (modified) */ + HPL_BLONG = 405, /* long broadcast */ + HPL_BLONG_M = 406 /* long broadcast (modified) */ +} HPL_T_TOP; +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_FAILURE 0 +#define HPL_SUCCESS 1 +#define HPL_KEEP_TESTING 2 +/* + * --------------------------------------------------------------------- + * comm function prototypes + * --------------------------------------------------------------------- + */ +int HPL_send +STDC_ARGS( ( + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_recv +STDC_ARGS( ( + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_sdrv +STDC_ARGS( ( + double *, + int, + int, + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_binit +STDC_ARGS( ( + HPL_T_panel * +) ); +int HPL_bcast +STDC_ARGS( ( + HPL_T_panel *, + int * +) ); +int HPL_bwait +STDC_ARGS( ( + HPL_T_panel * +) ); +int HPL_packL +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int +) ); +void HPL_copyL +STDC_ARGS( ( + HPL_T_panel * +) ); + +int HPL_binit_1ring STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_1ring STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_1ring STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_1rinM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_1rinM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_1rinM STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_2ring STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_2ring STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_2ring STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_2rinM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_2rinM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_2rinM STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_blong STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_blong STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_blong STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_blonM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_blonM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_blonM STDC_ARGS( ( HPL_T_panel * ) ); + +#endif +/* + * End of hpl_comm.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_gesv.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_gesv.h new file mode 100644 index 000000000..ce671cf2b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_gesv.h @@ -0,0 +1,87 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_GESV_H +#define HPL_GESV_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_LEFT_LOOKING = 301, /* Left looking lu fact variant */ + HPL_CROUT = 302, /* Crout lu fact variant */ + HPL_RIGHT_LOOKING = 303 /* Right looking lu fact variant */ +} HPL_T_FACT; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dgesv +STDC_ARGS( +( const int, const int, const int, const HPL_T_FACT, + const HPL_T_FACT, const int, double *, + const int, int * ) ); +void HPL_ipid +STDC_ARGS( +( const int, double *, int *, int *, + int *, int *, int *, int *, + const int, const int, const int, const int, + const int ) ); + +#endif +/* + * End of hpl_gesv.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_grid.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_grid.h new file mode 100644 index 000000000..1895a5ed4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_grid.h @@ -0,0 +1,212 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_GRID_H +#define HPL_GRID_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum { HPL_INT = 100, HPL_DOUBLE = 101 } HPL_T_TYPE; + +typedef enum +{ + HPL_ROW_MAJOR = 201, + HPL_COLUMN_MAJOR = 202 +} HPL_T_ORDER; + +typedef struct HPL_S_grid +{ + MPI_Comm all_comm; /* grid communicator */ + MPI_Comm row_comm; /* row communicator */ + MPI_Comm col_comm; /* column communicator */ + HPL_T_ORDER order; /* ordering of the procs in the grid */ + int iam; /* my rank in the grid */ + int myrow; /* my row number in the grid */ + int mycol; /* my column number in the grid */ + int nprow; /* the total # of rows in the grid */ + int npcol; /* the total # of columns in the grid */ + int nprocs; /* the total # of procs in the grid */ + int row_ip2; /* largest power of two <= nprow */ + int row_hdim; /* row_ip2 procs hypercube dimension */ + int row_ip2m1; /* largest power of two <= nprow-1 */ + int row_mask; /* row_ip2m1 procs hypercube mask */ + int col_ip2; /* largest power of two <= npcol */ + int col_hdim; /* col_ip2 procs hypercube dimension */ + int col_ip2m1; /* largest power of two <= npcol-1 */ + int col_mask; /* col_ip2m1 procs hypercube mask */ +} HPL_T_grid; + +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef void (*HPL_T_OP) +( const int, const void *, void *, const HPL_T_TYPE ); +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define HPL_2_MPI_TYPE( typ ) \ + ( ( typ == HPL_INT ? MPI_INT : MPI_DOUBLE ) ) +/* + * The following macros perform common modulo operations; All functions + * except MPosMod assume arguments are < d (i.e., arguments are themsel- + * ves within modulo range). + */ + /* increment with mod */ +#define MModInc(I, d) if(++(I) == (d)) (I) = 0 + /* decrement with mod */ +#define MModDec(I, d) if(--(I) == -1) (I) = (d)-1 + /* positive modulo */ +#define MPosMod(I, d) ( (I) - ((I)/(d))*(d) ) + /* add two numbers */ +#define MModAdd(I1, I2, d) \ + ( ( (I1) + (I2) < (d) ) ? (I1) + (I2) : (I1) + (I2) - (d) ) + /* add 1 to # */ +#define MModAdd1(I, d) ( ((I) != (d)-1) ? (I) + 1 : 0 ) + /* subtract two numbers */ +#define MModSub(I1, I2, d) \ + ( ( (I1) < (I2) ) ? (d) + (I1) - (I2) : (I1) - (I2) ) + /* sub 1 from # */ +#define MModSub1(I, d) ( ((I)!=0) ? (I)-1 : (d)-1 ) +/* + * --------------------------------------------------------------------- + * grid function prototypes + * --------------------------------------------------------------------- + */ +int HPL_grid_init +STDC_ARGS( ( + MPI_Comm, + const HPL_T_ORDER, + const int, + const int, + HPL_T_grid * +) ); +int HPL_grid_exit +STDC_ARGS( ( + HPL_T_grid * +) ); + +int HPL_grid_info +STDC_ARGS( ( + const HPL_T_grid *, + int *, + int *, + int *, + int * +) ); +int HPL_pnum +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int +) ); + +int HPL_barrier +STDC_ARGS( ( + MPI_Comm +) ); +int HPL_broadcast +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const int, + MPI_Comm +) ); +int HPL_reduce +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const HPL_T_OP , + const int, + MPI_Comm +) ); +int HPL_all_reduce +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const HPL_T_OP , + MPI_Comm +) ); + +void HPL_max +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); +void HPL_min +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); +void HPL_sum +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); + +#endif +/* + * End of hpl_grid.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_matgen.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_matgen.h new file mode 100644 index 000000000..de6503eea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_matgen.h @@ -0,0 +1,120 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_MATGEN_H +#define HPL_MATGEN_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_MULT0 1284865837 +#define HPL_MULT1 1481765933 +#define HPL_IADD0 1 +#define HPL_IADD1 0 +#define HPL_DIVFAC 2147483648.0 +#define HPL_POW16 65536.0 +#define HPL_HALF 0.5 +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dmatgen +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int +) ); +void HPL_lmul +STDC_ARGS( ( + int *, + int *, + int * +) ); +void HPL_ladd +STDC_ARGS( ( + int *, + int *, + int * +) ); +void HPL_xjumpm +STDC_ARGS( ( + const int, + int *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_setran +STDC_ARGS( ( + const int, + int * +) ); +void HPL_jumpit +STDC_ARGS( ( + int *, + int *, + int *, + int * +) ); +double HPL_rand STDC_ARGS( ( void ) ); + +#endif +/* + * End of hpl_matgen.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_misc.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_misc.h new file mode 100644 index 000000000..ea421a403 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_misc.h @@ -0,0 +1,110 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_MISC_H +#define HPL_MISC_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#ifdef __STDC__ +#define STDC_HEADERS +#endif + +#include +#include +#include +#include + +#ifdef STDC_HEADERS +#include +#define STDC_ARGS(p) p +#else +#include +#define STDC_ARGS(p) () +#endif + +#ifdef HPL_CALL_VSIPL +#include +#endif +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_rone 1.0 +#define HPL_rtwo 2.0 +#define HPL_rzero 0.0 +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define Mabs( a_ ) ( ( (a_) < 0 ) ? -(a_) : (a_) ) +#define Mmin( a_, b_ ) ( ( (a_) < (b_) ) ? (a_) : (b_) ) +#define Mmax( a_, b_ ) ( ( (a_) > (b_) ) ? (a_) : (b_) ) + +#define Mfloor(a,b) (((a)>0) ? (((a)/(b))) : (-(((-(a))+(b)-1)/(b)))) +#define Mceil(a,b) ( ( (a)+(b)-1 ) / (b) ) +#define Miceil(a,b) (((a)>0) ? ((((a)+(b)-1)/(b))) : (-((-(a))/(b)))) + +#define Mupcase(C) (((C)>96 && (C)<123) ? (C) & 0xDF : (C)) +#define Mlowcase(C) (((C)>64 && (C)< 91) ? (C) | 32 : (C)) +/* + * Mptr returns a pointer to a_( i_, j_ ) for readability reasons and + * also less silly errors ... + */ +#define Mptr( a_, i_, j_, lda_ ) \ + ( (a_) + (size_t)(i_) + (size_t)(j_)*(size_t)(lda_) ) +/* + * Align pointer + */ +#define HPL_PTR( ptr_, al_ ) \ + ( ( ( (size_t)(ptr_)+(al_)-1 ) / (al_) ) * (al_) ) +#endif +/* + * End of hpl_misc.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_panel.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_panel.h new file mode 100644 index 000000000..d5ba2939c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_panel.h @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PANEL_H +#define HPL_PANEL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +#include "hpl_grid.h" +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef struct HPL_S_panel +{ + struct HPL_S_grid * grid; /* ptr to the process grid */ + struct HPL_S_palg * algo; /* ptr to the algo parameters */ + struct HPL_S_pmat * pmat; /* ptr to the local array info */ + double * A; /* ptr to trailing part of A */ + double * WORK; /* work space */ + double * L2; /* ptr to L */ + double * L1; /* ptr to jb x jb upper block of A */ + double * DPIV; /* ptr to replicated jb pivot array */ + double * DINFO; /* ptr to replicated scalar info */ + double * U; /* ptr to U */ + int * IWORK; /* integer workspace for swapping */ + void * * * buffers[2]; /* buffers for panel bcast */ + int counts [2]; /* counts for panel bcast */ + MPI_Datatype dtypes [2]; /* data types for panel bcast */ + MPI_Request request[1]; /* requests for panel bcast */ + MPI_Status status [1]; /* status for panel bcast */ + int nb; /* distribution blocking factor */ + int jb; /* panel width */ + int m; /* global # of rows of trailing part of A */ + int n; /* global # of cols of trailing part of A */ + int ia; /* global row index of trailing part of A */ + int ja; /* global col index of trailing part of A */ + int mp; /* local # of rows of trailing part of A */ + int nq; /* local # of cols of trailing part of A */ + int ii; /* local row index of trailing part of A */ + int jj; /* local col index of trailing part of A */ + int lda; /* local leading dim of array A */ + int prow; /* proc. row owning 1st row of trail. A */ + int pcol; /* proc. col owning 1st col of trail. A */ + int msgid; /* message id for panel bcast */ + int ldl2; /* local leading dim of array L2 */ + int len; /* length of the buffer to broadcast */ +#ifdef HPL_CALL_VSIPL + vsip_block_d * Ablock; /* A block */ + vsip_block_d * L1block; /* L1 block */ + vsip_block_d * L2block; /* L2 block */ + vsip_block_d * Ublock; /* U block */ +#endif +} HPL_T_panel; + +/* + * --------------------------------------------------------------------- + * panel function prototypes + * --------------------------------------------------------------------- + */ +#include "hpl_pgesv.h" + +void HPL_pdpanel_new +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + const int, + const int, + const int, + HPL_T_pmat *, + const int, + const int, + const int, + HPL_T_panel * * +) ); +void HPL_pdpanel_init +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + const int, + const int, + const int, + HPL_T_pmat *, + const int, + const int, + const int, + HPL_T_panel * +) ); +int HPL_pdpanel_disp +STDC_ARGS( ( + HPL_T_panel * * +) ); +int HPL_pdpanel_free +STDC_ARGS( ( + HPL_T_panel * +) ); + +#endif +/* + * End of hpl_panel.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pauxil.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pauxil.h new file mode 100644 index 000000000..1fd0ee457 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pauxil.h @@ -0,0 +1,505 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PAUXIL_H +#define HPL_PAUXIL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" + +#include "hpl_pmisc.h" +#include "hpl_grid.h" +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +/* + * Mindxg2p returns the process coodinate owning the entry globally in- + * dexed by ig_. + */ +#define Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) && \ + ( (nprocs_) > 1 ) ) \ + { \ + proc_ = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_); \ + proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ + } \ + else \ + { \ + proc_ = (src_); \ + } \ + } + +#define Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ + ( (nprocs_) == 1 ) ) { il_ = (ig_); } \ + else \ + { \ + int i__, j__; \ + j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ + il_ = (nb_)*( j__ - i__ ) + \ + ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ + (ig_) - (inb_) : (ig_) ); \ + } \ + } + +#define Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ ) \ + { \ + if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ + ( (nprocs_) == 1 ) ) \ + { il_ = (ig_); proc_ = (src_); } \ + else \ + { \ + int i__, j__; \ + j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ + il_ = (nb_)*(j__-i__) + \ + ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ + (ig_) - (inb_) : (ig_) ); \ + proc_ = (src_) + 1 + i__; \ + proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ + } \ + } +/* + * Mindxl2g computes the global index ig_ corresponding to the local + * index il_ in process proc_. + */ +#define Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ + { \ + if( (proc_) == (src_) ) \ + { \ + if( (il_) < (inb_) ) ig_ = (il_); \ + else ig_ = (il_) + \ + (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \ + } \ + else if( (proc_) < (src_) ) \ + { \ + ig_ = (il_) + (inb_) + \ + (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ + (proc_)-(src_)-1+(nprocs_) ); \ + } \ + else \ + { \ + ig_ = (il_) + (inb_) + \ + (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ + (proc_)-(src_)-1 ); \ + } \ + } \ + else \ + { \ + ig_ = (il_); \ + } \ + } +/* + * MnumrocI computes the # of local indexes np_ residing in the process + * of coordinate proc_ corresponding to the interval of global indexes + * i_:i_+n_-1 assuming that the global index 0 resides in the process + * src_, and that the indexes are distributed from src_ using the para- + * meters inb_, nb_ and nprocs_. + */ +#define MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ + { \ + int inb__, mydist__, n__, nblk__, quot__, src__; \ + if( ( inb__ = (inb_) - (i_) ) <= 0 ) \ + { \ + nblk__ = (-inb__) / (nb_) + 1; \ + src__ = (src_) + nblk__; \ + src__ -= ( src__ / (nprocs_) ) * (nprocs_); \ + inb__ += nblk__*(nb_); \ + if( ( n__ = (n_) - inb__ ) <= 0 ) \ + { \ + if( (proc_) == src__ ) np_ = (n_); \ + else np_ = 0; \ + } \ + else \ + { \ + if( ( mydist__ = (proc_) - src__ ) < 0 ) \ + mydist__ += (nprocs_); \ + nblk__ = n__ / (nb_) + 1; \ + mydist__ -= nblk__ - \ + (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \ + if( mydist__ < 0 ) \ + { \ + if( (proc_) != src__ ) \ + np_ = (nb_) + (nb_) * quot__; \ + else \ + np_ = inb__ + (nb_) * quot__; \ + } \ + else if( mydist__ > 0 ) \ + { \ + np_ = (nb_) * quot__; \ + } \ + else \ + { \ + if( (proc_) != src__ ) \ + np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ + else \ + np_ = (n_)+ (nb_)*(quot__ - nblk__); \ + } \ + } \ + } \ + else \ + { \ + if( ( n__ = (n_) - inb__ ) <= 0 ) \ + { \ + if( (proc_) == (src_) ) np_ = (n_); \ + else np_ = 0; \ + } \ + else \ + { \ + if( ( mydist__ = (proc_) - (src_) ) < 0 ) \ + mydist__ += (nprocs_); \ + nblk__ = n__ / (nb_) + 1; \ + mydist__ -= nblk__ - \ + ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \ + if( mydist__ < 0 ) \ + { \ + if( (proc_) != (src_) ) \ + np_ = (nb_) + (nb_) * quot__; \ + else \ + np_ = inb__ + (nb_) * quot__; \ + } \ + else if( mydist__ > 0 ) \ + { \ + np_ = (nb_) * quot__; \ + } \ + else \ + { \ + if( (proc_) != (src_) ) \ + np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ + else \ + np_ = (n_)+ (nb_)*(quot__ - nblk__); \ + } \ + } \ + } \ + } \ + else \ + { \ + np_ = (n_); \ + } \ + } + +#define Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ ) \ + MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ ) +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_indxg2lp +STDC_ARGS( ( + int *, + int *, + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxg2l +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxg2p +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxl2g +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int +) ); +void HPL_infog2l +STDC_ARGS( ( + int, + int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + int *, + int *, + int *, + int * +) ); +int HPL_numroc +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int +) ); +int HPL_numrocI +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int, + const int +) ); + +void HPL_dlaswp00N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp10N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp01N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp01T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp02N +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp03N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const double *, + const int +) ); +void HPL_dlaswp03T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const double *, + const int +) ); +void HPL_dlaswp04N +STDC_ARGS( ( + const int, + const int, + const int, + double *, + const int, + double *, + const int, + const double *, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp04T +STDC_ARGS( ( + const int, + const int, + const int, + double *, + const int, + double *, + const int, + const double *, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp05N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp05T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp06N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp06T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int * +) ); + +void HPL_pabort +STDC_ARGS( ( + int, + const char *, + const char *, + ... +) ); +void HPL_pwarn +STDC_ARGS( ( + FILE *, + int, + const char *, + const char *, + ... +) ); +void HPL_pdlaprnt +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int, + const int, + double *, + const int, + const int, + const int, + const char * +) ); +double HPL_pdlamch +STDC_ARGS( ( + MPI_Comm, + const HPL_T_MACH +) ); +double HPL_pdlange +STDC_ARGS( ( + const HPL_T_grid *, + const HPL_T_NORM, + const int, + const int, + const int, + const double *, + const int +) ); + +#endif +/* + * End of hpl_pauxil.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pfact.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pfact.h new file mode 100644 index 000000000..09eee79ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pfact.h @@ -0,0 +1,216 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PFACT_H +#define HPL_PFACT_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef void (*HPL_T_PFA_FUN) +( HPL_T_panel *, const int, const int, const int, + double * ); +typedef void (*HPL_T_RFA_FUN) +( HPL_T_panel *, const int, const int, const int, + double * ); +typedef void (*HPL_T_UPD_FUN) +( HPL_T_panel *, int *, HPL_T_panel *, const int ); +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dlocmax +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_dlocswpN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + double * +) ); +void HPL_dlocswpT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + double * +) ); +void HPL_pdmxswp +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdpancrN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpancrT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanllN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanllT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanrlN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanrlT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdrpancrN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpancrT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanllN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanllT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanrlN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanrlT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdfact +STDC_ARGS( ( + HPL_T_panel * +) ); + +#endif +/* + * End of hpl_pfact.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pgesv.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pgesv.h new file mode 100644 index 000000000..3ca576c68 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pgesv.h @@ -0,0 +1,346 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PGESV_H +#define HPL_PGESV_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" + +#include "hpl_pmisc.h" +#include "hpl_grid.h" +#include "hpl_comm.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pfact.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_SWAP00 = 451, /* Use HPL_pdlaswp00 */ + HPL_SWAP01 = 452, /* Use HPL_pdlaswp01 */ + HPL_SW_MIX = 453, /* Use HPL_pdlaswp00_ for small number of */ + /* columns, and HPL_pdlaswp01_ otherwise. */ + HPL_NO_SWP = 499 +} HPL_T_SWAP; + +typedef struct HPL_S_palg +{ + HPL_T_TOP btopo; /* row broadcast topology */ + int depth; /* look-ahead depth */ + int nbdiv; /* recursive division factor */ + int nbmin; /* recursion stopping criterium */ + HPL_T_FACT pfact; /* panel fact variant */ + HPL_T_FACT rfact; /* recursive fact variant */ + HPL_T_PFA_FUN pffun; /* panel fact function ptr */ + HPL_T_RFA_FUN rffun; /* recursive fact function ptr */ + HPL_T_UPD_FUN upfun; /* update function */ + HPL_T_SWAP fswap; /* Swapping algorithm */ + int fsthr; /* Swapping threshold */ + int equil; /* Equilibration */ + int align; /* data alignment constant */ +} HPL_T_palg; + +typedef struct HPL_S_pmat +{ +#ifdef HPL_CALL_VSIPL + vsip_block_d * block; +#endif + double * A; /* pointer to local piece of A */ + double * X; /* pointer to solution vector */ + int n; /* global problem size */ + int nb; /* blocking factor */ + int ld; /* local leading dimension */ + int mp; /* local number of rows */ + int nq; /* local number of columns */ + int info; /* computational flag */ +} HPL_T_pmat; +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define MSGID_BEGIN_PFACT 1001 /* message id ranges */ +#define MSGID_END_PFACT 2000 +#define MSGID_BEGIN_FACT 2001 +#define MSGID_END_FACT 3000 +#define MSGID_BEGIN_PTRSV 3001 +#define MSGID_END_PTRSV 4000 + +#define MSGID_BEGIN_COLL 9001 +#define MSGID_END_COLL 10000 +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define MNxtMgid( id_, beg_, end_ ) \ + (( (id_)+1 > (end_) ? (beg_) : (id_)+1 )) +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pipid +STDC_ARGS( ( + HPL_T_panel *, + int *, + int * +) ); +void HPL_plindx0 +STDC_ARGS( ( + HPL_T_panel *, + const int, + int *, + int *, + int *, + int * +) ); +void HPL_pdlaswp00N +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdlaswp00T +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_perm +STDC_ARGS( ( + const int, + int *, + int *, + int * +) ); +void HPL_logsort +STDC_ARGS( ( + const int, + const int, + int *, + int *, + int * +) ); +void HPL_plindx10 +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int *, + int *, + int *, + int * +) ); +void HPL_plindx1 +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int *, + int *, + int *, + int *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_spreadN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_SIDE, + const int, + double *, + const int, + const int, + const int *, + const int *, + const int * +) ); +void HPL_spreadT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_SIDE, + const int, + double *, + const int, + const int, + const int *, + const int *, + const int * +) ); +void HPL_equil +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_TRANS, + const int, + double *, + const int, + int *, + const int *, + const int *, + int * +) ); +void HPL_rollN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int, + double *, + const int, + const int *, + const int *, + const int * +) ); +void HPL_rollT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int, + double *, + const int, + const int *, + const int *, + const int * +) ); +void HPL_pdlaswp01N +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdlaswp01T +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_pdupdateNN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateNT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateTN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateTT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_pdgesv0 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesvK1 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesvK2 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesv +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); + +void HPL_pdtrsv +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_pmat * +) ); + +#endif +/* + * End of hpl_pgesv.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pmatgen.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pmatgen.h new file mode 100644 index 000000000..1091b0f60 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pmatgen.h @@ -0,0 +1,77 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PMATGEN_H +#define HPL_PMATGEN_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_matgen.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pdmatgen +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int, + const int, + double *, + const int, + const int +) ); + +#endif +/* + * End of hpl_pmatgen.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pmisc.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pmisc.h new file mode 100644 index 000000000..23550d47b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_pmisc.h @@ -0,0 +1,59 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PMISC_H +#define HPL_PMISC_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "mpi.h" + +#endif +/* + * End of hpl_pmisc.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_ptest.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_ptest.h new file mode 100644 index 000000000..5777bd536 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_ptest.h @@ -0,0 +1,151 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PTEST_H +#define HPL_PTEST_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pgesv.h" + +#include "hpl_ptimer.h" +#include "hpl_pmatgen.h" +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef struct HPL_S_test +{ + double epsil; /* epsilon machine */ + double thrsh; /* threshold */ + FILE * outfp; /* output stream (only in proc 0) */ + int kfail; /* # of tests failed */ + int kpass; /* # of tests passed */ + int kskip; /* # of tests skipped */ + int ktest; /* total number of tests */ +} HPL_T_test; + +/* + * --------------------------------------------------------------------- + * #define macro constants for testing only + * --------------------------------------------------------------------- + */ +#define HPL_LINE_MAX 256 +#define HPL_MAX_PARAM 20 +#define HPL_ISEED 100 +/* + * --------------------------------------------------------------------- + * global timers for timing analysis only + * --------------------------------------------------------------------- + */ +#ifdef HPL_DETAILED_TIMING +#define HPL_TIMING_BEG 11 /* timer 0 reserved, used by main */ +#define HPL_TIMING_N 6 /* number of timers defined below */ +#define HPL_TIMING_RPFACT 11 /* starting from here, contiguous */ +#define HPL_TIMING_PFACT 12 +#define HPL_TIMING_MXSWP 13 +#define HPL_TIMING_UPDATE 14 +#define HPL_TIMING_LASWP 15 +#define HPL_TIMING_PTRSV 16 +#endif +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pdinfo +STDC_ARGS( ( + HPL_T_test *, + int *, + int *, + int *, + int *, + HPL_T_ORDER *, + int *, + int *, + int *, + int *, + HPL_T_FACT *, + int *, + int *, + int *, + int *, + int *, + HPL_T_FACT *, + int *, + HPL_T_TOP *, + int *, + int *, + HPL_T_SWAP *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_pdtest +STDC_ARGS( ( + HPL_T_test *, + HPL_T_grid *, + HPL_T_palg *, + const int, + const int +) ); + +#endif +/* + * End of hpl_ptest.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_ptimer.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_ptimer.h new file mode 100644 index 000000000..43c8fe33a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_ptimer.h @@ -0,0 +1,96 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PTIMER_H +#define HPL_PTIMER_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_NPTIMER 64 +#define HPL_PTIMER_STARTFLAG 5.0 +#define HPL_PTIMER_ERROR -1.0 +/* + * --------------------------------------------------------------------- + * type definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_WALL_PTIME = 101, HPL_CPU_PTIME = 102 } HPL_T_PTIME; + +typedef enum +{ HPL_AMAX_PTIME = 201, HPL_AMIN_PTIME = 202, HPL_SUM_PTIME = 203 } +HPL_T_PTIME_OP; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +double HPL_ptimer_cputime STDC_ARGS( ( void ) ); +double HPL_ptimer_walltime STDC_ARGS( ( void ) ); + +void HPL_ptimer STDC_ARGS( ( const int ) ); +void HPL_ptimer_boot STDC_ARGS( ( void ) ); +void HPL_ptimer_combine +STDC_ARGS( +( MPI_Comm comm, const HPL_T_PTIME_OP, const HPL_T_PTIME, + const int, const int, double * ) ); +void HPL_ptimer_disable STDC_ARGS( ( void ) ); +void HPL_ptimer_enable STDC_ARGS( ( void ) ); +double HPL_ptimer_inquire +STDC_ARGS( +( const HPL_T_PTIME, const int ) ); + +#endif +/* + * End of hpl_ptimer.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_test.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_test.h new file mode 100644 index 000000000..1eedc97e0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_test.h @@ -0,0 +1,80 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_TEST_H +#define HPL_TEST_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_matgen.h" +#include "hpl_timer.h" +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dinfo +STDC_ARGS( +( FILE * *, int *, int *, int *, + HPL_T_FACT *, int *, int *, int *, + int *, int *, HPL_T_FACT *, int *, + double *, double * ) ); +void HPL_dtest +STDC_ARGS( +( FILE *, const int, const int, const int, + HPL_T_FACT, HPL_T_FACT, const int, const double, + const double, int *, int *, int * ) ); + +#endif +/* + * End of hpl_test.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_timer.h b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_timer.h new file mode 100644 index 000000000..4c91700ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/include/hpl_timer.h @@ -0,0 +1,88 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_TIMER_H +#define HPL_TIMER_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_NTIMER 64 +#define HPL_TIMER_STARTFLAG 5.0 +#define HPL_TIMER_ERROR -1.0 +/* + * --------------------------------------------------------------------- + * type definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_WALL_TIME = 101, HPL_CPU_TIME = 102 } HPL_T_TIME; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +double HPL_timer_cputime STDC_ARGS( ( void ) ); +double HPL_timer_walltime STDC_ARGS( ( void ) ); + +void HPL_timer STDC_ARGS( ( const int ) ); +void HPL_timer_boot STDC_ARGS( ( void ) ); +void HPL_timer_enable STDC_ARGS( ( void ) ); +void HPL_timer_disable STDC_ARGS( ( void ) ); +double HPL_timer_inquire +STDC_ARGS( +( const HPL_T_TIME, const int ) ); + +#endif +/* + * End of hpl_timer.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/libdgemm.so.1.0.1 b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/libdgemm.so.1.0.1 new file mode 100755 index 000000000..1f2100053 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/libdgemm.so.1.0.1 differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_abort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_abort.c new file mode 100644 index 000000000..bf0c5e727 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_abort.c @@ -0,0 +1,129 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_abort +( + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_abort( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_abort displays an error message on stderr and halts execution. + * + * + * Arguments + * ========= + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[128]; +#ifndef STDC_HEADERS + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( stderr, "%s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR in function", SRNAME, cline ); + else + HPL_fprintf( stderr, "%s %d %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR on line", LINE, "of function", SRNAME, cline ); + exit( 0 ); +/* + * End of HPL_abort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_abort.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_abort.o new file mode 100644 index 000000000..394d35b67 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_abort.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlacpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlacpy.c new file mode 100644 index 000000000..ec71180eb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlacpy.c @@ -0,0 +1,343 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factors + * #ifndef HPL_LACPY_M_DEPTH + * #define HPL_LACPY_M_DEPTH 32 + * #define HPL_LACPY_LOG2_M_DEPTH 5 + * #endif + * #ifndef HPL_LACPY_N_DEPTH + * #define HPL_LACPY_N_DEPTH 4 + * #define HPL_LACPY_LOG2_N_DEPTH 2 + * #endif + */ +#ifndef HPL_LACPY_M_DEPTH +#define HPL_LACPY_M_DEPTH 4 +#define HPL_LACPY_LOG2_M_DEPTH 2 +#endif +#ifndef HPL_LACPY_N_DEPTH +#define HPL_LACPY_N_DEPTH 2 +#define HPL_LACPY_LOG2_N_DEPTH 1 +#endif + +#ifdef STDC_HEADERS +void HPL_dlacpy +( + const int M, + const int N, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dlacpy +( M, N, A, LDA, B, LDB ) + const int M; + const int N; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlacpy copies an array A into an array B. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the arrays A and + * B. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the arrays A + * and B. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N). + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * B (local output) double * + * On entry, B points to an array of dimension (LDB,N). On exit, + * B is overwritten with A. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of the array B. + * LDB must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_LACPY_USE_COPY + register int j; +#else +#if ( HPL_LACPY_N_DEPTH == 1 ) + const double * A0 = A; + double * B0 = B; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + const double * A0 = A, * A1 = A + LDA; + double * B0 = B, * B1 = B + LDB; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + const double * A0 = A, * A1 = A + LDA, + * A2 = A + (LDA << 1), * A3 = A + 3 * LDA; + double * B0 = B, * B1 = B + LDB, + * B2 = B + (LDB << 1), * B3 = B + 3 * LDB; +#endif + const int incA = ( (unsigned int)(LDA) << + HPL_LACPY_LOG2_N_DEPTH ) - M, + incB = ( (unsigned int)(LDB) << + HPL_LACPY_LOG2_N_DEPTH ) - M, + incA0 = (unsigned int)(LDA) - M, + incB0 = (unsigned int)(LDB) - M; + int mu, nu; + register int i, j; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + +#ifdef HPL_LACPY_USE_COPY + for( j = 0; j < N; j++, A0 += LDA, B0 += LDB ) HPL_dcopy( M, A0, 1, B0, 1 ); +#else + mu = (int)( ( (unsigned int)(M) >> HPL_LACPY_LOG2_M_DEPTH ) << + HPL_LACPY_LOG2_M_DEPTH ); + nu = (int)( ( (unsigned int)(N) >> HPL_LACPY_LOG2_N_DEPTH ) << + HPL_LACPY_LOG2_N_DEPTH ); + + for( j = 0; j < nu; j += HPL_LACPY_N_DEPTH ) + { + for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH ) + { +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 0] = A0[ 0]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; B2[ 0] = A2[ 0]; B3[ 0] = A3[ 0]; +#endif + +#if ( HPL_LACPY_M_DEPTH > 1 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 1] = A0[ 1]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; B2[ 1] = A2[ 1]; B3[ 1] = A3[ 1]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 2 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B2[ 2] = A2[ 2]; B3[ 2] = A3[ 2]; + B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; B2[ 3] = A2[ 3]; B3[ 3] = A3[ 3]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 4 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; + B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B2[ 4] = A2[ 4]; B3[ 4] = A3[ 4]; + B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; B2[ 5] = A2[ 5]; B3[ 5] = A3[ 5]; + B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B2[ 6] = A2[ 6]; B3[ 6] = A3[ 6]; + B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; B2[ 7] = A2[ 7]; B3[ 7] = A3[ 7]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 8 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11]; + B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; + B0[10] = A0[10]; B1[10] = A1[10]; B0[11] = A0[11]; B1[11] = A1[11]; + B0[12] = A0[12]; B1[12] = A1[12]; B0[13] = A0[13]; B1[13] = A1[13]; + B0[14] = A0[14]; B1[14] = A1[14]; B0[15] = A0[15]; B1[15] = A1[15]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B2[ 8] = A2[ 8]; B3[ 8] = A3[ 8]; + B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; B2[ 9] = A2[ 9]; B3[ 9] = A3[ 9]; + B0[10] = A0[10]; B1[10] = A1[10]; B2[10] = A2[10]; B3[10] = A3[10]; + B0[11] = A0[11]; B1[11] = A1[11]; B2[11] = A2[11]; B3[11] = A3[11]; + B0[12] = A0[12]; B1[12] = A1[12]; B2[12] = A2[12]; B3[12] = A3[12]; + B0[13] = A0[13]; B1[13] = A1[13]; B2[13] = A2[13]; B3[13] = A3[13]; + B0[14] = A0[14]; B1[14] = A1[14]; B2[14] = A2[14]; B3[14] = A3[14]; + B0[15] = A0[15]; B1[15] = A1[15]; B2[15] = A2[15]; B3[15] = A3[15]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 16 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19]; + B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23]; + B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27]; + B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[16] = A0[16]; B1[16] = A1[16]; B0[17] = A0[17]; B1[17] = A1[17]; + B0[18] = A0[18]; B1[18] = A1[18]; B0[19] = A0[19]; B1[19] = A1[19]; + B0[20] = A0[20]; B1[20] = A1[20]; B0[21] = A0[21]; B1[21] = A1[21]; + B0[22] = A0[22]; B1[22] = A1[22]; B0[23] = A0[23]; B1[23] = A1[23]; + B0[24] = A0[24]; B1[24] = A1[24]; B0[25] = A0[25]; B1[25] = A1[25]; + B0[26] = A0[26]; B1[26] = A1[26]; B0[27] = A0[27]; B1[27] = A1[27]; + B0[28] = A0[28]; B1[28] = A1[28]; B0[29] = A0[29]; B1[29] = A1[29]; + B0[30] = A0[30]; B1[30] = A1[30]; B0[31] = A0[31]; B1[31] = A1[31]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[16] = A0[16]; B1[16] = A1[16]; B2[16] = A2[16]; B3[16] = A3[16]; + B0[17] = A0[17]; B1[17] = A1[17]; B2[17] = A2[17]; B3[17] = A3[17]; + B0[18] = A0[18]; B1[18] = A1[18]; B2[18] = A2[18]; B3[18] = A3[18]; + B0[19] = A0[19]; B1[19] = A1[19]; B2[19] = A2[19]; B3[19] = A3[19]; + B0[20] = A0[20]; B1[20] = A1[20]; B2[20] = A2[20]; B3[20] = A3[20]; + B0[21] = A0[21]; B1[21] = A1[21]; B2[21] = A2[21]; B3[21] = A3[21]; + B0[22] = A0[22]; B1[22] = A1[22]; B2[22] = A2[22]; B3[22] = A3[22]; + B0[23] = A0[23]; B1[23] = A1[23]; B2[23] = A2[23]; B3[23] = A3[23]; + B0[24] = A0[24]; B1[24] = A1[24]; B2[24] = A2[24]; B3[24] = A3[24]; + B0[25] = A0[25]; B1[25] = A1[25]; B2[25] = A2[25]; B3[25] = A3[25]; + B0[26] = A0[26]; B1[26] = A1[26]; B2[26] = A2[26]; B3[26] = A3[26]; + B0[27] = A0[27]; B1[27] = A1[27]; B2[27] = A2[27]; B3[27] = A3[27]; + B0[28] = A0[28]; B1[28] = A1[28]; B2[28] = A2[28]; B3[28] = A3[28]; + B0[29] = A0[29]; B1[29] = A1[29]; B2[29] = A2[29]; B3[29] = A3[29]; + B0[30] = A0[30]; B1[30] = A1[30]; B2[30] = A2[30]; B3[30] = A3[30]; + B0[31] = A0[31]; B1[31] = A1[31]; B2[31] = A2[31]; B3[31] = A3[31]; +#endif + +#endif + +#if ( HPL_LACPY_N_DEPTH == 1 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; + A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; + A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH; + A2 += HPL_LACPY_M_DEPTH; B2 += HPL_LACPY_M_DEPTH; + A3 += HPL_LACPY_M_DEPTH; B3 += HPL_LACPY_M_DEPTH; +#endif + } + + for( i = mu; i < M; i++ ) + { +#if ( HPL_LACPY_N_DEPTH == 1 ) + *B0 = *A0; B0++; A0++; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++; + *B2 = *A2; B2++; A2++; *B3 = *A3; B3++; A3++; +#endif + } + +#if ( HPL_LACPY_N_DEPTH == 1 ) + A0 += incA; B0 += incB; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + A0 += incA; B0 += incB; A1 += incA; B1 += incB; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + A0 += incA; B0 += incB; A1 += incA; B1 += incB; + A2 += incA; B2 += incB; A3 += incA; B3 += incB; +#endif + } + + for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 ) + { + for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH, + B0 += HPL_LACPY_M_DEPTH, A0 += HPL_LACPY_M_DEPTH ) + { + B0[ 0] = A0[ 0]; +#if ( HPL_LACPY_M_DEPTH > 1 ) + B0[ 1] = A0[ 1]; +#endif +#if ( HPL_LACPY_M_DEPTH > 2 ) + B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3]; +#endif +#if ( HPL_LACPY_M_DEPTH > 4 ) + B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7]; +#endif +#if ( HPL_LACPY_M_DEPTH > 8 ) + B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11]; + B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15]; +#endif +#if ( HPL_LACPY_M_DEPTH > 16 ) + B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19]; + B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23]; + B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27]; + B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31]; +#endif + } + for( i = mu; i < M; i++, B0++, A0++ ) { *B0 = *A0; } + } +#endif +/* + * End of HPL_dlacpy + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlacpy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlacpy.o new file mode 100644 index 000000000..565483191 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlacpy.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlamch.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlamch.c new file mode 100644 index 000000000..c685f0d5e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlamch.c @@ -0,0 +1,876 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static function prototypes + * --------------------------------------------------------------------- + */ +static void HPL_dlamc1 +STDC_ARGS( +( int *, int *, int *, int * ) ); +static void HPL_dlamc2 +STDC_ARGS( +( int *, int *, int *, double *, + int *, double *, int *, double * ) ); +static double HPL_dlamc3 +STDC_ARGS( +( const double, const double ) ); +static void HPL_dlamc4 +STDC_ARGS( +( int *, const double, const int ) ); +static void HPL_dlamc5 +STDC_ARGS( +( const int, const int, const int, const int, + int *, double * ) ); +static double HPL_dipow +STDC_ARGS( +( const double, const int ) ); + +#ifdef STDC_HEADERS +double HPL_dlamch +( + const HPL_T_MACH CMACH +) +#else +double HPL_dlamch +( CMACH ) + const HPL_T_MACH CMACH; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamch determines machine-specific arithmetic constants such as + * the relative machine precision (eps), the safe minimum (sfmin) such + * that 1 / sfmin does not overflow, the base of the machine (base), the + * precision (prec), the number of (base) digits in the mantissa (t), + * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the + * minimum exponent before (gradual) underflow (emin), the underflow + * threshold (rmin) base**(emin-1), the largest exponent before overflow + * (emax), the overflow threshold (rmax) (base**emax)*(1-eps). + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamch.f (version 2.0 -- 1992), that was itself + * based on the function ENVRON by Malcolm and incorporated suggestions + * by Gentleman and Marovich. See + * + * Malcolm M. A., Algorithms to reveal properties of floating-point + * arithmetic., Comms. of the ACM, 15, 949-951 (1972). + * + * Gentleman W. M. and Marovich S. B., More on algorithms that reveal + * properties of floating point arithmetic units., Comms. of the ACM, + * 17, 276-277 (1974). + * + * Arguments + * ========= + * + * CMACH (local input) const HPL_T_MACH + * Specifies the value to be returned by HPL_dlamch + * = HPL_MACH_EPS, HPL_dlamch := eps (default) + * = HPL_MACH_SFMIN, HPL_dlamch := sfmin + * = HPL_MACH_BASE, HPL_dlamch := base + * = HPL_MACH_PREC, HPL_dlamch := eps*base + * = HPL_MACH_MLEN, HPL_dlamch := t + * = HPL_MACH_RND, HPL_dlamch := rnd + * = HPL_MACH_EMIN, HPL_dlamch := emin + * = HPL_MACH_RMIN, HPL_dlamch := rmin + * = HPL_MACH_EMAX, HPL_dlamch := emax + * = HPL_MACH_RMAX, HPL_dlamch := rmax + * + * where + * + * eps = relative machine precision, + * sfmin = safe minimum, + * base = base of the machine, + * prec = eps*base, + * t = number of digits in the mantissa, + * rnd = 1.0 if rounding occurs in addition, + * emin = minimum exponent before underflow, + * rmin = underflow threshold, + * emax = largest exponent before overflow, + * rmax = overflow threshold. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + static double eps, sfmin, base, t, rnd, emin, rmin, emax, + rmax, prec; + double small; + static int first=1; + int beta=0, imax=0, imin=0, it=0, lrnd=0; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; + HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax ); + base = (double)(beta); t = (double)(it); + if( lrnd != 0 ) + { rnd = HPL_rone; eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; } + else + { rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it ); } + prec = eps * base; emin = (double)(imin); emax = (double)(imax); + sfmin = rmin; small = HPL_rone / rmax; +/* + * Use SMALL plus a bit, to avoid the possibility of rounding causing + * overflow when computing 1/sfmin. + */ + if( small >= sfmin ) sfmin = small * ( HPL_rone + eps ); + } + + if( CMACH == HPL_MACH_EPS ) return( eps ); + if( CMACH == HPL_MACH_SFMIN ) return( sfmin ); + if( CMACH == HPL_MACH_BASE ) return( base ); + if( CMACH == HPL_MACH_PREC ) return( prec ); + if( CMACH == HPL_MACH_MLEN ) return( t ); + if( CMACH == HPL_MACH_RND ) return( rnd ); + if( CMACH == HPL_MACH_EMIN ) return( emin ); + if( CMACH == HPL_MACH_RMIN ) return( rmin ); + if( CMACH == HPL_MACH_EMAX ) return( emax ); + if( CMACH == HPL_MACH_RMAX ) return( rmax ); + + return( eps ); +/* + * End of HPL_dlamch + */ +} + +#ifdef STDC_HEADERS +static void HPL_dlamc1 +( + int * BETA, + int * T, + int * RND, + int * IEEE1 +) +#else +static void HPL_dlamc1 +( BETA, T, RND, IEEE1 ) +/* + * .. Scalar Arguments .. + */ + int * BETA, * IEEE1, * RND, * T; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc1 determines the machine parameters given by BETA, T, RND, + * and IEEE1. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc1.f (version 2.0 -- 1992), that was itself + * based on the function ENVRON by Malcolm and incorporated suggestions + * by Gentleman and Marovich. See + * + * Malcolm M. A., Algorithms to reveal properties of floating-point + * arithmetic., Comms. of the ACM, 15, 949-951 (1972). + * + * Gentleman W. M. and Marovich S. B., More on algorithms that reveal + * properties of floating point arithmetic units., Comms. of the ACM, + * 17, 276-277 (1974). + * + * Arguments + * ========= + * + * BETA (local output) int * + * The base of the machine. + * + * T (local output) int * + * The number of ( BETA ) digits in the mantissa. + * + * RND (local output) int * + * Specifies whether proper rounding (RND=1) or chopping (RND=0) + * occurs in addition. This may not be a reliable guide to the + * way in which the machine performs its arithmetic. + * + * IEEE1 (local output) int * + * Specifies whether rounding appears to be done in the IEEE + * `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double a, b, c, f, one, qtr, savec, t1, t2; + static int first=1, lbeta, lieee1, lrnd, lt; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; one = HPL_rone; +/* + * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and + * RND. Throughout this routine we use the function HPL_dlamc3 to ensure + * that relevant values are stored and not held in registers, or are not + * affected by optimizers. + * + * Compute a = 2.0**m with the smallest positive integer m such that + * fl( a + 1.0 ) == a. + */ + a = HPL_rone; c = HPL_rone; + do + { a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); } + while( c == HPL_rone ); +/* + * Now compute b = 2.0**m with the smallest positive integer m such that + * fl( a + b ) > a. + */ + b = HPL_rone; c = HPL_dlamc3( a, b ); + while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); } +/* + * Now compute the base. a and c are neighbouring floating point num- + * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe- + * rence is BETA. Adding 0.25 to c is to ensure that it is truncated to + * BETA and not (BETA-1). + */ + qtr = one / 4.0; savec = c; + c = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr); +/* + * Now determine whether rounding or chopping occurs, by adding a bit + * less than BETA/2 and a bit more than BETA/2 to a. + */ + b = (double)(lbeta); + f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a ); + if( c == a ) { lrnd = 1; } else { lrnd = 0; } + f = HPL_dlamc3( b / HPL_rtwo, b / 100.0 ); c = HPL_dlamc3( f, a ); + if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0; +/* + * Try and decide whether rounding is done in the IEEE round to nea- + * rest style. b/2 is half a unit in the last place of the two numbers + * a and savec. Furthermore, a is even, i.e. has last bit zero, and sa- + * vec is odd. Thus adding b/2 to a should not change a, but adding b/2 + * to savec should change savec. + */ + t1 = HPL_dlamc3( b / HPL_rtwo, a ); + t2 = HPL_dlamc3( b / HPL_rtwo, savec ); + if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1; + else lieee1 = 0; +/* + * Now find the mantissa, T. It should be the integer part of log to the + * base BETA of a, however it is safer to determine T by powering. So we + * find T as the smallest positive integer for which fl( beta**t + 1.0 ) + * is equal to 1.0. + */ + lt = 0; a = HPL_rone; c = HPL_rone; + + do + { + lt++; a *= (double)(lbeta); + c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); + } while( c == HPL_rone ); + } + + *BETA = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1; +} + +#ifdef STDC_HEADERS +static void HPL_dlamc2 +( + int * BETA, + int * T, + int * RND, + double * EPS, + int * EMIN, + double * RMIN, + int * EMAX, + double * RMAX +) +#else +static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) +/* + * .. Scalar Arguments .. + */ + int * BETA, * EMAX, * EMIN, * RND, * T; + double * EPS, * RMAX, * RMIN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc2 determines the machine parameters specified in its argu- + * ment list. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc2.f (version 2.0 -- 1992), that was itself + * based on a function PARANOIA by W. Kahan of the University of Cali- + * fornia at Berkeley for the computation of the relative machine epsi- + * lon eps. + * + * Arguments + * ========= + * + * BETA (local output) int * + * The base of the machine. + * + * T (local output) int * + * The number of ( BETA ) digits in the mantissa. + * + * RND (local output) int * + * Specifies whether proper rounding (RND=1) or chopping (RND=0) + * occurs in addition. This may not be a reliable guide to the + * way in which the machine performs its arithmetic. + * + * EPS (local output) double * + * The smallest positive number such that fl( 1.0 - EPS ) < 1.0, + * where fl denotes the computed value. + * + * EMIN (local output) int * + * The minimum exponent before (gradual) underflow occurs. + * + * RMIN (local output) double * + * The smallest normalized number for the machine, given by + * BASE**( EMIN - 1 ), where BASE is the floating point value + * of BETA. + * + * EMAX (local output) int * + * The maximum exponent before overflow occurs. + * + * RMAX (local output) double * + * The largest positive number for the machine, given by + * BASE**EMAX * ( 1 - EPS ), where BASE is the floating point + * value of BETA. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + static double leps, lrmax, lrmin; + double a, b, c, half, one, rbase, sixth, small, + third, two, zero; + static int first=1, iwarn=0, lbeta=0, lemax, lemin, + lt=0; + int gnmin=0, gpmin=0, i, ieee, lieee1=0, + lrnd=0, ngnmin=0, ngpmin=0; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo; +/* + * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of BETA, + * T, RND, EPS, EMIN and RMIN. + * + * Throughout this routine we use the function HPL_dlamc3 to ensure that + * relevant values are stored and not held in registers, or are not af- + * fected by optimizers. + * + * HPL_dlamc1 returns the parameters lbeta, lt, lrnd and lieee1. + */ + HPL_dlamc1( &lbeta, <, &lrnd, &lieee1 ); +/* + * Start to find eps. + */ + b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a; +/* + * Try some tricks to see whether or not this is the correct EPS. + */ + b = two / 3.0; + half = one / HPL_rtwo; + sixth = HPL_dlamc3( b, -half ); + third = HPL_dlamc3( sixth, sixth ); + b = HPL_dlamc3( third, -half ); + b = HPL_dlamc3( b, sixth ); + b = Mabs( b ); if( b < leps ) b = leps; + + leps = HPL_rone; + + while( ( leps > b ) && ( b > zero ) ) + { + leps = b; + c = HPL_dlamc3( half * leps, + HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) ); + c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c ); + c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c ); + } + if( a < leps ) leps = a; +/* + * Computation of EPS complete. + * + * Now find EMIN. Let a = + or - 1, and + or - (1 + BASE**(-3)). Keep + * dividing a by BETA until (gradual) underflow occurs. This is detected + * when we cannot recover the previous a. + */ + rbase = one / (double)(lbeta); small = one; + for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero ); + a = HPL_dlamc3( one, small ); + HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta ); + HPL_dlamc4( &gpmin, a, lbeta ); HPL_dlamc4( &gnmin, -a, lbeta ); + + ieee = 0; + + if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) ) + { + if( ngpmin == gpmin ) + { +/* + * Non twos-complement machines, no gradual underflow; e.g., VAX ) + */ + lemin = ngpmin; + } + else if( ( gpmin-ngpmin ) == 3 ) + { +/* + * Non twos-complement machines with gradual underflow; e.g., IEEE stan- + * dard followers + */ + lemin = ngpmin - 1 + lt; ieee = 1; + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, gpmin ); + iwarn = 1; + } + } + else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) ) + { + if( Mabs( ngpmin-ngnmin ) == 1 ) + { +/* + * Twos-complement machines, no gradual underflow; e.g., CYBER 205 + */ + lemin = Mmax( ngpmin, ngnmin ); + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); + iwarn = 1; + } + } + else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) ) + { + if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 ) + { +/* + * Twos-complement machines with gradual underflow; no known machine + */ + lemin = Mmax( ngpmin, ngnmin ) - 1 + lt; + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); + iwarn = 1; + } + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin ); + lemin = Mmin( lemin, gnmin ); iwarn = 1; + } +/* + * Comment out this if block if EMIN is ok + */ + if( iwarn != 0 ) + { + first = 1; + HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n", +"WARNING. The value EMIN may be incorrect:- EMIN =", lemin, +"If, after inspection, the value EMIN looks acceptable, please comment ", +"out the if block as marked within the code of routine HPL_dlamc2, ", +"otherwise supply EMIN explicitly." ); + } +/* + * Assume IEEE arithmetic if we found denormalised numbers above, or if + * arithmetic seems to round in the IEEE style, determined in routine + * HPL_dlamc1. A true IEEE machine should have both things true; how- + * ever, faulty machines may have one or the other. + */ + if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1; + else ieee = 0; +/* + * Compute RMIN by successive division by BETA. We could compute RMIN + * as BASE**( EMIN - 1 ), but some machines underflow during this compu- + * tation. + */ + lrmin = HPL_rone; + for( i = 0; i < 1 - lemin; i++ ) + lrmin = HPL_dlamc3( lrmin*rbase, zero ); +/* + * Finally, call HPL_dlamc5 to compute emax and rmax. + */ + HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax ); + } + *BETA = lbeta; *T = lt; *RND = lrnd; *EPS = leps; + *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax; +} + +#ifdef STDC_HEADERS +static double HPL_dlamc3( const double A, const double B ) +#else +static double HPL_dlamc3( A, B ) +/* + * .. Scalar Arguments .. + */ + const double A, B; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc3 is intended to force a and b to be stored prior to doing + * the addition of a and b, for use in situations where optimizers + * might hold one of these in a register. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc3.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * A, B (local input) double + * The values a and b. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + return( A + B ); +} + +#ifdef STDC_HEADERS +static void HPL_dlamc4 +( + int * EMIN, + const double START, + const int BASE +) +#else +static void HPL_dlamc4( EMIN, START, BASE ) +/* + * .. Scalar Arguments .. + */ + int * EMIN; + const int BASE; + const double START; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc4 is a service function for HPL_dlamc2. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc4.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * EMIN (local output) int * + * The minimum exponent before (gradual) underflow, computed by + * setting A = START and dividing by BASE until the previous A + * can not be recovered. + * + * START (local input) double + * The starting point for determining EMIN. + * + * BASE (local input) int + * The base of the machine. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double a, b1, b2, c1, c2, d1, d2, one, rbase, zero; + int i; +/* .. + * .. Executable Statements .. + */ + a = START; one = HPL_rone; rbase = one / (double)(BASE); + zero = HPL_rzero; + *EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a; + + do + { + (*EMIN)--; a = b1; + b1 = HPL_dlamc3( a / BASE, zero ); + c1 = HPL_dlamc3( b1 * BASE, zero ); + d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1; + b2 = HPL_dlamc3( a * rbase, zero ); + c2 = HPL_dlamc3( b2 / rbase, zero ); + d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2; + } while( ( c1 == a ) && ( c2 == a ) && ( d1 == a ) && ( d2 == a ) ); +} + +#ifdef STDC_HEADERS +static void HPL_dlamc5 +( + const int BETA, + const int P, + const int EMIN, + const int IEEE, + int * EMAX, + double * RMAX +) +#else +static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX ) +/* + * .. Scalar Arguments .. + */ + const int BETA, EMIN, IEEE, P; + int * EMAX; + double * RMAX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc5 attempts to compute RMAX, the largest machine floating- + * point number, without overflow. It assumes that EMAX + abs(EMIN) sum + * approximately to a power of 2. It will fail on machines where this + * assumption does not hold, for example, the Cyber 205 (EMIN = -28625, + * EMAX = 28718). It will also fail if the value supplied for EMIN is + * too large (i.e. too close to zero), probably with overflow. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc5.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * BETA (local input) int + * The base of floating-point arithmetic. + * + * P (local input) int + * The number of base BETA digits in the mantissa of a floating- + * point value. + * + * EMIN (local input) int + * The minimum exponent before (gradual) underflow. + * + * IEEE (local input) int + * A logical flag specifying whether or not the arithmetic sys- + * tem is thought to comply with the IEEE standard. + * + * EMAX (local output) int * + * The largest exponent before overflow. + * + * RMAX (local output) double * + * The largest machine floating-point number. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double oldy=HPL_rzero, recbas, y, z; + int exbits=1, expsum, i, lexp=1, nbits, try, + uexp; +/* .. + * .. Executable Statements .. + */ +/* + * First compute lexp and uexp, two powers of 2 that bound abs(EMIN). + * We then assume that EMAX + abs( EMIN ) will sum approximately to the + * bound that is closest to abs( EMIN ). (EMAX is the exponent of the + * required number RMAX). + */ +l_10: + try = (int)( (unsigned int)(lexp) << 1 ); + if( try <= ( -EMIN ) ) { lexp = try; exbits++; goto l_10; } + + if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try; exbits++; } +/* + * Now -lexp is less than or equal to EMIN, and -uexp is greater than or + * equal to EMIN. exbits is the number of bits needed to store the expo- + * nent. + */ + if( ( uexp+EMIN ) > ( -lexp-EMIN ) ) + { expsum = (int)( (unsigned int)(lexp) << 1 ); } + else + { expsum = (int)( (unsigned int)(uexp) << 1 ); } +/* + * expsum is the exponent range, approximately equal to EMAX - EMIN + 1. + */ + *EMAX = expsum + EMIN - 1; +/* + * nbits is the total number of bits needed to store a floating-point + * number. + */ + nbits = 1 + exbits + P; + + if( ( nbits % 2 == 1 ) && ( BETA == 2 ) ) + { +/* + * Either there are an odd number of bits used to store a floating-point + * number, which is unlikely, or some bits are not used in the represen- + * tation of numbers, which is possible, (e.g. Cray machines) or the + * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines), + * which is perhaps the most likely. We have to assume the last alterna- + * tive. If this is true, then we need to reduce EMAX by one because + * there must be some way of representing zero in an implicit-bit sys- + * tem. On machines like Cray we are reducing EMAX by one unnecessarily. + */ + (*EMAX)--; + } + + if( IEEE != 0 ) + { +/* + * Assume we are on an IEEE machine which reserves one exponent for in- + * finity and NaN. + */ + (*EMAX)--; + } +/* + * Now create RMAX, the largest machine number, which should be equal to + * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being + * careful that the result is less than 1.0. + */ + recbas = HPL_rone / (double)(BETA); + z = (double)(BETA) - HPL_rone; + y = HPL_rzero; + + for( i = 0; i < P; i++ ) + { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); } + + if( y >= HPL_rone ) y = oldy; +/* + * Now multiply by BETA**EMAX to get RMAX. + */ + for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero ); + + *RMAX = y; +/* + * End of HPL_dlamch + */ +} + +#ifdef STDC_HEADERS +static double HPL_dipow +( + const double X, + const int N +) +#else +static double HPL_dipow( X, N ) +/* + * .. Scalar Arguments .. + */ + const int N; + const double X; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dipow computes the integer n-th power of a real scalar x. + * + * Arguments + * ========= + * + * X (local input) const double + * The real scalar x. + * + * N (local input) const int + * The integer power to raise x to. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r, y=HPL_rone; + int k, n; +/* .. + * .. Executable Statements .. + */ + if( X == HPL_rzero ) return( HPL_rzero ); + if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; } + for( k = 0; k < n; k++ ) y *= r; + + return( y ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlamch.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlamch.o new file mode 100644 index 000000000..ecbe3bc06 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlamch.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlange.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlange.c new file mode 100644 index 000000000..82f118b6b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlange.c @@ -0,0 +1,184 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_dlange +( + const HPL_T_NORM NORM, + const int M, + const int N, + const double * A, + const int LDA +) +#else +double HPL_dlange +( NORM, M, N, A, LDA ) + const HPL_T_NORM NORM; + const int M; + const int N; + const double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlange returns the value of the one norm, or the infinity norm, + * or the element of largest absolute value of a matrix A: + * + * max(abs(A(i,j))) when NORM = HPL_NORM_A, + * norm1(A), when NORM = HPL_NORM_1, + * normI(A), when NORM = HPL_NORM_I, + * + * where norm1 denotes the one norm of a matrix (maximum column sum) and + * normI denotes the infinity norm of a matrix (maximum row sum). Note + * that max(abs(A(i,j))) is not a matrix norm. + * + * Arguments + * ========= + * + * NORM (local input) const HPL_T_NORM + * On entry, NORM specifies the value to be returned by this + * function as described above. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N), that + * contains the matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double s, v0=HPL_rzero, * work = NULL; + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return( HPL_rzero ); + + if( NORM == HPL_NORM_A ) + { +/* + * max( abs( A ) ) + */ + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) { v0 = Mmax( v0, Mabs( *A ) ); A++; } + A += LDA - M; + } + } + else if( NORM == HPL_NORM_1 ) + { +/* + * Find norm_1( A ). + */ + work = (double*)malloc( (size_t)(N) * sizeof( double ) ); + if( work == NULL ) + { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); } + else + { + for( j = 0; j < N; j++ ) + { + s = HPL_rzero; + for( i = 0; i < M; i++ ) { s += Mabs( *A ); A++; } + work[j] = s; A += LDA - M; + } +/* + * Find maximum sum of columns for 1-norm + */ + v0 = work[HPL_idamax( N, work, 1 )]; v0 = Mabs( v0 ); + if( work ) free( work ); + } + } + else if( NORM == HPL_NORM_I ) + { +/* + * Find norm_inf( A ) + */ + work = (double*)malloc( (size_t)(M) * sizeof( double ) ); + if( work == NULL ) + { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); } + else + { + for( i = 0; i < M; i++ ) { work[i] = HPL_rzero; } + + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) { work[i] += Mabs( *A ); A++; } + A += LDA - M; + } +/* + * Find maximum sum of rows for inf-norm + */ + v0 = work[HPL_idamax( M, work, 1 )]; v0 = Mabs( v0 ); + if( work ) free( work ); + } + } + + return( v0 ); +/* + * End of HPL_dlange + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlange.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlange.o new file mode 100644 index 000000000..869bd9c89 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlange.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlaprnt.c new file mode 100644 index 000000000..f29df3cd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlaprnt.c @@ -0,0 +1,130 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dlaprnt +( + const int M, + const int N, + double * A, + const int IA, + const int JA, + const int LDA, + const char * CMATNM +) +#else +void HPL_dlaprnt +( M, N, A, IA, JA, LDA, CMATNM ) + const int M; + const int N; + double * A; + const int IA; + const int JA; + const int LDA; + const char * CMATNM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaprnt prints to standard error an M-by-N matrix A. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A. M must be at + * least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of A. N must be + * at least zero. + * + * A (local input) double * + * On entry, A points to an array of dimension (LDA,N). + * + * IA (local input) const int + * On entry, IA specifies the starting row index to be printed. + * + * JA (local input) const int + * On entry, JA specifies the starting column index to be + * printed. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * CMATNM (local input) const char * + * On entry, CMATNM is the name of the matrix to be printed. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) + { + HPL_fprintf( stderr, "%s(%6d,%6d)=%30.18f\n", CMATNM, IA+i, + JA+j, *(Mptr( A, i, j, LDA )) ); + } + } +/* + * End of HPL_dlaprnt + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlaprnt.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlaprnt.o new file mode 100644 index 000000000..2fe4d970e Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlaprnt.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlatcpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlatcpy.c new file mode 100644 index 000000000..410451c24 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlatcpy.c @@ -0,0 +1,398 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factors + * #ifndef HPL_LATCPY_M_DEPTH + * #define HPL_LATCPY_M_DEPTH 32 + * #define HPL_LATCPY_LOG2_M_DEPTH 5 + * #endif + * #ifndef HPL_LATCPY_N_DEPTH + * #define HPL_LATCPY_N_DEPTH 4 + * #define HPL_LATCPY_LOG2_N_DEPTH 2 + * #endif + */ +#ifndef HPL_LATCPY_M_DEPTH +#define HPL_LATCPY_M_DEPTH 4 +#define HPL_LATCPY_LOG2_M_DEPTH 2 +#endif +#ifndef HPL_LATCPY_N_DEPTH +#define HPL_LATCPY_N_DEPTH 2 +#define HPL_LATCPY_LOG2_N_DEPTH 1 +#endif + +#ifdef STDC_HEADERS +void HPL_dlatcpy +( + const int M, + const int N, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dlatcpy +( M, N, A, LDA, B, LDB ) + const int M; + const int N; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlatcpy copies the transpose of an array A into an array B. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the array B and + * the number of columns of A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of rows of the array A and + * the number of columns of B. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,M). + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,N). + * + * B (local output) double * + * On entry, B points to an array of dimension (LDB,N). On exit, + * B is overwritten with the transpose of A. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of the array B. + * LDB must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_LATCPY_USE_COPY + register int j; +#else +#if ( HPL_LATCPY_N_DEPTH == 1 ) + const double * A0 = A; + double * B0 = B; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + const double * A0 = A, * A1 = A + 1; + double * B0 = B, * B1 = B + LDB; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + const double * A0 = A, * A1 = A + 1, + * A2 = A + 2, * A3 = A + 3; + double * B0 = B, * B1 = B + LDB, + * B2 = B + (LDB << 1), * B3 = B + 3 * LDB; +#endif + const int incA = -M * LDA + (1 << HPL_LATCPY_LOG2_N_DEPTH), + incB = ( (unsigned int)(LDB) << + HPL_LATCPY_LOG2_N_DEPTH ) - M, + incA0 = -M * LDA + 1, incB0 = LDB - M; + int mu, nu; + register int i, j; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + +#ifdef HPL_LATCPY_USE_COPY + for( j = 0; j < N; j++, B0 += LDB ) HPL_dcopy( M, A0+j, LDA, B0, 1 ); +#else + mu = (int)( ( (unsigned int)(M) >> HPL_LATCPY_LOG2_M_DEPTH ) << + HPL_LATCPY_LOG2_M_DEPTH ); + nu = (int)( ( (unsigned int)(N) >> HPL_LATCPY_LOG2_N_DEPTH ) << + HPL_LATCPY_LOG2_N_DEPTH ); + + for( j = 0; j < nu; j += HPL_LATCPY_N_DEPTH ) + { + for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH ) + { +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 0] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA; + B2[ 0] = *A2; A2 += LDA; B3[ 0] = *A3; A3 += LDA; +#endif + +#if ( HPL_LATCPY_M_DEPTH > 1 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 1] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA; + B2[ 1] = *A2; A2 += LDA; B3[ 1] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 2 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 2] = *A0; A0 += LDA; B0[ 3] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA; + B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA; + B2[ 2] = *A2; A2 += LDA; B3[ 2] = *A3; A3 += LDA; + B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA; + B2[ 3] = *A2; A2 += LDA; B3[ 3] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 4 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 4] = *A0; A0 += LDA; B0[ 5] = *A0; A0 += LDA; + B0[ 6] = *A0; A0 += LDA; B0[ 7] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA; + B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA; + B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA; + B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA; + B2[ 4] = *A2; A2 += LDA; B3[ 4] = *A3; A3 += LDA; + B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA; + B2[ 5] = *A2; A2 += LDA; B3[ 5] = *A3; A3 += LDA; + B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA; + B2[ 6] = *A2; A2 += LDA; B3[ 6] = *A3; A3 += LDA; + B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA; + B2[ 7] = *A2; A2 += LDA; B3[ 7] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 8 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 8] = *A0; A0 += LDA; B0[ 9] = *A0; A0 += LDA; + B0[10] = *A0; A0 += LDA; B0[11] = *A0; A0 += LDA; + B0[12] = *A0; A0 += LDA; B0[13] = *A0; A0 += LDA; + B0[14] = *A0; A0 += LDA; B0[15] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA; + B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA; + B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA; + B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA; + B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA; + B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA; + B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA; + B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA; + B2[ 8] = *A2; A2 += LDA; B3[ 8] = *A3; A3 += LDA; + B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA; + B2[ 9] = *A2; A2 += LDA; B3[ 9] = *A3; A3 += LDA; + B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA; + B2[10] = *A2; A2 += LDA; B3[10] = *A3; A3 += LDA; + B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA; + B2[11] = *A2; A2 += LDA; B3[11] = *A3; A3 += LDA; + B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA; + B2[12] = *A2; A2 += LDA; B3[12] = *A3; A3 += LDA; + B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA; + B2[13] = *A2; A2 += LDA; B3[13] = *A3; A3 += LDA; + B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA; + B2[14] = *A2; A2 += LDA; B3[14] = *A3; A3 += LDA; + B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA; + B2[15] = *A2; A2 += LDA; B3[15] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 16 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[16] = *A0; A0 += LDA; B0[17] = *A0; A0 += LDA; + B0[18] = *A0; A0 += LDA; B0[19] = *A0; A0 += LDA; + B0[20] = *A0; A0 += LDA; B0[21] = *A0; A0 += LDA; + B0[22] = *A0; A0 += LDA; B0[23] = *A0; A0 += LDA; + B0[24] = *A0; A0 += LDA; B0[25] = *A0; A0 += LDA; + B0[26] = *A0; A0 += LDA; B0[27] = *A0; A0 += LDA; + B0[28] = *A0; A0 += LDA; B0[29] = *A0; A0 += LDA; + B0[30] = *A0; A0 += LDA; B0[31] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA; + B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA; + B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA; + B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA; + B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA; + B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA; + B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA; + B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA; + B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA; + B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA; + B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA; + B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA; + B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA; + B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA; + B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA; + B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA; + B2[16] = *A2; A2 += LDA; B3[16] = *A3; A3 += LDA; + B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA; + B2[17] = *A2; A2 += LDA; B3[17] = *A3; A3 += LDA; + B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA; + B2[18] = *A2; A2 += LDA; B3[18] = *A3; A3 += LDA; + B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA; + B2[19] = *A2; A2 += LDA; B3[19] = *A3; A3 += LDA; + B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA; + B2[20] = *A2; A2 += LDA; B3[20] = *A3; A3 += LDA; + B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA; + B2[21] = *A2; A2 += LDA; B3[21] = *A3; A3 += LDA; + B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA; + B2[22] = *A2; A2 += LDA; B3[22] = *A3; A3 += LDA; + B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA; + B2[23] = *A2; A2 += LDA; B3[23] = *A3; A3 += LDA; + B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA; + B2[24] = *A2; A2 += LDA; B3[24] = *A3; A3 += LDA; + B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA; + B2[25] = *A2; A2 += LDA; B3[25] = *A3; A3 += LDA; + B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA; + B2[26] = *A2; A2 += LDA; B3[26] = *A3; A3 += LDA; + B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA; + B2[27] = *A2; A2 += LDA; B3[27] = *A3; A3 += LDA; + B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA; + B2[28] = *A2; A2 += LDA; B3[28] = *A3; A3 += LDA; + B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA; + B2[29] = *A2; A2 += LDA; B3[29] = *A3; A3 += LDA; + B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA; + B2[30] = *A2; A2 += LDA; B3[30] = *A3; A3 += LDA; + B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA; + B2[31] = *A2; A2 += LDA; B3[31] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0 += HPL_LATCPY_M_DEPTH; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH; + B2 += HPL_LATCPY_M_DEPTH; B3 += HPL_LATCPY_M_DEPTH; +#endif + } + + for( i = mu; i < M; i++ ) + { +#if ( HPL_LATCPY_N_DEPTH == 1 ) + *B0 = *A0; B0++; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA; + *B2 = *A2; B2++; A2 += LDA; *B3 = *A3; B3++; A3 += LDA; +#endif + } + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + A0 += incA; B0 += incB; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + A0 += incA; A1 += incA; B0 += incB; B1 += incB; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + A0 += incA; A1 += incA; A2 += incA; A3 += incA; + B0 += incB; B1 += incB; B2 += incB; B3 += incB; +#endif + } + + for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 ) + { + for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH, B0 += HPL_LATCPY_M_DEPTH ) + { + B0[ 0]=*A0; A0 += LDA; +#if ( HPL_LATCPY_M_DEPTH > 1 ) + B0[ 1]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 2 ) + B0[ 2]=*A0; A0 += LDA; B0[ 3]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 4 ) + B0[ 4]=*A0; A0 += LDA; B0[ 5]=*A0; A0 += LDA; + B0[ 6]=*A0; A0 += LDA; B0[ 7]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 8 ) + B0[ 8]=*A0; A0 += LDA; B0[ 9]=*A0; A0 += LDA; + B0[10]=*A0; A0 += LDA; B0[11]=*A0; A0 += LDA; + B0[12]=*A0; A0 += LDA; B0[13]=*A0; A0 += LDA; + B0[14]=*A0; A0 += LDA; B0[15]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 16 ) + B0[16]=*A0; A0 += LDA; B0[17]=*A0; A0 += LDA; + B0[18]=*A0; A0 += LDA; B0[19]=*A0; A0 += LDA; + B0[20]=*A0; A0 += LDA; B0[21]=*A0; A0 += LDA; + B0[22]=*A0; A0 += LDA; B0[23]=*A0; A0 += LDA; + B0[24]=*A0; A0 += LDA; B0[25]=*A0; A0 += LDA; + B0[26]=*A0; A0 += LDA; B0[27]=*A0; A0 += LDA; + B0[28]=*A0; A0 += LDA; B0[29]=*A0; A0 += LDA; + B0[30]=*A0; A0 += LDA; B0[31]=*A0; A0 += LDA; +#endif + } + + for( i = mu; i < M; i++, B0++, A0 += LDA ) { *B0 = *A0; } + } +#endif +/* + * End of HPL_dlatcpy + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlatcpy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlatcpy.o new file mode 100644 index 000000000..5ebbb92b0 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_dlatcpy.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_fprintf.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_fprintf.c new file mode 100644 index 000000000..adaf22b39 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_fprintf.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_fprintf +( + FILE * STREAM, + const char * FORM, + ... +) +#else +void HPL_fprintf( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_fprintf is a wrapper around fprintf flushing the output stream. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[256]; +#ifndef STDC_HEADERS + FILE * STREAM; + char * FORM; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + (void) fprintf( STREAM, "%s", cline ); + (void) fflush( STREAM ); +/* + * End of HPL_fprintf + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_fprintf.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_fprintf.o new file mode 100644 index 000000000..28a92f79f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_fprintf.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_warn.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_warn.c new file mode 100644 index 000000000..bc40818a9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_warn.c @@ -0,0 +1,134 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_warn +( + FILE * STREAM, + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_warn( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_warn displays an error message. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[128]; +#ifndef STDC_HEADERS + FILE * STREAM; + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( STREAM, "%s %s:\n>>> %s <<<\n\n", "HPL ERROR in function", + SRNAME, cline ); + else + HPL_fprintf( STREAM, "%s %d %s %s:\n>>> %s <<<\n\n", + "HPL ERROR on line", LINE, "of function", SRNAME, cline ); +/* + * End of HPL_warn + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_warn.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_warn.o new file mode 100644 index 000000000..1464ddcb8 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/auxil/HPL_warn.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_daxpy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_daxpy.c new file mode 100644 index 000000000..72be5774b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_daxpy.c @@ -0,0 +1,175 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_daxpy + +#ifdef STDC_HEADERS +void HPL_daxpy +( + const int N, + const double ALPHA, + const double * X, + const int INCX, + double * Y, + const int INCY +) +#else +void HPL_daxpy +( N, ALPHA, X, INCX, Y, INCY ) + const int N; + const double ALPHA; + const double * X; + const int INCX; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_daxpy scales the vector x by alpha and adds it to y. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vectors x and y. N + * must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero, then the entries of the incremented array X + * need not be set on input. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * On exit, the entries of the incremented array Y are updated + * with the scaled entries of the incremented array X. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_daxpy( N, ALPHA, X, INCX, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + register const double alpha = ALPHA; + register double x0, x1, x2, x3, y0, y1, y2, y3; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY; + + if( ( N > 0 ) && ( alpha != HPL_rzero ) ) + { + if( ( nu = ( N >> 2 ) << 2 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); y0 = (*Y); x1 = X[INCX ]; y1 = Y[INCY ]; + x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3]; + + *Y = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1; + Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3; + + X += incX4; + Y += incY4; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { + x0 = (*X); + y0 = (*Y); + + *Y = y0 + alpha * x0; + + X += INCX; + Y += INCY; + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; +#else +#define F77N N +#define F77incx INCX +#define F77incy INCY +#endif + F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy ); +#endif +/* + * End of HPL_daxpy + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_daxpy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_daxpy.o new file mode 100644 index 000000000..ff89e13c3 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_daxpy.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dcopy.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dcopy.c new file mode 100644 index 000000000..a8fe24109 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dcopy.c @@ -0,0 +1,168 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dcopy + +#ifdef STDC_HEADERS +void HPL_dcopy +( + const int N, + const double * X, + const int INCX, + double * Y, + const int INCY +) +#else +void HPL_dcopy +( N, X, INCX, Y, INCY ) + const int N; + const double * X; + const int INCX; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dcopy copies the vector x into the vector y. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vectors x and y. N + * must be at least zero. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * On exit, the entries of the incremented array Y are updated + * with the entries of the incremented array X. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dcopy( N, X, INCX, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + register double x0, x1, x2, x3, x4, x5, x6, x7; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY, + incX5 = 5 * INCX, incY5 = 5 * INCY, + incX6 = 6 * INCX, incY6 = 6 * INCY, + incX7 = 7 * INCX, incY7 = 7 * INCY, + incX8 = 8 * INCX, incY8 = 8 * INCY; + + if( N > 0 ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + *Y = x0; Y[incY4] = x4; Y[INCY ] = x1; Y[incY5] = x5; + Y[incY2] = x2; Y[incY6] = x6; Y[incY3] = x3; Y[incY7] = x7; + + X += incX8; + Y += incY8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { + x0 = (*X); + *Y = x0; + + X += INCX; + Y += INCY; + } + } +#endif +#ifdef HPL_CALL_FBLAS +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; +#else +#define F77N N +#define F77incx INCX +#define F77incy INCY +#endif + F77dcopy( &F77N, X, &F77incx, Y, &F77incy ); +#endif +/* + * End of HPL_dcopy + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dcopy.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dcopy.o new file mode 100644 index 000000000..d0bc0e6e6 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dcopy.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemm.c new file mode 100644 index 000000000..b222e4717 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemm.c @@ -0,0 +1,521 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dgemm + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dgemmNN +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iail, iblj, icij, j, jal, jbj, jcj, l; + + for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) + { + HPL_dscal( M, BETA, C+jcj, 1 ); + for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 ) + { + t0 = ALPHA * B[iblj]; + for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) + { C[icij] += A[iail] * t0; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmNT +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iail, ibj, ibjl, icij, j, jal, jcj, l; + + for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) + { + HPL_dscal( M, BETA, C+jcj, 1 ); + for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB ) + { + t0 = ALPHA * B[ibjl]; + for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) + { C[icij] += A[iail] * t0; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmTN +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iai, iail, iblj, icij, j, jbj, jcj, l; + + for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) + { + for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA ) + { + t0 = HPL_rzero; + for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 ) + { t0 += A[iail] * B[iblj]; } + if( BETA == HPL_rzero ) C[icij] = HPL_rzero; + else C[icij] *= BETA; + C[icij] += ALPHA * t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmTT +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iali, ibj, ibjl, icij, j, jai, jcj, l; + + for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) + { + for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA ) + { + t0 = HPL_rzero; + for( l = 0, iali = jai, ibjl = ibj; + l < K; l++, iali += 1, ibjl += LDB ) t0 += A[iali] * B[ibjl]; + if( BETA == HPL_rzero ) C[icij] = HPL_rzero; + else C[icij] *= BETA; + C[icij] += ALPHA * t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemm0 +( + const enum HPL_TRANS TRANSA, + const enum HPL_TRANS TRANSB, + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, + BETA, C, LDC ) + const enum HPL_TRANS TRANSA, TRANSB; + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + int i, j; + + if( ( M == 0 ) || ( N == 0 ) || + ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) && + ( BETA == HPL_rone ) ) ) return; + + if( ALPHA == HPL_rzero ) + { + for( j = 0; j < N; j++ ) + { for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; } + return; + } + + if( TRANSB == HplNoTrans ) + { + if( TRANSA == HplNoTrans ) + { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + else + { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + } + else + { + if( TRANSA == HplNoTrans ) + { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + else + { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dgemm +( + const enum HPL_ORDER ORDER, + const enum HPL_TRANS TRANSA, + const enum HPL_TRANS TRANSB, + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +void HPL_dgemm +( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const enum HPL_ORDER ORDER; + const enum HPL_TRANS TRANSA; + const enum HPL_TRANS TRANSB; + const int M; + const int N; + const int K; + const double ALPHA; + const double * A; + const int LDA; + const double * B; + const int LDB; + const double BETA; + double * C; + const int LDC; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dgemm performs one of the matrix-matrix operations + * + * C := alpha * op( A ) * op( B ) + beta * C + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X^T. + * + * Alpha and beta are scalars, and A, B and C are matrices, with op(A) + * an m by k matrix, op(B) a k by n matrix and C an m by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * TRANSA (local input) const enum HPL_TRANS + * On entry, TRANSA specifies the form of op(A) to be used in + * the matrix-matrix operation follows: + * TRANSA==HplNoTrans : op( A ) = A, + * TRANSA==HplTrans : op( A ) = A^T, + * TRANSA==HplConjTrans : op( A ) = A^T. + * + * TRANSB (local input) const enum HPL_TRANS + * On entry, TRANSB specifies the form of op(B) to be used in + * the matrix-matrix operation follows: + * TRANSB==HplNoTrans : op( B ) = B, + * TRANSB==HplTrans : op( B ) = B^T, + * TRANSB==HplConjTrans : op( B ) = B^T. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix + * op(A) and of the matrix C. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix + * op(B) and the number of columns of the matrix C. N must be + * at least zero. + * + * K (local input) const int + * On entry, K specifies the number of columns of the matrix + * op(A) and the number of rows of the matrix op(B). K must be + * be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then the elements of the matrices A and B + * need not be set on input. + * + * A (local input) const double * + * On entry, A is an array of dimension (LDA,ka), where ka is + * k when TRANSA==HplNoTrans, and is m otherwise. Before + * entry with TRANSA==HplNoTrans, the leading m by k part of + * the array A must contain the matrix A, otherwise the leading + * k by m part of the array A must contain the matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the first dimension of A as declared + * in the calling (sub) program. When TRANSA==HplNoTrans then + * LDA must be at least max(1,m), otherwise LDA must be at least + * max(1,k). + * + * B (local input) const double * + * On entry, B is an array of dimension (LDB,kb), where kb is + * n when TRANSB==HplNoTrans, and is k otherwise. Before + * entry with TRANSB==HplNoTrans, the leading k by n part of + * the array B must contain the matrix B, otherwise the leading + * n by k part of the array B must contain the matrix B. + * + * LDB (local input) const int + * On entry, LDB specifies the first dimension of B as declared + * in the calling (sub) program. When TRANSB==HplNoTrans then + * LDB must be at least max(1,k), otherwise LDB must be at least + * max(1,n). + * + * BETA (local input) const double + * On entry, BETA specifies the scalar beta. When BETA is + * supplied as zero then the elements of the matrix C need + * not be set on input. + * + * C (local input/output) double * + * On entry, C is an array of dimension (LDC,n). Before entry, + * the leading m by n part of the array C must contain the + * matrix C, except when beta is zero, in which case C need not + * be set on entry. On exit, the array C is overwritten by the + * m by n matrix ( alpha*op( A )*op( B ) + beta*C ). + * + * LDC (local input) const int + * On entry, LDC specifies the first dimension of C as declared + * in the calling (sub) program. LDC must be at least + * max(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + printf("Order %d, TransA %d, TransB %d, M %d, N %d, K %d\n", ORDER, TRANSA, TRANSB, M, N, K); + cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, + C, LDC ); + } + else + { + HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA, + C, LDC ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA, beta = BETA; +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef StringStructPtr + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef StringCrayStyle + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, F77K = K, + F77lda = LDA, F77ldb = LDB, F77ldc = LDC; +#else +#define F77M M +#define F77N N +#define F77K K +#define F77lda LDA +#define F77ldb LDB +#define F77ldc LDC +#endif + char ctransa, ctransb; + + if( TRANSA == HplNoTrans ) ctransa = 'N'; + else if( TRANSA == HplTrans ) ctransa = 'T'; + else ctransa = 'C'; + + if( TRANSB == HplNoTrans ) ctransb = 'N'; + else if( TRANSB == HplTrans ) ctransb = 'T'; + else ctransb = 'C'; + + if( ORDER == HplColumnMajor ) + { +#ifdef StringSunStyle + F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); + F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif +#ifdef StringStructVal + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif +#ifdef StringStructPtr + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif + } + else + { +#ifdef StringSunStyle + F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); + F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif +#ifdef StringStructVal + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif +#ifdef StringStructPtr + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif + } +#endif +/* + * End of HPL_dgemm + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemm.o new file mode 100644 index 000000000..12e87044c Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemm.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemv.c new file mode 100644 index 000000000..6366c5a48 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemv.c @@ -0,0 +1,326 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dgemv + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dgemv0 +( + const enum HPL_TRANS TRANS, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + const double * X, + const int INCX, + const double BETA, + double * Y, + const int INCY +) +#else +static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) + const enum HPL_TRANS TRANS; + const int INCX, INCY, LDA, M, N; + const double ALPHA, BETA; + const double * A, * X; + double * Y; +#endif +{ +/* + * .. Local Variables .. + */ + int i, iaij, ix, iy, j, jaj, jx, jy; + register double t0; +/* .. + * .. Executable Statements .. + */ + if( ( M == 0 ) || ( N == 0 ) || + ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone ) ) ) return; + + if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; } + + if( TRANS == HplNoTrans ) + { + HPL_dscal( M, BETA, Y, INCY ); + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = ALPHA * X[jx]; + for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY ) + { Y[iy] += A[iaij] * t0; } + } + } + else + { + for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) + { + t0 = HPL_rzero; + for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) + { t0 += A[iaij] * X[ix]; } + if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0; + else Y[jy] = BETA * Y[jy] + ALPHA * t0; + } + } +} +#endif + +#ifdef STDC_HEADERS +void HPL_dgemv +( + const enum HPL_ORDER ORDER, + const enum HPL_TRANS TRANS, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + const double * X, + const int INCX, + const double BETA, + double * Y, + const int INCY +) +#else +void HPL_dgemv +( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) + const enum HPL_ORDER ORDER; + const enum HPL_TRANS TRANS; + const int M; + const int N; + const double ALPHA; + const double * A; + const int LDA; + const double * X; + const int INCX; + const double BETA; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dgemv performs one of the matrix-vector operations + * + * y := alpha * op( A ) * x + beta * y, + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X^T. + * + * where alpha and beta are scalars, x and y are vectors and A is an m + * by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANS specifies the operation to be performed as + * follows: + * TRANS = HplNoTrans y := alpha*A *x + beta*y, + * TRANS = HplTrans y := alpha*A^T*x + beta*y. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then A and X need not be set on input. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry, the leading m by n part of the + * array A must contain the matrix coefficients. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m). + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * BETA (local input) const double + * On entry, BETA specifies the scalar beta. When ALPHA is + * supplied as zero then Y need not be set on input. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * Before entry with BETA non-zero, the incremented array Y must + * contain the vector y. On exit, Y is overwritten by the + * updated vector y. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); + } + else + { + HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), + N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA, beta = BETA; +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR ftran; +#endif +#ifdef StringStructPtr + F77_CHAR ftran; +#endif +#ifdef StringCrayStyle + F77_CHAR ftran; +#endif + +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77incx = INCX, F77incy = INCY; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77incx INCX +#define F77incy INCY +#endif + char ctran; + + if( ORDER == HplColumnMajor ) + { + ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); + +#ifdef StringSunStyle + F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); + F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructVal + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructPtr + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif + } + else + { + ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); +#ifdef StringSunStyle + F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); + F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructVal + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructPtr + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif + } + +#endif +/* + * End of HPL_dgemv + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemv.o new file mode 100644 index 000000000..a9b801898 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dgemv.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dger.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dger.c new file mode 100644 index 000000000..5ea702778 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dger.c @@ -0,0 +1,195 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dger + +#ifdef STDC_HEADERS +void HPL_dger +( + const enum HPL_ORDER ORDER, + const int M, + const int N, + const double ALPHA, + const double * X, + const int INCX, + double * Y, + const int INCY, + double * A, + const int LDA +) +#else +void HPL_dger +( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) + const enum HPL_ORDER ORDER; + const int M; + const int N; + const double ALPHA; + const double * X; + const int INCX; + double * Y; + const int INCY; + double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dger performs the rank 1 operation + * + * A := alpha * x * y^T + A, + * + * where alpha is a scalar, x is an m-element vector, y is an n-element + * vector and A is an m by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then X and Y need not be set on input. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( m - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * A (local input/output) double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry, the leading m by n part of the + * array A must contain the matrix coefficients. On exit, A is + * overwritten by the updated matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dger( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA ); +#endif +#ifdef HPL_CALL_VSIPL + register double t0; + int i, iaij, ix, iy, j, jaj, jx, jy; + + if( ( M == 0 ) || ( N == 0 ) || ( ALPHA == HPL_rzero ) ) return; + + if( ORDER == HplColumnMajor ) + { + for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) + { + t0 = ALPHA * Y[jy]; + for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) + { A[iaij] += X[ix] * t0; } + } + } + else + { + for( j = 0, jaj = 0, jx = 0; j < M; j++, jaj += LDA, jx += INCX ) + { + t0 = ALPHA * X[jx]; + for( i = 0, iaij = jaj, iy = 0; i < N; i++, iaij += 1, iy += INCY ) + { A[iaij] += Y[iy] * t0; } + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77incx = INCX, F77incy = INCY; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77incx INCX +#define F77incy INCY +#endif + + if( ORDER == HplColumnMajor ) + { F77dger( &F77M, &F77N, &alpha, X, &F77incx, Y, &F77incy, A, &F77lda ); } + else + { F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); } +#endif +/* + * End of HPL_dger + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dger.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dger.o new file mode 100644 index 000000000..255cfa4b2 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dger.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dscal.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dscal.c new file mode 100644 index 000000000..7e041991f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dscal.c @@ -0,0 +1,179 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dscal + +#ifdef STDC_HEADERS +void HPL_dscal +( + const int N, + const double ALPHA, + double * X, + const int INCX +) +#else +void HPL_dscal +( N, ALPHA, X, INCX ) + const int N; + const double ALPHA; + double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dscal scales the vector x by alpha. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vector x. N must be + * at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero, then the entries of the incremented array X + * need not be set on input. + * + * X (local input/output) double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * On exit, the entries of the incremented array X are scaled + * by the scalar alpha. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dscal( N, ALPHA, X, INCX ); +#endif +#ifdef HPL_CALL_VSIPL + register double x0, x1, x2, x3, x4, x5, x6, x7; + register const double alpha = ALPHA; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incX3 = 3 * INCX, + incX4 = 4 * INCX, incX5 = 5 * INCX, + incX6 = 6 * INCX, incX7 = 7 * INCX, + incX8 = 8 * INCX; + + if( ( N > 0 ) && ( alpha != HPL_rone ) ) + { + if( alpha == HPL_rzero ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = (double *)X + nu * INCX; + + do + { + (*X) = HPL_rzero; X[incX4] = HPL_rzero; + X[INCX ] = HPL_rzero; X[incX5] = HPL_rzero; + X[incX2] = HPL_rzero; X[incX6] = HPL_rzero; + X[incX3] = HPL_rzero; X[incX7] = HPL_rzero; X += incX8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) { *X = HPL_rzero; X += INCX; } + } + else + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + x0 *= alpha; x4 *= alpha; x1 *= alpha; x5 *= alpha; + x2 *= alpha; x6 *= alpha; x3 *= alpha; x7 *= alpha; + + (*X) = x0; X[incX4] = x4; X[INCX ] = x1; X[incX5] = x5; + X[incX2] = x2; X[incX6] = x6; X[incX3] = x3; X[incX7] = x7; + + X += incX8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { x0 = (*X); x0 *= alpha; *X = x0; X += INCX; } + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX; +#else +#define F77N N +#define F77incx INCX +#endif + + F77dscal( &F77N, &alpha, X, &F77incx ); +#endif +/* + * End of HPL_dscal + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dscal.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dscal.o new file mode 100644 index 000000000..4cb4cd8c9 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dscal.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsm.c new file mode 100644 index 000000000..a336a7d29 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsm.c @@ -0,0 +1,977 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dtrsm + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) + { + B[ibkj] /= A[k+jak]; + for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; + i < M; i++, iaik +=1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) + { + for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; + i < M; i++, iaik +=1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; + i >= 0; i--, jai -= LDA, ibij -= 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; + k < M; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + t0 /= A[i+jai]; + B[ibij] = t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; + i >= 0; i--, jai -= LDA, ibij -= 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; + k < M; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + B[ibij] = t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; + k >= 0; k--, jak -= LDA, ibkj -= 1 ) + { + B[ibkj] /= A[k+jak]; + for( i = 0, iaik = jak, ibij = jbj; + i < k; i++, iaik += 1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; + k >= 0; k--, jak -= LDA, ibkj -= 1 ) + { + for( i = 0, iaik = jak, ibij = jbj; + i < k; i++, iaik += 1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaki, ibij, ibkj, j, jai, jbj, k; + register double t0; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + t0 /= A[i+jai]; + B[ibij] = t0; + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + B[ibij] = t0; + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; + j >= 0; j--, jaj -= LDA, jbj -= LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; + k < N; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; + j >= 0; j--, jaj -= LDA, jbj -= LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; + k < N; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) + { + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } + for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; + j < N; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) + { + for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; + j < N; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; + k >= 0; k--, jak -= LDA, jbk -= LDB ) + { + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } + for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; + k >= 0; k--, jak -= LDA, jbk -= LDB ) + { + for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsm0 +( + const enum HPL_SIDE SIDE, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) + const enum HPL_SIDE SIDE; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, j; + + if( ( M == 0 ) || ( N == 0 ) ) return; + + if( ALPHA == HPL_rzero ) + { + for( j = 0; j < N; j++ ) + { for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; } + return; + } + + if( SIDE == HplLeft ) + { + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + } + else + { + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dtrsm +( + const enum HPL_ORDER ORDER, + const enum HPL_SIDE SIDE, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dtrsm +( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) + const enum HPL_ORDER ORDER; + const enum HPL_SIDE SIDE; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int M; + const int N; + const double ALPHA; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dtrsm solves one of the matrix equations + * + * op( A ) * X = alpha * B, or X * op( A ) = alpha * B, + * + * where alpha is a scalar, X and B are m by n matrices, A is a unit, or + * non-unit, upper or lower triangular matrix and op(A) is one of + * + * op( A ) = A or op( A ) = A^T. + * + * The matrix X is overwritten on B. + * + * No test for singularity or near-singularity is included in this + * routine. Such tests must be performed before calling this routine. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * SIDE (local input) const enum HPL_SIDE + * On entry, SIDE specifies whether op(A) appears on the left + * or right of X as follows: + * SIDE==HplLeft op( A ) * X = alpha * B, + * SIDE==HplRight X * op( A ) = alpha * B. + * + * UPLO (local input) const enum HPL_UPLO + * On entry, UPLO specifies whether the upper or lower + * triangular part of the array A is to be referenced. When + * UPLO==HplUpper, only the upper triangular part of A is to be + * referenced, otherwise only the lower triangular part of A is + * to be referenced. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANSA specifies the form of op(A) to be used in + * the matrix-matrix operation follows: + * TRANSA==HplNoTrans : op( A ) = A, + * TRANSA==HplTrans : op( A ) = A^T, + * TRANSA==HplConjTrans : op( A ) = A^T. + * + * DIAG (local input) const enum HPL_DIAG + * On entry, DIAG specifies whether A is unit triangular or + * not. When DIAG==HplUnit, A is assumed to be unit triangular, + * and otherwise, A is not assumed to be unit triangular. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix B. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix B. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then the elements of the matrix B need not + * be set on input. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * k, where k is m when SIDE==HplLeft and is n + * otherwise. Before entry with UPLO==HplUpper, the leading + * k by k upper triangular part of the array A must contain the + * upper triangular matrix and the strictly lower triangular + * part of A is not referenced. When UPLO==HplLower on entry, + * the leading k by k lower triangular part of the array A must + * contain the lower triangular matrix and the strictly upper + * triangular part of A is not referenced. + * + * Note that when DIAG==HplUnit, the diagonal elements of A + * not referenced either, but are assumed to be unity. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise. + * + * B (local input/output) double * + * On entry, B points to an array of size equal to or greater + * than LDB * n. Before entry, the leading m by n part of the + * array B must contain the matrix B, except when beta is zero, + * in which case B need not be set on entry. On exit, the array + * B is overwritten by the m by n solution matrix. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of B as + * declared in the calling (sub) program. LDB must be at + * least MAX(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); + } + else + { + HPL_dtrsm0( ( SIDE == HplRight ? HplLeft : HplRight ), + ( UPLO == HplLower ? HplUpper : HplLower ), + TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef StringSunStyle +#if defined( HPL_USE_F77_INTEGER_DEF ) + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef StringStructPtr + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef StringCrayStyle + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77ldb = LDB; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77ldb LDB +#endif + char cside, cuplo, ctran, cdiag; + + if( TRANS == HplNoTrans ) ctran = 'N'; + else if( TRANS == HplTrans ) ctran = 'T'; + else ctran = 'C'; + cdiag = ( DIAG == HplUnit ? 'U' : 'N' ); + + if( ORDER == HplColumnMajor ) + { + cside = ( SIDE == HplRight ? 'R' : 'L' ); + cuplo = ( UPLO == HplLower ? 'L' : 'U' ); +#ifdef StringSunStyle + F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructVal + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructPtr + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif + } + else + { + cside = ( SIDE == HplRight ? 'L' : 'R' ); + cuplo = ( UPLO == HplLower ? 'U' : 'L' ); +#ifdef StringSunStyle + F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructVal + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructPtr + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif + } +#endif +/* + * End of HPL_dtrsm + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsm.o new file mode 100644 index 000000000..339a5635f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsm.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsv.c new file mode 100644 index 000000000..99e84f073 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsv.c @@ -0,0 +1,520 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dtrsv + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dtrsvLNN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLNN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX ) + { + X[jx] /= A[jaj]; t0 = X[jx]; + for( i = j+1, iaij = jaj+1, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLNU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLNU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = jaj+1, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLTN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLTN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX; + j >= 0; j--, jaj -= ldap1, jx -= INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = 1+jaj, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } + t0 /= A[jaj]; X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLTU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLTU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX; + j >= 0; j--, jaj -= ldap1, jx -= INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = 1+jaj, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } + X[jx] = t0; + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUNN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUNN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX; + j >= 0; j--, jaj -= LDA, jx -= INCX ) + { + X[jx] /= A[j+jaj]; t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { X[ix] -= t0 * A[iaij]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUNU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUNU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX; + j >= 0; j--, jaj -= LDA, jx -= INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { X[ix] -= t0 * A[iaij]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUTN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUTN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = 0, jaj = 0,jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { t0 -= A[iaij] * X[ix]; } + t0 /= A[iaij]; X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvUTU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUTU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { t0 -= A[iaij] * X[ix]; } + X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsv0 +( + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + if( N == 0 ) return; + + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) { HPL_dtrsvUNN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvUNU( N, A, LDA, X, INCX ); } + } + else + { + if( DIAG == HplNonUnit ) { HPL_dtrsvUTN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvUTU( N, A, LDA, X, INCX ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) { HPL_dtrsvLNN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvLNU( N, A, LDA, X, INCX ); } + } + else + { + if( DIAG == HplNonUnit ) { HPL_dtrsvLTN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvLTU( N, A, LDA, X, INCX ); } + } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dtrsv +( + const enum HPL_ORDER ORDER, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +void HPL_dtrsv +( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) + const enum HPL_ORDER ORDER; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int N; + const double * A; + const int LDA; + double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dtrsv solves one of the systems of equations + * + * A * x = b, or A^T * x = b, + * + * where b and x are n-element vectors and A is an n by n non-unit, or + * unit, upper or lower triangular matrix. + * + * No test for singularity or near-singularity is included in this + * routine. Such tests must be performed before calling this routine. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * UPLO (local input) const enum HPL_UPLO + * On entry, UPLO specifies whether the upper or lower + * triangular part of the array A is to be referenced. When + * UPLO==HplUpper, only the upper triangular part of A is to be + * referenced, otherwise only the lower triangular part of A is + * to be referenced. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANS specifies the equations to be solved as + * follows: + * TRANS==HplNoTrans A * x = b, + * TRANS==HplTrans A^T * x = b. + * + * DIAG (local input) const enum HPL_DIAG + * On entry, DIAG specifies whether A is unit triangular or + * not. When DIAG==HplUnit, A is assumed to be unit triangular, + * and otherwise, A is not assumed to be unit triangular. + * + * N (local input) const int + * On entry, N specifies the order of the matrix A. N must be at + * least zero. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry with UPLO==HplUpper, the leading + * n by n upper triangular part of the array A must contain the + * upper triangular matrix and the strictly lower triangular + * part of A is not referenced. When UPLO==HplLower on entry, + * the leading n by n lower triangular part of the array A must + * contain the lower triangular matrix and the strictly upper + * triangular part of A is not referenced. + * + * Note that when DIAG==HplUnit, the diagonal elements of A + * not referenced either, but are assumed to be unity. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,n). + * + * X (local input/output) double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * Before entry, the incremented array X must contain the n + * element right-hand side vector b. On exit, X is overwritten + * with the solution vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dtrsv( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ); + } + else + { + HPL_dtrsv0( ( UPLO == HplUpper ? HplLower : HplUpper ), + ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), + DIAG, N, A, LDA, X, INCX ); + } +#endif +#ifdef HPL_CALL_FBLAS +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR fuplo, ftran, fdiag; +#endif +#ifdef StringStructPtr + F77_CHAR fuplo, ftran, fdiag; +#endif +#ifdef StringCrayStyle + F77_CHAR fuplo, ftran, fdiag; +#endif + +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77lda = LDA, F77incx = INCX; +#else +#define F77N N +#define F77lda LDA +#define F77incx INCX +#endif + char cuplo, ctran, cdiag; + + if( ORDER == HplColumnMajor ) + { + cuplo = ( UPLO == HplUpper ? 'U' : 'L' ); + ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); + } + else + { + cuplo = ( UPLO == HplUpper ? 'L' : 'U' ); + ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); + } + cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' ); + +#ifdef StringSunStyle + F77dtrsv( &cuplo, &ctran, &cdiag, &F77N, A, &F77lda, X, &F77incx, + IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + fuplo = HPL_C2F_CHAR( cuplo ); + F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif +#ifdef StringStructVal + fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; + fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif +#ifdef StringStructPtr + fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; + fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsv( &fuplo, &ftran, &fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif + +#endif +/* + * End of HPL_dtrsv + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsv.o new file mode 100644 index 000000000..2930120c9 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_dtrsv.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_idamax.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_idamax.c new file mode 100644 index 000000000..5ceabdf25 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_idamax.c @@ -0,0 +1,167 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_idamax + +#ifdef STDC_HEADERS +int HPL_idamax +( + const int N, + const double * X, + const int INCX +) +#else +int HPL_idamax +( N, X, INCX ) + const int N; + const double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_idamax returns the index in an n-vector x of the first element + * having maximum absolute value. + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vector x. N must be + * at least zero. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + return( (int)(cblas_idamax( N, X, INCX )) ); +#endif +#ifdef HPL_CALL_VSIPL + register double absxi, smax = HPL_rzero, x0, x1, x2, x3, + x4, x5, x6, x7; + const double * StX; + register int imax = 0, i = 0, j; + int nu; + const int incX2 = 2 * INCX, incX3 = 3 * INCX, + incX4 = 4 * INCX, incX5 = 5 * INCX, + incX6 = 6 * INCX, incX7 = 7 * INCX, + incX8 = 8 * INCX; + + if( N > 0 ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x1 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x2 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x3 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x4 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x5 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x6 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x7 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + + X += incX8; + + } while( X != StX ); + } + + for( j = N - nu; j != 0; j-- ) + { + x0 = (*X); + absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + X += INCX; + } + } + return( imax ); +#endif +#ifdef HPL_CALL_FBLAS +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX; +#else +#define F77N N +#define F77incx INCX +#endif + int imax = 0; + + if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1; + return( imax ); +#endif +/* + * End of HPL_idamax + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_idamax.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_idamax.o new file mode 100644 index 000000000..b765e7be6 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/blas/HPL_idamax.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1rinM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1rinM.c new file mode 100644 index 000000000..dd03b79b1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1rinM.c @@ -0,0 +1,224 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_1rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_1rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_1rinM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_1rinM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, prev, + rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, then send message to its two + * next neighbors. Otherwise, probe for message. If the message is here, + * then receive it, and if I am not the last process of the ring, or + * just after the root process, then forward it to the next. Otherwise, + * inform the caller that the panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( next, + size ), msgid, comm ); + } + } + else + { + prev = MModSub1( rank, size ); + if( ( size > 2 ) && + ( MModSub1( prev, size ) == root ) ) partner = root; + else partner = prev; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && + ( prev != root ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_1rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_1rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1rinM.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1rinM.o new file mode 100644 index 000000000..6753a83a3 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1rinM.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1ring.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1ring.c new file mode 100644 index 000000000..dd5eb2d12 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1ring.c @@ -0,0 +1,216 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_1ring +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_1ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_1ring +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_1ring( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, prev, rank, root, + size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, start spreading the panel. If + * I am not the root process, probe for message. If the message is here, + * then receive it, and if I am not the last process of the ring, then + * forward it to the next. Otherwise, inform the caller that the panel + * has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( rank, + size ), msgid, comm ); + } + else + { + prev = MModSub1( rank, size ); + + ierr = MPI_Iprobe( prev, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, prev, msgid, + comm, &PANEL->status[0] ); + next = MModAdd1( rank, size ); + if( ( ierr == MPI_SUCCESS ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, + msgid, comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_1ring +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_1ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1ring.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1ring.o new file mode 100644 index 000000000..5ce4009a8 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_1ring.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2rinM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2rinM.c new file mode 100644 index 000000000..56581ea0d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2rinM.c @@ -0,0 +1,236 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_2rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_2rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_2rinM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_2rinM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, prev, + rank, roo2, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process send to its two right neighbors and mid-pro- + * cess. If I am not the root process, probe for message. If the message + * is there, then receive it. If I am not the last process of both rings + * then forward it to the next. Otherwise, inform the caller that the + * panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); roo2 = ( ( size + 1 ) >> 1 ); + roo2 = MModAdd( root, roo2, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + if( MModAdd1( next, size ) != roo2 ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, + MModAdd1( next, size ), msgid, comm ); + } + + if( ierr == MPI_SUCCESS ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid, + comm ); + } + } + } + else + { + prev = MModSub1( rank, size ); + if( ( prev == root ) || ( rank == roo2 ) || + ( MModSub1( prev, size ) == root ) ) partner = root; + else partner = prev; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && ( prev != root ) && + ( next != roo2 ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_2rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_2rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2rinM.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2rinM.o new file mode 100644 index 000000000..9c738a796 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2rinM.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2ring.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2ring.c new file mode 100644 index 000000000..f0e6e2647 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2ring.c @@ -0,0 +1,224 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_2ring +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_2ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_2ring +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_2ring( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, rank, + roo2, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process send to its right neighbor and mid-process. + * If I am not the root process, probe for message. If the message is + * there, then receive it, and if I am not the last process of both + * rings, then forward it to the next. Otherwise, inform the caller that + * the panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); roo2 = ( ( size + 1 ) >> 1 ); + roo2 = MModAdd( root, roo2, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid, + comm ); + } + } + else + { + partner = MModSub1( rank, size ); + if( ( partner == root ) || ( rank == roo2 ) ) partner = root; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && + ( next != roo2 ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_2ring +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_2ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2ring.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2ring.o new file mode 100644 index 000000000..1de2094e7 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_2ring.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bcast.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bcast.c new file mode 100644 index 000000000..100161152 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bcast.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_bcast +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast +( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_bcast broadcasts the current panel. Successful completion is + * indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to + * HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was + * not completed, in which case this function should be called again. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * IFLAG (output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * occured. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_bcast_1rinM( PANEL, IFLAG ); break; + case HPL_1RING : ierr = HPL_bcast_1ring( PANEL, IFLAG ); break; + case HPL_2RING_M : ierr = HPL_bcast_2rinM( PANEL, IFLAG ); break; + case HPL_2RING : ierr = HPL_bcast_2ring( PANEL, IFLAG ); break; + case HPL_BLONG_M : ierr = HPL_bcast_blonM( PANEL, IFLAG ); break; + case HPL_BLONG : ierr = HPL_bcast_blong( PANEL, IFLAG ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_bcast + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bcast.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bcast.o new file mode 100644 index 000000000..5ce7d8a27 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bcast.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_binit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_binit.c new file mode 100644 index 000000000..3daf72b7d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_binit.c @@ -0,0 +1,108 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_binit +( + HPL_T_panel * PANEL +) +#else +int HPL_binit +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_binit initializes a row broadcast. Successful completion is + * indicated by the returned error code HPL_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS ); +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_binit_1rinM( PANEL ); break; + case HPL_1RING : ierr = HPL_binit_1ring( PANEL ); break; + case HPL_2RING_M : ierr = HPL_binit_2rinM( PANEL ); break; + case HPL_2RING : ierr = HPL_binit_2ring( PANEL ); break; + case HPL_BLONG_M : ierr = HPL_binit_blonM( PANEL ); break; + case HPL_BLONG : ierr = HPL_binit_blong( PANEL ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_binit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_binit.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_binit.o new file mode 100644 index 000000000..c9f9da1e6 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_binit.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blonM.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blonM.c new file mode 100644 index 000000000..5fa221937 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blonM.c @@ -0,0 +1,445 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +int HPL_binit_blonM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_blonM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif + return( HPL_SUCCESS ); +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF_S1 PANEL->buffers[I_SEND] +#define _M_COUNT_S1 PANEL->counts[I_SEND] +#define _M_TYPE_S1 PANEL->dtypes[I_SEND] + +#define _M_BUFF_S2 PANEL->buffers[I_SEND] +#define _M_COUNT_S2 PANEL->counts[I_SEND] +#define _M_TYPE_S2 PANEL->dtypes[I_SEND] + +#define _M_BUFF_R1 PANEL->buffers[I_RECV] +#define _M_COUNT_R1 PANEL->counts[I_RECV] +#define _M_TYPE_R1 PANEL->dtypes[I_RECV] + +#define _M_BUFF_R2 PANEL->buffers[I_RECV] +#define _M_COUNT_R2 PANEL->counts[I_RECV] +#define _M_TYPE_R2 PANEL->dtypes[I_RECV] + +#define _M_ROLL_BUFF_S PANEL->buffers[I_SEND] +#define _M_ROLL_COUNT_S PANEL->counts[I_SEND] +#define _M_ROLL_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_ROLL_BUFF_R PANEL->buffers[I_RECV] +#define _M_ROLL_COUNT_R PANEL->counts[I_RECV] +#define _M_ROLL_TYPE_R PANEL->dtypes[I_RECV] + +#else + +#define _M_BUFF_S1 (void *)(PANEL->L2) +#define _M_COUNT_S1 PANEL->len +#define _M_TYPE_S1 MPI_DOUBLE + +#define _M_BUFF_S2 (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_S2 lbuf +#define _M_TYPE_S2 MPI_DOUBLE + +#define _M_BUFF_R1 (void *)(PANEL->L2) +#define _M_COUNT_R1 PANEL->len +#define _M_TYPE_R1 MPI_DOUBLE + +#define _M_BUFF_R2 (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_R2 lbuf +#define _M_TYPE_R2 MPI_DOUBLE + +#define _M_ROLL_BUFF_S (void *)(PANEL->L2 + ibufS) +#define _M_ROLL_COUNT_S lbufS +#define _M_ROLL_TYPE_S MPI_DOUBLE +#define _M_ROLL_BUFF_R (void *)(PANEL->L2 + ibufR) +#define _M_ROLL_COUNT_R lbufR +#define _M_ROLL_TYPE_R MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_blonM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_blonM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int COUNT, count, go=1, ierr=MPI_SUCCESS, ibuf, + ibufR, ibufS, dummy=0, indx, ip2=1, k, l, + lbuf, lbufR, lbufS, mask=1, msgid, mydist, + mydist2, next, npm1, npm2, partner, prev, + rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process sends to its right neighbor, then spread + * the panel on the other npcol - 2 processes. If I am not the root + * process, probe for message received. If the message is there, then + * receive it. If I am just after the root process, return. Otherwise, + * keep spreading on those npcol - 2 processes. Otherwise, inform the + * caller that the panel has still not been received. + */ + comm = PANEL->grid->row_comm; rank = PANEL->grid->mycol; + root = PANEL->pcol; msgid = PANEL->msgid; + prev = MModSub1( rank, size ); + + if( rank == root ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, 0, PANEL->len, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S1, _M_COUNT_S1, _M_TYPE_S1, + MModAdd1( rank, size ), msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else if( prev == root ) + { +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + * + * ierr = MPI_Iprobe( root, msgid, comm, &go, &PANEL->status[0] ); + */ + if( ierr == MPI_SUCCESS ) + { /* if panel is here, proceed */ + if( go != 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + ierr = HPL_packL( PANEL, 0, PANEL->len, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R1, _M_COUNT_R1, _M_TYPE_R1, + root, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } + } +/* + * if I am just after the root, exit now. The message receive completed + * successfully, this guy is done. If there are only 2 processes in each + * row of processes, we are done as well. + */ + if( ( prev == root ) || ( size == 2 ) ) + { + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + return( *IFLAG ); + } +/* + * Otherwise, proceed with broadcast - Spread the panel across process + * columns + */ + npm2 = ( npm1 = size - 1 ) - 1; COUNT = PANEL->len; + + k = npm2; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + if( rank == root ) mydist2 = ( mydist = 0 ); + else mydist2 = ( mydist = MModSub( rank, root, size ) - 1 ); + + indx = ip2; count = COUNT / npm1; count = Mmax( count, 1 ); + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = COUNT - ( ibuf = indx * count ); + if( indx + ip2 < npm1 ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); } + + partner = mydist ^ ip2; + + if( ( mydist & ip2 ) != 0 ) + { + partner = MModAdd( root, partner, size ); + if( partner != root ) partner = MModAdd1( partner, size ); +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + */ +#if 0 + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { /* if panel is not here, return and keep testing */ + if( go == 0 ) + { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } +#endif + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R2, _M_COUNT_R2, _M_TYPE_R2, + partner, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + } + else if( partner < npm1 ) + { + partner = MModAdd( root, partner, size ); + if( partner != root ) partner = MModAdd1( partner, size ); + + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S2, _M_COUNT_S2, _M_TYPE_S2, + partner, msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE, + partner, msgid, comm ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; indx -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; } + + } while( ip2 > 0 ); +/* + * Roll the pieces + */ + prev = MModSub1( rank, size ); + if( MModSub1( prev, size ) == root ) prev = root; + next = MModAdd1( rank, size ); + if( rank == root ) next = MModAdd1( next, size ); + + for( k = 0; k < npm2; k++ ) + { + l = ( k >> 1 ); +/* + * Who is sending to who and how much + */ + if( ( ( mydist + k ) & 1 ) != 0 ) + { + ibufS = ( indx = MModAdd( mydist, l, npm1 ) ) * count; + lbufS = ( indx == npm2 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModSub( mydist, l+1, npm1 ) ) * count; + lbufR = ( indx == npm2 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = prev; + } + else + { + ibufS = ( indx = MModSub( mydist, l, npm1 ) ) * count; + lbufS = ( indx == npm2 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModAdd( mydist, l+1, npm1 ) ) * count; + lbufR = ( indx == npm2 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = next; + } +/* + * Exchange the messages + */ + if( lbufS > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufS, lbufS, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S, + _M_ROLL_TYPE_S, partner, msgid, comm, + &PANEL->request[0] ); + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->request[0] ); + } + + if( lbufR > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufR, lbufR, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R, + _M_ROLL_TYPE_R, partner, msgid, comm, + &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait ( &PANEL->request[0], &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_blonM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_blonM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } + + return( HPL_SUCCESS ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blonM.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blonM.o new file mode 100644 index 000000000..0e2589292 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blonM.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blong.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blong.c new file mode 100644 index 000000000..e57f11bcc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blong.c @@ -0,0 +1,363 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +int HPL_binit_blong +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_blong( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif + return( HPL_SUCCESS ); +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF_S PANEL->buffers[I_SEND] +#define _M_COUNT_S PANEL->counts[I_SEND] +#define _M_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_BUFF_R PANEL->buffers[I_RECV] +#define _M_COUNT_R PANEL->counts[I_RECV] +#define _M_TYPE_R PANEL->dtypes[I_RECV] + +#define _M_ROLL_BUFF_S PANEL->buffers[I_SEND] +#define _M_ROLL_COUNT_S PANEL->counts[I_SEND] +#define _M_ROLL_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_ROLL_BUFF_R PANEL->buffers[I_RECV] +#define _M_ROLL_COUNT_R PANEL->counts[I_RECV] +#define _M_ROLL_TYPE_R PANEL->dtypes[I_RECV] + +#else + +#define _M_BUFF_S (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_S lbuf +#define _M_TYPE_S MPI_DOUBLE + +#define _M_BUFF_R (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_R lbuf +#define _M_TYPE_R MPI_DOUBLE + +#define _M_ROLL_BUFF_S (void *)(PANEL->L2 + ibufS) +#define _M_ROLL_COUNT_S lbufS +#define _M_ROLL_TYPE_S MPI_DOUBLE + +#define _M_ROLL_BUFF_R (void *)(PANEL->L2 + ibufR) +#define _M_ROLL_COUNT_R lbufR +#define _M_ROLL_TYPE_R MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_blong +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_blong( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int COUNT, count, dummy=0, ierr=MPI_SUCCESS, + ibuf, ibufR, ibufS, indx, ip2, k, l, lbuf, + lbufR, lbufS, mask, msgid, mydist, mydist2, + next, npm1, partner, prev, rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, start spreading the panel. If + * I am not the root process, test for message receive completion. If + * the message is there, then receive it, and keep spreading in a + * blocking fashion this time. Otherwise, inform the caller that the + * panel has still not been received. + */ + comm = PANEL->grid->row_comm; rank = PANEL->grid->mycol; + mask = PANEL->grid->col_mask; ip2 = PANEL->grid->col_ip2m1; + root = PANEL->pcol; msgid = PANEL->msgid; + COUNT = PANEL->len; npm1 = size - 1; + mydist2 = ( mydist = MModSub( rank, root, size ) ); indx = ip2; + count = COUNT / size; count = Mmax( count, 1 ); +/* + * Spread the panel across process columns + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = COUNT - ( ibuf = indx * count ); + if( indx + ip2 < size ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); } + + partner = mydist ^ ip2; + + if( ( mydist & ip2 ) != 0 ) + { + partner = MModAdd( root, partner, size ); +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + */ +#if 0 + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + if( ierr == MPI_SUCCESS ) + { /* if panel is not here, return and keep testing */ + if( go == 0 ) + { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } +#endif + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R, _M_COUNT_R, _M_TYPE_R, + partner, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + } + else if( partner < size ) + { + partner = MModAdd( root, partner, size ); + + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S, _M_COUNT_S, _M_TYPE_S, + partner, msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else /* Send message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE, + partner, msgid, comm ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; indx -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; } + + } while( ip2 > 0 ); +/* + * Roll the pieces + */ + prev = MModSub1( rank, size ); next = MModAdd1( rank, size ); + + for( k = 0; k < npm1; k++ ) + { + l = ( k >> 1 ); +/* + * Who is sending to who and how much + */ + if( ( ( mydist + k ) & 1 ) != 0 ) + { + ibufS = ( indx = MModAdd( mydist, l, size ) ) * count; + lbufS = ( indx == npm1 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModSub( mydist, l+1, size ) ) * count; + lbufR = ( indx == npm1 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = prev; + } + else + { + ibufS = ( indx = MModSub( mydist, l, size ) ) * count; + lbufS = ( indx == npm1 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModAdd( mydist, l+1, size ) ) * count; + lbufR = ( indx == npm1 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = next; + } +/* + * Exchange the messages + */ + if( lbufS > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufS, lbufS, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S, + _M_ROLL_TYPE_S, partner, msgid, comm, + &PANEL->request[0] ); + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->request[0] ); + } + + if( lbufR > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufR, lbufR, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R, + _M_ROLL_TYPE_R, partner, msgid, comm, + &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait ( &PANEL->request[0], &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_blong +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_blong( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } + + return( HPL_SUCCESS ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blong.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blong.o new file mode 100644 index 000000000..3a0c08b06 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_blong.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bwait.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bwait.c new file mode 100644 index 000000000..a2e0f4df8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bwait.c @@ -0,0 +1,109 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_bwait +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_bwait HPL_bwait waits for the row broadcast of the current panel to + * terminate. Successful completion is indicated by the returned error + * code HPL_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS ); +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_bwait_1rinM( PANEL ); break; + case HPL_1RING : ierr = HPL_bwait_1ring( PANEL ); break; + case HPL_2RING_M : ierr = HPL_bwait_2rinM( PANEL ); break; + case HPL_2RING : ierr = HPL_bwait_2ring( PANEL ); break; + case HPL_BLONG_M : ierr = HPL_bwait_blonM( PANEL ); break; + case HPL_BLONG : ierr = HPL_bwait_blong( PANEL ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_bwait + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bwait.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bwait.o new file mode 100644 index 000000000..03ee92ae4 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_bwait.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_copyL.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_copyL.c new file mode 100644 index 000000000..04f765a6b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_copyL.c @@ -0,0 +1,108 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_copyL +( + HPL_T_panel * PANEL +) +#else +void HPL_copyL +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_copyL copies the panel of columns, the L1 replicated submatrix, + * the pivot array and the info scalar into a contiguous workspace for + * later broadcast. + * + * The copy of this panel into a contiguous buffer can be enforced by + * specifying -DHPL_COPY_L in the architecture specific Makefile. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int jb, lda; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->mycol == PANEL->pcol ) + { + jb = PANEL->jb; lda = PANEL->lda; + + if( PANEL->grid->myrow == PANEL->prow ) + { + HPL_dlacpy( PANEL->mp-jb, jb, Mptr( PANEL->A, jb, -jb, lda ), + lda, PANEL->L2, PANEL->ldl2 ); + } + else + { + HPL_dlacpy( PANEL->mp, jb, Mptr( PANEL->A, 0, -jb, lda ), + lda, PANEL->L2, PANEL->ldl2 ); + } + } +/* + * End of HPL_copyL + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_copyL.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_copyL.o new file mode 100644 index 000000000..7db34d0b4 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_copyL.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_packL.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_packL.c new file mode 100644 index 000000000..8a70ef83d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_packL.c @@ -0,0 +1,245 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_packL +( + HPL_T_panel * PANEL, + const int INDEX, + const int LEN, + const int IBUF +) +#else +int HPL_packL +( PANEL, INDEX, LEN, IBUF ) + HPL_T_panel * PANEL; + const int INDEX; + const int LEN; + const int IBUF; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_packL forms the MPI data type for the panel to be broadcast. + * Successful completion is indicated by the returned error code + * MPI_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * INDEX (input) const int + * On entry, INDEX points to the first entry of the packed + * buffer being broadcast. + * + * LEN (input) const int + * On entry, LEN is the length of the packed buffer. + * + * IBUF (input) const int + * On entry, IBUF specifies the panel buffer/count/type entries + * that should be initialized. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ +#ifndef HPL_COPY_L + MPI_Datatype * type = NULL; + void * * * bufs = NULL; + double * A; + int * blen = NULL; + MPI_Aint * disp = NULL; + int curr, i, i1, ibuf, ierr=MPI_SUCCESS, j1, + jb, jbm, jbp1, lda, len, m, m1, nbufs; +#else + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_COPY_L +/* + * Panel + L1 + DPIV have been copied into a contiguous buffer - Create + * and commit a contiguous data type + */ + PANEL->buffers[IBUF] = (void *)(PANEL->L2 + INDEX); + PANEL->counts [IBUF] = 1; + + ierr = MPI_Type_contiguous( LEN, MPI_DOUBLE, &PANEL->dtypes[IBUF] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &PANEL->dtypes[IBUF] ); + + return( ierr ); +#else +/* + * Panel is not contiguous (because of LDA and also L1 + DPIV) - Create + * and commit a struct data type + */ + jbp1 = ( jb = PANEL->jb ) + 1; +/* + * Temporaries to create the type struct. + */ + bufs = (void * * *)malloc( jbp1 * sizeof( void * * ) ); + blen = (int *)malloc( jbp1 * sizeof( int ) ); + disp = (MPI_Aint *)malloc( jbp1 * sizeof( MPI_Aint ) ); + type = (MPI_Datatype *)malloc( jbp1 * sizeof( MPI_Datatype ) ); + + if( ( bufs != NULL ) && ( blen != NULL ) && + ( disp != NULL ) && ( type != NULL ) ) + { + m = PANEL->mp; curr = (int)( PANEL->grid->myrow == PANEL->prow ); + if( curr != 0 ) m -= jb; + + len = LEN; ibuf = INDEX; nbufs = 0; jbm = jb * m; + + if( ( m > 0 ) && ( ibuf < jbm ) ) + { +/* + * Retrieve proper pointers depending on process row and column + */ + if( PANEL->grid->mycol == PANEL->pcol ) + { + lda = PANEL->lda; + if( curr != 0 ) { A = Mptr( PANEL->A, jb, -jb, lda ); } + else { A = Mptr( PANEL->A, 0, -jb, lda ); } + } + else { lda = PANEL->ldl2; A = PANEL->L2; } +/* + * Pack the first (partial) column of L + */ + m1 = m - ( i1 = ibuf - ( j1 = ibuf / m ) * m ); + m1 = Mmin( len, m1 ); + + bufs[nbufs] = (void *)(Mptr( A, i1, j1, lda )); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = m1; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + + nbufs++; len -= m1; j1++; ibuf += m1; +/* + * Pack the remaining columns of L + */ + while( ( len > 0 ) && ( j1 < jb ) ) + { + m1 = Mmin( len, m ); + + bufs[nbufs] = (void*)(Mptr( A, 0, j1, lda )); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = m1; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + + nbufs++; len -= m1; j1++; ibuf += m1; + } + } +/* + * Pack L1, DPIV, DINFO + */ + if( len > 0 ) + { /* L1, DPIV, DINFO */ + bufs[nbufs] = (void *)(PANEL->L1 + ibuf - jbm); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = len; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + nbufs++; + } + + for( i = 1; i < nbufs; i++ ) disp[i] -= disp[0]; disp[0] = 0; + + PANEL->buffers[IBUF] = (void *)(bufs[0]); PANEL->counts [IBUF] = 1; +/* + * construct the struct type + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_create_struct( nbufs, blen, disp, type, + &PANEL->dtypes[IBUF] ); +/* + * release temporaries + */ + if( bufs ) free( bufs ); + if( blen ) free( blen ); + if( disp ) free( disp ); + if( type ) free( type ); +/* + * commit the type + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &PANEL->dtypes[IBUF] ); + + return( ierr ); + } + else + { +/* + * Memory allocation failed -> abort + */ + HPL_pabort( __LINE__, "HPL_packL", "Memory allocation failed" ); + return( MPI_SUCCESS ); /* never executed (hopefully ...) */ + } +#endif +#else + /* HPL_USE_MPI_DATATYPE not defined - Oops, there is a bug + somewhere, so, just in case and until I find it ... */ + return( MPI_SUCCESS ); +#endif +/* + * End of HPL_packL + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_packL.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_packL.o new file mode 100644 index 000000000..609133f84 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_packL.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_recv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_recv.c new file mode 100644 index 000000000..ff426891c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_recv.c @@ -0,0 +1,142 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_recv +( + double * RBUF, + int RCOUNT, + int SRC, + int RTAG, + MPI_Comm COMM +) +#else +int HPL_recv +( RBUF, RCOUNT, SRC, RTAG, COMM ) + double * RBUF; + int RCOUNT; + int SRC; + int RTAG; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_recv is a simple wrapper around MPI_Recv. Its main purpose is + * to allow for some experimentation / tuning of this simple routine. + * Successful completion is indicated by the returned error code + * HPL_SUCCESS. In the case of messages of length less than or equal to + * zero, this function returns immediately. + * + * Arguments + * ========= + * + * RBUF (local output) double * + * On entry, RBUF specifies the starting address of buffer to be + * received. + * + * RCOUNT (local input) int + * On entry, RCOUNT specifies the number of double precision + * entries in RBUF. RCOUNT must be at least zero. + * + * SRC (local input) int + * On entry, SRC specifies the rank of the sending process in + * the communication space defined by COMM. + * + * RTAG (local input) int + * On entry, STAG specifies the message tag to be used for this + * communication operation. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Status status; +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type; +#endif + int ierr; +/* .. + * .. Executable Statements .. + */ + if( RCOUNT <= 0 ) return( HPL_SUCCESS ); + +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(RBUF), 1, type, SRC, RTAG, COMM, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else + ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, SRC, RTAG, + COMM, &status ); +#endif + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_recv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_recv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_recv.o new file mode 100644 index 000000000..a87fbb1f9 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_recv.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_sdrv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_sdrv.c new file mode 100644 index 000000000..0b2363563 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_sdrv.c @@ -0,0 +1,239 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_sdrv +( + double * SBUF, + int SCOUNT, + int STAG, + double * RBUF, + int RCOUNT, + int RTAG, + int PARTNER, + MPI_Comm COMM +) +#else +int HPL_sdrv +( SBUF, SCOUNT, STAG, RBUF, RCOUNT, RTAG, PARTNER, COMM ) + double * SBUF; + int SCOUNT; + int STAG; + double * RBUF; + int RCOUNT; + int RTAG; + int PARTNER; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_sdrv is a simple wrapper around MPI_Sendrecv. Its main purpose is + * to allow for some experimentation and tuning of this simple function. + * Messages of length less than or equal to zero are not sent nor + * received. Successful completion is indicated by the returned error + * code HPL_SUCCESS. + * + * Arguments + * ========= + * + * SBUF (local input) double * + * On entry, SBUF specifies the starting address of buffer to be + * sent. + * + * SCOUNT (local input) int + * On entry, SCOUNT specifies the number of double precision + * entries in SBUF. SCOUNT must be at least zero. + * + * STAG (local input) int + * On entry, STAG specifies the message tag to be used for the + * sending communication operation. + * + * RBUF (local output) double * + * On entry, RBUF specifies the starting address of buffer to be + * received. + * + * RCOUNT (local input) int + * On entry, RCOUNT specifies the number of double precision + * entries in RBUF. RCOUNT must be at least zero. + * + * RTAG (local input) int + * On entry, RTAG specifies the message tag to be used for the + * receiving communication operation. + * + * PARTNER (local input) int + * On entry, PARTNER specifies the rank of the collaborative + * process in the communication space defined by COMM. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type[2]; +#endif + MPI_Request request; + MPI_Status status; + int ierr; +/* .. + * .. Executable Statements .. + */ + if( RCOUNT > 0 ) + { + if( SCOUNT > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE +/* + * Post asynchronous receive + */ + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( (void *)(RBUF), 1, type[0], PARTNER, + RTAG, COMM, &request ); +/* + * Blocking send + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, + STAG, COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[1] ); +/* + * Wait for the receive to complete + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[0] ); +#else +/* + * Post asynchronous receive + */ + ierr = MPI_Irecv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, + PARTNER, RTAG, COMM, &request ); +/* + * Blocking send + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, + PARTNER, STAG, COMM ); +/* + * Wait for the receive to complete + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); +#endif + } + else + { +/* + * Blocking receive + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(RBUF), 1, type[0], PARTNER, RTAG, + COMM, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[0] ); +#else + ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, + PARTNER, RTAG, COMM, &status ); +#endif + } + } + else if( SCOUNT > 0 ) + { +/* + * Blocking send + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG, + COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[1] ) ); +#else + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, PARTNER, + STAG, COMM ); +#endif + } + else { ierr = MPI_SUCCESS; } + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_sdrv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_sdrv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_sdrv.o new file mode 100644 index 000000000..8d188a0ec Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_sdrv.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_send.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_send.c new file mode 100644 index 000000000..9e9868594 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_send.c @@ -0,0 +1,139 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_send +( + double * SBUF, + int SCOUNT, + int DEST, + int STAG, + MPI_Comm COMM +) +#else +int HPL_send +( SBUF, SCOUNT, DEST, STAG, COMM ) + double * SBUF; + int SCOUNT; + int DEST; + int STAG; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_send is a simple wrapper around MPI_Send. Its main purpose is + * to allow for some experimentation / tuning of this simple routine. + * Successful completion is indicated by the returned error code + * MPI_SUCCESS. In the case of messages of length less than or equal to + * zero, this function returns immediately. + * + * Arguments + * ========= + * + * SBUF (local input) double * + * On entry, SBUF specifies the starting address of buffer to be + * sent. + * + * SCOUNT (local input) int + * On entry, SCOUNT specifies the number of double precision + * entries in SBUF. SCOUNT must be at least zero. + * + * DEST (local input) int + * On entry, DEST specifies the rank of the receiving process in + * the communication space defined by COMM. + * + * STAG (local input) int + * On entry, STAG specifies the message tag to be used for this + * communication operation. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type; +#endif + int ierr; +/* .. + * .. Executable Statements .. + */ + if( SCOUNT <= 0 ) return( HPL_SUCCESS ); + +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type, DEST, STAG, COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, DEST, STAG, COMM ); +#endif + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_send + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_send.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_send.o new file mode 100644 index 000000000..6f242b1ed Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/comm/HPL_send.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/cuda/cuda_dgemm.cpp.dp.cpp b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/cuda/cuda_dgemm.cpp.dp.cpp new file mode 100644 index 000000000..644503181 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/cuda/cuda_dgemm.cpp.dp.cpp @@ -0,0 +1,310 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ + +#define NUMBER_OF_STREAMS 4 +#define CHUNK_SIZE 512 +#define NN 64 +#define NM 128 +#define ERRCODE(e) (-(__LINE__ * 1000 + (e))) +//#define DEVICE_DEBUG +//#ifdef MPI +//#include +//#endif + + +#define _GNU_SOURCE + +#define CUDA_ERROR_CHECK +#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ ) +#define CudaCheckError() __cudaCheckError( __FILE__, __LINE__ ) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include "mkl.h" + +extern "C" { + +inline void __cudaSafeCall(dpct::err0 err, const char *file, const int line) +{ + #ifdef CUDA_ERROR_CHECK + +#endif + + return; +} + +inline void __cudaCheckError(const char *file, const int line) try { +#ifdef CUDA_ERROR_CHECK + /* + DPCT1010:1: SYCL uses exceptions to report errors and does not use the + error codes. The call was replaced with 0. You need to rewrite this + code. + */ + dpct::err0 err = 0; + + // More careful checking. However, this will affect performance. + // Comment away if needed. + err = DPCT_CHECK_ERROR(dpct::get_current_device().queues_wait_and_throw()); + +#endif + + return; +} +catch (sycl::exception const &exc) { + std::cerr << exc.what() << "Exception caught at file:" << __FILE__ + << ", line:" << __LINE__ << std::endl; + std::exit(1); +} + + void dpcpp_dgemm + ( const int ORDER, + const int TRANSA, const int TRANSB, + const int M, const int N, const int K, + const double ALPHA, const double *A, const int LDA, + const double *B, const int LDB, const double BETA, + double *C, const int LDC); + + void dpcpp_dtrsm( + int HPL_ORDER, + int HPL_SIDE, + int HPL_UPLO, + int HPL_TRANS, + int HPL_DIAG, + const int, + const int, + const double, + const double *, + const int, + double *, + const int); +} + + +void dpcpp_dgemm +( const int ORDER, const int TRANSA, const int TRANSB, + const int M, const int N, const int K, + const double ALPHA,const double *A, const int LDA, + const double *B, const int LDB, + const double BETA, double *C, const int LDC) +{ + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + + if ((M==0)||(K==0)||(N==0)){ + return; + } + + + if ( (N) < NN || (M) < NM || (K) < 128){ + + #ifdef DEVICE_DEBUG + std::cout << "dgemm-Running on CPU" << std::endl; + #endif + + cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC); + return; + } + + + #ifdef DEVICE_DEBUG + std::cout << "dgemm-Running on GPU" << std::endl; + #endif + + double *devPtrA, *devPtrB, *devPtrC; + int status; + + CudaSafeCall(DPCT_CHECK_ERROR( + devPtrA = sycl::malloc_device(K * LDA, q_ct1))); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(devPtrA, &A[0], K * LDA * sizeof(double)).wait())); + + CudaSafeCall(DPCT_CHECK_ERROR( + devPtrB = sycl::malloc_device(N * LDB, q_ct1))); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(devPtrB, &B[0], N * LDB * sizeof(double)).wait())); + + CudaSafeCall(DPCT_CHECK_ERROR( + devPtrC = sycl::malloc_device(N * LDC, q_ct1))); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(devPtrC, &C[0], N * LDC * sizeof(double)).wait())); + + dev_ct1.queues_wait_and_throw(); + oneapi::mkl::blas::column_major::gemm( + *dpct::get_current_device().get_saved_queue(), + oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, M, + N, K, ALPHA, devPtrA, LDA, devPtrB, LDB, BETA, devPtrC, LDC) + .wait(); + dev_ct1.queues_wait_and_throw(); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(&C[0], devPtrC, N * LDC * sizeof(double)).wait())); + dev_ct1.queues_wait_and_throw(); + sycl::free(devPtrA, q_ct1); + sycl::free(devPtrB, q_ct1); + sycl::free(devPtrC, q_ct1); +} + +void dpcpp_dtrsm + +( const int ORDER, const int SIDE, + const int UPLO, const int TRANS, + const int DIAG, const int M, const int N, + const double ALPHA, const double* A, const int LDA, double* B, + const int LDB) +{ + dpct::device_ext &dev_ct1 = dpct::get_current_device(); + sycl::queue &q_ct1 = dev_ct1.in_order_queue(); + + if ((M==0)||(N==0)){ + return; + } + + double *devPtrA, *devPtrB; + int status; + + + if ( (M) < 512 || (N) < 2*(M)){ + #ifdef DEVICE_DEBUG + std::cout << "dtrsm-Running on CPU" << std::endl; + #endif + cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, M, N, ALPHA, A, LDA, B, LDB); + + + return; + } + + #ifdef DEVICE_DEBUG + std::cout << "dtrsm-Running on GPU" << std::endl; + #endif + + CudaSafeCall(DPCT_CHECK_ERROR( + devPtrA = sycl::malloc_device(M * LDA, q_ct1))); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(devPtrA, A, M * LDA * sizeof(double)).wait())); + + CudaSafeCall(DPCT_CHECK_ERROR( + devPtrB = sycl::malloc_device(N * LDB, q_ct1))); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(devPtrB, B, N * LDB * sizeof(double)).wait())); + dev_ct1.queues_wait_and_throw(); + + oneapi::mkl::blas::column_major::trsm( + *dpct::get_current_device().get_saved_queue(), oneapi::mkl::side::left, + oneapi::mkl::uplo::lower, oneapi::mkl::transpose::nontrans, + oneapi::mkl::diag::unit, M, N, ALPHA, devPtrA, LDA, devPtrB, LDB) + .wait(); + + dev_ct1.queues_wait_and_throw(); + CudaSafeCall(DPCT_CHECK_ERROR( + q_ct1.memcpy(B, devPtrB, N * LDB * sizeof(double)).wait())); + + dev_ct1.queues_wait_and_throw(); + sycl::free(devPtrA, q_ct1); + sycl::free(devPtrB, q_ct1); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/cuda/cuda_dgemm.cpp.dp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/cuda/cuda_dgemm.cpp.dp.o new file mode 100644 index 000000000..5284ec6c1 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/cuda/cuda_dgemm.cpp.dp.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_all_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_all_reduce.c new file mode 100644 index 000000000..776f48504 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_all_reduce.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_all_reduce +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const HPL_T_OP OP, + MPI_Comm COMM +) +#else +int HPL_all_reduce +( BUFFER, COUNT, DTYPE, OP, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const HPL_T_OP OP; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_all_reduce performs a global reduce operation across all + * processes of a group leaving the results on all processes. + * + * Arguments + * ========= + * + * BUFFER (local input/global output) void * + * On entry, BUFFER points to the buffer to be combined. On + * exit, this array contains the combined data and is identical + * on all processes in the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * OP (global input) const HPL_T_OP + * On entry, OP is a pointer to the local combine function. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr; +/* .. + * .. Executable Statements .. + */ + hplerr = HPL_reduce( BUFFER, COUNT, DTYPE, OP, 0, COMM ); + if( hplerr != MPI_SUCCESS ) return( hplerr ); + return( HPL_broadcast( BUFFER, COUNT, DTYPE, 0, COMM ) ); +/* + * End of HPL_all_reduce + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_all_reduce.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_all_reduce.o new file mode 100644 index 000000000..ac0f38d00 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_all_reduce.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_barrier.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_barrier.c new file mode 100644 index 000000000..9a5d9b10a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_barrier.c @@ -0,0 +1,90 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_barrier +( + MPI_Comm COMM +) +#else +int HPL_barrier +( COMM ) + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_barrier blocks the caller until all process members have call it. + * The call returns at any process only after all group members have + * entered the call. + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i=0; +/* .. + * .. Executable Statements .. + */ + return( HPL_broadcast( (void*)(&i), 1, HPL_INT, 0, COMM ) ); +/* + * End of HPL_barrier + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_barrier.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_barrier.o new file mode 100644 index 000000000..b842da4f7 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_barrier.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_broadcast.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_broadcast.c new file mode 100644 index 000000000..42d962864 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_broadcast.c @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_broadcast +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const int ROOT, + MPI_Comm COMM +) +#else +int HPL_broadcast +( BUFFER, COUNT, DTYPE, ROOT, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const int ROOT; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_broadcast broadcasts a message from the process with rank ROOT to + * all processes in the group. + * + * Arguments + * ========= + * + * BUFFER (local input/output) void * + * On entry, BUFFER points to the buffer to be broadcast. On + * exit, this array contains the broadcast data and is identical + * on all processes in the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * ROOT (global input) const int + * On entry, ROOT is the coordinate of the source process. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr=MPI_SUCCESS, ip2=1, kk, mask=1, + mpierr, mydist, partner, rank, size, + tag = MSGID_BEGIN_COLL; + MPI_Status status; +/* .. + * .. Executable Statements .. + */ + if( COUNT <= 0 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_size( COMM, &size ); if( size <= 1 ) return( mpierr ); + mpierr = MPI_Comm_rank( COMM, &rank ); + + kk = size - 1; + while( kk > 1 ) { kk >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist = MModSub( rank, ROOT, size ); + + do + { + mask ^= ip2; + if( ( mydist & mask ) == 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Recv( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM, &status ); + } + else if( partner < size ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM ); + } + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + ip2 >>= 1; + } while( ip2 ); + + return( hplerr ); +/* + * End of HPL_broadcast + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_broadcast.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_broadcast.o new file mode 100644 index 000000000..1862a82f8 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_broadcast.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_exit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_exit.c new file mode 100644 index 000000000..f0d00b065 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_exit.c @@ -0,0 +1,109 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_exit +( + HPL_T_grid * GRID +) +#else +int HPL_grid_exit +( GRID ) + HPL_T_grid * GRID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_exit marks the process grid object for deallocation. The + * returned error code MPI_SUCCESS indicates successful completion. + * Other error codes are (MPI) implementation dependent. + * + * Arguments + * ========= + * + * GRID (local input/output) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid to be released. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr = MPI_SUCCESS, mpierr; +/* .. + * .. Executable Statements .. + */ + if( GRID->all_comm != MPI_COMM_NULL ) + { + mpierr = MPI_Comm_free( &(GRID->row_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + mpierr = MPI_Comm_free( &(GRID->col_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + mpierr = MPI_Comm_free( &(GRID->all_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + + GRID->order = HPL_COLUMN_MAJOR; + + GRID->iam = GRID->myrow = GRID->mycol = -1; + GRID->nprow = GRID->npcol = GRID->nprocs = -1; + + GRID->row_ip2 = GRID->row_hdim = GRID->row_ip2m1 = GRID->row_mask = -1; + GRID->col_ip2 = GRID->col_hdim = GRID->col_ip2m1 = GRID->col_mask = -1; + + return( hplerr ); +/* + * End of HPL_grid_exit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_exit.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_exit.o new file mode 100644 index 000000000..75a094fff Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_exit.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_info.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_info.c new file mode 100644 index 000000000..95c5a7315 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_info.c @@ -0,0 +1,116 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_info +( + const HPL_T_grid * GRID, + int * NPROW, + int * NPCOL, + int * MYROW, + int * MYCOL +) +#else +int HPL_grid_info +( GRID, NPROW, NPCOL, MYROW, MYCOL ) + const HPL_T_grid * GRID; + int * NPROW; + int * NPCOL; + int * MYROW; + int * MYCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_info returns the grid shape and the coordinates in the grid + * of the calling process. Successful completion is indicated by the + * returned error code MPI_SUCCESS. Other error codes depend on the MPI + * implementation. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * NPROW (global output) int * + * On exit, NPROW specifies the number of process rows in the + * grid. NPROW is at least one. + * + * NPCOL (global output) int * + * On exit, NPCOL specifies the number of process columns in + * the grid. NPCOL is at least one. + * + * MYROW (global output) int * + * On exit, MYROW specifies my row process coordinate in the + * grid. MYROW is greater than or equal to zero and less than + * NPROW. + * + * MYCOL (global output) int * + * On exit, MYCOL specifies my column process coordinate in the + * grid. MYCOL is greater than or equal to zero and less than + * NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + *NPROW = GRID->nprow; *NPCOL = GRID->npcol; + *MYROW = GRID->myrow; *MYCOL = GRID->mycol; + return( MPI_SUCCESS ); +/* + * End of HPL_grid_info + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_info.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_info.o new file mode 100644 index 000000000..0b216fcf1 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_info.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_init.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_init.c new file mode 100644 index 000000000..52111ac52 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_init.c @@ -0,0 +1,184 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_init +( + MPI_Comm COMM, + const HPL_T_ORDER ORDER, + const int NPROW, + const int NPCOL, + HPL_T_grid * GRID +) +#else +int HPL_grid_init +( COMM, ORDER, NPROW, NPCOL, GRID ) + MPI_Comm COMM; + const HPL_T_ORDER ORDER; + const int NPROW; + const int NPCOL; + HPL_T_grid * GRID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_init creates a NPROW x NPCOL process grid using column- or + * row-major ordering from an initial collection of processes identified + * by an MPI communicator. Successful completion is indicated by the + * returned error code MPI_SUCCESS. Other error codes depend on the MPI + * implementation. The coordinates of processes that are not part of the + * grid are set to values outside of [0..NPROW) x [0..NPCOL). + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * On entry, COMM is the MPI communicator identifying the + * initial collection of processes out of which the grid is + * formed. + * + * ORDER (global input) const HPL_T_ORDER + * On entry, ORDER specifies how the processes should be ordered + * in the grid as follows: + * ORDER = HPL_ROW_MAJOR row-major ordering; + * ORDER = HPL_COLUMN_MAJOR column-major ordering; + * + * NPROW (global input) const int + * On entry, NPROW specifies the number of process rows in the + * grid to be created. NPROW must be at least one. + * + * NPCOL (global input) const int + * On entry, NPCOL specifies the number of process columns in + * the grid to be created. NPCOL must be at least one. + * + * GRID (local input/output) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information to be initialized. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hdim, hplerr=MPI_SUCCESS, ierr, ip2, k, + mask, mycol, myrow, nprocs, rank, size; +/* .. + * .. Executable Statements .. + */ + MPI_Comm_rank( COMM, &rank ); MPI_Comm_size( COMM, &size ); +/* + * Abort if illegal process grid + */ + nprocs = NPROW * NPCOL; + if( ( nprocs > size ) || ( NPROW < 1 ) || ( NPCOL < 1 ) ) + { HPL_pabort( __LINE__, "HPL_grid_init", "Illegal Grid" ); } +/* + * Row- or column-major ordering of the processes + */ + if( ORDER == HPL_ROW_MAJOR ) + { + GRID->order = HPL_ROW_MAJOR; + myrow = rank / NPCOL; mycol = rank - myrow * NPCOL; + } + else + { + GRID->order = HPL_COLUMN_MAJOR; + mycol = rank / NPROW; myrow = rank - mycol * NPROW; + } + GRID->iam = rank; GRID->myrow = myrow; GRID->mycol = mycol; + GRID->nprow = NPROW; GRID->npcol = NPCOL; GRID->nprocs = nprocs; +/* + * row_ip2 : largest power of two <= nprow; + * row_hdim : row_ip2 procs hypercube dim; + * row_ip2m1 : largest power of two <= nprow-1; + * row_mask : row_ip2m1 procs hypercube mask; + */ + hdim = 0; ip2 = 1; k = NPROW; + while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; } + GRID->row_ip2 = ip2; GRID->row_hdim = hdim; + + mask = ip2 = 1; k = NPROW - 1; + while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + GRID->row_ip2m1 = ip2; GRID->row_mask = mask; +/* + * col_ip2 : largest power of two <= npcol; + * col_hdim : col_ip2 procs hypercube dim; + * col_ip2m1 : largest power of two <= npcol-1; + * col_mask : col_ip2m1 procs hypercube mask; + */ + hdim = 0; ip2 = 1; k = NPCOL; + while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; } + GRID->col_ip2 = ip2; GRID->col_hdim = hdim; + + mask = ip2 = 1; k = NPCOL - 1; + while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + GRID->col_ip2m1 = ip2; GRID->col_mask = mask; +/* + * All communicator, leave if I am not part of this grid. Creation of the + * row- and column communicators. + */ + ierr = MPI_Comm_split( COMM, ( rank < nprocs ? 0 : MPI_UNDEFINED ), + rank, &(GRID->all_comm) ); + if( GRID->all_comm == MPI_COMM_NULL ) return( ierr ); + + ierr = MPI_Comm_split( GRID->all_comm, myrow, mycol, &(GRID->row_comm) ); + if( ierr != MPI_SUCCESS ) hplerr = ierr; + + ierr = MPI_Comm_split( GRID->all_comm, mycol, myrow, &(GRID->col_comm) ); + if( ierr != MPI_SUCCESS ) hplerr = ierr; + + return( hplerr ); +/* + * End of HPL_grid_init + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_init.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_init.o new file mode 100644 index 000000000..7bad72781 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_grid_init.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_max.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_max.c new file mode 100644 index 000000000..002aabe01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_max.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_max +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_max +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_max combines (max) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] ); + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] ); + } +/* + * End of HPL_max + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_max.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_max.o new file mode 100644 index 000000000..5cb94b4ef Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_max.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_min.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_min.c new file mode 100644 index 000000000..a99e5e58a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_min.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_min +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_min +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_min combines (min) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] ); + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] ); + } +/* + * End of HPL_min + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_min.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_min.o new file mode 100644 index 000000000..144fc1ec6 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_min.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_pnum.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_pnum.c new file mode 100644 index 000000000..c80885b9a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_pnum.c @@ -0,0 +1,103 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pnum +( + const HPL_T_grid * GRID, + const int MYROW, + const int MYCOL +) +#else +int HPL_pnum +( GRID, MYROW, MYCOL ) + const HPL_T_grid * GRID; + const int MYROW; + const int MYCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pnum determines the rank of a process as a function of its + * coordinates in the grid. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * MYROW (local input) const int + * On entry, MYROW specifies the row coordinate of the process + * whose rank is to be determined. MYROW must be greater than or + * equal to zero and less than NPROW. + * + * MYCOL (local input) const int + * On entry, MYCOL specifies the column coordinate of the + * process whose rank is to be determined. MYCOL must be greater + * than or equal to zero and less than NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + if( GRID->order == HPL_ROW_MAJOR ) + return( MYROW * GRID->npcol + MYCOL ); + else + return( MYCOL * GRID->nprow + MYROW ); +/* + * End of HPL_pnum + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_pnum.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_pnum.o new file mode 100644 index 000000000..8da27eae3 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_pnum.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_reduce.c new file mode 100644 index 000000000..417c21163 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_reduce.c @@ -0,0 +1,179 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_reduce +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const HPL_T_OP OP, + const int ROOT, + MPI_Comm COMM +) +#else +int HPL_reduce +( BUFFER, COUNT, DTYPE, OP, ROOT, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const HPL_T_OP OP; + const int ROOT; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_reduce performs a global reduce operation across all processes of + * a group. Note that the input buffer is used as workarray and in all + * processes but the accumulating process corrupting the original data. + * + * Arguments + * ========= + * + * BUFFER (local input/output) void * + * On entry, BUFFER points to the buffer to be reduced. On + * exit, and in process of rank ROOT this array contains the + * reduced data. This buffer is also used as workspace during + * the operation in the other processes of the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * OP (global input) const HPL_T_OP + * On entry, OP is a pointer to the local combine function. + * + * ROOT (global input) const int + * On entry, ROOT is the coordinate of the accumulating process. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Status status; + void * buffer = NULL; + int hplerr=MPI_SUCCESS, d=1, i, ip2=1, mask=0, + mpierr, mydist, partner, rank, size, + tag = MSGID_BEGIN_COLL; +/* .. + * .. Executable Statements .. + */ + if( COUNT <= 0 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_size( COMM, &size ); + if( size == 1 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_rank( COMM, &rank ); + i = size - 1; while( i > 1 ) { i >>= 1; d++; } + + if( DTYPE == HPL_INT ) + buffer = (void *)( (int *) malloc( (size_t)(COUNT) * + sizeof( int ) ) ); + else + buffer = (void *)( (double *)malloc( (size_t)(COUNT) * + sizeof( double ) ) ); + + if( !( buffer ) ) + { HPL_pabort( __LINE__, "HPL_reduce", "Memory allocation failed" ); } + + if( ( mydist = MModSub( rank, ROOT, size ) ) == 0 ) + { + do + { + mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ), + MModAdd( ROOT, ip2, size ), tag, COMM, + &status ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + OP( COUNT, buffer, BUFFER, DTYPE ); + ip2 <<= 1; d--; + } while( d ); + } + else + { + do + { + if( ( mydist & mask ) == 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM ); + } + else if( partner < size ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM, &status ); + OP( COUNT, buffer, BUFFER, DTYPE ); + } + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + mask ^= ip2; ip2 <<= 1; d--; + } while( d ); + } + if( buffer ) free( buffer ); + + return( hplerr ); +/* + * End of HPL_reduce + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_reduce.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_reduce.o new file mode 100644 index 000000000..d731bed78 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_reduce.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_sum.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_sum.c new file mode 100644 index 000000000..34cf87210 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_sum.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_sum +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_sum +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_sum combines (sum) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] += a[i]; + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] += a[i]; + } +/* + * End of HPL_sum + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_sum.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_sum.o new file mode 100644 index 000000000..99df49676 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/grid/HPL_sum.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_disp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_disp.c new file mode 100644 index 000000000..757dad242 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_disp.c @@ -0,0 +1,97 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pdpanel_disp +( + HPL_T_panel * * PANEL +) +#else +int HPL_pdpanel_disp +( PANEL ) + HPL_T_panel * * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_disp deallocates the panel structure and resources and + * stores the error code returned by the panel factorization. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * * + * On entry, PANEL points to the address of the panel data + * structure to be deallocated. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int mpierr; +/* .. + * .. Executable Statements .. + */ +/* + * Deallocate the panel resources and panel structure + */ + mpierr = HPL_pdpanel_free( *PANEL ); + if( *PANEL ) free( *PANEL ); + *PANEL = NULL; + + return( mpierr ); +/* + * End of HPL_pdpanel_disp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_disp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_disp.o new file mode 100644 index 000000000..22d8bd1b5 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_disp.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_free.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_free.c new file mode 100644 index 000000000..38b5b0d97 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_free.c @@ -0,0 +1,104 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pdpanel_free +( + HPL_T_panel * PANEL +) +#else +int HPL_pdpanel_free +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_free deallocates the panel resources and stores the error + * code returned by the panel factorization. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the panel data structure from + * which the resources should be deallocated. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( PANEL->pmat->info == 0 ) PANEL->pmat->info = *(PANEL->DINFO); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( PANEL->L1block, VSIP_TRUE ); + (void) vsip_blockrelease_d( PANEL->L2block, VSIP_TRUE ); + if( PANEL->grid->nprow > 1 ) + (void) vsip_blockrelease_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Destroy blocks + */ + vsip_blockdestroy_d( PANEL->L1block ); + vsip_blockdestroy_d( PANEL->L2block ); + if( PANEL->grid->nprow > 1 ) + vsip_blockdestroy_d( PANEL->Ublock ); +#endif + + if( PANEL->WORK ) free( PANEL->WORK ); + if( PANEL->IWORK ) free( PANEL->IWORK ); + + return( MPI_SUCCESS ); +/* + * End of HPL_pdpanel_free + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_free.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_free.o new file mode 100644 index 000000000..66da88393 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_free.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_init.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_init.c new file mode 100644 index 000000000..9e35c7fb4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_init.c @@ -0,0 +1,348 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +void HPL_pdpanel_init +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int M, + const int N, + const int JB, + HPL_T_pmat * A, + const int IA, + const int JA, + const int TAG, + HPL_T_panel * PANEL +) +#else +void HPL_pdpanel_init +( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int M; + const int N; + const int JB; + HPL_T_pmat * A; + const int IA; + const int JA; + const int TAG; + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_init initializes a panel data structure. + * + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * M (local input) const int + * On entry, M specifies the global number of rows of the panel. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the global number of columns of the + * panel and trailing submatrix. N must be at least zero. + * + * JB (global input) const int + * On entry, JB specifies is the number of columns of the panel. + * JB must be at least zero. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * IA (global input) const int + * On entry, IA is the global row index identifying the panel + * and trailing submatrix. IA must be at least zero. + * + * JA (global input) const int + * On entry, JA is the global column index identifying the panel + * and trailing submatrix. JA must be at least zero. + * + * TAG (global input) const int + * On entry, TAG is the row broadcast message id. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + size_t dalign; + int icurcol, icurrow, ii, itmp1, jj, lwork, + ml2, mp, mycol, myrow, nb, npcol, nprow, + nq, nu; +/* .. + * .. Executable Statements .. + */ + PANEL->grid = GRID; /* ptr to the process grid */ + PANEL->algo = ALGO; /* ptr to the algo parameters */ + PANEL->pmat = A; /* ptr to the local array info */ + + myrow = GRID->myrow; mycol = GRID->mycol; + nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb; + + HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol, + nprow, npcol, &ii, &jj, &icurrow, &icurcol ); + mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow ); + nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol ); + /* ptr to trailing part of A */ + PANEL->A = Mptr( (double *)(A->A), ii, jj, A->ld ); +/* + * Workspace pointers are initialized to NULL. + */ + PANEL->WORK = NULL; PANEL->L2 = NULL; PANEL->L1 = NULL; + PANEL->DPIV = NULL; PANEL->DINFO = NULL; PANEL->U = NULL; + PANEL->IWORK = NULL; +/* + * Local lengths, indexes process coordinates + */ + PANEL->nb = nb; /* distribution blocking factor */ + PANEL->jb = JB; /* panel width */ + PANEL->m = M; /* global # of rows of trailing part of A */ + PANEL->n = N; /* global # of cols of trailing part of A */ + PANEL->ia = IA; /* global row index of trailing part of A */ + PANEL->ja = JA; /* global col index of trailing part of A */ + PANEL->mp = mp; /* local # of rows of trailing part of A */ + PANEL->nq = nq; /* local # of cols of trailing part of A */ + PANEL->ii = ii; /* local row index of trailing part of A */ + PANEL->jj = jj; /* local col index of trailing part of A */ + PANEL->lda = A->ld; /* local leading dim of array A */ + PANEL->prow = icurrow; /* proc row owning 1st row of trailing A */ + PANEL->pcol = icurcol; /* proc col owning 1st col of trailing A */ + PANEL->msgid = TAG; /* message id to be used for panel bcast */ +/* + * Initialize ldl2 and len to temporary dummy values and Update tag for + * next panel + */ + PANEL->ldl2 = 0; /* local leading dim of array L2 */ + PANEL->len = 0; /* length of the buffer to broadcast */ +/* + * Figure out the exact amount of workspace needed by the factorization + * and the update - Allocate that space - Finish the panel data structu- + * re initialization. + * + * L1: JB x JB in all processes + * DPIV: JB in all processes + * DINFO: 1 in all processes + * + * We make sure that those three arrays are contiguous in memory for the + * later panel broadcast. We also choose to put this amount of space + * right after L2 (when it exist) so that one can receive a contiguous + * buffer. + */ + dalign = ALGO->align * sizeof( double ); + + if( npcol == 1 ) /* P x 1 process grid */ + { /* space for L1, DPIV, DINFO */ + lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 ); + if( nprow > 1 ) /* space for U */ + { nu = nq - JB; lwork += JB * Mmax( 0, nu ); } + + if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * + sizeof( double ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_init", + "Memory allocation failed" ); + } +/* + * Initialize the pointers of the panel structure - Always re-use A in + * the only process column + */ + PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 ); + PANEL->ldl2 = A->ld; + PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->DPIV = PANEL->L1 + JB * JB; + PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; + PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1: NULL ); + } + else + { /* space for L2, L1, DPIV */ + ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 ); + PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 ); +#ifdef HPL_COPY_L + lwork = ALGO->align + PANEL->len; +#else + lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len ); +#endif + if( nprow > 1 ) /* space for U */ + { + nu = ( mycol == icurcol ? nq - JB : nq ); + lwork += JB * Mmax( 0, nu ); + } + + if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * + sizeof( double ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_init", + "Memory allocation failed" ); + } +/* + * Initialize the pointers of the panel structure - Re-use A in the cur- + * rent process column when HPL_COPY_L is not defined. + */ +#ifdef HPL_COPY_L + PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->ldl2 = Mmax( 1, ml2 ); + PANEL->L1 = PANEL->L2 + ml2 * JB; +#else + if( mycol == icurcol ) + { + PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 ); + PANEL->ldl2 = A->ld; + PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign ); + } + else + { + PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->ldl2 = Mmax( 1, ml2 ); + PANEL->L1 = PANEL->L2 + ml2 * JB; + } +#endif + PANEL->DPIV = PANEL->L1 + JB * JB; + PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; + PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1 : NULL ); + } +#ifdef HPL_CALL_VSIPL + PANEL->Ablock = A->block; +/* + * Create blocks and bind them to the data pointers + */ + PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1), + (vsip_length)(JB*JB), VSIP_MEM_NONE ); + PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2), + (vsip_length)(PANEL->ldl2*JB), + VSIP_MEM_NONE ); + if( nprow > 1 ) + { + nu = ( mycol == icurcol ? nq - JB : nq ); + PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U), + (vsip_length)(JB * Mmax( 0, nu )), + VSIP_MEM_NONE ); + } + else { PANEL->Ublock = A->block; } +#endif +/* + * If nprow is 1, we just allocate an array of JB integers for the swap. + * When nprow > 1, we allocate the space for the index arrays immediate- + * ly. The exact size of this array depends on the swapping routine that + * will be used, so we allocate the maximum: + * + * IWORK[0] is of size at most 1 + + * IPL is of size at most 1 + + * IPID is of size at most 4 * JB + + * + * For HPL_pdlaswp00: + * lindxA is of size at most 2 * JB + + * lindxAU is of size at most 2 * JB + + * llen is of size at most NPROW + + * llen_sv is of size at most NPROW. + * + * For HPL_pdlaswp01: + * ipA is of size ar most 1 + + * lindxA is of size at most 2 * JB + + * lindxAU is of size at most 2 * JB + + * iplen is of size at most NPROW + 1 + + * ipmap is of size at most NPROW + + * ipmapm1 is of size at most NPROW + + * permU is of size at most JB + + * iwork is of size at most MAX( 2*JB, NPROW+1 ). + * + * that is 3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1)) + * = 4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ). + * + * We use the fist entry of this to work array to indicate whether the + * the local index arrays have already been computed, and if yes, by + * which function: + * IWORK[0] = -1: no index arrays have been computed so far; + * IWORK[0] = 0: HPL_pdlaswp00 already computed those arrays; + * IWORK[0] = 1: HPL_pdlaswp01 already computed those arrays; + * This allows to save some redundant and useless computations. + */ + if( nprow == 1 ) { lwork = JB; } + else + { + itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork ); + lwork = 4 + (9 * JB) + (3 * nprow) + itmp1; + } + + PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); + + if( PANEL->IWORK == NULL ) + { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); } + /* Initialize the first entry of the workarray */ + *(PANEL->IWORK) = -1; +/* + * End of HPL_pdpanel_init + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_init.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_init.o new file mode 100644 index 000000000..8e9fd1360 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_init.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_new.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_new.c new file mode 100644 index 000000000..1dbd8a18f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_new.c @@ -0,0 +1,152 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanel_new +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int M, + const int N, + const int JB, + HPL_T_pmat * A, + const int IA, + const int JA, + const int TAG, + HPL_T_panel * * PANEL +) +#else +void HPL_pdpanel_new +( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int M; + const int N; + const int JB; + HPL_T_pmat * A; + const int IA; + const int JA; + const int TAG; + HPL_T_panel * * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_new creates and initializes a panel data structure. + * + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * M (local input) const int + * On entry, M specifies the global number of rows of the panel. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the global number of columns of the + * panel and trailing submatrix. N must be at least zero. + * + * JB (global input) const int + * On entry, JB specifies is the number of columns of the panel. + * JB must be at least zero. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * IA (global input) const int + * On entry, IA is the global row index identifying the panel + * and trailing submatrix. IA must be at least zero. + * + * JA (global input) const int + * On entry, JA is the global column index identifying the panel + * and trailing submatrix. JA must be at least zero. + * + * TAG (global input) const int + * On entry, TAG is the row broadcast message id. + * + * PANEL (local input/output) HPL_T_panel * * + * On entry, PANEL points to the address of the panel data + * structure to create and initialize. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * p = NULL; +/* .. + * .. Executable Statements .. + */ +/* + * Allocate the panel structure - Check for enough memory + */ + if( !( p = (HPL_T_panel *)malloc( sizeof( HPL_T_panel ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_new", "Memory allocation failed" ); + } + + HPL_pdpanel_init( GRID, ALGO, M, N, JB, A, IA, JA, TAG, p ); + *PANEL = p; +/* + * End of HPL_pdpanel_new + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_new.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_new.o new file mode 100644 index 000000000..b63cf0f8a Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/panel/HPL_pdpanel_new.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp00N.c new file mode 100644 index 000000000..7ad5a1a99 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp00N.c @@ -0,0 +1,198 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP00N_DEPTH +#define HPL_LASWP00N_DEPTH 32 +#define HPL_LASWP00N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp00N +( + const int M, + const int N, + double * A, + const int LDA, + const int * IPIV +) +#else +void HPL_dlaswp00N +( M, N, A, LDA, IPIV ) + const int M; + const int N; + double * A; + const int LDA; + const int * IPIV; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp00N performs a series of local row interchanges on a matrix + * A. One row interchange is initiated for rows 0 through M-1 of A. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the array A to be + * interchanged. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the array A. + * N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N) to which + * the row interchanges will be applied. On exit, the permuted + * matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * IPIV (local input) const int * + * On entry, IPIV is an array of size M that contains the + * pivoting information. For k in [0..M), IPIV[k]=IROFF + l + * implies that local rows k and l are to be interchanged. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register double r; + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP00N_LOG2_DEPTH ); + int ip, nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP00N_LOG2_DEPTH ) + << HPL_LASWP00N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP00N_DEPTH, A += incA ) + { + for( i = 0; i < M; i++ ) + { + if( i != ( ip = IPIV[i] ) ) + { + a0 = A + i; a1 = A + ip; + + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#if ( HPL_LASWP00N_DEPTH > 1 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 2 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 4 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 8 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 16 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif + } + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + if( i != ( ip = IPIV[i] ) ) + { + a0 = A + i; a1 = A + ip; + for( j = 0; j < nr; j++, a0 += LDA, a1 += LDA ) + { r = *a0; *a0 = *a1; *a1 = r; } + } + } + } +/* + * End of HPL_dlaswp00N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp00N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp00N.o new file mode 100644 index 000000000..acc66206b Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp00N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01N.c new file mode 100644 index 000000000..786d1eff4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01N.c @@ -0,0 +1,209 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP01N_DEPTH +#define HPL_LASWP01N_DEPTH 32 +#define HPL_LASWP01N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp01N +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp01N +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp01N copies scattered rows of A into itself and into an + * array U. The row offsets in A of the source rows are specified by + * LINDXA. The destination of those rows are specified by LINDXAU. A + * positive value of LINDXAU indicates that the array destination is U, + * and A otherwise. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * moved within A or copied into U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * moved within A or copied into U. N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be moved within A or + * copied into U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). The rows + * of A specified by LINDXA are be copied within this array U at + * the positions indicated by positive values of LINDXAU. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be moved within A or + * or copied into U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U where the rows of A should be + * copied at. This array also contains the local row offsets in + * A where some of the rows of A should be moved to. A positive + * value of LINDXAU[i] indicates that the row LINDXA[i] of A + * should be copied into U at the position LINDXAU[i]; otherwise + * the row LINDXA[i] of A should be moved at the position + * -LINDXAU[i] within A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP01N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP01N_LOG2_DEPTH ); + int lda1, nu, nr; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01N_LOG2_DEPTH ) << + HPL_LASWP01N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP01N_DEPTH, A += incA, U += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; } + else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; } + + *a1 = *a0; a1 += lda1; a0 += LDA; +#if ( HPL_LASWP01N_DEPTH > 1 ) + *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 2 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 4 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 8 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 16 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; } + else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; } + for( j = 0; j < nr; j++, a1 += lda1, a0 += LDA ) { *a1 = *a0; } + } + } +/* + * End of HPL_dlaswp01N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01N.o new file mode 100644 index 000000000..8722bb62c Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01T.c new file mode 100644 index 000000000..429cfb6f2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01T.c @@ -0,0 +1,252 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP01T_DEPTH +#define HPL_LASWP01T_DEPTH 32 +#define HPL_LASWP01T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp01T +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp01T +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp01T copies scattered rows of A into itself and into an + * array U. The row offsets in A of the source rows are specified by + * LINDXA. The destination of those rows are specified by LINDXAU. A + * positive value of LINDXAU indicates that the array destination is U, + * and A otherwise. Rows of A are stored as columns in U. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * moved within A or copied into U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * moved within A or copied into U. N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be moved within A or + * copied into U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,M). The rows + * of A specified by LINDXA are copied within this array U at + * the positions indicated by positive values of LINDXAU. The + * rows of A are stored as columns in U. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be moved within A or + * or copied into U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U where the rows of A should be + * copied at. This array also contains the local row offsets in + * A where some of the rows of A should be moved to. A positive + * value of LINDXAU[i] indicates that the row LINDXA[i] of A + * should be copied into U at the position LINDXAU[i]; otherwise + * the row LINDXA[i] of A should be moved at the position + * -LINDXAU[i] within A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP01T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP01T_LOG2_DEPTH ); + int nu, nr; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01T_LOG2_DEPTH ) << + HPL_LASWP01T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP01T_DEPTH, A += incA, U += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + + if( LINDXAU[i] >= 0 ) + { + a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU); + + a1[ 0] = *a0; a0 += LDA; +#if ( HPL_LASWP01T_DEPTH > 1 ) + a1[ 1] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 2 ) + a1[ 2] = *a0; a0 += LDA; a1[ 3] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 4 ) + a1[ 4] = *a0; a0 += LDA; a1[ 5] = *a0; a0 += LDA; + a1[ 6] = *a0; a0 += LDA; a1[ 7] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 8 ) + a1[ 8] = *a0; a0 += LDA; a1[ 9] = *a0; a0 += LDA; + a1[10] = *a0; a0 += LDA; a1[11] = *a0; a0 += LDA; + a1[12] = *a0; a0 += LDA; a1[13] = *a0; a0 += LDA; + a1[14] = *a0; a0 += LDA; a1[15] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 16 ) + a1[16] = *a0; a0 += LDA; a1[17] = *a0; a0 += LDA; + a1[18] = *a0; a0 += LDA; a1[19] = *a0; a0 += LDA; + a1[20] = *a0; a0 += LDA; a1[21] = *a0; a0 += LDA; + a1[22] = *a0; a0 += LDA; a1[23] = *a0; a0 += LDA; + a1[24] = *a0; a0 += LDA; a1[25] = *a0; a0 += LDA; + a1[26] = *a0; a0 += LDA; a1[27] = *a0; a0 += LDA; + a1[28] = *a0; a0 += LDA; a1[29] = *a0; a0 += LDA; + a1[30] = *a0; a0 += LDA; a1[31] = *a0; a0 += LDA; +#endif + } + else + { + a1 = A - (size_t)(LINDXAU[i]); + + *a1 = *a0; a1 += LDA; a0 += LDA; +#if ( HPL_LASWP01T_DEPTH > 1 ) + *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 2 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 4 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 8 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 16 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif + } + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + + if( LINDXAU[i] >= 0 ) + { + a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) { a1[j] = *a0; } + } + else + { + a1 = A - (size_t)(LINDXAU[i]); + for( j = 0; j < nr; j++, a1 += LDA, a0 += LDA ) { *a1 = *a0; } + } + } + } +/* + * End of HPL_dlaswp01T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01T.o new file mode 100644 index 000000000..8061746bb Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp01T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp02N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp02N.c new file mode 100644 index 000000000..45c2f5f1f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp02N.c @@ -0,0 +1,205 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP02N_DEPTH +#define HPL_LASWP02N_DEPTH 32 +#define HPL_LASWP02N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp02N +( + const int M, + const int N, + const double * A, + const int LDA, + double * W0, + double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp02N +( M, N, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M; + const int N; + const double * A; + const int LDA; + double * W0; + double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp02N packs scattered rows of an array A into workspace W. + * The row offsets in A are specified by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * copied into W. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * copied into W. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be copied into W. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * W0 (local input/output) double * + * On exit, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local output) double * + * On entry, W is an array of size (LDW,M). On exit, W contains + * the rows LINDXA[i] for i in [0..M) of A stored contiguously + * in W(:,i). + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied into W. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U that should be copied into A and + * replaced by the rows of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * A0 = A, * a0; + double * w0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP02N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + for( i = 0; i < M; i++ ) + *(W0+(size_t)(i)*(size_t)(LDW)) = (double)(LINDXAU[i]); + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP02N_LOG2_DEPTH ) << + HPL_LASWP02N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP02N_DEPTH, A0 += incA, W += HPL_LASWP02N_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW); + + w0[ 0] = *a0; a0 += LDA; +#if ( HPL_LASWP02N_DEPTH > 1 ) + w0[ 1] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 2 ) + w0[ 2] = *a0; a0 += LDA; w0[ 3] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 4 ) + w0[ 4] = *a0; a0 += LDA; w0[ 5] = *a0; a0 += LDA; + w0[ 6] = *a0; a0 += LDA; w0[ 7] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 8 ) + w0[ 8] = *a0; a0 += LDA; w0[ 9] = *a0; a0 += LDA; + w0[10] = *a0; a0 += LDA; w0[11] = *a0; a0 += LDA; + w0[12] = *a0; a0 += LDA; w0[13] = *a0; a0 += LDA; + w0[14] = *a0; a0 += LDA; w0[15] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 16 ) + w0[16] = *a0; a0 += LDA; w0[17] = *a0; a0 += LDA; + w0[18] = *a0; a0 += LDA; w0[19] = *a0; a0 += LDA; + w0[20] = *a0; a0 += LDA; w0[21] = *a0; a0 += LDA; + w0[22] = *a0; a0 += LDA; w0[23] = *a0; a0 += LDA; + w0[24] = *a0; a0 += LDA; w0[25] = *a0; a0 += LDA; + w0[26] = *a0; a0 += LDA; w0[27] = *a0; a0 += LDA; + w0[28] = *a0; a0 += LDA; w0[29] = *a0; a0 += LDA; + w0[30] = *a0; a0 += LDA; w0[31] = *a0; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, a0 += LDA ) { w0[j] = *a0; } + } + } +/* + * End of HPL_dlaswp02N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp02N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp02N.o new file mode 100644 index 000000000..22f23ffdc Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp02N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03N.c new file mode 100644 index 000000000..760732a8d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03N.c @@ -0,0 +1,194 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP03N_DEPTH +#define HPL_LASWP03N_DEPTH 32 +#define HPL_LASWP03N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp03N +( + const int M, + const int N, + double * U, + const int LDU, + const double * W0, + const double * W, + const int LDW +) +#else +void HPL_dlaswp03N +( M, N, U, LDU, W0, W, LDW ) + const int M; + const int N; + double * U; + const int LDU; + const double * W0; + const double * W; + const int LDW; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp03N copies columns of W into rows of an array U. The + * destination in U of these columns contained in W is stored within W0. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of W stored + * contiguously that should be copied into U. M must be at least + * zero. + * + * N (local input) const int + * On entry, N specifies the length of columns of W stored + * contiguously that should be copied into U. N must be at least + * zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). Columns + * of W are copied as rows within this array U at the positions + * specified in W0. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M), that contains data + * to be copied into U. For i in [0..M), entries W(:,i) should + * be copied into the row or column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * u0; + const int incU = (int)( (unsigned int)(LDU) << + HPL_LASWP03N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03N_LOG2_DEPTH ) << + HPL_LASWP03N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP03N_DEPTH, U += incU, w += HPL_LASWP03N_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *u0 = w0[ 0]; u0 += LDU; +#if ( HPL_LASWP03N_DEPTH > 1 ) + *u0 = w0[ 1]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 2 ) + *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 4 ) + *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU; + *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 8 ) + *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU; + *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU; + *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU; + *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 16 ) + *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU; + *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU; + *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU; + *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU; + *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU; + *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU; + *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU; + *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; } + } + } +/* + * End of HPL_dlaswp03N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03N.o new file mode 100644 index 000000000..dd84de51f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03T.c new file mode 100644 index 000000000..fece692ce --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03T.c @@ -0,0 +1,186 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP03T_DEPTH +#define HPL_LASWP03T_DEPTH 32 +#define HPL_LASWP03T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp03T +( + const int M, + const int N, + double * U, + const int LDU, + const double * W0, + const double * W, + const int LDW +) +#else +void HPL_dlaswp03T +( M, N, U, LDU, W0, W, LDW ) + const int M; + const int N; + double * U; + const int LDU; + const double * W0; + const double * W; + const int LDW; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp03T copies columns of W into an array U. The destination + * in U of these columns contained in W is stored within W0. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of W stored + * contiguously that should be copied into U. M must be at least + * zero. + * + * N (local input) const int + * On entry, N specifies the length of columns of W stored + * contiguously that should be copied into U. N must be at least + * zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,M). Columns + * of W are copied within the array U at the positions specified + * in W0. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M), that contains data + * to be copied into U. For i in [0..M), entries W(:,i) should + * be copied into the row or column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * u0; + const int incU = ( 1 << HPL_LASWP03T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03T_LOG2_DEPTH ) << + HPL_LASWP03T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP03T_DEPTH, U += incU, w += HPL_LASWP03T_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU); + w0 = w + (size_t)(i) * (size_t)(LDW); + + u0[ 0] = w0[ 0]; +#if ( HPL_LASWP03T_DEPTH > 1 ) + u0[ 1] = w0[ 1]; +#endif +#if ( HPL_LASWP03T_DEPTH > 2 ) + u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3]; +#endif +#if ( HPL_LASWP03T_DEPTH > 4 ) + u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7]; +#endif +#if ( HPL_LASWP03T_DEPTH > 8 ) + u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11]; + u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15]; +#endif +#if ( HPL_LASWP03T_DEPTH > 16 ) + u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19]; + u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23]; + u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27]; + u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31]; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; } + } + } +/* + * End of HPL_dlaswp03T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03T.o new file mode 100644 index 000000000..ef64f8ad7 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp03T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04N.c new file mode 100644 index 000000000..4f9c490a5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04N.c @@ -0,0 +1,285 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP04N_DEPTH +#define HPL_LASWP04N_DEPTH 32 +#define HPL_LASWP04N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp04N +( + const int M0, + const int M1, + const int N, + double * U, + const int LDU, + double * A, + const int LDA, + const double * W0, + const double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp04N +( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M0; + const int M1; + const int N; + double * U; + const int LDU; + double * A; + const int LDA; + const double * W0; + const double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp04N copies M0 rows of U into A and replaces those rows of U + * with columns of W. In addition M1 - M0 columns of W are copied into + * rows of U. + * + * Arguments + * ========= + * + * M0 (local input) const int + * On entry, M0 specifies the number of rows of U that should be + * copied into A and replaced by columns of W. M0 must be at + * least zero. + * + * M1 (local input) const int + * On entry, M1 specifies the number of columns of W that should + * be copied into rows of U. M1 must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of U that should + * be copied into A. N must be at least zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows that are to be copied into A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M1). + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M0). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M0+M1), that contains + * data to be copied into U. For i in [M0..M0+M1), the entries + * W(:,i) are copied into the row W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M0 containing the + * local row indexes A into which rows of U are copied. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M0 that contains + * the local row indexes of U that should be copied into A and + * replaced by the columns of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP04N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP04N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04N_LOG2_DEPTH ) << + HPL_LASWP04N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP04N_DEPTH, A += incA, U += incU, + w += HPL_LASWP04N_DEPTH ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U + (size_t)(LINDXAU[i]); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *a0 = *u0; *u0 = w0[ 0]; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP04N_DEPTH > 1 ) + *a0 = *u0; *u0 = w0[ 1]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 2 ) + *a0 = *u0; *u0 = w0[ 2]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 3]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 4 ) + *a0 = *u0; *u0 = w0[ 4]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 5]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 6]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 7]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 8 ) + *a0 = *u0; *u0 = w0[ 8]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 9]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[10]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[11]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[12]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[13]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[14]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[15]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 16 ) + *a0 = *u0; *u0 = w0[16]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[17]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[18]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[19]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[20]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[21]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[22]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[23]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[24]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[25]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[26]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[27]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[28]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[29]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[30]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[31]; a0 += LDA; u0 += LDU; +#endif + } + + for( i = M0; i < M1; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *u0 = w0[ 0]; u0 += LDU; +#if ( HPL_LASWP04N_DEPTH > 1 ) + *u0 = w0[ 1]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 2 ) + *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 4 ) + *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU; + *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 8 ) + *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU; + *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU; + *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU; + *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 16 ) + *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU; + *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU; + *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU; + *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU; + *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU; + *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU; + *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU; + *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U + (size_t)(LINDXAU[i]); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) + { *a0 = *u0; *u0 = w0[j]; } + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; } + } + } +/* + * End of HPL_dlaswp04N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04N.o new file mode 100644 index 000000000..d55277d49 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04T.c new file mode 100644 index 000000000..9cbb4c863 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04T.c @@ -0,0 +1,270 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP04T_DEPTH +#define HPL_LASWP04T_DEPTH 32 +#define HPL_LASWP04T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp04T +( + const int M0, + const int M1, + const int N, + double * U, + const int LDU, + double * A, + const int LDA, + const double * W0, + const double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp04T +( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M0; + const int M1; + const int N; + double * U; + const int LDU; + double * A; + const int LDA; + const double * W0; + const double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp04T copies M0 columns of U into rows of A and replaces those + * columns of U with columns of W. In addition M1 - M0 columns of W are + * copied into U. + * + * Arguments + * ========= + * + * M0 (local input) const int + * On entry, M0 specifies the number of columns of U that should + * be copied into A and replaced by columns of W. M0 must be at + * least zero. + * + * M1 (local input) const int + * On entry, M1 specifies the number of columnns of W that will + * be copied into U. M1 must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the columns of U that + * will be copied into rows of A. N must be at least zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns that are to be copied into rows of + * A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M0). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M0+M1), that contains + * data to be copied into U. For i in [M0..M0+M1), the entries + * W(:,i) are copied into the column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M0 containing the + * local row indexes A into which columns of U are copied. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M0 that contains + * the local column indexes of U that should be copied into A + * and replaced by the columns of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP04T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP04T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04T_LOG2_DEPTH ) << + HPL_LASWP04T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP04T_DEPTH, A += incA, U += incU, + w += HPL_LASWP04T_DEPTH ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW; + + *a0 = u0[ 0]; u0[ 0] = w0[ 0]; a0 += LDA; +#if ( HPL_LASWP04T_DEPTH > 1 ) + *a0 = u0[ 1]; u0[ 1] = w0[ 1]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 2 ) + *a0 = u0[ 2]; u0[ 2] = w0[ 2]; a0 += LDA; + *a0 = u0[ 3]; u0[ 3] = w0[ 3]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 4 ) + *a0 = u0[ 4]; u0[ 4] = w0[ 4]; a0 += LDA; + *a0 = u0[ 5]; u0[ 5] = w0[ 5]; a0 += LDA; + *a0 = u0[ 6]; u0[ 6] = w0[ 6]; a0 += LDA; + *a0 = u0[ 7]; u0[ 7] = w0[ 7]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 8 ) + *a0 = u0[ 8]; u0[ 8] = w0[ 8]; a0 += LDA; + *a0 = u0[ 9]; u0[ 9] = w0[ 9]; a0 += LDA; + *a0 = u0[10]; u0[10] = w0[10]; a0 += LDA; + *a0 = u0[11]; u0[11] = w0[11]; a0 += LDA; + *a0 = u0[12]; u0[12] = w0[12]; a0 += LDA; + *a0 = u0[13]; u0[13] = w0[13]; a0 += LDA; + *a0 = u0[14]; u0[14] = w0[14]; a0 += LDA; + *a0 = u0[15]; u0[15] = w0[15]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 16 ) + *a0 = u0[16]; u0[16] = w0[16]; a0 += LDA; + *a0 = u0[17]; u0[17] = w0[17]; a0 += LDA; + *a0 = u0[18]; u0[18] = w0[18]; a0 += LDA; + *a0 = u0[19]; u0[19] = w0[19]; a0 += LDA; + *a0 = u0[20]; u0[20] = w0[20]; a0 += LDA; + *a0 = u0[21]; u0[21] = w0[21]; a0 += LDA; + *a0 = u0[22]; u0[22] = w0[22]; a0 += LDA; + *a0 = u0[23]; u0[23] = w0[23]; a0 += LDA; + *a0 = u0[24]; u0[24] = w0[24]; a0 += LDA; + *a0 = u0[25]; u0[25] = w0[25]; a0 += LDA; + *a0 = u0[26]; u0[26] = w0[26]; a0 += LDA; + *a0 = u0[27]; u0[27] = w0[27]; a0 += LDA; + *a0 = u0[28]; u0[28] = w0[28]; a0 += LDA; + *a0 = u0[29]; u0[29] = w0[29]; a0 += LDA; + *a0 = u0[30]; u0[30] = w0[30]; a0 += LDA; + *a0 = u0[31]; u0[31] = w0[31]; a0 += LDA; +#endif + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW; + + u0[ 0] = w0[ 0]; +#if ( HPL_LASWP04T_DEPTH > 1 ) + u0[ 1] = w0[ 1]; +#endif +#if ( HPL_LASWP04T_DEPTH > 2 ) + u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3]; +#endif +#if ( HPL_LASWP04T_DEPTH > 4 ) + u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7]; +#endif +#if ( HPL_LASWP04T_DEPTH > 8 ) + u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11]; + u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15]; +#endif +#if ( HPL_LASWP04T_DEPTH > 16 ) + u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19]; + u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23]; + u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27]; + u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31]; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW; + for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; u0[j] = w0[j]; } + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW; + for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; } + } + } +/* + * End of HPL_dlaswp04T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04T.o new file mode 100644 index 000000000..8382d0d39 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp04T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05N.c new file mode 100644 index 000000000..3edcf91a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05N.c @@ -0,0 +1,195 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP05N_DEPTH +#define HPL_LASWP05N_DEPTH 32 +#define HPL_LASWP05N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp05N +( + const int M, + const int N, + double * A, + const int LDA, + const double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp05N +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + const double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp05N copies rows of U of global offset LINDXAU into rows of + * A at positions indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of U that should be + * copied into A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of U that should + * be copied into A. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) const double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows that are to be copied into A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied from U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U that should be copied in A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * U0 = U, * u0; + double * a0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP05N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP05N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05N_LOG2_DEPTH ) << + HPL_LASWP05N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP05N_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]); + + *a0 = *u0; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP05N_DEPTH > 1 ) + *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 2 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 4 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 8 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 16 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { *a0 = *u0; } + } + } +/* + * End of HPL_dlaswp05N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05N.o new file mode 100644 index 000000000..cfedb5ffe Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05T.c new file mode 100644 index 000000000..0adaa102d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05T.c @@ -0,0 +1,196 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP05T_DEPTH +#define HPL_LASWP05T_DEPTH 32 +#define HPL_LASWP05T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp05T +( + const int M, + const int N, + double * A, + const int LDA, + const double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp05T +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + const double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp05T copies columns of U of global offset LINDXAU into rows + * of A at positions indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of U that shouldbe copied into A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the columns of U that will + * be copied into rows of A. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) const double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns that are to be copied into rows of + * A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied from U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local column indexes of U that should be copied in A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * U0 = U, * u0; + double * a0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP05T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP05T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05T_LOG2_DEPTH ) << + HPL_LASWP05T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP05T_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[ i]); + u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU); + + *a0 = u0[ 0]; a0 += LDA; +#if ( HPL_LASWP05T_DEPTH > 1 ) + *a0 = u0[ 1]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 2 ) + *a0 = u0[ 2]; a0 += LDA; *a0 = u0[ 3]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 4 ) + *a0 = u0[ 4]; a0 += LDA; *a0 = u0[ 5]; a0 += LDA; + *a0 = u0[ 6]; a0 += LDA; *a0 = u0[ 7]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 8 ) + *a0 = u0[ 8]; a0 += LDA; *a0 = u0[ 9]; a0 += LDA; + *a0 = u0[10]; a0 += LDA; *a0 = u0[11]; a0 += LDA; + *a0 = u0[12]; a0 += LDA; *a0 = u0[13]; a0 += LDA; + *a0 = u0[14]; a0 += LDA; *a0 = u0[15]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 16 ) + *a0 = u0[16]; a0 += LDA; *a0 = u0[17]; a0 += LDA; + *a0 = u0[18]; a0 += LDA; *a0 = u0[19]; a0 += LDA; + *a0 = u0[20]; a0 += LDA; *a0 = u0[21]; a0 += LDA; + *a0 = u0[22]; a0 += LDA; *a0 = u0[23]; a0 += LDA; + *a0 = u0[24]; a0 += LDA; *a0 = u0[25]; a0 += LDA; + *a0 = u0[26]; a0 += LDA; *a0 = u0[27]; a0 += LDA; + *a0 = u0[28]; a0 += LDA; *a0 = u0[29]; a0 += LDA; + *a0 = u0[30]; a0 += LDA; *a0 = u0[31]; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[ i]); + u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; } + } + } +/* + * End of HPL_dlaswp05T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05T.o new file mode 100644 index 000000000..f995aa8f5 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp05T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06N.c new file mode 100644 index 000000000..a74bae75c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06N.c @@ -0,0 +1,206 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP06N_DEPTH +#define HPL_LASWP06N_DEPTH 32 +#define HPL_LASWP06N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp06N +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA +) +#else +void HPL_dlaswp06N +( M, N, A, LDA, U, LDU, LINDXA ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp06N swaps rows of U with rows of A at positions + * indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * swapped with rows of U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of A that should + * be swapped with rows of U. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows or columns of U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows of U that are to be swapped with rows + * of A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be swapped with U. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * U0 = U, * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP06N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP06N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06N_LOG2_DEPTH ) << + HPL_LASWP06N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP06N_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i); + + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP06N_DEPTH > 1 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 2 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 4 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 8 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 16 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) + { r = *a0; *a0 = *u0; *u0 = r; } + } + } +/* + * End of HPL_dlaswp06N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06N.o new file mode 100644 index 000000000..ccc0984e9 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06T.c new file mode 100644 index 000000000..fb53c2a31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06T.c @@ -0,0 +1,207 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP06T_DEPTH +#define HPL_LASWP06T_DEPTH 32 +#define HPL_LASWP06T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp06T +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA +) +#else +void HPL_dlaswp06T +( M, N, A, LDA, U, LDU, LINDXA ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp06T swaps columns of U with rows of A at positions + * indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * swapped with columns of U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of A that should + * be swapped with columns of U. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns of U that are to be swapped with + * rows of A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be swapped with U. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * U0 = U, * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP06T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP06T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06T_LOG2_DEPTH ) << + HPL_LASWP06T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP06T_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U0 + (size_t)(i) * (size_t)(LDU); + + r = *a0; *a0 = u0[ 0]; u0[ 0] = r; a0 += LDA; +#if ( HPL_LASWP06T_DEPTH > 1 ) + r = *a0; *a0 = u0[ 1]; u0[ 1] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 2 ) + r = *a0; *a0 = u0[ 2]; u0[ 2] = r; a0 += LDA; + r = *a0; *a0 = u0[ 3]; u0[ 3] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 4 ) + r = *a0; *a0 = u0[ 4]; u0[ 4] = r; a0 += LDA; + r = *a0; *a0 = u0[ 5]; u0[ 5] = r; a0 += LDA; + r = *a0; *a0 = u0[ 6]; u0[ 6] = r; a0 += LDA; + r = *a0; *a0 = u0[ 7]; u0[ 7] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 8 ) + r = *a0; *a0 = u0[ 8]; u0[ 8] = r; a0 += LDA; + r = *a0; *a0 = u0[ 9]; u0[ 9] = r; a0 += LDA; + r = *a0; *a0 = u0[10]; u0[10] = r; a0 += LDA; + r = *a0; *a0 = u0[11]; u0[11] = r; a0 += LDA; + r = *a0; *a0 = u0[12]; u0[12] = r; a0 += LDA; + r = *a0; *a0 = u0[13]; u0[13] = r; a0 += LDA; + r = *a0; *a0 = u0[14]; u0[14] = r; a0 += LDA; + r = *a0; *a0 = u0[15]; u0[15] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 16 ) + r = *a0; *a0 = u0[16]; u0[16] = r; a0 += LDA; + r = *a0; *a0 = u0[17]; u0[17] = r; a0 += LDA; + r = *a0; *a0 = u0[18]; u0[18] = r; a0 += LDA; + r = *a0; *a0 = u0[19]; u0[19] = r; a0 += LDA; + r = *a0; *a0 = u0[20]; u0[20] = r; a0 += LDA; + r = *a0; *a0 = u0[21]; u0[21] = r; a0 += LDA; + r = *a0; *a0 = u0[22]; u0[22] = r; a0 += LDA; + r = *a0; *a0 = u0[23]; u0[23] = r; a0 += LDA; + r = *a0; *a0 = u0[24]; u0[24] = r; a0 += LDA; + r = *a0; *a0 = u0[25]; u0[25] = r; a0 += LDA; + r = *a0; *a0 = u0[26]; u0[26] = r; a0 += LDA; + r = *a0; *a0 = u0[27]; u0[27] = r; a0 += LDA; + r = *a0; *a0 = u0[28]; u0[28] = r; a0 += LDA; + r = *a0; *a0 = u0[29]; u0[29] = r; a0 += LDA; + r = *a0; *a0 = u0[30]; u0[30] = r; a0 += LDA; + r = *a0; *a0 = u0[31]; u0[31] = r; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U0 + (size_t)(i) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) + { r = *a0; *a0 = u0[j]; u0[j] = r; } + } + } +/* + * End of HPL_dlaswp06T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06T.o new file mode 100644 index 000000000..1e2d93537 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp06T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp10N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp10N.c new file mode 100644 index 000000000..7dbf934f2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp10N.c @@ -0,0 +1,186 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP10N_DEPTH +#define HPL_LASWP10N_DEPTH 32 +#define HPL_LASWP10N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp10N +( + const int M, + const int N, + double * A, + const int LDA, + const int * IPIV +) +#else +void HPL_dlaswp10N +( M, N, A, LDA, IPIV ) + const int M; + const int N; + double * A; + const int LDA; + const int * IPIV; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp10N performs a sequence of local column interchanges on a + * matrix A. One column interchange is initiated for columns 0 through + * N-1 of A. + * + * Arguments + * ========= + * + * M (local input) const int + * __arg0__ + * + * N (local input) const int + * On entry, M specifies the number of rows of the array A. M + * must be at least zero. + * + * A (local input/output) double * + * On entry, N specifies the number of columns of the array A. N + * must be at least zero. + * + * LDA (local input) const int + * On entry, A points to an array of dimension (LDA,N). This + * array contains the columns onto which the interchanges should + * be applied. On exit, A contains the permuted matrix. + * + * IPIV (local input) const int * + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * a0, * a1; + const int incA = ( 1 << HPL_LASWP10N_LOG2_DEPTH ); + int jp, mr, mu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + mr = M - ( mu = (int)( ( (unsigned int)(M) >> HPL_LASWP10N_LOG2_DEPTH ) + << HPL_LASWP10N_LOG2_DEPTH ) ); + + for( j = 0; j < N; j++ ) + { + if( j != ( jp = IPIV[j] ) ) + { + a0 = A + j * LDA; a1 = A + jp * LDA; + + for( i = 0; i < mu; i += incA, a0 += incA, a1 += incA ) + { + r = *a0; *a0 = *a1; *a1 = r; +#if ( HPL_LASWP10N_DEPTH > 1 ) + r = a0[ 1]; a0[ 1] = a1[ 1]; a1[ 1] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 2 ) + r = a0[ 2]; a0[ 2] = a1[ 2]; a1[ 2] = r; + r = a0[ 3]; a0[ 3] = a1[ 3]; a1[ 3] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 4 ) + r = a0[ 4]; a0[ 4] = a1[ 4]; a1[ 4] = r; + r = a0[ 5]; a0[ 5] = a1[ 5]; a1[ 5] = r; + r = a0[ 6]; a0[ 6] = a1[ 6]; a1[ 6] = r; + r = a0[ 7]; a0[ 7] = a1[ 7]; a1[ 7] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 8 ) + r = a0[ 8]; a0[ 8] = a1[ 8]; a1[ 8] = r; + r = a0[ 9]; a0[ 9] = a1[ 9]; a1[ 9] = r; + r = a0[10]; a0[10] = a1[10]; a1[10] = r; + r = a0[11]; a0[11] = a1[11]; a1[11] = r; + r = a0[12]; a0[12] = a1[12]; a1[12] = r; + r = a0[13]; a0[13] = a1[13]; a1[13] = r; + r = a0[14]; a0[14] = a1[14]; a1[14] = r; + r = a0[15]; a0[15] = a1[15]; a1[15] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 16 ) + r = a0[16]; a0[16] = a1[16]; a1[16] = r; + r = a0[17]; a0[17] = a1[17]; a1[17] = r; + r = a0[18]; a0[18] = a1[18]; a1[18] = r; + r = a0[19]; a0[19] = a1[19]; a1[19] = r; + r = a0[20]; a0[20] = a1[20]; a1[20] = r; + r = a0[21]; a0[21] = a1[21]; a1[21] = r; + r = a0[22]; a0[22] = a1[22]; a1[22] = r; + r = a0[23]; a0[23] = a1[23]; a1[23] = r; + r = a0[24]; a0[24] = a1[24]; a1[24] = r; + r = a0[25]; a0[25] = a1[25]; a1[25] = r; + r = a0[26]; a0[26] = a1[26]; a1[26] = r; + r = a0[27]; a0[27] = a1[27]; a1[27] = r; + r = a0[28]; a0[28] = a1[28]; a1[28] = r; + r = a0[29]; a0[29] = a1[29]; a1[29] = r; + r = a0[30]; a0[30] = a1[30]; a1[30] = r; + r = a0[31]; a0[31] = a1[31]; a1[31] = r; +#endif + } + + for( i = 0; i < mr; i++ ) + { r = a0[i]; a0[i] = a1[i]; a1[i] = r; } + } + } +/* + * End of HPL_dlaswp10N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp10N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp10N.o new file mode 100644 index 000000000..90f330e48 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_dlaswp10N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2l.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2l.c new file mode 100644 index 000000000..e1b5bbfac --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2l.c @@ -0,0 +1,151 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxg2l +( + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxg2l +( IG, INB, NB, SRCPROC, NPROCS ) + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2l computes the local index of a matrix entry pointed to by + * the global index IG. This local returned index is the same in all + * processes. + * + * Arguments + * ========= + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, if SRCPROC = -1, the data is not distributed but + * replicated, in which case this routine returns IG in all + * processes. Otherwise, the value of SRCPROC is ignored. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + return( IG ); +/* + * IG = INB - NB + ( l * NPROCS + MYROC ) * NB + X with 0 <= X < NB, + * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC + * with 0 <= MYROC < NPROCS. The local index to be returned depends on + * whether IG resides in the process owning the first partial block of + * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB, + * so that if NPROCS divides i+1, i.e. MYROC=0, we have i+1 = l*NPROCS. + * If we set j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is + * equal to (j+1) * NPROCS. Conversely, if NPROCS does not divide i+1, + * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that + * j=l and thus (j+1)*NPROCS > i+1. + */ + j = ( i = ( IG - INB ) / NB ) / NPROCS; +/* + * When IG resides in the process owning the first partial block of size + * INB (MYROC = 0), then the result IL can be written as: + * IL = INB - NB + l * NB + X = IG + ( l - (l * NPROCS + MYROC) ) * NB. + * Using the above notation, we have i+1 = l*NPROCS + MYROC = l*NPROCS, + * i.e l = ( i+1 ) / NPROCS = j+1, since NPROCS divides i+1, therefore + * IL = IG + ( j + 1 - ( i + 1 ) ) * NB. + * + * Otherwise when MYROC >= 1, the result IL can be written as: + * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB. + * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1, + * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e + * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB. + */ + return( NB * (j - i) + + ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ) ); +/* + * End of HPL_indxg2l + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2l.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2l.o new file mode 100644 index 000000000..e62d9ce51 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2l.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2lp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2lp.c new file mode 100644 index 000000000..74662f9d2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2lp.c @@ -0,0 +1,176 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_indxg2lp +( + int * IL, + int * PROC, + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +void HPL_indxg2lp +( IL, PROC, IG, INB, NB, SRCPROC, NPROCS ) + int * IL; + int * PROC; + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2lp computes the local index of a matrix entry pointed to by + * the global index IG as well as the process coordinate which posseses + * this entry. The local returned index is the same in all processes. + * + * Arguments + * ========= + * + * IL (output) int * + * On exit, IL specifies the local index corresponding to IG. IL + * is at least zero. + * + * PROC (output) int * + * On exit, PROC is the coordinate of the process owning the + * entry specified by the global index IG. PROC is at least zero + * and less than NPROCS. + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, if SRCPROC = -1, the data is not distributed but + * replicated, in which case this routine returns IG in all + * processes. Otherwise, the value of SRCPROC is ignored. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) + { +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + *IL = IG; + *PROC = SRCPROC; + } + else + { +/* + * IG = INB - NB + ( l * NPROCS + MYROC ) * NB + X with 0 <= X < NB, + * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC + * with 0 <= MYROC < NPROCS. The local index to be returned depends on + * whether IG resides in the process owning the first partial block of + * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB, + * so that if NPROCS divides i+1, i.e. MYROC=0, we have i+1 = l*NPROCS. + * If we set j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is + * equal to (j+1) * NPROCS. Conversely, if NPROCS does not divide i+1, + * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that + * j=l and thus (j+1)*NPROCS > i+1. + */ + j = ( i = ( IG - INB ) / NB ) / NPROCS; +/* + * IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC and take + * the NPROCS modulo (definition of the block-cyclic data distribution). + */ + *PROC = SRCPROC + 1 + i; + *PROC = MPosMod( *PROC, NPROCS ); +/* + * When IG resides in the process owning the first partial block of size + * INB (MYROC = 0), then the result IL can be written as: + * IL = INB - NB + l * NB + X = IG + ( l - (l * NPROCS + MYROC) ) * NB. + * Using the above notation, we have i+1 = l*NPROCS + MYROC = l*NPROCS, + * i.e l = ( i+1 ) / NPROCS = j+1, since NPROCS divides i+1, therefore + * IL = IG + ( j + 1 - ( i + 1 ) ) * NB. + * + * Otherwise when MYROC >= 1, the result IL can be written as: + * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB. + * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1, + * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e + * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB. + */ + *IL = NB * (j - i) + + ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ); + } +/* + * End of HPL_indxg2lp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2lp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2lp.o new file mode 100644 index 000000000..a5da4d443 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2lp.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2p.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2p.c new file mode 100644 index 000000000..d0e75f516 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2p.c @@ -0,0 +1,128 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxg2p +( + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxg2p +( IG, INB, NB, SRCPROC, NPROCS ) + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2p computes the process coordinate which posseses the entry + * of a matrix specified by a global index IG. + * + * Arguments + * ========= + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int proc; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + return( SRCPROC ); +/* + * Otherwise, IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC + * and take the NPROCS modulo (definition of the block-cyclic data dis- + * tribution). + */ + proc = SRCPROC + 1 + ( IG - INB ) / NB; + return( MPosMod( proc, NPROCS ) ); +/* + * End of HPL_indxg2p + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2p.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2p.o new file mode 100644 index 000000000..59e79159d Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxg2p.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxl2g.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxl2g.c new file mode 100644 index 000000000..7f139425a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxl2g.c @@ -0,0 +1,164 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxl2g +( + const int IL, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxl2g +( IL, INB, NB, PROC, SRCPROC, NPROCS ) + const int IL; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxl2g computes the global index of a matrix entry pointed to + * by the local index IL of the process indicated by PROC. + * + * Arguments + * ========= + * + * IL (input) const int + * On entry, IL specifies the local index of the matrix entry. + * IL must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whose + * local array row or column is to be determined. PROC must be + * at least zero and strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) + { +/* + * The data is not distributed, or there is just one process in this di- + * mension of the grid. + */ + return( IL ); + } + else if( PROC == SRCPROC ) + { +/* + * If I am SRCPROC, my first block is of size INB + */ + if( IL < INB ) +/* + * If IL belongs to the first block, the local and global indexes are + * equal. + */ + return ( IL ); +/* + * The number of entire blocks before the one IL belongs to is + * ( IL - INB ) / NB + 1. In the other NPROCS-1 processes, there are + * thus NB*( ( IL-INB )/NB + 1 ) entries, that are globally before the + * global entry corresponding to IL. + */ + return( ( NPROCS - 1 ) * NB * ( ( IL - INB ) / NB + 1 ) + IL ); + } + else if( PROC < SRCPROC ) + { +/* + * Otherwise, the process of coordinate MOD(SRCPROC+1, NPROCS) owns the + * second block. Let IPROC = PROC-SRCPROC-1+NPROCS be the number of pro- + * cesses between this process and PROC not included when going from + * left to right on the process line with possible wrap around. These + * IPROC processes have one more NB block than the other processes, who + * own IL / NB blocks of size NB. + */ + return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1+NPROCS )+IL+INB ); + } + else + { +/* + * Same reasoning as above with IPROC = PROC - SRCPROC - 1. + */ + return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1 )+IL+INB ); + } +/* + * End of HPL_indxl2g + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxl2g.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxl2g.o new file mode 100644 index 000000000..739c73a85 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_indxl2g.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_infog2l.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_infog2l.c new file mode 100644 index 000000000..2580f2ad4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_infog2l.c @@ -0,0 +1,382 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_infog2l +( + int I, + int J, + const int IMB, + const int MB, + const int INB, + const int NB, + const int RSRC, + const int CSRC, + const int MYROW, + const int MYCOL, + const int NPROW, + const int NPCOL, + int * II, + int * JJ, + int * PROW, + int * PCOL +) +#else +void HPL_infog2l +( I, J, IMB, MB, INB, NB, RSRC, CSRC, MYROW, MYCOL, NPROW, NPCOL, II, JJ, PROW, PCOL ) + int I; + int J; + const int IMB; + const int MB; + const int INB; + const int NB; + const int RSRC; + const int CSRC; + const int MYROW; + const int MYCOL; + const int NPROW; + const int NPCOL; + int * II; + int * JJ; + int * PROW; + int * PCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_infog2l computes the starting local index II, JJ corresponding to + * the submatrix starting globally at the entry pointed by I, J. This + * routine returns the coordinates in the grid of the process owning the + * matrix entry of global indexes I, J, namely PROW and PCOL. + * + * Arguments + * ========= + * + * I (global input) int + * On entry, I specifies the global row index of the matrix + * entry. I must be at least zero. + * + * J (global input) int + * On entry, J specifies the global column index of the matrix + * entry. J must be at least zero. + * + * IMB (global input) const int + * On entry, IMB specifies the size of the first row block of + * the global matrix. IMB must be at least one. + * + * MB (global input) const int + * On entry, MB specifies the blocking factor used to partition + * and distribute the rows of the matrix A. MB must be larger + * than one. + * + * INB (global input) const int + * On entry, INB specifies the size of the first column block of + * the global matrix. INB must be at least one. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the columns of the matrix A. NB must be larger + * than one. + * + * RSRC (global input) const int + * On entry, RSRC specifies the row coordinate of the process + * that possesses the row I. RSRC must be at least zero and + * strictly less than NPROW. + * + * CSRC (global input) const int + * On entry, CSRC specifies the column coordinate of the process + * that possesses the column J. CSRC must be at least zero and + * strictly less than NPCOL. + * + * MYROW (local input) const int + * On entry, MYROW specifies my row process coordinate in the + * grid. MYROW is greater than or equal to zero and less than + * NPROW. + * + * MYCOL (local input) const int + * On entry, MYCOL specifies my column process coordinate in the + * grid. MYCOL is greater than or equal to zero and less than + * NPCOL. + * + * NPROW (global input) const int + * On entry, NPROW specifies the number of process rows in the + * grid. NPROW is at least one. + * + * NPCOL (global input) const int + * On entry, NPCOL specifies the number of process columns in + * the grid. NPCOL is at least one. + * + * II (local output) int * + * On exit, II specifies the local starting row index of the + * submatrix. On exit, II is at least 0. + * + * JJ (local output) int * + * On exit, JJ specifies the local starting column index of the + * submatrix. On exit, JJ is at least 0. + * + * PROW (global output) int * + * On exit, PROW is the row coordinate of the process owning the + * entry specified by the global index I. PROW is at least zero + * and less than NPROW. + * + * PCOL (global output) int * + * On exit, PCOL is the column coordinate of the process owning + * the entry specified by the global index J. PCOL is at least + * zero and less than NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc; +/* .. + * .. Executable Statements .. + */ + imb = IMB; + *PROW = RSRC; + + if( ( *PROW == -1 ) || ( NPROW == 1 ) ) + { +/* + * The data is not distributed, or there is just one process row in the + * grid. + */ + *II = I; + } + else if( I < imb ) + { +/* + * I refers to an entry in the first block of rows + */ + *II = ( MYROW == *PROW ? I : 0 ); + } + else + { + mb = MB; + rsrc = *PROW; +/* + * The discussion goes as follows: compute my distance from the source + * process so that within this process coordinate system, the source + * process is the process such that mydist = 0, or equivalently + * MYROW == rsrc. + * + * Find out the global coordinate of the block I belongs to (nblocks), + * as well as the minimum local number of blocks that every process has. + * + * when mydist < nblocks-ilocblk*NPROCS, I own ilocblk + 1 full blocks, + * when mydist > nblocks-ilocblk*NPROCS, I own ilocblk full blocks, + * when mydist = nblocks-ilocblk*NPROCS, I own ilocblk full blocks + * but not I, or I own ilocblk + 1 blocks and the entry I refers to. + */ + if( MYROW == rsrc ) + { +/* + * I refers to an entry that is not in the first block, find out which + * process has it. + */ + nblocks = ( I - imb ) / mb + 1; + *PROW += nblocks; + *PROW -= ( *PROW / NPROW ) * NPROW; +/* + * Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only + * three possible cases: + * + * 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I do not own + * I, in which case II = IMB + ( ilocblk - 1 ) * MB. Note that this + * case cannot happen when ilocblk is zero, since nblocks is at + * least one. + * + * 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in + * which case I and II can respectively be written as IMB + + * (nblocks-1)*NB + IL and IMB + (ilocblk-1) * MB + IL. That is + * II = I + (ilocblk-nblocks)*MB. Note that this case cannot happen + * when ilocblk is zero, since nblocks is at least one. + * + * 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns + * ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB. + * Note that when ilocblk is zero, II is just IMB. + */ + if( nblocks < NPROW ) + { + *II = imb; + } + else + { + ilocblk = nblocks / NPROW; + if( ilocblk * NPROW >= nblocks ) + { + *II = ( ( MYROW == *PROW ) ? + I + ( ilocblk - nblocks ) * mb : + imb + ( ilocblk - 1 ) * mb ); + } + else + { + *II = imb + ilocblk * mb; + } + } + } + else + { +/* + * I refers to an entry that is not in the first block, find out which + * process has it. + */ + nblocks = ( I -= imb ) / mb + 1; + *PROW += nblocks; + *PROW -= ( *PROW / NPROW ) * NPROW; +/* + * Compute my distance from the source process so that within this pro- + * cess coordinate system, the source process is the process such that + * mydist=0. + */ + if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW; +/* + * When mydist < nblocks - ilocblk * NPROW, I own ilocblk+1 full blocks + * of size MB since I am not the source process, i.e. II=(ilocblk+1)*MB. + * When mydist>=nblocks-ilocblk*NPROW and I do not own I, I own ilocblk + * full blocks of size MB, i.e. II = ilocblk*MB, otherwise I own ilocblk + * blocks and I, in which case I can be written as IMB + (nblocks-1)*MB + * + IL and II = ilocblk*MB + IL = I - IMB + (ilocblk - nblocks + 1)*MB. + */ + if( nblocks < NPROW ) + { + mydist -= nblocks; + *II = ( ( mydist < 0 ) ? mb : + ( ( MYROW == *PROW ) ? + I + ( 1 - nblocks ) * mb : 0 ) ); + } + else + { + ilocblk = nblocks / NPROW; + mydist -= nblocks - ilocblk * NPROW; + *II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb : + ( ( MYROW == *PROW ) ? + ( ilocblk - nblocks + 1 ) * mb + I : + ilocblk * mb ) ); + } + } + } +/* + * Idem for the columns + */ + inb = INB; + *PCOL = CSRC; + + if( ( *PCOL == -1 ) || ( NPCOL == 1 ) ) + { + *JJ = J; + } + else if( J < inb ) + { + *JJ = ( MYCOL == *PCOL ? J : 0 ); + } + else + { + nb = NB; + csrc = *PCOL; + + if( MYCOL == csrc ) + { + nblocks = ( J - inb ) / nb + 1; + *PCOL += nblocks; + *PCOL -= ( *PCOL / NPCOL ) * NPCOL; + + if( nblocks < NPCOL ) + { + *JJ = inb; + } + else + { + ilocblk = nblocks / NPCOL; + if( ilocblk * NPCOL >= nblocks ) + { + *JJ = ( ( MYCOL == *PCOL ) ? + J + ( ilocblk - nblocks ) * nb : + inb + ( ilocblk - 1 ) * nb ); + } + else + { + *JJ = inb + ilocblk * nb; + } + } + } + else + { + nblocks = ( J -= inb ) / nb + 1; + *PCOL += nblocks; + *PCOL -= ( *PCOL / NPCOL ) * NPCOL; + + if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL; + + if( nblocks < NPCOL ) + { + mydist -= nblocks; + *JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ? + J + ( 1 - nblocks )*nb : 0 ) ); + } + else + { + ilocblk = nblocks / NPCOL; + mydist -= nblocks - ilocblk * NPCOL; + *JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb : + ( ( MYCOL == *PCOL ) ? + ( ilocblk - nblocks + 1 ) * nb + J : + ilocblk * nb ) ); + } + } + } +/* + * End of HPL_infog2l + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_infog2l.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_infog2l.o new file mode 100644 index 000000000..60e9f71da Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_infog2l.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numroc.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numroc.c new file mode 100644 index 000000000..39cd736d3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numroc.c @@ -0,0 +1,120 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_numroc +( + const int N, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_numroc +( N, INB, NB, PROC, SRCPROC, NPROCS ) + const int N; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_numroc returns the local number of matrix rows/columns process + * PROC will get if we give out N rows/columns starting from global + * index 0. + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the number of rows/columns being dealt + * out. N must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whose + * local portion is determined. PROC must be at least zero and + * strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + return( HPL_numrocI( N, 0, INB, NB, PROC, SRCPROC, NPROCS ) ); +/* + * End of HPL_numroc + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numroc.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numroc.o new file mode 100644 index 000000000..5c9ee9fd6 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numroc.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numrocI.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numrocI.c new file mode 100644 index 000000000..70f3497de --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numrocI.c @@ -0,0 +1,243 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_numrocI +( + const int N, + const int I, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_numrocI +( N, I, INB, NB, PROC, SRCPROC, NPROCS ) + const int N; + const int I; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_numrocI returns the local number of matrix rows/columns process + * PROC will get if we give out N rows/columns starting from global + * index I. + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the number of rows/columns being dealt + * out. N must be at least zero. + * + * I (input) const int + * On entry, I specifies the global index of the matrix entry + * I must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of th + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whos + * local portion is determined. PROC must be at least zero an + * strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the proces + * that possesses the first row or column of the matrix. SRCPRO + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process row + * or columns over which the matrix is distributed. NPROCS mus + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ilocblk, inb, mydist, nblocks, srcproc; +/* .. + * .. Executable Statements .. + */ + if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * The data is not distributed, or there is just one process in this di- + * mension of the grid. + */ + return( N ); +/* + * Compute coordinate of process owning I and corresponding INB + */ + srcproc = SRCPROC; + + if( ( inb = INB - I ) <= 0 ) + { +/* + * I is not in the first block, find out which process has it and update + * the size of first block + */ + srcproc += ( nblocks = (-inb) / NB + 1 ); + srcproc -= ( srcproc / NPROCS ) * NPROCS; + inb += nblocks * NB; + } +/* + * Now everything is just like N, I=0, INB, NB, srcproc, NPROCS. The + * discussion goes as follows: compute my distance from the source pro- + * cess so that within this process coordinate system, the source pro- + * cess is the process such that mydist = 0, or PROC == srcproc. + * + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries. Then remark that + * + * when mydist < nblocks - ilocblk*NPROCS, I own ilocblk+1 full blocks, + * when mydist > nblocks - ilocblk*NPROCS, I own ilocblk full blocks, + * when mydist = nblocks - ilocblk*NPROCS, either the last block is not + * full and I own it, or the last block is full and I am the first pro- + * cess owning only ilocblk full blocks. + */ + if( PROC == srcproc ) + { +/* + * I am the source process, i.e. I own I (mydist=0). When N <= INB, the + * answer is simply N. + */ + if( N <= inb ) return( N ); +/* + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries. + */ + nblocks = ( N - inb ) / NB + 1; +/* + * Since mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only + * two possible cases: + * + * 1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS di- + * vides the global number of full blocks, then the source process + * srcproc owns one more block than the other processes; and N can + * be rewritten as N = INB + (nblocks-1) * NB + LNB with LNB >= 0 + * size of the last block. Similarly, the local value Np correspon- + * ding to N can be written as Np = INB + (ilocblk-1) * NB + LNB = + * N + ( ilocblk-1 - (nblocks-1) )*NB. Note that this case cannot + * happen when ilocblk is zero, since nblocks is at least one. + * + * 2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only + * owns full blocks, and therefore Np = INB + ilocblk * NB. Note + * that when ilocblk is zero, Np is just INB. + */ + if( nblocks < NPROCS ) return( inb ); + + ilocblk = nblocks / NPROCS; + return( ( nblocks - ilocblk * NPROCS ) ? inb + ilocblk * NB : + N + ( ilocblk - nblocks ) * NB ); + } + else + { +/* + * I am not the source process. When N <= INB, the answer is simply 0. + */ + if( N <= inb ) return( 0 ); +/* + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries + */ + nblocks = ( N - inb ) / NB + 1; +/* + * Compute my distance from the source process so that within this pro- + * cess coordinate system, the source process is the process such that + * mydist=0. + */ + if( ( mydist = PROC - srcproc ) < 0 ) mydist += NPROCS; +/* + * When mydist < nblocks - ilocblk*NPROCS, I own ilocblk + 1 full blocks + * of size NB since I am not the source process, + * + * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks + * of size NB since I am not the source process, + * + * when mydist = nblocks - ilocblk*NPROCS, + * either the last block is not full and I own it, in which case + * N = INB + (nblocks - 1)*NB + LNB with LNB the size of the last + * block such that NB > LNB > 0; the local value Np corresponding to + * N is given by Np = ilocblk*NB+LNB = N-INB+(ilocblk-nblocks+1)*NB; + * or the last block is full and I am the first process owning only + * ilocblk full blocks of size NB, that is N = INB+(nblocks-1)*NB and + * Np = ilocblk * NB = N - INB + (ilocblk-nblocks+1) * NB. + */ + if( nblocks < NPROCS ) + return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 : + N - inb + NB * ( 1 - nblocks ) ) ); + + ilocblk = nblocks / NPROCS; + mydist -= nblocks - ilocblk * NPROCS; + return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB : + ( ( mydist > 0 ) ? ilocblk * NB : + N - inb + NB * ( ilocblk - nblocks + 1 ) ) ); + } +/* + * End of HPL_numrocI + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numrocI.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numrocI.o new file mode 100644 index 000000000..d6d0bbf65 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_numrocI.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pabort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pabort.c new file mode 100644 index 000000000..268975fc1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pabort.c @@ -0,0 +1,137 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pabort +( + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_pabort( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pabort displays an error message on stderr and halts execution. + * + * + * Arguments + * ========= + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + int rank; + char cline[128]; +#ifndef STDC_HEADERS + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( stderr, "%s %s %d, %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR", "from process #", rank, "in function", + SRNAME, cline ); + else + HPL_fprintf( stderr, + "%s %s %d, %s %d %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR", "from process #", rank, "on line", LINE, + "of function", SRNAME, cline ); + + MPI_Abort( MPI_COMM_WORLD, -1 ); + exit( -1 ); +/* + * End of HPL_pabort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pabort.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pabort.o new file mode 100644 index 000000000..2bf97bc44 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pabort.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlamch.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlamch.c new file mode 100644 index 000000000..73cf649da --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlamch.c @@ -0,0 +1,143 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_pdlamch +( + MPI_Comm COMM, + const HPL_T_MACH CMACH +) +#else +double HPL_pdlamch +( COMM, CMACH ) + MPI_Comm COMM; + const HPL_T_MACH CMACH; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlamch determines machine-specific arithmetic constants such as + * the relative machine precision (eps), the safe minimum(sfmin) such that + * 1/sfmin does not overflow, the base of the machine (base), the precision + * (prec), the number of (base) digits in the mantissa (t), whether + * rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum + * exponent before (gradual) underflow (emin), the underflow threshold + * (rmin)- base**(emin-1), the largest exponent before overflow (emax), the + * overflow threshold (rmax) - (base**emax)*(1-eps). + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * CMACH (global input) const HPL_T_MACH + * Specifies the value to be returned by HPL_pdlamch + * = HPL_MACH_EPS, HPL_pdlamch := eps (default) + * = HPL_MACH_SFMIN, HPL_pdlamch := sfmin + * = HPL_MACH_BASE, HPL_pdlamch := base + * = HPL_MACH_PREC, HPL_pdlamch := eps*base + * = HPL_MACH_MLEN, HPL_pdlamch := t + * = HPL_MACH_RND, HPL_pdlamch := rnd + * = HPL_MACH_EMIN, HPL_pdlamch := emin + * = HPL_MACH_RMIN, HPL_pdlamch := rmin + * = HPL_MACH_EMAX, HPL_pdlamch := emax + * = HPL_MACH_RMAX, HPL_pdlamch := rmax + * + * where + * + * eps = relative machine precision, + * sfmin = safe minimum, + * base = base of the machine, + * prec = eps*base, + * t = number of digits in the mantissa, + * rnd = 1.0 if rounding occurs in addition, + * emin = minimum exponent before underflow, + * rmin = underflow threshold, + * emax = largest exponent before overflow, + * rmax = overflow threshold. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double param; +/* .. + * .. Executable Statements .. + */ + param = HPL_dlamch( CMACH ); + + switch( CMACH ) + { + case HPL_MACH_EPS : + case HPL_MACH_SFMIN : + case HPL_MACH_EMIN : + case HPL_MACH_RMIN : + (void) HPL_all_reduce( (void *)(¶m), 1, HPL_DOUBLE, + HPL_max, COMM ); + break; + case HPL_MACH_EMAX : + case HPL_MACH_RMAX : + (void) HPL_all_reduce( (void *)(¶m), 1, HPL_DOUBLE, + HPL_min, COMM ); + break; + default : + break; + } + + return( param ); +/* + * End of HPL_pdlamch + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlamch.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlamch.o new file mode 100644 index 000000000..c7731580e Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlamch.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlange.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlange.c new file mode 100644 index 000000000..40bdcc36b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlange.c @@ -0,0 +1,242 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_pdlange +( + const HPL_T_grid * GRID, + const HPL_T_NORM NORM, + const int M, + const int N, + const int NB, + const double * A, + const int LDA +) +#else +double HPL_pdlange +( GRID, NORM, M, N, NB, A, LDA ) + const HPL_T_grid * GRID; + const HPL_T_NORM NORM; + const int M; + const int N; + const int NB; + const double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlange returns the value of the one norm, or the infinity norm, + * or the element of largest absolute value of a distributed matrix A: + * + * + * max(abs(A(i,j))) when NORM = HPL_NORM_A, + * norm1(A), when NORM = HPL_NORM_1, + * normI(A), when NORM = HPL_NORM_I, + * + * where norm1 denotes the one norm of a matrix (maximum column sum) and + * normI denotes the infinity norm of a matrix (maximum row sum). Note + * that max(abs(A(i,j))) is not a matrix norm. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * NORM (global input) const HPL_T_NORM + * On entry, NORM specifies the value to be returned by this + * function as described above. + * + * M (global input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,LocQ(N)), + * that contains the local pieces of the distributed matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double s, v0=HPL_rzero, * work = NULL; + MPI_Comm Acomm, Ccomm, Rcomm; + int ii, jj, mp, mycol, myrow, npcol, nprow, + nq; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Rcomm = GRID->row_comm; Ccomm = GRID->col_comm; + Acomm = GRID->all_comm; + + Mnumroc( mp, M, NB, NB, myrow, 0, nprow ); + Mnumroc( nq, N, NB, NB, mycol, 0, npcol ); + + if( Mmin( M, N ) == 0 ) { return( v0 ); } + else if( NORM == HPL_NORM_A ) + { +/* + * max( abs( A ) ) + */ + if( ( nq > 0 ) && ( mp > 0 ) ) + { + for( jj = 0; jj < nq; jj++ ) + { + for( ii = 0; ii < mp; ii++ ) + { v0 = Mmax( v0, Mabs( *A ) ); A++; } + A += LDA - mp; + } + } + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0, + Acomm ); + } + else if( NORM == HPL_NORM_1 ) + { +/* + * Find norm_1( A ). + */ + if( nq > 0 ) + { + work = (double*)malloc( (size_t)(nq) * sizeof( double ) ); + if( work == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); } + + for( jj = 0; jj < nq; jj++ ) + { + s = HPL_rzero; + for( ii = 0; ii < mp; ii++ ) { s += Mabs( *A ); A++; } + work[jj] = s; A += LDA - mp; + } +/* + * Find sum of global matrix columns, store on row 0 of process grid + */ + (void) HPL_reduce( (void *)(work), nq, HPL_DOUBLE, HPL_sum, + 0, Ccomm ); +/* + * Find maximum sum of columns for 1-norm + */ + if( myrow == 0 ) + { v0 = work[HPL_idamax( nq, work, 1 )]; v0 = Mabs( v0 ); } + if( work ) free( work ); + } +/* + * Find max in row 0, store result in process (0,0) + */ + if( myrow == 0 ) + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0, + Rcomm ); + } + else if( NORM == HPL_NORM_I ) + { +/* + * Find norm_inf( A ) + */ + if( mp > 0 ) + { + work = (double*)malloc( (size_t)(mp) * sizeof( double ) ); + if( work == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); } + + for( ii = 0; ii < mp; ii++ ) { work[ii] = HPL_rzero; } + + for( jj = 0; jj < nq; jj++ ) + { + for( ii = 0; ii < mp; ii++ ) + { work[ii] += Mabs( *A ); A++; } + A += LDA - mp; + } +/* + * Find sum of global matrix rows, store on column 0 of process grid + */ + (void) HPL_reduce( (void *)(work), mp, HPL_DOUBLE, HPL_sum, + 0, Rcomm ); +/* + * Find maximum sum of rows for inf-norm + */ + if( mycol == 0 ) + { v0 = work[HPL_idamax( mp, work, 1 )]; v0 = Mabs( v0 ); } + if( work ) free( work ); + } +/* + * Find max in column 0, store result in process (0,0) + */ + if( mycol == 0 ) + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, + 0, Ccomm ); + } +/* + * Broadcast answer to every process in the grid + */ + (void) HPL_broadcast( (void *)(&v0), 1, HPL_DOUBLE, 0, Acomm ); + + return( v0 ); +/* + * End of HPL_pdlange + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlange.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlange.o new file mode 100644 index 000000000..b9e697826 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlange.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlaprnt.c new file mode 100644 index 000000000..20f11129a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlaprnt.c @@ -0,0 +1,236 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaprnt +( + const HPL_T_grid * GRID, + const int M, + const int N, + const int NB, + double * A, + const int LDA, + const int IAROW, + const int IACOL, + const char * CMATNM +) +#else +void HPL_pdlaprnt +( GRID, M, N, NB, A, LDA, IAROW, IACOL, CMATNM ) + const HPL_T_grid * GRID; + const int M; + const int N; + const int NB; + double * A; + const int LDA; + const int IAROW; + const int IACOL; + const char * CMATNM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaprnt prints to standard error a distributed matrix A. The + * local pieces of A are sent to the process of coordinates (0,0) in + * the grid and then printed. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * M (global input) const int + * On entry, M specifies the number of rows of the coefficient + * matrix A. M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the + * coefficient matrix A. N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * A (local input) double * + * On entry, A points to an array of dimension (LDA,LocQ(N)). + * This array contains the coefficient matrix to be printed. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * IAROW (global input) const int + * On entry, IAROW specifies the row process coordinate owning + * the first row of A. IAROW must be larger than or equal to + * zero and less than NPROW. + * + * IACOL (global input) const int + * On entry, IACOL specifies the column process coordinate + * owning the first column of A. IACOL must be larger than or + * equal to zero and less than NPCOL. + * + * CMATNM (global input) const char * + * On entry, CMATNM is the name of the matrix to be printed. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm Acomm; + double * buf = NULL; + int h, i, ib, icurcol=IACOL, icurrow=IAROW, + ii=0, j, jb, jj=0, mycol, myrow, npcol, + nprow, src; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Acomm = GRID->all_comm; + if( ( myrow == 0 ) && ( mycol == 0 ) ) + buf = (double*)malloc( (size_t)(NB) * sizeof( double ) ); + + for( j = 0; j < N; j += NB ) + { + jb = N-j; jb = Mmin( jb, NB ); + for( h = 0; h < jb; h++ ) + { + (void) HPL_barrier( Acomm ); + + for( i = 0; i < M; i += NB ) + { + ib = M-i; ib = Mmin( ib, NB ); + if( ( icurrow == 0 ) && ( icurcol == 0 ) ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_dlaprnt( ib, 1, Mptr( A, ii, jj+h, LDA ), i+1, + j+h+1, LDA, CMATNM ); + } + else + { + if( ( myrow == icurrow ) && ( mycol == icurcol ) ) + { + (void) HPL_send( Mptr( A, ii, jj+h, LDA ), ib, 0, + 9000+(j+h)*M+i, Acomm ); + } + else if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + src = HPL_pnum( GRID, icurrow, icurcol ); + (void) HPL_recv( buf, ib, src, 9000+(j+h)*M+i, + Acomm ); + if (buf != NULL) + HPL_dlaprnt( ib, 1, buf, i+1, j+h+1, NB, CMATNM ); + } + } + if( myrow == icurrow ) ii += ib; + icurrow = MModAdd1( icurrow, nprow ); + (void) HPL_barrier( Acomm ); + } + ii = 0; icurrow = IAROW; + } + if( mycol == icurcol ) jj += jb; + icurcol = MModAdd1( icurcol, npcol ); + (void) HPL_barrier( Acomm ); + } + if( ( myrow == 0 ) && ( mycol == 0 ) && ( buf ) ) free( buf ); +/* + * End of HPL_pdlaprnt + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlaprnt.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlaprnt.o new file mode 100644 index 000000000..f2f86a8bd Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pdlaprnt.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pwarn.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pwarn.c new file mode 100644 index 000000000..a9f666f89 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pwarn.c @@ -0,0 +1,139 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pwarn +( + FILE * STREAM, + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_pwarn( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pwarn displays an error message. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + int rank; + char cline[128]; +#ifndef STDC_HEADERS + FILE * STREAM; + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( STREAM, "%s %s %d, %s %s:\n>>> %s <<<\n\n", + "HPL ERROR", "from process #", rank, "in function", + SRNAME, cline ); + else + HPL_fprintf( STREAM, "%s %s %d, %s %d %s %s:\n>>> %s <<<\n\n", + "HPL ERROR", "from process #", rank, "on line", LINE, + "of function", SRNAME, cline ); +/* + * End of HPL_pwarn + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pwarn.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pwarn.o new file mode 100644 index 000000000..1d409181d Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pauxil/HPL_pwarn.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocmax.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocmax.c new file mode 100644 index 000000000..644641412 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocmax.c @@ -0,0 +1,149 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dlocmax +( + HPL_T_panel * PANEL, + const int N, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocmax +( PANEL, N, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int N; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocmax finds the maximum entry in the current column and packs + * the useful information in WORK[0:3]. On exit, WORK[0] contains the + * local maximum absolute value scalar, WORK[1] is the corresponding + * local row index, WORK[2] is the corresponding global row index, and + * WORK[3] is the coordinate of the process owning this max. When N is + * less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set + * to the total number of process rows. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * N (local input) const int + * On entry, N specifies the local number of rows of the column + * of A on which we operate. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 4. On exit, + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A; + int kk, igindx, ilindx, myrow, nb, nprow; +/* .. + * .. Executable Statements .. + */ + if( N > 0 ) + { + A = Mptr( PANEL->A, II, JJ, PANEL->lda ); + myrow = PANEL->grid->myrow; + nprow = PANEL->grid->nprow; + nb = PANEL->nb; + kk = PANEL->ii + II + ( ilindx = HPL_idamax( N, A, 1 ) ); + Mindxl2g( igindx, kk, nb, nb, myrow, 0, nprow ); +/* + * WORK[0] := local maximum absolute value scalar, + * WORK[1] := corresponding local row index, + * WORK[2] := corresponding global row index, + * WORK[3] := coordinate of process owning this max. + */ + WORK[0] = A[ilindx]; WORK[1] = (double)(ilindx); + WORK[2] = (double)(igindx); WORK[3] = (double)(myrow); + } + else + { +/* + * If I do not have any row of A, then set the coordinate of the process + * (WORK[3]) owning this "ghost" row, such that it will never be used, + * even if there are only zeros in the current column of A. + */ + WORK[0] = WORK[1] = WORK[2] = HPL_rzero; + WORK[3] = (double)(PANEL->grid->nprow); + } +/* + * End of HPL_dlocmax + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocmax.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocmax.o new file mode 100644 index 000000000..c6f19ce77 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocmax.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpN.c new file mode 100644 index 000000000..a3919500a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpN.c @@ -0,0 +1,436 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LOCSWP_DEPTH +#define HPL_LOCSWP_DEPTH 32 +#define HPL_LOCSWP_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlocswpN +( + HPL_T_panel * PANEL, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocswpN +( PANEL, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocswpN performs the local swapping operations within a panel. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. The N0 length max + * row is stored in WORK[4:4+N0-1]; Note that this is also the + * JJth row (or column) of L1. The remaining part of this array + * is used as workspace. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax; + double * A1, * A2, * L, * Wr0, * Wmx; + int ilindx, lda, myrow, n0, nr, nu; + register int i; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda; + + Wr0 = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0]; + nu = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) + << HPL_LOCSWP_LOG2_DEPTH ); + nr = n0 - nu; +/* + * Replicated swap and copy of the current (new) row of A into L1 + */ + L = Mptr( PANEL->L1, JJ, 0, n0 ); +/* + * If the pivot is non-zero ... + */ + if( gmax != HPL_rzero ) + { +/* + * and if I own the current row of A ... + */ + if( myrow == PANEL->prow ) + { +/* + * and if I also own the row to be swapped with the current row of A ... + */ + if( myrow == (int)(WORK[3]) ) + { +/* + * and if the current row of A is not to swapped with itself ... + */ + if( ( ilindx = (int)(WORK[1]) ) != 0 ) + { +/* + * then copy the max row into L1 and locally swap the 2 rows of A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + A2 = Mptr( A1, ilindx, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) + { + *L=*A1=Wmx[ 0]; *A2=Wr0[ 0]; L+=n0; A1+=lda; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L=*A1=Wmx[ 1]; *A2=Wr0[ 1]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L=*A1=Wmx[ 2]; *A2=Wr0[ 2]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 3]; *A2=Wr0[ 3]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L=*A1=Wmx[ 4]; *A2=Wr0[ 4]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 5]; *A2=Wr0[ 5]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 6]; *A2=Wr0[ 6]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 7]; *A2=Wr0[ 7]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L=*A1=Wmx[ 8]; *A2=Wr0[ 8]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 9]; *A2=Wr0[ 9]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[10]; *A2=Wr0[10]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[11]; *A2=Wr0[11]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[12]; *A2=Wr0[12]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[13]; *A2=Wr0[13]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[14]; *A2=Wr0[14]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[15]; *A2=Wr0[15]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L=*A1=Wmx[16]; *A2=Wr0[16]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[17]; *A2=Wr0[17]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[18]; *A2=Wr0[18]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[19]; *A2=Wr0[19]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[20]; *A2=Wr0[20]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[21]; *A2=Wr0[21]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[22]; *A2=Wr0[22]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[23]; *A2=Wr0[23]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[24]; *A2=Wr0[24]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[25]; *A2=Wr0[25]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[26]; *A2=Wr0[26]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[27]; *A2=Wr0[27]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[28]; *A2=Wr0[28]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[29]; *A2=Wr0[29]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[30]; *A2=Wr0[30]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[31]; *A2=Wr0[31]; L+=n0; A1+=lda; A2+=lda; +#endif + } + for( i = 0; i < nr; i++, L += n0, A1 += lda, A2 += lda ) + { *L = *A1 = Wmx[i]; *A2 = Wr0[i]; } + } + else + { +/* + * otherwise the current row of A is swapped with itself, so just copy + * the current of A into L1. + */ + *Mptr( PANEL->A, II, JJ, lda ) = gmax; + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) + { + *L = Wmx[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wmx[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0; + *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0; + *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0; + *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0; + *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0; + *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0; + *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0; + *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0; + *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0; + *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0; + *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0; + *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0; +#endif + } + for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; } + } + } + else + { +/* + * otherwise, the row to be swapped with the current row of A is in Wmx, + * so copy Wmx into L1 and A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH ) + { + *L = *A1 = Wmx[ 0]; L += n0; A1 += lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = *A1 = Wmx[ 1]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = *A1 = Wmx[ 2]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 3]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = *A1 = Wmx[ 4]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 5]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 6]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 7]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = *A1 = Wmx[ 8]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 9]; L += n0; A1 += lda; + *L = *A1 = Wmx[10]; L += n0; A1 += lda; + *L = *A1 = Wmx[11]; L += n0; A1 += lda; + *L = *A1 = Wmx[12]; L += n0; A1 += lda; + *L = *A1 = Wmx[13]; L += n0; A1 += lda; + *L = *A1 = Wmx[14]; L += n0; A1 += lda; + *L = *A1 = Wmx[15]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = *A1 = Wmx[16]; L += n0; A1 += lda; + *L = *A1 = Wmx[17]; L += n0; A1 += lda; + *L = *A1 = Wmx[18]; L += n0; A1 += lda; + *L = *A1 = Wmx[19]; L += n0; A1 += lda; + *L = *A1 = Wmx[20]; L += n0; A1 += lda; + *L = *A1 = Wmx[21]; L += n0; A1 += lda; + *L = *A1 = Wmx[22]; L += n0; A1 += lda; + *L = *A1 = Wmx[23]; L += n0; A1 += lda; + *L = *A1 = Wmx[24]; L += n0; A1 += lda; + *L = *A1 = Wmx[25]; L += n0; A1 += lda; + *L = *A1 = Wmx[26]; L += n0; A1 += lda; + *L = *A1 = Wmx[27]; L += n0; A1 += lda; + *L = *A1 = Wmx[28]; L += n0; A1 += lda; + *L = *A1 = Wmx[29]; L += n0; A1 += lda; + *L = *A1 = Wmx[30]; L += n0; A1 += lda; + *L = *A1 = Wmx[31]; L += n0; A1 += lda; +#endif + } + + for( i = 0; i < nr; i++, L += n0, A1 += lda ) + { *L = *A1 = Wmx[i]; } + } + } + else + { +/* + * otherwise I do not own the current row of A, so copy the max row Wmx + * into L1. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH ) + { + *L = Wmx[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wmx[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0; + *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0; + *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0; + *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0; + *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0; + *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0; + *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0; + *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0; + *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0; + *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0; + *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0; + *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0; +#endif + } + for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; } +/* + * and if I own the max row, overwrite it with the current row Wr0. + */ + if( myrow == (int)(WORK[3]) ) + { + A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *A2 = Wr0[ 0]; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *A2 = Wr0[ 1]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda; + *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda; + *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda; + *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda; + *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda; + *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda; + *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda; + *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda; + *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda; + *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda; + *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda; + *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda; +#endif + } + + for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; } + } + } + } + else + { +/* + * Otherwise the max element in the current column is zero, simply copy + * the current row Wr0 into L1. The matrix is singular. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *L = Wr0[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wr0[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wr0[ 2]; L+=n0; *L = Wr0[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wr0[ 4]; L+=n0; *L = Wr0[ 5]; L+=n0; + *L = Wr0[ 6]; L+=n0; *L = Wr0[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wr0[ 8]; L+=n0; *L = Wr0[ 9]; L+=n0; + *L = Wr0[10]; L+=n0; *L = Wr0[11]; L+=n0; + *L = Wr0[12]; L+=n0; *L = Wr0[13]; L+=n0; + *L = Wr0[14]; L+=n0; *L = Wr0[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wr0[16]; L+=n0; *L = Wr0[17]; L+=n0; + *L = Wr0[18]; L+=n0; *L = Wr0[19]; L+=n0; + *L = Wr0[20]; L+=n0; *L = Wr0[21]; L+=n0; + *L = Wr0[22]; L+=n0; *L = Wr0[23]; L+=n0; + *L = Wr0[24]; L+=n0; *L = Wr0[25]; L+=n0; + *L = Wr0[26]; L+=n0; *L = Wr0[27]; L+=n0; + *L = Wr0[28]; L+=n0; *L = Wr0[29]; L+=n0; + *L = Wr0[30]; L+=n0; *L = Wr0[31]; L+=n0; +#endif + } + + for( i = 0; i < nr; i++, L += n0 ) { *L = Wr0[i]; } +/* + * set INFO. + */ + if( *(PANEL->DINFO) == 0.0 ) + *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1); + } +/* + * End of HPL_dlocswpN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpN.o new file mode 100644 index 000000000..09d9b9dfe Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpT.c new file mode 100644 index 000000000..89b86e35a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpT.c @@ -0,0 +1,406 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LOCSWP_DEPTH +#define HPL_LOCSWP_DEPTH 32 +#define HPL_LOCSWP_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlocswpT +( + HPL_T_panel * PANEL, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocswpT +( PANEL, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocswpT performs the local swapping operations within a panel. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. The N0 length max + * row is stored in WORK[4:4+N0-1]; Note that this is also the + * JJth row (or column) of L1. The remaining part of this array + * is used as workspace. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax; + double * A1, * A2, * L, * Wr0, * Wmx; + int ilindx, lda, myrow, n0, nr, nu; + register int i; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda; + + Wr0 = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0]; + nu = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) + << HPL_LOCSWP_LOG2_DEPTH ); + nr = n0 - nu; +/* + * Replicated swap and copy of the current (new) row of A into L1 + */ + L = Mptr( PANEL->L1, 0, JJ, n0 ); +/* + * If the pivot is non-zero ... + */ + if( gmax != HPL_rzero ) + { +/* + * and if I own the current row of A ... + */ + if( myrow == PANEL->prow ) + { +/* + * and if I also own the row to be swapped with the current row of A ... + */ + if( myrow == (int)(WORK[3]) ) + { +/* + * and if the current row of A is not to swapped with itself ... + */ + if( ( ilindx = (int)(WORK[1]) ) != 0 ) + { +/* + * then copy the max row into L1 and locally swap the 2 rows of A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + A2 = Mptr( A1, ilindx, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH, + L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=*A1=Wmx[ 0]; *A2=Wr0[ 0]; A1+=lda; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=*A1=Wmx[ 1]; *A2=Wr0[ 1]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=*A1=Wmx[ 2]; *A2=Wr0[ 2]; A1+=lda; A2+=lda; + L[ 3]=*A1=Wmx[ 3]; *A2=Wr0[ 3]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=*A1=Wmx[ 4]; *A2=Wr0[ 4]; A1+=lda; A2+=lda; + L[ 5]=*A1=Wmx[ 5]; *A2=Wr0[ 5]; A1+=lda; A2+=lda; + L[ 6]=*A1=Wmx[ 6]; *A2=Wr0[ 6]; A1+=lda; A2+=lda; + L[ 7]=*A1=Wmx[ 7]; *A2=Wr0[ 7]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=*A1=Wmx[ 8]; *A2=Wr0[ 8]; A1+=lda; A2+=lda; + L[ 9]=*A1=Wmx[ 9]; *A2=Wr0[ 9]; A1+=lda; A2+=lda; + L[10]=*A1=Wmx[10]; *A2=Wr0[10]; A1+=lda; A2+=lda; + L[11]=*A1=Wmx[11]; *A2=Wr0[11]; A1+=lda; A2+=lda; + L[12]=*A1=Wmx[12]; *A2=Wr0[12]; A1+=lda; A2+=lda; + L[13]=*A1=Wmx[13]; *A2=Wr0[13]; A1+=lda; A2+=lda; + L[14]=*A1=Wmx[14]; *A2=Wr0[14]; A1+=lda; A2+=lda; + L[15]=*A1=Wmx[15]; *A2=Wr0[15]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=*A1=Wmx[16]; *A2=Wr0[16]; A1+=lda; A2+=lda; + L[17]=*A1=Wmx[17]; *A2=Wr0[17]; A1+=lda; A2+=lda; + L[18]=*A1=Wmx[18]; *A2=Wr0[18]; A1+=lda; A2+=lda; + L[19]=*A1=Wmx[19]; *A2=Wr0[19]; A1+=lda; A2+=lda; + L[20]=*A1=Wmx[20]; *A2=Wr0[20]; A1+=lda; A2+=lda; + L[21]=*A1=Wmx[21]; *A2=Wr0[21]; A1+=lda; A2+=lda; + L[22]=*A1=Wmx[22]; *A2=Wr0[22]; A1+=lda; A2+=lda; + L[23]=*A1=Wmx[23]; *A2=Wr0[23]; A1+=lda; A2+=lda; + L[24]=*A1=Wmx[24]; *A2=Wr0[24]; A1+=lda; A2+=lda; + L[25]=*A1=Wmx[25]; *A2=Wr0[25]; A1+=lda; A2+=lda; + L[26]=*A1=Wmx[26]; *A2=Wr0[26]; A1+=lda; A2+=lda; + L[27]=*A1=Wmx[27]; *A2=Wr0[27]; A1+=lda; A2+=lda; + L[28]=*A1=Wmx[28]; *A2=Wr0[28]; A1+=lda; A2+=lda; + L[29]=*A1=Wmx[29]; *A2=Wr0[29]; A1+=lda; A2+=lda; + L[30]=*A1=Wmx[30]; *A2=Wr0[30]; A1+=lda; A2+=lda; + L[31]=*A1=Wmx[31]; *A2=Wr0[31]; A1+=lda; A2+=lda; +#endif + } + + for( i = 0; i < nr; i++, A1 += lda, A2 += lda ) + { L[i] = *A1 = Wmx[i]; *A2 = Wr0[i]; } + } + else + { +/* + * otherwise the current row of A is swapped with itself, so just copy + * the current of A into L1. + */ + *Mptr( PANEL->A, II, JJ, lda ) = gmax; + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wmx[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wmx[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; + L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wmx[ 8]; L[12]=Wmx[12]; + L[ 9]=Wmx[ 9]; L[13]=Wmx[13]; + L[10]=Wmx[10]; L[14]=Wmx[14]; + L[11]=Wmx[11]; L[15]=Wmx[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wmx[16]; L[20]=Wmx[20]; + L[17]=Wmx[17]; L[21]=Wmx[21]; + L[18]=Wmx[18]; L[22]=Wmx[22]; + L[19]=Wmx[19]; L[23]=Wmx[23]; + L[24]=Wmx[24]; L[28]=Wmx[28]; + L[25]=Wmx[25]; L[29]=Wmx[29]; + L[26]=Wmx[26]; L[30]=Wmx[30]; + L[27]=Wmx[27]; L[31]=Wmx[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; } + } + } + else + { +/* + * otherwise, the row to be swapped with the current row of A is in Wmx, + * so copy Wmx into L1 and A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=*A1=Wmx[ 0]; A1+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=*A1=Wmx[ 1]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=*A1=Wmx[ 2]; A1+=lda; L[ 3]=*A1=Wmx[ 3]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=*A1=Wmx[ 4]; A1+=lda; L[ 5]=*A1=Wmx[ 5]; A1+=lda; + L[ 6]=*A1=Wmx[ 6]; A1+=lda; L[ 7]=*A1=Wmx[ 7]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=*A1=Wmx[ 8]; A1+=lda; L[ 9]=*A1=Wmx[ 9]; A1+=lda; + L[10]=*A1=Wmx[10]; A1+=lda; L[11]=*A1=Wmx[11]; A1+=lda; + L[12]=*A1=Wmx[12]; A1+=lda; L[13]=*A1=Wmx[13]; A1+=lda; + L[14]=*A1=Wmx[14]; A1+=lda; L[15]=*A1=Wmx[15]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=*A1=Wmx[16]; A1+=lda; L[17]=*A1=Wmx[17]; A1+=lda; + L[18]=*A1=Wmx[18]; A1+=lda; L[19]=*A1=Wmx[19]; A1+=lda; + L[20]=*A1=Wmx[20]; A1+=lda; L[21]=*A1=Wmx[21]; A1+=lda; + L[22]=*A1=Wmx[22]; A1+=lda; L[23]=*A1=Wmx[23]; A1+=lda; + L[24]=*A1=Wmx[24]; A1+=lda; L[25]=*A1=Wmx[25]; A1+=lda; + L[26]=*A1=Wmx[26]; A1+=lda; L[27]=*A1=Wmx[27]; A1+=lda; + L[28]=*A1=Wmx[28]; A1+=lda; L[29]=*A1=Wmx[29]; A1+=lda; + L[30]=*A1=Wmx[30]; A1+=lda; L[31]=*A1=Wmx[31]; A1+=lda; +#endif + } + + for( i = 0; i < nr; i++, A1 += lda ) { L[i]=*A1=Wmx[i]; } + } + } + else + { +/* + * otherwise I do not own the current row of A, so copy the max row Wmx + * into L1. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wmx[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wmx[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wmx[ 8]; L[ 9]=Wmx[ 9]; L[10]=Wmx[10]; L[11]=Wmx[11]; + L[12]=Wmx[12]; L[13]=Wmx[13]; L[14]=Wmx[14]; L[15]=Wmx[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wmx[16]; L[17]=Wmx[17]; L[18]=Wmx[18]; L[19]=Wmx[19]; + L[20]=Wmx[20]; L[21]=Wmx[21]; L[22]=Wmx[22]; L[23]=Wmx[23]; + L[24]=Wmx[24]; L[25]=Wmx[25]; L[26]=Wmx[26]; L[27]=Wmx[27]; + L[28]=Wmx[28]; L[29]=Wmx[29]; L[30]=Wmx[30]; L[31]=Wmx[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; } +/* + * and if I own the max row, overwrite it with the current row Wr0. + */ + if( myrow == (int)(WORK[3]) ) + { + A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *A2 = Wr0[ 0]; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *A2 = Wr0[ 1]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda; + *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda; + *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda; + *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda; + *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda; + *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda; + *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda; + *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda; + *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda; + *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda; + *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda; + *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda; +#endif + } + for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; } + } + } + } + else + { +/* + * Otherwise the max element in the current column is zero, simply copy + * the current row Wr0 into L1. The matrix is singular. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wr0[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wr0[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wr0[ 2]; L[ 3]=Wr0[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wr0[ 4]; L[ 5]=Wr0[ 5]; L[ 6]=Wr0[ 6]; L[ 7]=Wr0[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wr0[ 8]; L[12]=Wr0[12]; L[ 9]=Wr0[ 9]; L[13]=Wr0[13]; + L[10]=Wr0[10]; L[14]=Wr0[14]; L[11]=Wr0[11]; L[15]=Wr0[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wr0[16]; L[20]=Wr0[20]; L[17]=Wr0[17]; L[21]=Wr0[21]; + L[18]=Wr0[18]; L[22]=Wr0[22]; L[19]=Wr0[19]; L[23]=Wr0[23]; + L[24]=Wr0[24]; L[28]=Wr0[28]; L[25]=Wr0[25]; L[29]=Wr0[29]; + L[26]=Wr0[26]; L[30]=Wr0[30]; L[27]=Wr0[27]; L[31]=Wr0[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wr0[i]; } +/* + * Set INFO. + */ + if( *(PANEL->DINFO) == 0.0 ) + *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1); + } +/* + * End of HPL_dlocswpT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpT.o new file mode 100644 index 000000000..674e04044 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_dlocswpT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdfact.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdfact.c new file mode 100644 index 000000000..1d99c6e14 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdfact.c @@ -0,0 +1,141 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdfact +( + HPL_T_panel * PANEL +) +#else +void HPL_pdfact +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdfact recursively factorizes a 1-dimensional panel of columns. + * The RPFACT function pointer specifies the recursive algorithm to be + * used, either Crout, Left- or Right looking. NBMIN allows to vary the + * recursive stopping criterium in terms of the number of columns in the + * panel, and NDIV allows to specify the number of subpanels each panel + * should be divided into. Usuallly a value of 2 will be chosen. Finally + * PFACT is a function pointer specifying the non-recursive algorithm to + * to be used on at most NBMIN columns. One can also choose here between + * Crout, Left- or Right looking. Empirical tests seem to indicate that + * values of 4 or 8 for NBMIN give the best results. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + void * vptr = NULL; + int align, jb; +/* .. + * .. Executable Statements .. + */ + jb = PANEL->jb; PANEL->n -= jb; PANEL->ja += jb; + + if( ( PANEL->grid->mycol != PANEL->pcol ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_RPFACT ); +#endif + align = PANEL->algo->align; + vptr = (void *)malloc( ( (size_t)(align) + + (size_t)(((4+((unsigned int)(jb) << 1)) << 1) )) * + sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdfact", "Memory allocation failed" ); } +/* + * Factor the panel - Update the panel pointers + */ + PANEL->algo->rffun( PANEL, PANEL->mp, jb, 0, (double *)HPL_PTR( vptr, + ((size_t)(align) * sizeof(double) ) ) ); + if( vptr ) free( vptr ); + + PANEL->A = Mptr( PANEL->A, 0, jb, PANEL->lda ); + PANEL->nq -= jb; PANEL->jj += jb; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_RPFACT ); +#endif +/* + * End of HPL_pdfact + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdfact.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdfact.o new file mode 100644 index 000000000..56b7bfa6c Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdfact.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdmxswp.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdmxswp.c new file mode 100644 index 000000000..b14452197 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdmxswp.c @@ -0,0 +1,311 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdmxswp +( + HPL_T_panel * PANEL, + const int M, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_pdmxswp +( PANEL, M, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int M; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdmxswp swaps and broadcasts the absolute value max row using + * bi-directional exchange. The buffer is partially set by HPL_dlocmax. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by + * + * log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) + * + * where lat and bdwth are the latency and bandwidth of the network for + * double precision real elements. Communication only occurs in one + * process column. Mono-directional links will cause the communication + * cost to double. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of the matrix + * column on which this function operates. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * It is assumed that HPL_dlocmax was called prior to this + * routine to initialize the first four entries of this array. + * On exit, the N0 length max row is stored in WORK[4:4+N0-1]; + * Note that this is also the JJth row (or column) of L1. The + * remaining part is used as a temporary array. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax, tmp1; + double * A0, * Wmx, * Wwork; + HPL_T_grid * grid; + MPI_Comm comm; + unsigned int hdim, ip2, ip2_, ipow, k, mask; + int Np2, cnt_, cnt0, i, icurrow, lda, mydist, + mydis_, myrow, n0, nprow, partner, rcnt, + root, scnt, size_; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_MXSWP ); +#endif + grid = PANEL->grid; myrow = grid->myrow; nprow = grid->nprow; +/* + * ip2 : the smallest power of two less than or equal to nprow; + * hdim : dimension of the hypercube made of those ip2 processes; + * Np2 : logical flag indicating whether or not nprow is a power of 2; + */ + comm = grid->col_comm; ip2 = (unsigned int)(grid->row_ip2); + hdim = (unsigned int)(grid->row_hdim); n0 = PANEL->jb; + icurrow = PANEL->prow; Np2 = (int)( ( size_ = nprow - ip2 ) != 0 ); + mydist = MModSub( myrow, icurrow, nprow ); +/* + * Set up pointers in workspace: WORK and Wwork point to the beginning + * of the buffers of size 4 + 2*N0 to be combined. Wmx points to the row + * owning the local (before combine) and global (after combine) absolute + * value max. A0 points to the copy of the current row of the matrix. + */ + cnt0 = ( cnt_ = n0 + 4 ) + n0; A0 = ( Wmx = WORK + 4 ) + n0; + Wwork = WORK + cnt0; +/* + * Wmx[0:N0-1] := A[ilindx,0:N0-1] where ilindx is (int)(WORK[1]) (row + * with max in current column). If I am the current process row, pack in + * addition the current row of A in A0[0:N0-1]. If I do not own any row + * of A, then zero out Wmx[0:N0-1]. + */ + if( M > 0 ) + { + lda = PANEL->lda; + HPL_dcopy( n0, Mptr( PANEL->A, II+(int)(WORK[1]), 0, lda ), lda, + Wmx, 1 ); + if( myrow == icurrow ) + { HPL_dcopy( n0, Mptr( PANEL->A, II, 0, lda ), lda, A0, 1 ); } + } + else { for( i = 0; i < n0; i++ ) Wmx[i] = HPL_rzero; } +/* + * Combine the results (bi-directional exchange): the process coordina- + * tes are relative to icurrow, this allows to reduce the communication + * volume when nprow is not a power of 2. + * + * When nprow is not a power of 2: proc[i-ip2] receives local data from + * proc[i] for all i in [ip2..nprow). In addition, proc[0] (icurrow) + * sends to proc[ip2] the current row of A for later broadcast in procs + * [ip2..nprow). + */ + if( ( Np2 != 0 ) && + ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) ) + { + if( ( mydist & ip2 ) != 0 ) + { + if( mydist == (int)(ip2) ) + (void) HPL_sdrv( WORK, cnt_, MSGID_BEGIN_PFACT, A0, n0, + MSGID_BEGIN_PFACT, MModAdd( partner, + icurrow, nprow ), comm ); + else + (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else + { + if( mydist == 0 ) + (void) HPL_sdrv( A0, n0, MSGID_BEGIN_PFACT, Wwork, cnt_, + MSGID_BEGIN_PFACT, MModAdd( partner, + icurrow, nprow ), comm ); + else + (void) HPL_recv( Wwork, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + + tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] ); + if( ( tmp1 > gmax ) || + ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) ) + { HPL_dcopy( cnt_, Wwork, 1, WORK, 1 ); } + } + } + + if( mydist < (int)(ip2) ) + { +/* + * power of 2 part of the processes collection: processes [0..ip2) are + * combining (binary exchange); proc[0] has two rows to send, but one to + * receive. At every step k in [0..hdim) of the algorithm, a process + * pair exchanging 2 rows is such that myrow >> k+1 is 0. Among those + * processes the ones that are sending one more row than what they are + * receiving are such that myrow >> k is equal to 0. + */ + k = 0; ipow = 1; + + while( k < hdim ) + { + if( ( (unsigned int)(mydist) >> ( k + 1 ) ) == 0 ) + { + if( ( (unsigned int)(mydist) >> k ) == 0 ) + { scnt = cnt0; rcnt = cnt_; } + else + { scnt = cnt_; rcnt = cnt0; } + } + else { scnt = rcnt = cnt_; } + + partner = (int)( (unsigned int)(mydist) ^ ipow ); + (void) HPL_sdrv( WORK, scnt, MSGID_BEGIN_PFACT, Wwork, rcnt, + MSGID_BEGIN_PFACT, MModAdd( partner, icurrow, + nprow ), comm ); + + tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] ); + if( ( tmp1 > gmax ) || + ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) ) + { + HPL_dcopy( ( rcnt == cnt0 ? cnt0 : cnt_ ), Wwork, 1, + WORK, 1 ); + } + else if( rcnt == cnt0 ) + { HPL_dcopy( n0, Wwork+cnt_, 1, A0, 1 ); } + + ipow <<= 1; k++; + } + } + else if( size_ > 1 ) + { +/* + * proc[ip2] broadcast current row of A to procs [ip2+1..nprow). + */ + k = (unsigned int)(size_) - 1; ip2_ = mask = 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( A0, n0, MModAdd( root, partner, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else if( partner < size_ ) + { + (void) HPL_send( A0, n0, MModAdd( root, partner, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + } + ip2_ >>= 1; + } while( ip2_ > 0 ); + } +/* + * If nprow is not a power of 2, for all i in [ip2..nprow), proc[i-ip2] + * sends the pivot row to proc[i] along with the first four entries of + * the WORK array. + */ + if( ( Np2 != 0 ) && + ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) ) + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_recv( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else + { + (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + } +/* + * Save the global pivot index in pivot array + */ + (PANEL->DPIV)[JJ] = WORK[2]; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_MXSWP ); +#endif +/* + * End of HPL_pdmxswp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdmxswp.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdmxswp.o new file mode 100644 index 000000000..f1d41539a Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdmxswp.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrN.c new file mode 100644 index 000000000..4ea170b73 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrN.c @@ -0,0 +1,270 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpancrN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpancrN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpancrN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Crout variant of the usual + * one-dimensional algorithm. The lower triangular N0-by-N0 upper block + * of the panel is stored in no-transpose form (i.e. just like the input + * matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); +/* + * Compute row (column) jj of L1 + */ + if( kk > 0 ) + { + L1ptr = Mptr( L1, jj, jj+1, n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk, Nm1 ); + Xv1 = vsip_msubview_d( Xv0, jj, ICOFF, 1, kk ); + Yv1 = vsip_msubview_d( Xv0, jj, jj+1, 1, Nm1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Av1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplTrans, kk, Nm1, -HPL_rone, + Mptr( L1, ICOFF, jj+1, n0 ), n0, Mptr( L1, jj, + ICOFF, n0 ), n0, HPL_rone, L1ptr, n0 ); +#endif + if( curr != 0 ) + HPL_dcopy( Nm1, L1ptr, n0, Mptr( A, ii, jj+1, lda ), lda ); + } +/* + * Scale current column by its absolute value max entry - Update dia- + * diagonal and subdiagonal elements in column A(iip1:iip1+Mm1-1, jj+1) + * and find local absolute value max in that column (Only one pass + * through cache for each current column). This sequence of operations + * could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk+1, 1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + vsip_mdestroy_d( Yv1 ); + vsip_mdestroy_d( Xv1 ); + vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, ICOFF, + jj+1, n0 ), 1, HPL_rone, Mptr( A, iip1, jj+1, lda ), + 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; kk++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); + +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpancrN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrN.o new file mode 100644 index 000000000..4e646a182 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrT.c new file mode 100644 index 000000000..50ed300aa --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrT.c @@ -0,0 +1,267 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpancrT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpancrT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpancrT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Crout variant of the usual + * one-dimensional algorithm. The lower triangular N0-by-N0 upper block + * of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); +/* + * Compute row (column) jj of L1 + */ + if( kk > 0 ) + { + L1ptr = Mptr( L1, jj+1, jj, n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Xv0, jj+1, ICOFF, Nm1, kk ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj, kk, 1 ); + Yv1 = vsip_msubview_d( Xv0, jj+1, jj, Nm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Nm1, kk, -HPL_rone, + Mptr( L1, jj+1, ICOFF, n0 ), n0, Mptr( L1, ICOFF, + jj, n0 ), 1, HPL_rone, L1ptr, 1 ); +#endif + if( curr != 0 ) + HPL_dcopy( Nm1, L1ptr, 1, Mptr( A, ii, jj+1, lda ), lda ); + } +/* + * Scale current column by its absolute value max entry - Update dia- + * diagonal and subdiagonal elements in column A(iip1:iip1+Mm1-1, jj+1) + * and find local absolute value max in that column (Only one pass + * through cache for each current column). This sequence of operations + * could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 ); + Xv1 = vsip_msubview_d( Xv0, jj+1, ICOFF, 1, kk+1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, jj+1, ICOFF, + n0 ), n0, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; kk++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpancrT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrT.o new file mode 100644 index 000000000..02f30764d Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpancrT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllN.c new file mode 100644 index 000000000..fa471198d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllN.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanllN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanllN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanllN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Left-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in no-transpose form (i.e. just like the + * input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column and initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + + L1ptr = Mptr( L1, ICOFF, jj+1, n0 ); kk = jj + 1 - ICOFF; + HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, HplUnit, kk, + Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, 1 ); +/* + * Scale current column by its absolute value max entry - Update and + * find local absolute value max in next column (Only one pass through + * cache for each next column). This sequence of operations could bene- + * fit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk, 1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, 1, + HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) + { + HPL_dcopy( kk, L1ptr, 1, Mptr( A, ICOFF, jj+1, lda ), 1 ); + ii = iip1; iip1++; m = Mm1; Mm1--; + } + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanllN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllN.o new file mode 100644 index 000000000..0bcc4417f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllT.c new file mode 100644 index 000000000..a6e1b67bd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllT.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanllT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanllT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanllT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Left-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column and initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + + L1ptr = Mptr( L1, jj+1, ICOFF, n0 ); kk = jj + 1 - ICOFF; + HPL_dtrsv( HplColumnMajor, HplUpper, HplTrans, HplUnit, kk, + Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, n0 ); +/* + * Scale current column by its absolute value max entry - Update and + * find local absolute value max in next column (Only one pass through + * cache for each next column). This sequence of operations could bene- + * fit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk ); + Xv1 = vsip_msubview_d( Xv0, jj+1, ICOFF, 1, kk ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, n0, + HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) + { + HPL_dcopy( kk, L1ptr, n0, Mptr( A, ICOFF, jj+1, lda ), 1 ); + ii = iip1; iip1++; m = Mm1; Mm1--; + } + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); + +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanllT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllT.o new file mode 100644 index 000000000..4fbf6ebca Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanllT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlN.c new file mode 100644 index 000000000..0a3b9a542 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlN.c @@ -0,0 +1,250 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanrlN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanrlN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanrlN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Right-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in no-transpose form (i.e. just like the + * input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Acur, * Anxt; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Xv1, * Yv0, * Yv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, lda, m=M; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 >= 1 ) + { + Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda ); +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); +/* + * Scale current column by its absolute value max entry - Update trai- + * ling sub-matrix and find local absolute value max in next column (On- + * ly one pass through cache for each current column). This sequence of + * operations could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 ); + HPL_daxpy( Mm1, -WORK[4+jj+1], Acur, 1, Anxt, 1 ); + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); +#ifdef HPL_CALL_VSIPL + if( Nm1 > 1 ) + { +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2, + Mm1, Nm1-1 ); + Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj, + Mm1, 1 ); + Yv1 = vsip_msubview_d( Yv0, jj, jj+2, 1, Nm1-1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); + } +#else + if( Nm1 > 1 ) + HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + WORK+4+jj+2, 1, Mptr( Anxt, 0, 1, lda ), lda ); +#endif +/* + * Same thing as above but with worse data access on y (A += x * y^T) + * + * if( Nm1 > 1 ) ) + * HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + * Mptr( L1, jj, jj+2, n0 ), n0, Mptr( Anxt, 0, 1, lda ), + * lda ); + */ + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Yv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanrlN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlN.o new file mode 100644 index 000000000..4ccb67fe7 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlT.c new file mode 100644 index 000000000..68c1afc02 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlT.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanrlT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanrlT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanrlT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Right-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Acur, * Anxt, * L1; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Xv1, * Yv0, * Yv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, lda, m=M, + n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 >= 1 ) + { + Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda ); +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); +/* + * Scale current column by its absolute value max entry - Update trai- + * ling sub-matrix and find local absolute value max in next column (On- + * ly one pass through cache for each current column). This sequence of + * operations could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 ); + HPL_daxpy( Mm1, -(*(Mptr( L1, jj+1, jj, n0 ))), Acur, 1, Anxt, 1 ); + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + + if( Nm1 > 1 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2, + Mm1, Nm1-1 ); + Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj, + Mm1, 1 ); + Yv1 = vsip_msubview_d( Yv0, jj+2, jj, Nm1-1, 1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + Mptr( L1, jj+2, jj, n0 ), 1, Mptr( Anxt, 0, 1, lda ), + lda ); +#endif + } + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Yv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanrlT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlT.o new file mode 100644 index 000000000..75bdb487b Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdpanrlT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrN.c new file mode 100644 index 000000000..348d7ebe6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrN.c @@ -0,0 +1,282 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpancrN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpancrN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpancrN HPL_pdrpancrN recursively factorizes a panel of columns using the + * recursive Crout variant of the usual one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Local update - Factor current panel - Replicated update and solve + */ +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + } + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, jj, + -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, + 0, jj, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), + lda ); +#endif + HPL_pdrpancrN( PANEL, m, jb, ioff, WORK ); + + if( n > 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + Av2 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n ); + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff+jb, jj, n ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, jb, n, + jj, -HPL_rone, Mptr( L1ptr, jj, 0, n0 ), n0, + Mptr( L1ptr, 0, jj+jb, n0 ), n0, HPL_rone, + Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, + n0 ), n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); + } +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpancrN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrN.o new file mode 100644 index 000000000..ca755e4a1 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrT.c new file mode 100644 index 000000000..a1ecfac2c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrT.c @@ -0,0 +1,282 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpancrT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpancrT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpancrT recursively factorizes a panel of columns using the + * recursive Crout variant of the usual one-dimensional algorithm. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Local update - Factor current panel - Replicated update and solve + */ +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, + VSIP_MAT_TRANS, HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, jj, + -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, + jj, 0, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), + lda ); +#endif + HPL_pdrpancrT( PANEL, m, jb, ioff, WORK ); + + if( n > 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Lv0, ioff+jb, ICOFF, n, jj ); + Av2 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb ); + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, + VSIP_MAT_NTRANS, HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, n, jb, + jj, -HPL_rone, Mptr( L1ptr, jj+jb, 0, n0 ), n0, + Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone, + Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); +#endif + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, + n0 ), n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); + } +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpancrT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrT.o new file mode 100644 index 000000000..2ae6cc537 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpancrT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllN.c new file mode 100644 index 000000000..4dbc13b44 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllN.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanllN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanllN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanllN recursively factorizes a panel of columns using the + * recursive Left-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Replicated solve - Local update - Factor current panel + */ + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, + jj, jb, HPL_rone, L1ptr, n0, Mptr( L1ptr, 0, jj, n0 ), + n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jj ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jj ); + } + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, + jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, + Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj, lda ), lda ); +#endif + HPL_pdrpanllN( PANEL, m, jb, ioff, WORK ); +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanllN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllN.o new file mode 100644 index 000000000..330396b19 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllT.c new file mode 100644 index 000000000..887caeb87 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllT.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanllT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanllT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanllT recursively factorizes a panel of columns using the + * recursive Left-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Replicated solve - Local update - Factor current panel + */ + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, jb, jj, HPL_rone, L1ptr, n0, Mptr( L1ptr, + jj, 0, n0 ), n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jj ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jj ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Av2 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, + jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, + Mptr( L1ptr, jj, 0, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj, lda ), lda ); +#endif + HPL_pdrpanllT( PANEL, m, jb, ioff, WORK ); +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanllT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllT.o new file mode 100644 index 000000000..546461349 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanllT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlN.c new file mode 100644 index 000000000..22f105cf4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlN.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanrlN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanrlN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanrlN recursively factorizes a panel of columns using the + * recursive Right-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Factor current panel - Replicated solve - Local update + */ + HPL_pdrpanrlN( PANEL, m, jb, ioff, WORK ); + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), + n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); + if( curr != 0 ) { ii += jb; m -= jb; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb, + m, n ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m, n ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, n, + jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda, + Mptr( L1ptr, jj, jj+jb, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj+jb, lda ), lda ); +#endif +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanrlN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlN.o new file mode 100644 index 000000000..56ede64e7 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlT.c new file mode 100644 index 000000000..a77301b9b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlT.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanrlT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanrlT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanrlT recursively factorizes a panel of columns using the + * recursive Right-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Factor current panel - Replicated solve - Local update + */ + HPL_pdrpanrlT( PANEL, m, jb, ioff, WORK ); + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), + n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); + if( curr != 0 ) { ii += jb; m -= jb; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb, + m, N ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m, n ); + } + Lv1 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, n, + jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda, + Mptr( L1ptr, jj+jb, jj, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj+jb, lda ), lda ); +#endif +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanrlT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlT.o new file mode 100644 index 000000000..22cbbc0cf Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pfact/HPL_pdrpanrlT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_equil.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_equil.c new file mode 100644 index 000000000..b917a6525 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_equil.c @@ -0,0 +1,253 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_equil +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_TRANS TRANS, + const int N, + double * U, + const int LDU, + int * IPLEN, + const int * IPMAP, + const int * IPMAPM1, + int * IWORK +) +#else +void HPL_equil +( PBCST, IFLAG, PANEL, TRANS, N, U, LDU, IPLEN, IPMAP, IPMAPM1, IWORK ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_TRANS TRANS; + const int N; + double * U; + const int LDU; + int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_equil equilibrates the local pieces of U, so that on exit to + * this function, pieces of U contained in every process row are of the + * same size. This phase makes the rolling phase optimal. In addition, + * this function probes for the column panel L and forwards it when + * possible. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be equilibrated) information. + * + * TRANS (global input) const enum HPL_TRANS + * On entry, TRANS specifies whether U is stored in transposed + * or non-transposed form. + * + * N (local input) const int + * On entry, N specifies the number of rows or columns of U. N + * must be at least 0. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[nprow]) when U is stored in + * non-transposed form, and MAX(1,N) otherwise. + * + * IPLEN (global input) int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension NPROW+1. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, ip, ipU, ipcur, iprow, iptgt, lastrow, + left, npm1, nprow, ll, llU, llcur, lltgt, + right, slen, smax, smin; +/* .. + * .. Executable Statements .. + */ + if( ( npm1 = ( nprow = PANEL->grid->nprow ) - 1 ) <= 1 ) return; +/* + * If the current distribution of the pieces of U is already optimal for + * the rolling phase, then return imediately. The optimal distribution + * is such that ip processes have smax items and the remaining processes + * only have smin items. Another way to check this is to verify that all + * differences IPLEN[i+1] - IPLEN[i] are either smin or smax. + */ + smax = ( ( slen = IPLEN[nprow] ) + npm1 ) / nprow; + ip = slen - nprow * ( smin = slen / nprow ); + + iprow = 0; + do + { + ll = IPLEN[iprow+1] - IPLEN[iprow]; iprow++; + } while( ( iprow < nprow ) && ( ( ll == smin ) || ( ll == smax ) ) ); + + if( iprow == nprow ) return; +/* + * Now, we are sure the distribution of the pieces of U is not optimal + * with respect to the rolling phase, thus perform equilibration. Go + * through the list of processes: Processes that have rows that do not + * belong to them with respect to the optimal mapping spread them in a + * logarithmic fashion. To simplify a little bit the implementation, and + * mainly the packing, a source process row spreads its data to its left + * first, and then to its right. + */ + IWORK[nprow] = slen; + + for( iprow = 0; iprow < nprow; iprow++ ) + { + llU = IPLEN[iprow+1] - ( ipU = IPLEN[iprow] ); + if( iprow < ip ) { lltgt = smax; iptgt = iprow * smax; } + else { lltgt = smin; iptgt = iprow * smin + ip; } + + left = ( ipU < iptgt ); right = ( iptgt + lltgt < ipU + llU ); +/* + * If I have something to spread to either the left or the right + */ + if( ( llU > 0 ) && ( left || right ) ) + { /* Figure out how much every other process should have */ + + ipcur = ipU; llcur = llU; + + for( i = 0; i < nprow; i++ ) + { + if( i < ip ) { lltgt = smax; iptgt = i * smax; } + else { lltgt = smin; iptgt = i * smin + ip; } + lastrow = iptgt + lltgt - 1; + + if( ( lastrow >= ipcur ) && ( llcur > 0 ) ) + { ll = lastrow - ipcur + 1; ll = Mmin( ll, llcur ); llcur -= ll; } + else { ll = 0; } + + IWORK[i] = ipcur; ipcur += ll; IWORK[i+1] = ipcur; + } +/* + * Equilibration phase + */ + if( TRANS == HplNoTrans ) + { + if( left ) + { + HPL_spreadN( PBCST, IFLAG, PANEL, HplLeft, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + + if( right ) + { + HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + } + else + { + if( left ) + { + HPL_spreadT( PBCST, IFLAG, PANEL, HplLeft, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + + if( right ) + { + HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + } + } + } +/* + * Finally update IPLEN with the indexes corresponding to the new dis- + * tribution of U - IPLEN[nprow] remained unchanged. + */ + for( i = 0; i < nprow; i++ ) IPLEN[i] = ( i < ip ? i*smax : i*smin + ip ); +/* + * End of HPL_equil + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_equil.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_equil.o new file mode 100644 index 000000000..5551089fd Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_equil.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_logsort.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_logsort.c new file mode 100644 index 000000000..0715159bd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_logsort.c @@ -0,0 +1,185 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_logsort +( + const int NPROCS, + const int ICURROC, + int * IPLEN, + int * IPMAP, + int * IPMAPM1 +) +#else +void HPL_logsort +( NPROCS, ICURROC, IPLEN, IPMAP, IPMAPM1 ) + const int NPROCS; + const int ICURROC; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_logsort computes an array IPMAP and its inverse IPMAPM1 that + * contain the logarithmic sorted processes id with repect to the local + * number of rows of U that they own. This is necessary to ensure that + * the logarithmic spreading of U is optimal in terms of number of steps + * and communication volume as well. In other words, the larget pieces + * of U will be sent a minimal number of times. + * + * Arguments + * ========= + * + * NPROCS (global input) const int + * On entry, NPROCS specifies the number of process rows in the + * process grid. NPROCS is at least one. + * + * ICURROC (global input) const int + * On entry, ICURROC is the source process row. + * + * IPLEN (global input/output) int * + * On entry, IPLEN is an array of dimension NPROCS+1, such that + * IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U, + * that process i-1 has. On exit, IPLEN[i] is the number of + * rows of U in the processes before process IPMAP[i] after the + * sort, with the convention that IPLEN[NPROCS] is the total + * number of rows of the panel. In other words, IPLEN[i+1] - + * IPLEN[i] is the number of rows of A that should be moved to + * the process IPMAP[i]. IPLEN is such that the number of rows + * of the source process row is IPLEN[1] - IPLEN[0], and the + * remaining entries of this array are sorted so that the + * quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROCS. On exit, + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myroc] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROCS. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROCS) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dist, i, ip, iplen_i, iplen_j, itmp, j, k; +/* .. + * .. Executable Statements .. + */ +/* + * Compute the logarithmic distance between process j and process 0, as + * well as the maximum logarithmic distance. IPMAPM1 is workarray here. + */ + for( j = 0, dist = 0; j < NPROCS; j++ ) + { + IPMAP[j] = MModAdd( j, ICURROC, NPROCS ); ip = j; itmp = 0; + do { if( ip & 1 ) itmp++; ip >>= 1; } while ( ip ); + IPMAPM1[j] = itmp; if( itmp > dist ) dist = itmp; + } +/* + * Shift IPLEN[1..NPROCS] of ICURROC places, so that IPLEN[1] is now + * what used to be IPLEN[ICURROC+1]. Initialize IPMAP, so that IPMAP[0] + * is ICURROC. + */ + for( j = 0; j < ICURROC; j++ ) + { + for( i = 2, itmp = IPLEN[1]; i <= NPROCS; i++ ) IPLEN[i-1] = IPLEN[i]; + IPLEN[NPROCS] = itmp; + } +/* + * logarithmic sort + */ + for( k = 1; k <= dist; k++ ) + { + for( j = 1; j < NPROCS; j++ ) + { + if( IPMAPM1[j] == k ) + { + for( i = 2; i < NPROCS; i++ ) + { + if( k < IPMAPM1[i] ) + { + iplen_i = IPLEN[i+1]; iplen_j = IPLEN[j+1]; + + if( iplen_j < iplen_i ) + { + IPLEN[j+1] = iplen_i; IPLEN[i+1] = iplen_j; + itmp = IPMAP[j]; IPMAP[j] = IPMAP[i]; + IPMAP[i] = itmp; + } + } + } + } + } + } +/* + * Compute IPLEN and IPMAPM1 (the inverse of IPMAP) + */ + IPLEN[0] = 0; + + for( i = 0; i < NPROCS; i++ ) + { + IPMAPM1[ IPMAP[i] ] = i; + IPLEN[i+1] += IPLEN[i]; + } +/* + * End of HPL_logsort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_logsort.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_logsort.o new file mode 100644 index 000000000..bc6a54df3 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_logsort.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv.c new file mode 100644 index 000000000..ced74269e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv.c @@ -0,0 +1,116 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesv +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesv +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesv factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with or without look-ahead. The lower triangular factor is left + * unpivoted and the pivots are not returned. The right hand side is the + * N+1 column of the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( A->n <= 0 ) return; + + A->info = 0; + + if( ( ALGO->depth == 0 ) || ( GRID->npcol == 1 ) ) + { + HPL_pdgesv0( GRID, ALGO, A ); + } + else + { + HPL_pdgesvK2( GRID, ALGO, A ); + } +/* + * Solve upper triangular system + */ + if( A->info == 0 ) HPL_pdtrsv( GRID, A ); +/* + * End of HPL_pdgesv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv.o new file mode 100644 index 000000000..eebf1d2bd Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv0.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv0.c new file mode 100644 index 000000000..d79b6fa55 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv0.c @@ -0,0 +1,167 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesv0 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesv0 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesv0 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * without look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, j, jb, n, nb, tag=MSGID_BEGIN_FACT, + test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( N = A->n ) <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + + HPL_pdupdate = ALGO->upfun; nb = A->nb; +/* + * Allocate a panel list of length 1 - Allocate panel[0] resources + */ + panel = (HPL_T_panel **)malloc( sizeof( HPL_T_panel * ) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesv0", "Memory allocation failed" ); } + + HPL_pdpanel_new( GRID, ALGO, N, N+1, Mmin( N, nb ), A, 0, 0, tag, + &panel[0] ); +/* + * Loop over the columns of A + */ + for( j = 0; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && GRID->mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Release panel resources - re-initialize panel data structure + */ + (void) HPL_pdpanel_free( panel[0] ); + HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[0] ); +/* + * Factor and broadcast current panel - update + */ + HPL_pdfact( panel[0] ); + (void) HPL_binit( panel[0] ); + do + { (void) HPL_bcast( panel[0], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[0] ); + HPL_pdupdate( NULL, NULL, panel[0], -1 ); +/* + * Update message id for next factorization + */ + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Release panel resources and panel list + */ + (void) HPL_pdpanel_disp( &panel[0] ); + + if( panel ) free( panel ); +/* + * End of HPL_pdgesv0 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv0.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv0.o new file mode 100644 index 000000000..542c74bbb Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesv0.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK1.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK1.c new file mode 100644 index 000000000..ff1958cfc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK1.c @@ -0,0 +1,222 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesvK1 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesvK1 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesvK1 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, depth, icurcol=0, j, jb, jj=0, jstart, + k, mycol, n, nb, nn, npcol, nq, + tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + mycol = GRID->mycol; npcol = GRID->npcol; + depth = ALGO->depth; HPL_pdupdate = ALGO->upfun; + N = A->n; nb = A->nb; + + if( N <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + +/* + * Allocate a panel list of length depth + 1 (depth >= 1) + */ + panel = (HPL_T_panel **)malloc( (size_t)(depth+1)*sizeof( HPL_T_panel *) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesvK1", "Memory allocation failed" ); } +/* + * Create and initialize the first depth panels + */ + nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0; + + for( k = 0; k < depth; k++ ) + { + jb = Mmin( nn, nb ); + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart, + tag, &panel[k] ); + nn -= jb; jstart += jb; + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Initialize the lookahead - Factor jstart columns: panel[0..depth-1] + */ + for( k = 0, j = 0; k < depth; k++ ) + { + jb = jstart - j; jb = Mmin( jb, nb ); j += jb; +/* + * Factor and broadcast k-th panel - use long topology for those + */ + HPL_pdfact( panel[k] ); + (void) HPL_binit( panel[k] ); + do + { (void) HPL_bcast( panel[k], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[k] ); +/* + * Partial update of the depth-1-k panels in front of me + */ + if( k < depth - 1 ) + { + nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol ); + HPL_pdupdate( NULL, NULL, panel[k], nn ); + } + } +/* + * Main loop over the remaining columns of A + */ + for( j = jstart; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Allocate current panel resources - Finish latest update - Factor and + * broadcast current panel + */ + HPL_pdpanel_new( GRID, ALGO, n, n+1, jb, A, j, j, tag, &panel[depth] ); + + if( mycol == icurcol ) + { + nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) /* partial updates 0..depth-1 */ + HPL_pdupdate( NULL, NULL, panel[k], nn ); + HPL_pdfact( panel[depth] ); /* factor current panel */ + } + else { nn = 0; } + /* Finish the latest update and broadcast the current panel */ + (void) HPL_binit( panel[depth] ); + HPL_pdupdate( panel[depth], &test, panel[0], nq-nn ); + (void) HPL_bwait( panel[depth] ); +/* + * Release latest panel resources - circular of the panel pointers + * Go to the next process row and column - update the message ids for + * broadcast + */ + (void) HPL_pdpanel_disp( &panel[0] ); + for( k = 0; k < depth; k++ ) panel[k] = panel[k+1]; + + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Clean-up: Finish updates - release panels and panel list + */ + nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) + { + HPL_pdupdate( NULL, NULL, panel[k], nn ); + (void) HPL_pdpanel_disp( &panel[k] ); + } + + if( panel ) free( panel ); +/* + * End of HPL_pdgesvK1 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK1.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK1.o new file mode 100644 index 000000000..e84aa62e0 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK1.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK2.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK2.c new file mode 100644 index 000000000..dec506ab9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK2.c @@ -0,0 +1,231 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesvK2 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesvK2 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesvK2 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * p, * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, depth, icurcol=0, j, jb, jj=0, jstart, + k, mycol, n, nb, nn, npcol, nq, + tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + mycol = GRID->mycol; npcol = GRID->npcol; + depth = ALGO->depth; HPL_pdupdate = ALGO->upfun; + N = A->n; nb = A->nb; + + if( N <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + +/* + * Allocate a panel list of length depth + 1 (depth >= 1) + */ + panel = (HPL_T_panel **)malloc( (size_t)(depth+1) * sizeof( HPL_T_panel *) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesvK2", "Memory allocation failed" ); } +/* + * Create and initialize the first depth panels + */ + nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0; + + for( k = 0; k < depth; k++ ) + { + jb = Mmin( nn, nb ); + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart, + tag, &panel[k] ); + nn -= jb; jstart += jb; + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Create last depth+1 panel + */ + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, Mmin( nn, nb ), A, jstart, + jstart, tag, &panel[depth] ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); +/* + * Initialize the lookahead - Factor jstart columns: panel[0..depth-1] + */ + for( k = 0, j = 0; k < depth; k++ ) + { + jb = jstart - j; jb = Mmin( jb, nb ); j += jb; +/* + * Factor and broadcast k-th panel + */ + HPL_pdfact( panel[k] ); + (void) HPL_binit( panel[k] ); + do + { (void) HPL_bcast( panel[k], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[k] ); +/* + * Partial update of the depth-k-1 panels in front of me + */ + if( k < depth - 1 ) + { + nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol ); + HPL_pdupdate( NULL, NULL, panel[k], nn ); + } + } +/* + * Main loop over the remaining columns of A + */ + for( j = jstart; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Initialize current panel - Finish latest update, Factor and broadcast + * current panel + */ + (void) HPL_pdpanel_free( panel[depth] ); + HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[depth] ); + + if( mycol == icurcol ) + { + nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) /* partial updates 0..depth-1 */ + (void) HPL_pdupdate( NULL, NULL, panel[k], nn ); + HPL_pdfact( panel[depth] ); /* factor current panel */ + } + else { nn = 0; } + /* Finish the latest update and broadcast the current panel */ + (void) HPL_binit( panel[depth] ); + HPL_pdupdate( panel[depth], &test, panel[0], nq-nn ); + (void) HPL_bwait( panel[depth] ); +/* + * Circular of the panel pointers: + * xtmp = x[0]; for( k=0; k < depth; k++ ) x[k] = x[k+1]; x[d] = xtmp; + * + * Go to next process row and column - update the message ids for broadcast + */ + p = panel[0]; for( k = 0; k < depth; k++ ) panel[k] = panel[k+1]; + panel[depth] = p; + + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Clean-up: Finish updates - release panels and panel list + */ + nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) + { + (void) HPL_pdupdate( NULL, NULL, panel[k], nn ); + (void) HPL_pdpanel_disp( &panel[k] ); + } + (void) HPL_pdpanel_disp( &panel[depth] ); + + if( panel ) free( panel ); +/* + * End of HPL_pdgesvK2 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK2.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK2.o new file mode 100644 index 000000000..97892453c Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdgesvK2.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00N.c new file mode 100644 index 000000000..b4433e1be --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00N.c @@ -0,0 +1,432 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp00N +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp00N +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp00N applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * Bi-directional exchange is used to perform the swap :: broadcast of + * the row panel U at once, resulting in a lower number of messages than + * usual as well as a lower communication volume. With P process rows and + * assuming bi-directional links, the running time of this function can + * be approximated by: + * + * log_2(P) * (lat + NB*LocQ(N) / bdwth) + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. Mono + * directional links will double this communication cost. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be broadcast and swapped) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + HPL_T_grid * grid; + double * A, * U, * W; + void * vptr = NULL; + int * ipID, * lindxA, * lindxAU, * llen, + * llen_sv; + unsigned int ip2, ip2_=1, ipdist, ipow=1, mask=1, + mydist, mydis_; + int Cmsgid=MSGID_BEGIN_PFACT, Np2, align, + hdim, i, icurrow, *iflag, ipA, ipW, *ipl, + iprow, jb, k, lda, ldW, myrow, n, nprow, + partner, root, size_, usize; +#define LDU jb +/* .. + * .. Executable Statements .. + */ + n = Mmin( NN, PANEL->n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Retrieve parameters from the PANEL data structure + */ + grid = PANEL->grid; nprow = grid->nprow; myrow = grid->myrow; + comm = grid->col_comm; ip2 = (unsigned int)grid->row_ip2; + hdim = grid->row_hdim; align = PANEL->algo->align; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; usize = jb * n; + ldW = n + 1; +/* + * Allocate space for temporary W (ldW * jb) + */ + vptr = (void*)malloc( + ((size_t)(align) + ((size_t)(jb) * (size_t)(ldW))) * sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlaswp00N", "Memory allocation failed" ); } + + W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) ); +/* + * Construct ipID and its local counter parts lindxA, lindxAU - llen is + * the number of rows/columns that I have in workspace and that I should + * send. Compute lindx_, ipA, llen if it has not already been done for + * this panel; + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + lindxA = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k; + llen = lindxAU + k; llen_sv = llen + nprow; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } + else if( *iflag == 1 ) /* HPL_pdlaswp01N called before: reuse ipID */ + { + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } +/* + * Copy the llen_sv into llen - Reset ipA to its correct value + */ + ipA = llen_sv[myrow]; + for( i = 0; i < nprow; i++ ) { llen[i] = llen_sv[i]; } +/* + * For i in [0..2*jb), lindxA[i] is the offset in A of a row that ulti- + * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ). In icurrow, + * we directly pack into U, otherwise we pack into workspace. The first + * entry of each column packed in workspace is in fact the row or column + * offset in U where it should go to. + */ + if( myrow == icurrow ) + { + HPL_dlaswp01N( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } + else + { + HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * Algorithm for bi-directional data exchange: + * + * As long as I have not talked to a process that already had the data + * from icurrow, I will be sending the workspace, otherwise I will be + * sending U. Note that the columns in workspace contain the local index + * in U they should go to. + * + * If I am receiving from a process that has the data from icurrow, I + * will be receiving in U, copy the data of U that stays into A, and + * then the columns I have in workspace into U; otherwise I will be re- + * ceiving in the remaining workspace. If I am one of those processes + * that already has the data from icurrow, I will be immediately copying + * the data I have in my workspace into U. + * + * When I receive U, some of U should be copied in my piece of A before + * I can copy the rows I have in my workspace into U. This information + * is kept in the lists lindx_: the row lindxAU[i] should be copied in + * the row lindxA[i] of my piece of A, just as in the reversed initial + * packing operation. Those rows are thus the first ones in the work ar- + * ray. After this operation has been performed, I will not need + * those lindx arrays, and I will always be sending a buffer of size + * jb x n, or n x jb, that is, U. + * + * At every step of the algorithm, it is necesary to update the list + * llen, so that I can figure out how large the next messages I will be + * sending/receiving are. It is obvious when I am sending U. It is not + * otherwise. + * + * We choose icurrow to be the source of the bi-directional exchange. + * This allows the processes in the non-power 2 part to receive U at the + * first exchange, and then broadcast internally this U so that those + * processes can grab their piece of A. + */ + if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; } + ipW = ipA; + Np2 = ( ( size_ = nprow - ip2 ) != 0 ); + mydist = (unsigned int)MModSub( myrow, icurrow, nprow ); +/* + * bi-directional exchange: If nprow is not a power of 2, proc[i-ip2] + * receives local data from proc[i] for all i in [ip2..nprow); icurrow + * is the source, these last process indexes are relative to icurrow. + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + + if( mydist == 0 ) /* I am the current row: I send U and recv W */ + { + (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW, + Cmsgid, partner, comm ); + if( llen[partner] > 0 ) + HPL_dlaswp03N( llen[partner], n, U, LDU, W, W+1, ldW ); + } + else if( mydist == ip2 ) + { /* I recv U for later Bcast, I send my W */ + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + } + else /* None of us is icurrow, we exchange our Ws */ + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm ); + } + else + { + (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, + partner, Cmsgid, comm ); + if( llen[partner] > 0 ) ipW += llen[partner]; + } + } + } +/* + * Update llen + */ + for( i = 1; i < size_; i++ ) + { + iprow = MModAdd( icurrow, i, nprow ); + partner = MModAdd( iprow, (int)(ip2), nprow ); + llen[ iprow ] += llen[ partner ]; + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * power of 2 part of the processes collection: only processes [0..ip2) + * are working; some of them (mydist >> (k+1) == 0) either send or re- + * ceive U. At every step k, k is in [0 .. hdim), of the algorithm, a + * process pair that exchanges U is such that (mydist >> (k+1) == 0). + * Among those processes, the ones that are sending U are such that + * mydist >> k == 0. + */ + if( mydist < ip2 ) + { + k = 0; + + while( k < hdim ) + { + partner = (int)(mydist ^ ipow); + partner = MModAdd( icurrow, partner, nprow ); +/* + * Exchange and combine the local results - If I receive U, then I must + * copy from U the rows that belong to my piece of A, and then update U + * by copying in it the rows I have accumulated in W. Otherwise, I re- + * ceive W. In this later case, and I have U, I shall update my copy of + * U by copying in it the rows I have accumulated in W. If I did not + * have U before, I simply need to update my pointer in W for later use. + */ + if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 ) + { + if( ( mydist >> (unsigned int)(k) ) == 0 ) + { + (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW, + ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + HPL_dlaswp03N( llen[partner], n, U, LDU, Mptr( W, 0, ipW, + ldW ), Mptr( W, 1, ipW, ldW ), ldW ); + ipW += llen[partner]; + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + HPL_dlaswp04N( ipA, llen[myrow], n, U, LDU, A, lda, W, + W+1, ldW, lindxA, lindxAU ); + } + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0, + ipW, ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + ipW += llen[partner]; + } +/* + * Update llen - Go to next process pairs + */ + iprow = icurrow; ipdist = 0; + do + { + if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist ) + { + partner = MModAdd( icurrow, partner, nprow ); + llen[iprow] += llen[partner]; + llen[partner] = llen[iprow]; + } + iprow = MModAdd( iprow, 1, nprow ); ipdist++; + + } while( ipdist < ip2 ); + + ipow <<= 1; k++; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + } + else + { +/* + * non power of 2 part of the process collection: proc[ip2] broadcast U + * to procs[ip2..nprow) (relatively to icurrow). + */ + if( size_ > 1 ) + { + k = size_ - 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = (unsigned int)MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + + } + else if( partner < size_ ) + { + (void) HPL_send( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + } + } + ip2_ >>= 1; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2_ > 0 ); + } +/* + * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece + * of A. + */ + HPL_dlaswp05N( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } +/* + * If nprow is not a power of 2, proc[i-ip2] sends global result to + * proc[i] for all i in [ip2..nprow); + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + if( ( mydist & ip2 ) != 0 ) + { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); } + else + { (void) HPL_send( U, usize, partner, Cmsgid, comm ); } + } + + if( vptr ) free( vptr ); +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp00N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00N.o new file mode 100644 index 000000000..45646d165 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00T.c new file mode 100644 index 000000000..7a9764c09 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00T.c @@ -0,0 +1,433 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp00T +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp00T +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp00T applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * Bi-directional exchange is used to perform the swap :: broadcast of + * the row panel U at once, resulting in a lower number of messages than + * usual as well as a lower communication volume. With P process rows and + * assuming bi-directional links, the running time of this function can + * be approximated by: + * + * log_2(P) * (lat + NB*LocQ(N) / bdwth) + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. Mono + * directional links will double this communication cost. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be broadcast and swapped) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + HPL_T_grid * grid; + double * A, * U, * W; + void * vptr = NULL; + int * ipID, * lindxA, * lindxAU, * llen, + * llen_sv; + unsigned int ip2, ip2_=1, ipdist, ipow=1, mask=1, + mydist, mydis_; + int Cmsgid=MSGID_BEGIN_PFACT, Np2, align, + hdim, i, icurrow, *iflag, ipA, ipW, *ipl, + iprow, jb, k, lda, ldW, myrow, n, nprow, + partner, root, size_, usize; +#define LDU n +/* .. + * .. Executable Statements .. + */ + n = Mmin( NN, PANEL->n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Retrieve parameters from the PANEL data structure + */ + grid = PANEL->grid; nprow = grid->nprow; myrow = grid->myrow; + comm = grid->col_comm; ip2 = (unsigned int)grid->row_ip2; + hdim = grid->row_hdim; align = PANEL->algo->align; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; usize = jb * n; + ldW = n + 1; +/* + * Allocate space for temporary W (ldW * jb) + */ + vptr = (void*)malloc( ( (size_t)(align) + + ((size_t)(jb) * (size_t)(ldW))) * + sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlaswp00T", "Memory allocation failed" ); } + + W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) ); +/* + * Construct ipID and its local counter parts lindxA, lindxAU - llen is + * the number of rows/columns that I have in workspace and that I should + * send. Compute lindx_, ipA, llen if it has not already been done for + * this panel; + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + lindxA = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k; + llen = lindxAU + k; llen_sv = llen + nprow; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } + else if( *iflag == 1 ) /* HPL_pdlaswp01T called before: reuse ipID */ + { + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } +/* + * Copy the llen_sv into llen - Reset ipA to its correct value + */ + ipA = llen_sv[myrow]; + for( i = 0; i < nprow; i++ ) { llen[i] = llen_sv[i]; } +/* + * For i in [0..2*jb), lindxA[i] is the offset in A of a row that ulti- + * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ). In icurrow, + * we directly pack into U, otherwise we pack into workspace. The first + * entry of each column packed in workspace is in fact the row or column + * offset in U where it should go to. + */ + if( myrow == icurrow ) + { + HPL_dlaswp01T( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } + else + { + HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * Algorithm for bi-directional data exchange: + * + * As long as I have not talked to a process that already had the data + * from icurrow, I will be sending the workspace, otherwise I will be + * sending U. Note that the columns in workspace contain the local index + * in U they should go to. + * + * If I am receiving from a process that has the data from icurrow, I + * will be receiving in U, copy the data of U that stays into A, and + * then the columns I have in workspace into U; otherwise I will be re- + * ceiving in the remaining workspace. If I am one of those processes + * that already has the data from icurrow, I will be immediately copying + * the data I have in my workspace into U. + * + * When I receive U, some of U should be copied in my piece of A before + * I can copy the rows I have in my workspace into U. This information + * is kept in the lists lindx_: the row lindxAU[i] should be copied in + * the row lindxA[i] of my piece of A, just as in the reversed initial + * packing operation. Those rows are thus the first ones in the work ar- + * ray. After this operation has been performed, I will not need + * those lindx arrays, and I will always be sending a buffer of size + * jb x n, or n x jb, that is, U. + * + * At every step of the algorithm, it is necesary to update the list + * llen, so that I can figure out how large the next messages I will be + * sending/receiving are. It is obvious when I am sending U. It is not + * otherwise. + * + * We choose icurrow to be the source of the bi-directional exchange. + * This allows the processes in the non-power 2 part to receive U at the + * first exchange, and then broadcast internally this U so that those + * processes can grab their piece of A. + */ + if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; } + ipW = ipA; + Np2 = ( ( size_ = nprow - ip2 ) != 0 ); + mydist = (unsigned int)MModSub( myrow, icurrow, nprow ); +/* + * bi-directional exchange: If nprow is not a power of 2, proc[i-ip2] + * receives local data from proc[i] for all i in [ip2..nprow); icurrow + * is the source, these last process indexes are relative to icurrow. + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + + if( mydist == 0 ) /* I am the current row: I send U and recv W */ + { + (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW, + Cmsgid, partner, comm ); + if( llen[partner] > 0 ) + HPL_dlaswp03T( llen[partner], n, U, LDU, W, W+1, ldW ); + } + else if( mydist == ip2 ) + { /* I recv U for later Bcast, I send my W */ + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + } + else /* None of us is icurrow, we exchange our Ws */ + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm ); + } + else + { + (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, + partner, Cmsgid, comm ); + if( llen[partner] > 0 ) ipW += llen[partner]; + } + } + } +/* + * Update llen + */ + for( i = 1; i < size_; i++ ) + { + iprow = MModAdd( icurrow, i, nprow ); + partner = MModAdd( iprow, (int)(ip2), nprow ); + llen[ iprow ] += llen[ partner ]; + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * power of 2 part of the processes collection: only processes [0..ip2) + * are working; some of them (mydist >> (k+1) == 0) either send or re- + * ceive U. At every step k, k is in [0 .. hdim), of the algorithm, a + * process pair that exchanges U is such that (mydist >> (k+1) == 0). + * Among those processes, the ones that are sending U are such that + * mydist >> k == 0. + */ + if( mydist < ip2 ) + { + k = 0; + + while( k < hdim ) + { + partner = (int)(mydist ^ ipow); + partner = MModAdd( icurrow, partner, nprow ); +/* + * Exchange and combine the local results - If I receive U, then I must + * copy from U the rows that belong to my piece of A, and then update U + * by copying in it the rows I have accumulated in W. Otherwise, I re- + * ceive W. In this later case, and I have U, I shall update my copy of + * U by copying in it the rows I have accumulated in W. If I did not + * have U before, I simply need to update my pointer in W for later use. + */ + if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 ) + { + if( ( mydist >> (unsigned int)(k) ) == 0 ) + { + (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW, + ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + HPL_dlaswp03T( llen[partner], n, U, LDU, Mptr( W, 0, ipW, + ldW ), Mptr( W, 1, ipW, ldW ), ldW ); + ipW += llen[partner]; + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + HPL_dlaswp04T( ipA, llen[myrow], n, U, LDU, A, lda, W, + W+1, ldW, lindxA, lindxAU ); + } + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0, + ipW, ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + ipW += llen[partner]; + } +/* + * Update llen - Go to next process pairs + */ + iprow = icurrow; ipdist = 0; + do + { + if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist ) + { + partner = MModAdd( icurrow, partner, nprow ); + llen[iprow] += llen[partner]; + llen[partner] = llen[iprow]; + } + iprow = MModAdd( iprow, 1, nprow ); ipdist++; + + } while( ipdist < ip2 ); + + ipow <<= 1; k++; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + } + else + { +/* + * non power of 2 part of the process collection: proc[ip2] broadcast U + * to procs[ip2..nprow) (relatively to icurrow). + */ + if( size_ > 1 ) + { + k = size_ - 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = (unsigned int)MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + + } + else if( partner < size_ ) + { + (void) HPL_send( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + } + } + ip2_ >>= 1; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2_ > 0 ); + } +/* + * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece + * of A. + */ + HPL_dlaswp05T( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } +/* + * If nprow is not a power of 2, proc[i-ip2] sends global result to + * proc[i] for all i in [ip2..nprow); + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + if( ( mydist & ip2 ) != 0 ) + { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); } + else + { (void) HPL_send( U, usize, partner, Cmsgid, comm ); } + } + + if( vptr ) free( vptr ); +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp00T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00T.o new file mode 100644 index 000000000..64d2a7b87 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp00T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01N.c new file mode 100644 index 000000000..31f219840 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01N.c @@ -0,0 +1,217 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp01N +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp01N +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp01N applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * A "Spread then roll" algorithm performs the swap :: broadcast of the + * row panel U at once, resulting in a minimal communication volume and + * a "very good" use of the connectivity if available. With P process + * rows and assuming bi-directional links, the running time of this + * function can be approximated by: + * + * (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. K is + * a constant in (2,3] that depends on the achieved bandwidth during a + * simultaneous message exchange between two processes. An empirical + * optimistic value of K is typically 2.4. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * U; + int * ipID, * iplen, * ipmap, * ipmapm1, + * iwork, * lindxA = NULL, * lindxAU, + * permU; + static int equil=-1; + int icurrow, * iflag, * ipA, * ipl, jb, k, + lda, myrow, n, nprow; +#define LDU jb +/* .. + * .. Executable Statements .. + */ + n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Decide whether equilibration should be performed or not + */ + if( equil == -1 ) equil = PANEL->algo->equil; +/* + * Retrieve parameters from the PANEL data structure + */ + nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; +/* + * Compute ipID (if not already done for this panel). lindxA and lindxAU + * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1 + * are of size nprow, permU is of length jb, and this function needs a + * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): + * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1) + * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1); + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + ipA = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1; + lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1; + ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( *iflag == 0 ) /* HPL_pdlaswp00N called before: reuse ipID */ + { + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( ( *iflag == 1 ) && ( equil != 0 ) ) + { /* HPL_pdlaswp01N was call before only re-compute IPLEN, IPMAP */ + HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 ); + *iflag = 1; + } +/* + * Copy into U the rows to be spread (local to icurrow) + */ + if( myrow == icurrow ) + { HPL_dlaswp01N( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } +/* + * Spread U - optionally probe for column panel + */ + HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen, + ipmap, ipmapm1 ); +/* + * Local exchange (everywhere but in process row icurrow) + */ + if( myrow != icurrow ) + { + k = ipmapm1[myrow]; + HPL_dlaswp06N( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, iplen[k], + 0, LDU ), LDU, lindxA ); + } +/* + * Equilibration + */ + if( equil != 0 ) + HPL_equil( PBCST, IFLAG, PANEL, HplNoTrans, n, U, LDU, iplen, + ipmap, ipmapm1, iwork ); +/* + * Rolling phase + */ + HPL_rollN( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 ); +/* + * Permute U in every process row + */ + HPL_dlaswp00N( jb, n, U, LDU, permU ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp01N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01N.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01N.o new file mode 100644 index 000000000..eb5b938b6 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01N.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01T.c new file mode 100644 index 000000000..0c4de2669 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01T.c @@ -0,0 +1,217 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp01T +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp01T +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp01T applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * A "Spread then roll" algorithm performs the swap :: broadcast of the + * row panel U at once, resulting in a minimal communication volume and + * a "very good" use of the connectivity if available. With P process + * rows and assuming bi-directional links, the running time of this + * function can be approximated by: + * + * (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. K is + * a constant in (2,3] that depends on the achieved bandwidth during a + * simultaneous message exchange between two processes. An empirical + * optimistic value of K is typically 2.4. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * U; + int * ipID, * iplen, * ipmap, * ipmapm1, + * iwork, * lindxA = NULL, * lindxAU, + * permU; + static int equil=-1; + int icurrow, * iflag, * ipA, * ipl, jb, k, + lda, myrow, n, nprow; +#define LDU n +/* .. + * .. Executable Statements .. + */ + n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Decide whether equilibration should be performed or not + */ + if( equil == -1 ) equil = PANEL->algo->equil; +/* + * Retrieve parameters from the PANEL data structure + */ + nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; +/* + * Compute ipID (if not already done for this panel). lindxA and lindxAU + * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1 + * are of size nprow, permU is of length jb, and this function needs a + * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): + * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1) + * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1); + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + ipA = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1; + lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1; + ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( *iflag == 0 ) /* HPL_pdlaswp00T called before: reuse ipID */ + { + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( ( *iflag == 1 ) && ( equil != 0 ) ) + { /* HPL_pdlaswp01T was call before only re-compute IPLEN, IPMAP */ + HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 ); + *iflag = 1; + } +/* + * Copy into U the rows to be spread (local to icurrow) + */ + if( myrow == icurrow ) + { HPL_dlaswp01T( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } +/* + * Spread U - optionally probe for column panel + */ + HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen, + ipmap, ipmapm1 ); +/* + * Local exchange (everywhere but in process row icurrow) + */ + if( myrow != icurrow ) + { + k = ipmapm1[myrow]; + HPL_dlaswp06T( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, 0, + iplen[k], LDU ), LDU, lindxA ); + } +/* + * Equilibration + */ + if( equil != 0 ) + HPL_equil( PBCST, IFLAG, PANEL, HplTrans, n, U, LDU, iplen, ipmap, + ipmapm1, iwork ); +/* + * Rolling phase + */ + HPL_rollT( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 ); +/* + * Permute U in every process row + */ + HPL_dlaswp10N( n, jb, U, LDU, permU ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp01T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01T.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01T.o new file mode 100644 index 000000000..020b40f86 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdlaswp01T.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdtrsv.c new file mode 100644 index 000000000..d2135130a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdtrsv.c @@ -0,0 +1,296 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdtrsv +( + HPL_T_grid * GRID, + HPL_T_pmat * AMAT +) +#else +void HPL_pdtrsv +( GRID, AMAT ) + HPL_T_grid * GRID; + HPL_T_pmat * AMAT; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdtrsv solves an upper triangular system of linear equations. + * + * The rhs is the last column of the N by N+1 matrix A. The solve starts + * in the process column owning the Nth column of A, so the rhs b may + * need to be moved one process column to the left at the beginning. The + * routine therefore needs a column vector in every process column but + * the one owning b. The result is replicated in all process rows, and + * returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. + * + * The algorithm uses decreasing one-ring broadcast in process rows and + * columns implemented in terms of synchronous communication point to + * point primitives. The lookahead of depth 1 is used to minimize the + * critical path. This entire operation is essentially ``latency'' bound + * and an estimate of its running time is given by: + * + * (move rhs) lat + N / ( P bdwth ) + + * (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + + * gam2 N^2 / ( P Q ), + * + * where gam2 is an estimate of the Level 2 BLAS rate of execution. + * There are N / NB diagonal blocks. One must exchange 2 messages of + * length NB to compute the next NB entries of the vector solution, as + * well as performing a total of N^2 floating point operations. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * AMAT (local input/output) HPL_T_pmat * + * On entry, AMAT points to the data structure containing the + * local array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm Ccomm, Rcomm; + double * A=NULL, * Aprev=NULL, * Aptr, * XC=NULL, + * XR=NULL, * Xd=NULL, * Xdprev=NULL, + * W=NULL; + int Alcol, Alrow, Anpprev, Anp, Anq, Bcol, + Cmsgid, GridIsNotPx1, GridIsNot1xQ, Rmsgid, + Wfr=0, colprev, kb, kbprev, lda, mycol, + myrow, n, n1, n1p, n1pprev=0, nb, npcol, + nprow, rowprev, tmp1, tmp2; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PTRSV ); +#endif + if( ( n = AMAT->n ) <= 0 ) return; + nb = AMAT->nb; lda = AMAT->ld; A = AMAT->A; XR = AMAT->X; + + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Rcomm = GRID->row_comm; Rmsgid = MSGID_BEGIN_PTRSV; + Ccomm = GRID->col_comm; Cmsgid = MSGID_BEGIN_PTRSV + 1; + GridIsNot1xQ = ( nprow > 1 ); GridIsNotPx1 = ( npcol > 1 ); +/* + * Move the rhs in the process column owning the last column of A. + */ + Mnumroc( Anp, n, nb, nb, myrow, 0, nprow ); + Mnumroc( Anq, n, nb, nb, mycol, 0, npcol ); + + tmp1 = ( n - 1 ) / nb; + Alrow = tmp1 - ( tmp1 / nprow ) * nprow; + Alcol = tmp1 - ( tmp1 / npcol ) * npcol; + kb = n - tmp1 * nb; + + Aptr = (double *)(A); XC = Mptr( Aptr, 0, Anq, lda ); + Mindxg2p( n, nb, nb, Bcol, 0, npcol ); + + if( ( Anp > 0 ) && ( Alcol != Bcol ) ) + { + if( mycol == Bcol ) + { (void) HPL_send( XC, Anp, Alcol, Rmsgid, Rcomm ); } + else if( mycol == Alcol ) + { (void) HPL_recv( XC, Anp, Bcol, Rmsgid, Rcomm ); } + } + Rmsgid = ( Rmsgid + 2 > + MSGID_END_PTRSV ? MSGID_BEGIN_PTRSV : Rmsgid + 2 ); + if( mycol != Alcol ) + { for( tmp1=0; tmp1 < Anp; tmp1++ ) XC[tmp1] = HPL_rzero; } +/* + * Set up lookahead + */ + n1 = ( npcol - 1 ) * nb; n1 = Mmax( n1, nb ); + if( Anp > 0 ) + { + W = (double*)malloc( (size_t)(Mmin( n1, Anp )) * sizeof( double ) ); + if( W == NULL ) + { HPL_pabort( __LINE__, "HPL_pdtrsv", "Memory allocation failed" ); } + Wfr = 1; + } + + Anpprev = Anp; Xdprev = XR; Aprev = Aptr = Mptr( Aptr, 0, Anq, lda ); + tmp1 = n - kb; tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1pprev, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); + + if( myrow == Alrow ) { Anpprev = ( Anp -= kb ); } + if( mycol == Alcol ) + { + Aprev = ( Aptr -= lda * kb ); Anq -= kb; Xdprev = ( Xd = XR + Anq ); + if( myrow == Alrow ) + { + HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit, + kb, Aptr+Anp, lda, XC+Anp, 1 ); + HPL_dcopy( kb, XC+Anp, 1, Xd, 1 ); + } + } + + rowprev = Alrow; Alrow = MModSub1( Alrow, nprow ); + colprev = Alcol; Alcol = MModSub1( Alcol, npcol ); + kbprev = kb; n -= kb; + tmp1 = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); +/* + * Start the operations + */ + while( n > 0 ) + { + if( mycol == Alcol ) { Aptr -= lda * kb; Anq -= kb; Xd = XR + Anq; } + if( myrow == Alrow ) { Anp -= kb; } +/* + * Broadcast (decreasing-ring) of previous solution block in previous + * process column, compute partial update of current block and send it + * to current process column. + */ + if( mycol == colprev ) + { +/* + * Send previous solution block in process row above + */ + if( myrow == rowprev ) + { + if( GridIsNot1xQ ) + (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ), + Cmsgid, Ccomm ); + } + else + { + (void) HPL_recv( Xdprev, kbprev, MModAdd1( myrow, nprow ), + Cmsgid, Ccomm ); + } +/* + * Compute partial update of previous solution block and send it to cur- + * rent column + */ + if( n1pprev > 0 ) + { + tmp1 = Anpprev - n1pprev; + HPL_dgemv( HplColumnMajor, HplNoTrans, n1pprev, kbprev, + -HPL_rone, Aprev+tmp1, lda, Xdprev, 1, HPL_rone, + XC+tmp1, 1 ); + if( GridIsNotPx1 ) + (void) HPL_send( XC+tmp1, n1pprev, Alcol, Rmsgid, Rcomm ); + } +/* + * Finish the (decreasing-ring) broadcast of the solution block in pre- + * vious process column + */ + if( ( myrow != rowprev ) && + ( myrow != MModAdd1( rowprev, nprow ) ) ) + (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ), + Cmsgid, Ccomm ); + } + else if( mycol == Alcol ) + { +/* + * Current column receives and accumulates partial update of previous + * solution block + */ + if( n1pprev > 0 ) + { + (void) HPL_recv( W, n1pprev, colprev, Rmsgid, Rcomm ); + HPL_daxpy( n1pprev, HPL_rone, W, 1, XC+Anpprev-n1pprev, 1 ); + } + } +/* + * Solve current diagonal block + */ + if( ( mycol == Alcol ) && ( myrow == Alrow ) ) + { + HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit, + kb, Aptr+Anp, lda, XC+Anp, 1 ); + HPL_dcopy( kb, XC+Anp, 1, XR+Anq, 1 ); + } +/* +* Finish previous update +*/ + if( ( mycol == colprev ) && ( ( tmp1 = Anpprev - n1pprev ) > 0 ) ) + HPL_dgemv( HplColumnMajor, HplNoTrans, tmp1, kbprev, -HPL_rone, + Aprev, lda, Xdprev, 1, HPL_rone, XC, 1 ); +/* +* Save info of current step and update info for the next step +*/ + if( mycol == Alcol ) { Xdprev = Xd; Aprev = Aptr; } + if( myrow == Alrow ) { Anpprev -= kb; } + rowprev = Alrow; colprev = Alcol; + n1pprev = n1p; kbprev = kb; n -= kb; + Alrow = MModSub1( Alrow, nprow ); Alcol = MModSub1( Alcol, npcol ); + tmp1 = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); + + Rmsgid = ( Rmsgid+2 > MSGID_END_PTRSV ? + MSGID_BEGIN_PTRSV : Rmsgid+2 ); + Cmsgid = ( Cmsgid+2 > MSGID_END_PTRSV ? + MSGID_BEGIN_PTRSV+1 : Cmsgid+2 ); + } +/* + * Replicate last solution block + */ + if( mycol == colprev ) + (void) HPL_broadcast( (void *)(XR), kbprev, HPL_DOUBLE, rowprev, + Ccomm ); + + if( Wfr ) free( W ); +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PTRSV ); +#endif +/* + * End of HPL_pdtrsv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdtrsv.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdtrsv.o new file mode 100644 index 000000000..1b4f1597b Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdtrsv.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNN.c new file mode 100644 index 000000000..7e31ddcd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNN.c @@ -0,0 +1,442 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateNN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateNN +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateNN broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU jb +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, n ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, 0, nn, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateNN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNN.o new file mode 100644 index 000000000..67ec6202d Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNT.c new file mode 100644 index 000000000..faa3ef207 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNT.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateNT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateNT +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateNT broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU n +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, nn, 0, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateNT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNT.o new file mode 100644 index 000000000..f66995f02 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateNT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTN.c new file mode 100644 index 000000000..a16aa26a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTN.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateTN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateTN +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateTN broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU jb +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, n ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, 0, nn, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateTN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTN.o new file mode 100644 index 000000000..dbe1285f1 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTT.c new file mode 100644 index 000000000..81e6cc4b7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTT.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateTT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateTT +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateTT broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU n +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, nn, 0, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateTT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTT.o new file mode 100644 index 000000000..344e0cdc3 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pdupdateTT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_perm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_perm.c new file mode 100644 index 000000000..bf7cc4503 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_perm.c @@ -0,0 +1,131 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_perm +( + const int N, + int * LINDXA, + int * LINDXAU, + int * IWORK +) +#else +void HPL_perm +( N, LINDXA, LINDXAU, IWORK ) + const int N; + int * LINDXA; + int * LINDXAU; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_perm combines two index arrays and generate the corresponding + * permutation. First, this function computes the inverse of LINDXA, and + * then combine it with LINDXAU. Second, in order to be able to perform + * the permutation in place, LINDXAU is overwritten by the sequence of + * permutation producing the same result. What we ultimately want to + * achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the + * call to this function, this in place permutation can be performed by + * for i in [0..N) swap U[i] with U[LINDXAU[i]]. + * + * Arguments + * ========= + * + * N (global input) const int + * On entry, N specifies the length of the arrays LINDXA and + * LINDXAU. N should be at least zero. + * + * LINDXA (global input/output) int * + * On entry, LINDXA is an array of dimension N containing the + * source indexes. On exit, LINDXA contains the combined index + * array. + * + * LINDXAU (global input/output) int * + * On entry, LINDXAU is an array of dimension N containing the + * target indexes. On exit, LINDXAU contains the sequence of + * permutation, that should be applied in increasing order to + * permute the underlying array U in place. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension N. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j, k, fndd; +/* .. + * .. Executable Statements .. + */ +/* + * Inverse LINDXA - combine LINDXA and LINDXAU - Initialize IWORK + */ + for( i = 0; i < N; i++ ) { IWORK[LINDXA[i]] = i; } + for( i = 0; i < N; i++ ) { LINDXA[i] = LINDXAU[IWORK[i]]; IWORK[i] = i; } + + for( i = 0; i < N; i++ ) + { + /* search LINDXA such that LINDXA[j] == i */ + j = 0; do { fndd = ( LINDXA[j] == i ); j++; } while( !fndd ); j--; + /* search IWORK such that IWORK[k] == j */ + k = 0; do { fndd = ( IWORK[k] == j ); k++; } while( !fndd ); k--; + /* swap IWORK[i] and IWORK[k]; LINDXAU[i] = k */ + j = IWORK[i]; IWORK[i] = IWORK[k]; IWORK[k] = j; + LINDXAU[i] = k; + } +/* + * End of HPL_perm + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_perm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_perm.o new file mode 100644 index 000000000..6e8f33ec4 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_perm.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pipid.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pipid.c new file mode 100644 index 000000000..ab5ef949f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pipid.c @@ -0,0 +1,187 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pipid +( + HPL_T_panel * PANEL, + int * K, + int * IPID +) +#else +void HPL_pipid +( PANEL, K, IPID ) + HPL_T_panel * PANEL; + int * K; + int * IPID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pipid computes an array IPID that contains the source and final + * destination of matrix rows resulting from the application of N + * interchanges as computed by the LU factorization with row partial + * pivoting. The array IPID is such that the row of global index IPID(i) + * should be mapped onto the row of global index IPID(i+1). Note that we + * cannot really know the length of IPID a priori. However, we know that + * this array is at least 2*N long, since there are N rows to swap and + * broadcast. The length of this array must be smaller than or equal to + * 4*N, since every row is swapped with at most a single distinct remote + * row. The algorithm constructing IPID goes as follows: Let IA be the + * global index of the first row to be swapped. + * + * For every row src IA + i with i in [0..N) to be swapped with row dst + * such that dst is given by DPIV[i]: + * + * Is row src the destination of a previous row of the current block, + * that is, is there k odd such that IPID(k) is equal to src ? + * Yes: update this destination with dst. For example, if the + * pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, + * we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it + * was thought so far ... + * No : add the pair (src,dst) at the end of IPID; row src has not + * been moved yet. + * + * Is row dst different from src the destination of a previous row of + * the current block, i.e., is there k odd such that IPID(k) is equal to + * dst ? + * Yes: update IPID(k) with src. For example, if the pivot array + * is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in + * fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought + * so far ... + * No : add the pair (dst,src) at the end of IPID; row dst has not + * been moved yet. + * + * Note that when src is equal to dst, the pair (dst,src) should not be + * added to IPID in order to avoid duplicated entries in this array. + * During the construction of the array IPID, we make sure that the + * first N entries are such that IPID(k) with k odd is equal to IA+k/2. + * For k in [0..K/2), the row of global index IPID(2*k) should be + * mapped onto the row of global index IPID(2*k+1). + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global output) int * + * On exit, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global output) int * + * On entry, IPID is an array of length 4*N. On exit, the first + * K entries of that array contain the src and final destination + * resulting from the application of the N interchanges as + * specified by DPIV. The pairs (src,dst) are contiguously + * stored and sorted so that IPID(2*i+1) is equal to IA+i with i + * in [0..N) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, fndd, fnds, ia, i, j, jb, lst, off, + src; + double * dpiv; +/* .. + * .. Executable Statements .. + */ + dpiv = PANEL->DPIV; jb = PANEL->jb; src = ia = PANEL->ia; + dst = (int)(dpiv[0]); IPID[0] = dst; IPID[1] = src; *K = 2; + if( src != dst ) { IPID[2] = src; IPID[3] = dst; *K += 2; } + + for( i = 1; i < jb; i++ ) + { + fnds = 0; j = 1; + + if( ( src = ia + i ) == ( dst = (int)(dpiv[i]) ) ) + { + do { if( src == IPID[j] ) { fnds = j; } else { j += 2; } } + while( !( fnds ) && ( j < *K ) ); + if( !fnds ) { lst = *K; off = 2; IPID[lst] = src; } + else { lst = fnds-1; off = 0; } + IPID[lst+1] = dst; + } + else + { + fndd = 0; + do + { + if ( src == IPID[j] ) { fnds = j; } + else if( dst == IPID[j] ) { fndd = j; } + j += 2; + } + while( ( !( fnds ) || !( fndd ) ) && ( j < *K ) ); + if( !fnds ) { IPID[*K] = src; IPID[*K+1] = dst; off = 2; } + else { IPID[fnds] = dst; off = 0; } + if( !fndd ) { lst = *K+off; IPID[lst ] = dst; off += 2; } + else { lst = fndd-1; } + IPID[lst+1] = src; + } +/* + * Enforce IPID(1,i) equal to src = ia + i + */ + if( lst != ( j = ( i << 1 ) ) ) + { + src = IPID[j ]; IPID[j ] = IPID[lst ]; IPID[lst ] = src; + dst = IPID[j+1]; IPID[j+1] = IPID[lst+1]; IPID[lst+1] = dst; + } + *K += off; + } +/* + * End of HPL_pipid + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pipid.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pipid.o new file mode 100644 index 000000000..13544e481 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_pipid.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx0.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx0.c new file mode 100644 index 000000000..be12639d0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx0.c @@ -0,0 +1,281 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx0 +( + HPL_T_panel * PANEL, + const int K, + int * IPID, + int * LINDXA, + int * LINDXAU, + int * LLEN +) +#else +void HPL_plindx0 +( PANEL, K, IPID, LINDXA, LINDXAU, LLEN ) + HPL_T_panel * PANEL; + const int K; + int * IPID; + int * LINDXA; + int * LINDXAU; + int * LLEN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx0 computes two local arrays LINDXA and LINDXAU containing + * the local source and final destination position resulting from the + * application of row interchanges. + * + * On entry, the array IPID of length K is such that the row of global + * index IPID(i) should be mapped onto row of global index IPID(i+1). + * Let IA be the global index of the first row to be swapped. For k in + * [0..K/2), the row of global index IPID(2*k) should be mapped onto the + * row of global index IPID(2*k+1). The question then, is to determine + * which rows should ultimately be part of U. + * + * First, some rows of the process ICURROW may be swapped locally. One + * of this row belongs to U, the other one belongs to my local piece of + * A. The other rows of the current block are swapped with remote rows + * and are thus not part of U. These rows however should be sent along, + * and grabbed by the other processes as we progress in the exchange + * phase. + * + * So, assume that I am ICURROW and consider a row of index IPID(2*i) + * that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less + * than N, this row is locally swapped and should be copied into U at + * the position IPID(2*i+1) - IA. No row will be exchanged for this one. + * If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be + * locally copied into my local piece of A at the position corresponding + * to the row of global index IPID(2*i+1). + * + * If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) + * is to be swapped away and strictly speaking does not belong to U, but + * to A remotely. Since this process will however send this array U, + * this row is copied into U, exactly where the row IPID(2*i+1) should + * go. For this, we search IPID for k1, such that IPID(2*k1) is equal to + * IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position + * IPID(2*k1+1)-IA. + * + * It is thus important to put the rows that go into U, i.e., such that + * IPID(2*i+1) - IA is less than N at the begining of the array IPID. By + * doing so, U is formed, and the local copy is performed in just one + * sweep. + * + * Two lists LINDXA and LINDXAU are built. LINDXA contains the local + * index of the rows I have that should be copied. LINDXAU contains the + * local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A + * is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). In the process + * ICURROW, the initial packing algorithm proceeds as follows. + * + * for all entries in IPID, + * if IPID(2*i) is in ICURROW, + * if IPID(2*i+1) is in ICURROW, + * if( IPID(2*i+1) - IA < N ) + * save corresponding local position + * of this row (LINDXA); + * save local position (LINDXAU) in U + * where this row goes; + * [copy row IPID(2*i) in U at position + * IPID(2*i+1)-IA; ]; + * else + * save corresponding local position of + * this row (LINDXA); + * save local position (-LINDXAU) in A + * where this row goes; + * [copy row IPID(2*i) in my piece of A + * at IPID(2*i+1);] + * end if + * else + * find k1 such that IPID(2*k1) = IPID(2*i+1); + * copy row IPID(2*i) in U at position + * IPID(2*k1+1)-IA; + * save corresponding local position of this + * row (LINDXA); + * save local position (LINDXAU) in U where + * this row goes; + * end if + * end if + * end for + * + * Second, if I am not the current row process ICURROW, all source rows + * in IPID that I own are part of U. Indeed, they are swapped with one + * row of the current block of rows, and the main factorization + * algorithm proceeds one row after each other. The processes different + * from ICURROW, should exchange and accumulate those rows until they + * receive some data previously owned by the process ICURROW. + * + * In processes different from ICURROW, the initial packing algorithm + * proceeds as follows. Consider a row of global index IPID(2*i) that I + * own. When I will be receiving data previously owned by ICURROW, i.e., + * U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, + * and this particular row of U should be first copied into my piece of + * A, at A(il,:), where il is the local row index corresponding to + * IPID(2*i). Now,initially, this row will be packed into workspace, say + * as the kth row of that work array. The following algorithm sets + * LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row + * should be copied. LINDXA(k) stores the local index in A where this + * row of U should be copied, i.e il. + * + * for all entries in IPID, + * if IPID(2*i) is not in ICURROW, + * copy row IPID(2*i) in work array; + * save corresponding local position + * of this row (LINDXA); + * save position (LINDXAU) in U where + * this row should be copied; + * end if + * end for + * + * Since we are at it, we also globally figure out how many rows every + * process has. That is necessary, because it would rather be cumbersome + * to figure it on the fly during the bi-directional exchange phase. + * This information is kept in the array LLEN of size NPROW. Also note + * that the arrays LINDXA and LINDXAU are of max length equal to 2*N. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * LINDXA (local output) int * + * On entry, LINDXA is an array of dimension 2*N. On exit, this + * array contains the local indexes of the rows of A I have that + * should be copied into U. + * + * LINDXAU (local output) int * + * On exit, LINDXAU is an array of dimension 2*N. On exit, this + * array contains the local destination information encoded as + * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be + * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). + * + * LLEN (global output) int * + * On entry, LLEN is an array of length NPROW. On exit, it + * contains how many rows every process has. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, dstrow, fndd, i, ia, icurrow, il, + ip=0, iroff, j, jb, myrow, nb, nprow, + src, srcrow; +/* .. + * .. Executable Statements .. + */ +/* + * Compute the local arrays LINDXA and LINDXAU containing the local + * source and final destination position resulting from the application + * of N interchanges. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + icurrow = PANEL->prow; jb = PANEL->jb; + nb = PANEL->nb; ia = PANEL->ia; + iroff = PANEL->ii; + + for( i = 0; i < nprow; i++ ) LLEN[i] = 0; + + for( i = 0; i < K; i += 2 ) + { + src = IPID[i]; + Mindxg2p( src, nb, nb, srcrow, 0, nprow ); LLEN[ srcrow ]++; + + if( myrow == srcrow ) + { + Mindxg2l( il, src, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; dst = IPID[i+1]; + + if( myrow == icurrow ) + { + Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + if( dstrow == icurrow ) + { + if( dst - ia < jb ) { LINDXAU[ip] = dst - ia; } + else + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXAU[ip] = iroff - il; + } + } + else + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + LINDXAU[ip] = IPID[j-1] - ia; + } + } + else { LINDXAU[ip] = dst - ia; } + + ip++; + } + } +/* + * End of HPL_plindx0 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx0.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx0.o new file mode 100644 index 000000000..b41a64031 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx0.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx1.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx1.c new file mode 100644 index 000000000..a24fd4c56 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx1.c @@ -0,0 +1,275 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx1 +( + HPL_T_panel * PANEL, + const int K, + const int * IPID, + int * IPA, + int * LINDXA, + int * LINDXAU, + int * IPLEN, + int * IPMAP, + int * IPMAPM1, + int * PERMU, + int * IWORK +) +#else +void HPL_plindx1 +( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK ) + HPL_T_panel * PANEL; + const int K; + const int * IPID; + int * IPA; + int * LINDXA; + int * LINDXAU; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; + int * PERMU; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx1 computes two local arrays LINDXA and LINDXAU containing + * the local source and final destination position resulting from the + * application of row interchanges. In addition, this function computes + * three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic + * mapping information for the spreading phase. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) const int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * IPA (global output) int * + * On exit, IPA specifies the number of rows that the current + * process row has that either belong to U or should be swapped + * with remote rows of A. + * + * LINDXA (global output) int * + * On entry, LINDXA is an array of dimension 2*N. On exit, this + * array contains the local indexes of the rows of A I have that + * should be copied into U. + * + * LINDXAU (global output) int * + * On exit, LINDXAU is an array of dimension 2*N. On exit, this + * array contains the local destination information encoded as + * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be + * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). + * + * IPLEN (global output) int * + * On entry, IPLEN is an array of dimension NPROW + 1. On exit, + * this array is such that IPLEN[i] is the number of rows of A + * in the processes before process IPMAP[i] after the sort + * with the convention that IPLEN[nprow] is the total number of + * rows of the panel. In other words IPLEN[i+1]-IPLEN[i] is the + * local number of rows of A that should be moved to the process + * IPMAP[i]. IPLEN is such that the number of rows of the source + * process row can be computed as IPLEN[1] - IPLEN[0], and the + * remaining entries of this array are sorted so that the + * quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROW. On exit, this + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myrow] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROW. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROCS) + * + * PERMU (global output) int * + * On entry, PERMU is an array of dimension JB. On exit, PERMU + * contains a sequence of permutations, that should be applied + * in increasing order to permute in place the row panel U. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension 2*JB. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int * iwork; + int dst, dstrow, fndd, i, ia, icurrow, il, + ip, ipU, iroff, j, jb, myrow, nb, nprow, + src, srcrow; +/* .. + * .. Executable Statements .. + */ +/* + * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1 + */ + HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 ); +/* + * Compute the local arrays LINDXA and LINDXAU containing the local + * source and final destination position resulting from the application + * of N interchanges. Compute LINDXA and LINDXAU in icurrow, and LINDXA + * elsewhere and PERMU in every process. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + jb = PANEL->jb; nb = PANEL->nb; ia = PANEL->ia; + iroff = PANEL->ii; icurrow = PANEL->prow; + + iwork = IWORK + jb; + + if( myrow == icurrow ) + { + for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) + { + src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + + if( srcrow == icurrow ) + { + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + + Mindxg2l( il, src, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; + + if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) + { + PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; + j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; + IPLEN[il]++; ipU++; + } + else if( dstrow != icurrow ) + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + + PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow]; + j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; + IPLEN[il]++; ipU++; + } + else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) ) + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXAU[ip] = iroff - il; + } + ip++; + } + } + *IPA = ip; + } + else + { + for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) + { + src = IPID[i ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); +/* + * LINDXA[i] is the local index of the row of A that belongs into U + */ + if( myrow == dstrow ) + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; ip++; + } +/* + * iwork[i] is the local (current) position index in U + * PERMU[i] is the local (final) destination index in U + */ + if( srcrow == icurrow ) + { + if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) + { + PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; + iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; + } + else if( dstrow != icurrow ) + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow]; + iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; + } + } + } + *IPA = 0; + } +/* + * Simplify iwork and PERMU, return in PERMU the sequence of permutation + * that need to be apply to U after it has been broadcast. + */ + HPL_perm( jb, iwork, PERMU, IWORK ); +/* + * Reset IPLEN to its correct value + */ + for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1]; + IPLEN[0] = 0; +/* + * End of HPL_plindx1 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx1.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx1.o new file mode 100644 index 000000000..5196523b4 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx1.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx10.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx10.c new file mode 100644 index 000000000..fa460fd35 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx10.c @@ -0,0 +1,155 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx10 +( + HPL_T_panel * PANEL, + const int K, + const int * IPID, + int * IPLEN, + int * IPMAP, + int * IPMAPM1 +) +#else +void HPL_plindx10 +( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PANEL; + const int K; + const int * IPID; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx10 computes three arrays IPLEN, IPMAP and IPMAPM1 that + * contain the logarithmic mapping information for the spreading phase. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) const int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * IPLEN (global output) int * + * On entry, IPLEN is an array of dimension NPROW + 1. On exit, + * this array is such that IPLEN[i] is the number of rows of A + * in the processes before process IMAP[i] after the sort, with + * the convention that IPLEN[nprow] is the total number of rows. + * In other words, IPLEN[i+1] - IPLEN[i] is the local number of + * rows of A that should be moved for each process. IPLEN is + * such that the number of rows of the source process row can be + * computed as IPLEN[1] - IPLEN[0], and the remaining entries of + * this array are sorted so that the quantities IPLEN[i+1] - + * IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROW. On exit, this + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myrow] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROW. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROW) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, dstrow, i, ia, icurrow, jb, nb, + nprow, src, srcrow; +/* .. + * .. Executable Statements .. + */ + nprow = PANEL->grid->nprow; jb = PANEL->jb; nb = PANEL->nb; + ia = PANEL->ia; icurrow = PANEL->prow; +/* + * Compute redundantly the local number of rows that each process has + * and that belong to U in IPLEN[1 .. nprow+1] + */ + for( i = 0; i <= nprow; i++ ) IPLEN[i] = 0; + + for( i = 0; i < K; i += 2 ) + { + src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + if( srcrow == icurrow ) + { + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + if( ( dstrow != srcrow ) || ( dst - ia < jb ) ) IPLEN[dstrow+1]++; + } + } +/* + * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1 + * (the inverse of IPMAP) + */ + HPL_logsort( nprow, icurrow, IPLEN, IPMAP, IPMAPM1 ); +/* + * End of HPL_plindx10 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx10.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx10.o new file mode 100644 index 000000000..b6e933947 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_plindx10.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollN.c new file mode 100644 index 000000000..e68590a01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollN.c @@ -0,0 +1,225 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +void HPL_rollN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int N, + double * U, + const int LDU, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_rollN +( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int N; + double * U; + const int LDU; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rollN rolls the local arrays containing the local pieces of U, so + * that on exit to this function U is replicated in every process row. + * In addition, this function probe for the presence of the column panel + * and forwards it when available. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be rolled) information. + * + * N (local input) const int + * On entry, N specifies the number of columns of U. N must be + * at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[NPROW]). + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process row. + * + * IPMAP (global input) const int * + * On entry, IMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Datatype type[2]; + MPI_Status status; + MPI_Request request; + MPI_Comm comm; + int Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS, + ierr=MPI_SUCCESS, il, k, l, lengthR, + lengthS, mydist, myrow, next, npm1, nprow, + partner, prev; +/* .. + * .. Executable Statements .. + */ + if( N <= 0 ) return; + + npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow; + comm = PANEL->grid->col_comm; +/* + * Rolling phase + */ + mydist = IPMAPM1[myrow]; + prev = IPMAP[MModSub1( mydist, nprow )]; + next = IPMAP[MModAdd1( mydist, nprow )]; + + for( k = 0; k < npm1; k++ ) + { + l = (int)( (unsigned int)(k) >> 1 ); + + if( ( ( mydist + k ) & 1 ) != 0 ) + { + il = MModAdd( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModSub( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev; + } + else + { + il = MModSub( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModAdd( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next; + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lengthR, LDU, MPI_DOUBLE, + &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, ibufR, 0, LDU ), 1, type[I_RECV], + partner, Cmsgid, comm, &request ); + } + + if( lengthS > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lengthS, LDU, MPI_DOUBLE, + &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibufS, 0, LDU ), 1, type[I_SEND], + partner, Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_SEND] ); + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_RECV] ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_rollN", "MPI call failed" ); } +/* + * End of HPL_rollN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollN.o new file mode 100644 index 000000000..fe91d1449 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollT.c new file mode 100644 index 000000000..0160c9412 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollT.c @@ -0,0 +1,259 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +void HPL_rollT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int N, + double * U, + const int LDU, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_rollT +( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int N; + double * U; + const int LDU; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rollT rolls the local arrays containing the local pieces of U, so + * that on exit to this function U is replicated in every process row. + * In addition, this function probe for the presence of the column panel + * and forwards it when available. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be rolled) information. + * + * N (local input) const int + * On entry, N specifies the local number of rows of U. N must + * be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,N). + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process row. + * + * IPMAP (global input) const int * + * On entry, IMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#if 0 + MPI_Datatype type[2]; +#endif + MPI_Status status; + MPI_Request request; + MPI_Comm comm; + int Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS, + ierr=MPI_SUCCESS, il, k, l, lengthR, + lengthS, mydist, myrow, next, npm1, nprow, + partner, prev; +/* .. + * .. Executable Statements .. + */ + if( N <= 0 ) return; + + npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow; + comm = PANEL->grid->col_comm; +/* + * Rolling phase + */ + mydist = IPMAPM1[myrow]; + prev = IPMAP[MModSub1( mydist, nprow )]; + next = IPMAP[MModAdd1( mydist, nprow )]; + + for( k = 0; k < npm1; k++ ) + { + l = (int)( (unsigned int)(k) >> 1 ); + + if( ( ( mydist + k ) & 1 ) != 0 ) + { + il = MModAdd( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModSub( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev; + } + else + { + il = MModSub( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModAdd( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next; + } + + if( lengthR > 0 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lengthR * LDU, MPI_DOUBLE, + &type[I_RECV] ); + else + ierr = MPI_Type_vector( lengthR, N, LDU, MPI_DOUBLE, + &type[I_RECV] ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, 0, ibufR, LDU ), 1, type[I_RECV], + partner, Cmsgid, comm, &request ); +#else +/* + * In our case, LDU is N - Do not use the MPI datatype. + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, 0, ibufR, LDU ), lengthR*LDU, + MPI_DOUBLE, partner, Cmsgid, comm, &request ); +#endif + } + + if( lengthS > 0 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lengthS*LDU, MPI_DOUBLE, + &type[I_SEND] ); + else + ierr = MPI_Type_vector( lengthS, N, LDU, MPI_DOUBLE, + &type[I_SEND] ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibufS, LDU ), 1, type[I_SEND], + partner, Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_SEND] ); +#else +/* + * In our case, LDU is N - Do not use the MPI datatype. + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibufS, LDU ), lengthS*LDU, + MPI_DOUBLE, partner, Cmsgid, comm ); +#endif + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); +#if 0 + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_RECV] ); +#endif + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_rollT", "MPI call failed" ); } +/* + * End of HPL_rollT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollT.o new file mode 100644 index 000000000..c40488766 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_rollT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadN.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadN.c new file mode 100644 index 000000000..202611e7f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadN.c @@ -0,0 +1,303 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_spreadN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_SIDE SIDE, + const int N, + double * U, + const int LDU, + const int SRCDIST, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_spreadN +( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_SIDE SIDE; + const int N; + double * U; + const int LDU; + const int SRCDIST; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_spreadN spreads the local array containing local pieces of U, so + * that on exit to this function, a piece of U is contained in every + * process row. The array IPLEN contains the number of rows of U, that + * should be spread on any given process row. This function also probes + * for the presence of the column panel PBCST. In case of success, this + * panel will be forwarded. If PBCST is NULL on input, this probing + * mechanism will be disabled. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be spread) information. + * + * SIDE (global input) const enum HPL_SIDE + * On entry, SIDE specifies whether the local piece of U located + * in process IPMAP[SRCDIST] should be spread to the right or to + * the left. This feature is used by the equilibration process. + * + * N (global input) const int + * On entry, N specifies the local number of columns of U. N + * must be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[nprow]). + * + * SRCDIST (local input) const int + * On entry, SRCDIST specifies the source process that spreads + * its piece of U. + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process before process IPMAP[i], with the convention + * that IPLEN[nprow] is the total number of rows. In other words + * IPLEN[i+1] - IPLEN[i] is the local number of rows of U that + * should be moved to process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Datatype type; + MPI_Status status; + MPI_Comm comm; + unsigned int ip2=1, mask=1, mydist, mydist2; + int Cmsgid=MSGID_BEGIN_PFACT, ibuf, + ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow, + npm1, nprow, partner; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + comm = PANEL->grid->col_comm; +/* + * Spread U to the left + */ + if( SIDE == HplLeft ) + { + nprow = ( npm1 = SRCDIST ) + 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) > + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2; + lgth = IPLEN[nprow]; + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + else if( partner < nprow ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il += ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + else + { + npm1 = ( nprow -= SRCDIST ) - 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) < + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist -= SRCDIST ); il = ip2; + lgth = IPLEN[SRCDIST+nprow]; +/* + * Spread U to the right - offset the IPLEN, and IPMAP arrays + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + k = il ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ); + k = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf; + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + else if( partner < nprow ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il += ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_spreadN", "MPI call failed" ); } +/* + * End of HPL_spreadN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadN.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadN.o new file mode 100644 index 000000000..566eb66ab Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadN.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadT.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadT.c new file mode 100644 index 000000000..1adf93507 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadT.c @@ -0,0 +1,372 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_spreadT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_SIDE SIDE, + const int N, + double * U, + const int LDU, + const int SRCDIST, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_spreadT +( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_SIDE SIDE; + const int N; + double * U; + const int LDU; + const int SRCDIST; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_spreadT spreads the local array containing local pieces of U, so + * that on exit to this function, a piece of U is contained in every + * process row. The array IPLEN contains the number of columns of U, + * that should be spread on any given process row. This function also + * probes for the presence of the column panel PBCST. If available, + * this panel will be forwarded. If PBCST is NULL on input, this + * probing mechanism will be disabled. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be spread) information. + * + * SIDE (global input) const enum HPL_SIDE + * On entry, SIDE specifies whether the local piece of U located + * in process IPMAP[SRCDIST] should be spread to the right or to + * the left. This feature is used by the equilibration process. + * + * N (global input) const int + * On entry, N specifies the local number of rows of U. N must + * be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,N). + * + * SRCDIST (local input) const int + * On entry, SRCDIST specifies the source process that spreads + * its piece of U. + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process before process IPMAP[i], with the convention + * that IPLEN[nprow] is the total number of rows. In other words + * IPLEN[i+1] - IPLEN[i] is the local number of rows of U that + * should be moved to process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#if 0 + MPI_Datatype type; +#endif + MPI_Status status; + MPI_Comm comm; + unsigned int ip2=1, mask=1, mydist, mydist2; + int Cmsgid=MSGID_BEGIN_PFACT, ibuf, + ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow, + npm1, nprow, partner; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + comm = PANEL->grid->col_comm; +/* + * Spread U + */ + if( SIDE == HplLeft ) + { + nprow = ( npm1 = SRCDIST ) + 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) > + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2; + lgth = IPLEN[nprow]; + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[npm1-partner], + Cmsgid, comm, &status ); +#endif + } + else if( partner < nprow ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[npm1-partner], + Cmsgid, comm ); +#endif + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il += ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + else + { + npm1 = ( nprow -= SRCDIST ) - 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) < + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist -= SRCDIST ); il = ip2; +/* + * Spread to the right - offset the IPLEN and IPMAP arrays + */ + lgth = IPLEN[SRCDIST+nprow]; +/* + * Spread U + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + k = il ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ); + k = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf; + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[SRCDIST+partner], + Cmsgid, comm, &status ); +#endif + } + else if( partner < nprow ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[SRCDIST+partner], + Cmsgid, comm ); +#endif + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il += ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_spreadT", "MPI call failed" ); } +/* + * End of HPL_spreadT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadT.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadT.o new file mode 100644 index 000000000..710235018 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/src/pgesv/HPL_spreadT.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_dmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_dmatgen.c new file mode 100644 index 000000000..c14ef0fd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_dmatgen.c @@ -0,0 +1,134 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dmatgen +( + const int M, + const int N, + double * A, + const int LDA, + const int ISEED +) +#else +void HPL_dmatgen +( M, N, A, LDA, ISEED ) + const int M; + const int N; + double * A; + const int LDA; + const int ISEED; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dmatgen generates (or regenerates) a random matrix A. + * + * The pseudo-random generator uses the linear congruential algorithm: + * X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer + * Programming, Knuth 1973, Vol. 2. + * + * Arguments + * ========= + * + * M (input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * A (output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * this array contains the coefficients of the randomly + * generated matrix. + * + * LDA (input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * ISEED (input) const int + * On entry, ISEED specifies the seed number to generate the + * matrix A. ISEED must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int iadd[2], ia1[2], ic1[2], iran1[2], + jseed[2], mult[2]; + int i, incA = LDA - M, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; +/* + * Initialize the random sequence + */ + mult [0] = HPL_MULT0; mult [1] = HPL_MULT1; + iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1; + jseed[0] = ISEED; jseed[1] = 0; + + HPL_xjumpm( 1, mult, iadd, jseed, iran1, ia1, ic1 ); + HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 ); +/* + * Generate an M by N matrix + */ + for( j = 0; j < N; A += incA, j++ ) + for( i = 0; i < M; A++, i++ ) *A = HPL_rand(); +/* + * End of HPL_dmatgen + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_dmatgen.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_dmatgen.o new file mode 100644 index 000000000..a2ea27c62 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_dmatgen.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_jumpit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_jumpit.c new file mode 100644 index 000000000..4d4dc4db5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_jumpit.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_jumpit +( + int * MULT, + int * IADD, + int * IRANN, + int * IRANM +) +#else +void HPL_jumpit +( MULT, IADD, IRANN, IRANM ) + int * MULT; + int * IADD; + int * IRANN; + int * IRANM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_jumpit jumps in the random sequence from the number X(n) encoded + * in IRANN to the number X(m) encoded in IRANM using the constants A + * and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A + * and C obviously depend on m and n, see the function HPL_xjumpm in + * order to initialize them. + * + * Arguments + * ========= + * + * MULT (local input) int * + * On entry, MULT is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant A. + * + * IADD (local input) int * + * On entry, IADD is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant C. + * + * IRANN (local input) int * + * On entry, IRANN is an array of dimension 2, that contains + * the 16-lower and 15-higher bits of the encoding of X(n). + * + * IRANM (local output) int * + * On entry, IRANM is an array of dimension 2. On exit, this + * array contains respectively the 16-lower and 15-higher bits + * of the encoding of X(m). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + HPL_lmul( IRANN, MULT, j ); /* j = IRANN * MULT; */ + HPL_ladd( j, IADD, IRANM ); /* IRANM = j + IADD; */ + HPL_setran( 0, IRANM ); /* irand = IRANM */ +/* + * End of HPL_jumpit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_jumpit.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_jumpit.o new file mode 100644 index 000000000..65b616d11 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_jumpit.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_ladd.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_ladd.c new file mode 100644 index 000000000..0d4e4c08c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_ladd.c @@ -0,0 +1,126 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_ladd +( + int * J, + int * K, + int * I +) +#else +void HPL_ladd +( J, K, I ) + int * J; + int * K; + int * I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ladd adds without carry two long positive integers K and J and + * puts the result into I. The long integers I, J, K are encoded on 64 + * bits using an array of 2 integers. The 32-lower bits are stored in + * the first entry of each array, the 32-higher bits in the second + * entry. + * + * Arguments + * ========= + * + * J (local input) int * + * On entry, J is an integer array of dimension 2 containing the + * encoded long integer J. + * + * K (local input) int * + * On entry, K is an integer array of dimension 2 containing the + * encoded long integer K. + * + * I (local output) int * + * On entry, I is an integer array of dimension 2. On exit, this + * array contains the encoded long integer result. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + unsigned int itmp0, itmp1; + unsigned int ktmp0 = K[0] & 65535, ktmp1 = (unsigned)K[0] >> 16; + unsigned int ktmp2 = K[1] & 65535, ktmp3 = (unsigned)K[1] >> 16; + unsigned int jtmp0 = J[0] & 65535, jtmp1 = (unsigned)J[0] >> 16; + unsigned int jtmp2 = J[1] & 65535, jtmp3 = (unsigned)J[1] >> 16; + +/* .. + * .. Executable Statements .. + */ +/* + * K[1] K[0] K I[0] = (K[0]+J[0]) % 2^32 + * XXXX XXXX carry = (K[0]+J[0]) / 2^32 + * + * + J[1] J[0] J I[1] = K[1] + J[1] + carry + * XXXX XXXX I[1] = I[1] % 2^32 + * ------------- + * I[1] I[0] + * 0XXX XXXX I + */ + itmp0 = ktmp0 + jtmp0; + itmp1 = itmp0 >> 16; I[0] = itmp0 - (itmp1 << 16 ); + itmp1 += ktmp1 + jtmp1; I[0] |= (itmp1 & 65535) << 16; + itmp0 = (itmp1 >> 16) + ktmp2 + jtmp2; + I[1] = itmp0 - ((itmp0 >> 16 ) << 16); + itmp1 = (itmp0 >> 16) + ktmp3 + jtmp3; + I[1] |= (itmp1 & 65535) << 16; +/* + * End of HPL_ladd + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_ladd.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_ladd.o new file mode 100644 index 000000000..2d0724592 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_ladd.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_lmul.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_lmul.c new file mode 100644 index 000000000..254b192f6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_lmul.c @@ -0,0 +1,131 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_lmul +( + int * K, + int * J, + int * I +) +#else +void HPL_lmul +( K, J, I ) + int * K; + int * J; + int * I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_lmul multiplies without carry two long positive integers K and J + * and puts the result into I. The long integers I, J, K are encoded on + * 64 bits using an array of 2 integers. The 32-lower bits are stored in + * the first entry of each array, the 32-higher bits in the second entry + * of each array. For efficiency purposes, the intrisic modulo function + * is inlined. + * + * Arguments + * ========= + * + * K (local input) int * + * On entry, K is an integer array of dimension 2 containing the + * encoded long integer K. + * + * J (local input) int * + * On entry, J is an integer array of dimension 2 containing the + * encoded long integer J. + * + * I (local output) int * + * On entry, I is an integer array of dimension 2. On exit, this + * array contains the encoded long integer result. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int r, c; + unsigned int kk[4], jj[4], res[5]; +/* .. + * .. Executable Statements .. + */ +/* + * Addition is done with 16 bits at a time. Multiplying two 16-bit + * integers yields a 32-bit result. The lower 16-bits of the result + * are kept in I, and the higher 16-bits are carried over to the + * next multiplication. + */ + for (c = 0; c < 2; ++c) { + kk[2*c] = K[c] & 65535; + kk[2*c+1] = ((unsigned)K[c] >> 16) & 65535; + jj[2*c] = J[c] & 65535; + jj[2*c+1] = ((unsigned)J[c] >> 16) & 65535; + } + + res[0] = 0; + for (c = 0; c < 4; ++c) { + res[c+1] = (res[c] >> 16) & 65535; + res[c] &= 65535; + for (r = 0; r < c+1; ++r) { + res[c] = kk[r] * jj[c-r] + (res[c] & 65535); + res[c+1] += (res[c] >> 16) & 65535; + } + } + + for (c = 0; c < 2; ++c) + I[c] = (int)(((res[2*c+1] & 65535) << 16) | (res[2*c] & 65535)); +/* + * End of HPL_lmul + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_lmul.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_lmul.o new file mode 100644 index 000000000..af6abfe4c Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_lmul.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_rand.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_rand.c new file mode 100644 index 000000000..fe4e12f5e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_rand.c @@ -0,0 +1,94 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_rand( void ) +#else +double HPL_rand() +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rand generates the next number in the random sequence. This + * function ensures that this number lies in the interval (-0.5, 0.5]. + * + * The static array irand contains the information (2 integers) required + * to generate the next number in the sequence X(n). This number is + * computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the + * constant d is the largest 64 bit positive unsigned integer. The array + * irand is then updated for the generation of the next number X(n+1) + * in the random sequence as follows X(n+1) = a * X(n) + c. The + * constants a and c should have been preliminarily stored in the arrays + * ias and ics as 2 pairs of integers. The initialization of ias, ics + * and irand is performed by the function HPL_setran. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + HPL_setran( 3, j ); +/* + * return number between -0.5 and 0.5 + */ + return( HPL_HALF - + (((j[0] & 65535) + ((unsigned)j[0] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF + + (j[1] & 65535) + ((unsigned)j[1] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF ); +/* + * End of HPL_rand + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_rand.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_rand.o new file mode 100644 index 000000000..99981cf0e Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_rand.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_setran.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_setran.c new file mode 100644 index 000000000..1a3ca73aa --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_setran.c @@ -0,0 +1,115 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int ias[2], ics[2], irand[2]; + +#ifdef STDC_HEADERS +void HPL_setran +( + const int OPTION, + int * IRAN +) +#else +void HPL_setran +( OPTION, IRAN ) + const int OPTION; + int * IRAN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_setran initializes the random generator with the encoding of the + * first number X(0) in the sequence, and the constants a and c used to + * compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), + * a and c are stored in the static variables irand, ias and ics. When + * OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the + * values of the input array IRAN. When OPTION is 3, IRAN is set to the + * current value of irand, and irand is then incremented. + * + * Arguments + * ========= + * + * OPTION (local input) const int + * On entry, OPTION is an integer that specifies the operations + * to be performed on the random generator as specified above. + * + * IRAN (local input/output) int * + * On entry, IRAN is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of a random number. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + if( OPTION == 3 ) + { /* return current value */ + IRAN[0] = irand[0]; IRAN[1] = irand[1]; + HPL_lmul( irand, ias, j ); /* j = irand * ias; */ + HPL_ladd( j, ics, irand ); /* irand = j + ics; */ + } + else if( OPTION == 0 ) { irand[0] = IRAN[0]; irand[1] = IRAN[1]; } + else if( OPTION == 1 ) { ias [0] = IRAN[0]; ias [1] = IRAN[1]; } + else if( OPTION == 2 ) { ics [0] = IRAN[0]; ics [1] = IRAN[1]; } +/* + * End of HPL_setran + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_setran.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_setran.o new file mode 100644 index 000000000..5c8c2451b Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_setran.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_xjumpm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_xjumpm.c new file mode 100644 index 000000000..ae70bbc16 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_xjumpm.c @@ -0,0 +1,158 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_xjumpm +( + const int JUMPM, + int * MULT, + int * IADD, + int * IRANN, + int * IRANM, + int * IAM, + int * ICM +) +#else +void HPL_xjumpm +( JUMPM, MULT, IADD, IRANN, IRANM, IAM, ICM ) + const int JUMPM; + int * MULT; + int * IADD; + int * IRANN; + int * IRANM; + int * IAM; + int * ICM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_xjumpm computes the constants A and C to jump JUMPM numbers in + * the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in + * MULT and IADD specify how to jump from one entry in the sequence to + * the next. + * + * Arguments + * ========= + * + * JUMPM (local input) const int + * On entry, JUMPM specifies the number of entries in the + * sequence to jump over. When JUMPM is less or equal than zero, + * A and C are not computed, IRANM is set to IRANN corresponding + * to a jump of size zero. + * + * MULT (local input) int * + * On entry, MULT is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant a to jump from + * X(n) to X(n+1) = a*X(n) + c in the random sequence. + * + * IADD (local input) int * + * On entry, IADD is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant c to jump from + * X(n) to X(n+1) = a*X(n) + c in the random sequence. + * + * IRANN (local input) int * + * On entry, IRANN is an array of dimension 2. that contains the + * 16-lower and 15-higher bits of the encoding of X(n). + * + * IRANM (local output) int * + * On entry, IRANM is an array of dimension 2. On exit, this + * array contains respectively the 16-lower and 15-higher bits + * of the encoding of X(n+JUMPM). + * + * IAM (local output) int * + * On entry, IAM is an array of dimension 2. On exit, when JUMPM + * is greater than zero, this array contains the encoded + * constant A to jump from X(n) to X(n+JUMPM) in the random + * sequence. IAM(0:1) contains respectively the 16-lower and + * 15-higher bits of this constant A. When JUMPM is less or + * equal than zero, this array is not referenced. + * + * ICM (local output) int * + * On entry, ICM is an array of dimension 2. On exit, when JUMPM + * is greater than zero, this array contains the encoded + * constant C to jump from X(n) to X(n+JUMPM) in the random + * sequence. ICM(0:1) contains respectively the 16-lower and + * 15-higher bits of this constant C. When JUMPM is less or + * equal than zero, this array is not referenced. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2], k; +/* .. + * .. Executable Statements .. + */ + if( JUMPM > 0 ) + { + IAM[0] = MULT[0]; IAM[1] = MULT[1]; /* IAM = MULT; */ + ICM[0] = IADD[0]; ICM[1] = IADD[1]; /* ICM = IADD; */ + for( k = 1; k <= JUMPM-1; k++ ) + { + HPL_lmul( IAM, MULT, j ); /* j = IAM * MULT; */ + IAM[0] = j[0]; IAM[1] = j[1]; /* IAM = j; */ + HPL_lmul( ICM, MULT, j ); /* j = ICM * MULT; */ + HPL_ladd( IADD, j, ICM ); /* ICM = IADD + j; */ + } + HPL_lmul( IRANN, IAM, j ); /* j = IRANN * IAM; */ + HPL_ladd( j, ICM, IRANM ); /* IRANM = j + ICM; */ + } + else + { /* IRANM = IRANN */ + IRANM[0] = IRANN[0]; IRANM[1] = IRANN[1]; + } +/* + * End of HPL_xjumpm + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_xjumpm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_xjumpm.o new file mode 100644 index 000000000..0fbb4ec34 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/matgen/HPL_xjumpm.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/pmatgen/HPL_pdmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/pmatgen/HPL_pdmatgen.c new file mode 100644 index 000000000..2d129c863 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/pmatgen/HPL_pdmatgen.c @@ -0,0 +1,198 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdmatgen +( + const HPL_T_grid * GRID, + const int M, + const int N, + const int NB, + double * A, + const int LDA, + const int ISEED +) +#else +void HPL_pdmatgen +( GRID, M, N, NB, A, LDA, ISEED ) + const HPL_T_grid * GRID; + const int M; + const int N; + const int NB; + double * A; + const int LDA; + const int ISEED; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdmatgen generates (or regenerates) a parallel random matrix A. + * + * The pseudo-random generator uses the linear congruential algorithm: + * X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer + * Programming, Knuth 1973, Vol. 2. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * M (global input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,LocQ(N)). + * On exit, this array contains the coefficients of the randomly + * generated matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * ISEED (global input) const int + * On entry, ISEED specifies the seed number to generate the + * matrix A. ISEED must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int iadd [2], ia1 [2], ia2 [2], ia3 [2], + ia4 [2], ia5 [2], ib1 [2], ib2 [2], + ib3 [2], ic1 [2], ic2 [2], ic3 [2], + ic4 [2], ic5 [2], iran1[2], iran2[2], + iran3[2], iran4[2], itmp1[2], itmp2[2], + itmp3[2], jseed[2], mult [2]; + int ib, iblk, ik, jb, jblk, jk, jump1, jump2, + jump3, jump4, jump5, jump6, jump7, lmb, + lnb, mblks, mp, mycol, myrow, nblks, + npcol, nprow, nq; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + + mult [0] = HPL_MULT0; mult [1] = HPL_MULT1; + iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1; + jseed[0] = ISEED; jseed[1] = 0; +/* + * Generate an M by N matrix starting in process (0,0) + */ + Mnumroc( mp, M, NB, NB, myrow, 0, nprow ); + Mnumroc( nq, N, NB, NB, mycol, 0, npcol ); + + if( ( mp <= 0 ) || ( nq <= 0 ) ) return; +/* + * Local number of blocks and size of the last one + */ + mblks = ( mp + NB - 1 ) / NB; lmb = mp - ( ( mp - 1 ) / NB ) * NB; + nblks = ( nq + NB - 1 ) / NB; lnb = nq - ( ( nq - 1 ) / NB ) * NB; +/* + * Compute multiplier/adder for various jumps in random sequence + */ + jump1 = 1; jump2 = nprow * NB; jump3 = M; jump4 = npcol * NB; + jump5 = NB; jump6 = mycol; jump7 = myrow * NB; + + HPL_xjumpm( jump1, mult, iadd, jseed, iran1, ia1, ic1 ); + HPL_xjumpm( jump2, mult, iadd, iran1, itmp1, ia2, ic2 ); + HPL_xjumpm( jump3, mult, iadd, iran1, itmp1, ia3, ic3 ); + HPL_xjumpm( jump4, ia3, ic3, iran1, itmp1, ia4, ic4 ); + HPL_xjumpm( jump5, ia3, ic3, iran1, itmp1, ia5, ic5 ); + HPL_xjumpm( jump6, ia5, ic5, iran1, itmp3, itmp1, itmp2 ); + HPL_xjumpm( jump7, mult, iadd, itmp3, iran1, itmp1, itmp2 ); + HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 ); +/* + * Save value of first number in sequence + */ + ib1[0] = iran1[0]; ib1[1] = iran1[1]; + ib2[0] = iran1[0]; ib2[1] = iran1[1]; + ib3[0] = iran1[0]; ib3[1] = iran1[1]; + + for( jblk = 0; jblk < nblks; jblk++ ) + { + jb = ( jblk == nblks - 1 ? lnb : NB ); + for( jk = 0; jk < jb; jk++ ) + { + for( iblk = 0; iblk < mblks; iblk++ ) + { + ib = ( iblk == mblks - 1 ? lmb : NB ); + for( ik = 0; ik < ib; A++, ik++ ) *A = HPL_rand(); + HPL_jumpit( ia2, ic2, ib1, iran2 ); + ib1[0] = iran2[0]; ib1[1] = iran2[1]; + } + A += LDA - mp; + HPL_jumpit( ia3, ic3, ib2, iran3 ); + ib1[0] = iran3[0]; ib1[1] = iran3[1]; + ib2[0] = iran3[0]; ib2[1] = iran3[1]; + } + HPL_jumpit( ia4, ic4, ib3, iran4 ); + ib1[0] = iran4[0]; ib1[1] = iran4[1]; + ib2[0] = iran4[0]; ib2[1] = iran4[1]; + ib3[0] = iran4[0]; ib3[1] = iran4[1]; + } +/* + * End of HPL_pdmatgen + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/pmatgen/HPL_pdmatgen.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/pmatgen/HPL_pdmatgen.o new file mode 100644 index 000000000..1965382af Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/pmatgen/HPL_pdmatgen.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pddriver.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pddriver.c new file mode 100644 index 000000000..5e4050f48 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pddriver.c @@ -0,0 +1,293 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int main +( + int ARGC, + char * * ARGV +) +#else +int main( ARGC, ARGV ) +/* + * .. Scalar Arguments .. + */ + int ARGC; +/* + * .. Array Arguments .. + */ + char * * ARGV; +#endif +{ +/* + * Purpose + * ======= + * + * main is the main driver program for testing the HPL routines. + * This program is driven by a short data file named "HPL.dat". + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int nval [HPL_MAX_PARAM], + nbval [HPL_MAX_PARAM], + pval [HPL_MAX_PARAM], + qval [HPL_MAX_PARAM], + nbmval[HPL_MAX_PARAM], + ndvval[HPL_MAX_PARAM], + ndhval[HPL_MAX_PARAM]; + + HPL_T_FACT pfaval[HPL_MAX_PARAM], + rfaval[HPL_MAX_PARAM]; + + HPL_T_TOP topval[HPL_MAX_PARAM]; + + HPL_T_grid grid; + HPL_T_palg algo; + HPL_T_test test; + int L1notran, Unotran, align, equil, in, inb, + inbm, indh, indv, ipfa, ipq, irfa, itop, + mycol, myrow, ns, nbs, nbms, ndhs, ndvs, + npcol, npfs, npqs, nprow, nrfs, ntps, + rank, size, tswap; + HPL_T_ORDER pmapping; + HPL_T_FACT rpfa; + HPL_T_SWAP fswap; +/* .. + * .. Executable Statements .. + */ + MPI_Init( &ARGC, &ARGV ); +#ifdef HPL_CALL_VSIPL + vsip_init((void*)0); +#endif + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + MPI_Comm_size( MPI_COMM_WORLD, &size ); +/* + * Read and check validity of test parameters from input file + * + * HPL Version 1.0, Linpack benchmark input file + * Your message here + * HPL.out output file name (if any) + * 6 device out (6=stdout,7=stderr,file) + * 4 # of problems sizes (N) + * 29 30 34 35 Ns + * 4 # of NBs + * 1 2 3 4 NBs + * 0 PMAP process mapping (0=Row-,1=Column-major) + * 3 # of process grids (P x Q) + * 2 1 4 Ps + * 2 4 1 Qs + * 16.0 threshold + * 3 # of panel fact + * 0 1 2 PFACTs (0=left, 1=Crout, 2=Right) + * 2 # of recursive stopping criterium + * 2 4 NBMINs (>= 1) + * 1 # of panels in recursion + * 2 NDIVs + * 3 # of recursive panel fact. + * 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) + * 1 # of broadcast + * 0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + * 1 # of lookahead depth + * 0 DEPTHs (>=0) + * 2 SWAP (0=bin-exch,1=long,2=mix) + * 4 swapping threshold + * 0 L1 in (0=transposed,1=no-transposed) form + * 0 U in (0=transposed,1=no-transposed) form + * 1 Equilibration (0=no,1=yes) + * 8 memory alignment in double (> 0) + */ + HPL_pdinfo( &test, &ns, nval, &nbs, nbval, &pmapping, &npqs, pval, qval, + &npfs, pfaval, &nbms, nbmval, &ndvs, ndvval, &nrfs, rfaval, + &ntps, topval, &ndhs, ndhval, &fswap, &tswap, &L1notran, + &Unotran, &equil, &align ); +/* + * Loop over different process grids - Define process grid. Go to bottom + * of process grid loop if this case does not use my process. + */ + for( ipq = 0; ipq < npqs; ipq++ ) + { + (void) HPL_grid_init( MPI_COMM_WORLD, pmapping, pval[ipq], qval[ipq], + &grid ); + (void) HPL_grid_info( &grid, &nprow, &npcol, &myrow, &mycol ); + + if( ( myrow < 0 ) || ( myrow >= nprow ) || + ( mycol < 0 ) || ( mycol >= npcol ) ) goto label_end_of_npqs; + + for( in = 0; in < ns; in++ ) + { /* Loop over various problem sizes */ + for( inb = 0; inb < nbs; inb++ ) + { /* Loop over various blocking factors */ + for( indh = 0; indh < ndhs; indh++ ) + { /* Loop over various lookahead depths */ + for( itop = 0; itop < ntps; itop++ ) + { /* Loop over various broadcast topologies */ + for( irfa = 0; irfa < nrfs; irfa++ ) + { /* Loop over various recursive factorizations */ + for( ipfa = 0; ipfa < npfs; ipfa++ ) + { /* Loop over various panel factorizations */ + for( inbm = 0; inbm < nbms; inbm++ ) + { /* Loop over various recursive stopping criteria */ + for( indv = 0; indv < ndvs; indv++ ) + { /* Loop over various # of panels in recursion */ +/* + * Set up the algorithm parameters + */ + algo.btopo = topval[itop]; algo.depth = ndhval[indh]; + algo.nbmin = nbmval[inbm]; algo.nbdiv = ndvval[indv]; + + algo.pfact = rpfa = pfaval[ipfa]; + + if( L1notran != 0 ) + { + if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllN; + else if( rpfa == HPL_CROUT ) algo.pffun = HPL_pdpancrN; + else algo.pffun = HPL_pdpanrlN; + + algo.rfact = rpfa = rfaval[irfa]; + if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllN; + else if( rpfa == HPL_CROUT ) algo.rffun = HPL_pdrpancrN; + else algo.rffun = HPL_pdrpanrlN; + + if( Unotran != 0 ) algo.upfun = HPL_pdupdateNN; + else algo.upfun = HPL_pdupdateNT; + } + else + { + if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllT; + else if( rpfa == HPL_CROUT ) algo.pffun = HPL_pdpancrT; + else algo.pffun = HPL_pdpanrlT; + + algo.rfact = rpfa = rfaval[irfa]; + if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllT; + else if( rpfa == HPL_CROUT ) algo.rffun = HPL_pdrpancrT; + else algo.rffun = HPL_pdrpanrlT; + + if( Unotran != 0 ) algo.upfun = HPL_pdupdateTN; + else algo.upfun = HPL_pdupdateTT; + } + + algo.fswap = fswap; algo.fsthr = tswap; + algo.equil = equil; algo.align = align; + + HPL_pdtest( &test, &grid, &algo, nval[in], nbval[inb] ); + + } + } + } + } + } + } + } + } + (void) HPL_grid_exit( &grid ); +label_end_of_npqs: ; + } +/* + * Print ending messages, close output file, exit. + */ + if( rank == 0 ) + { + test.ktest = test.kpass + test.kfail + test.kskip; +#ifndef HPL_DETAILED_TIMING + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); +#else + if( test.thrsh > HPL_rzero ) + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); +#endif + + HPL_fprintf( test.outfp, "\n%s %6d %s\n", "Finished", test.ktest, + "tests with the following results:" ); + if( test.thrsh > HPL_rzero ) + { + HPL_fprintf( test.outfp, " %6d %s\n", test.kpass, + "tests completed and passed residual checks," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kfail, + "tests completed and failed residual checks," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kskip, + "tests skipped because of illegal input values." ); + } + else + { + HPL_fprintf( test.outfp, " %6d %s\n", test.kpass, + "tests completed without checking," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kskip, + "tests skipped because of illegal input values." ); + } + + HPL_fprintf( test.outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( test.outfp, "\nEnd of Tests.\n" ); + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); + + if( ( test.outfp != stdout ) && ( test.outfp != stderr ) ) + (void) fclose( test.outfp ); + } +#ifdef HPL_CALL_VSIPL + vsip_finalize((void*)0); +#endif + MPI_Finalize(); + exit( 0 ); + + return( 0 ); +/* + * End of main + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pddriver.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pddriver.o new file mode 100644 index 000000000..f087c3d97 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pddriver.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdinfo.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdinfo.c new file mode 100644 index 000000000..4ede45be6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdinfo.c @@ -0,0 +1,1182 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdinfo +( + HPL_T_test * TEST, + int * NS, + int * N, + int * NBS, + int * NB, + HPL_T_ORDER * PMAPPIN, + int * NPQS, + int * P, + int * Q, + int * NPFS, + HPL_T_FACT * PF, + int * NBMS, + int * NBM, + int * NDVS, + int * NDV, + int * NRFS, + HPL_T_FACT * RF, + int * NTPS, + HPL_T_TOP * TP, + int * NDHS, + int * DH, + HPL_T_SWAP * FSWAP, + int * TSWAP, + int * L1NOTRAN, + int * UNOTRAN, + int * EQUIL, + int * ALIGN +) +#else +void HPL_pdinfo +( TEST, NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN ) + HPL_T_test * TEST; + int * NS; + int * N; + int * NBS; + int * NB; + HPL_T_ORDER * PMAPPIN; + int * NPQS; + int * P; + int * Q; + int * NPFS; + HPL_T_FACT * PF; + int * NBMS; + int * NBM; + int * NDVS; + int * NDV; + int * NRFS; + HPL_T_FACT * RF; + int * NTPS; + HPL_T_TOP * TP; + int * NDHS; + int * DH; + HPL_T_SWAP * FSWAP; + int * TSWAP; + int * L1NOTRAN; + int * UNOTRAN; + int * EQUIL; + int * ALIGN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdinfo reads the startup information for the various tests and + * transmits it to all processes. + * + * Arguments + * ========= + * + * TEST (global output) HPL_T_test * + * On entry, TEST points to a testing data structure. On exit, + * the fields of this data structure are initialized as follows: + * TEST->outfp specifies the output file where the results will + * be printed. It is only defined and used by the process 0 of + * the grid. TEST->thrsh specifies the threshhold value for the + * test ratio. TEST->epsil is the relative machine precision of + * the distributed computer. Finally the test counters, kfail, + * kpass, kskip, ktest are initialized to zero. + * + * NS (global output) int * + * On exit, NS specifies the number of different problem sizes + * to be tested. NS is less than or equal to HPL_MAX_PARAM. + * + * N (global output) int * + * On entry, N is an array of dimension HPL_MAX_PARAM. On exit, + * the first NS entries of this array contain the problem sizes + * to run the code with. + * + * NBS (global output) int * + * On exit, NBS specifies the number of different distribution + * blocking factors to be tested. NBS must be less than or equal + * to HPL_MAX_PARAM. + * + * NB (global output) int * + * On exit, PMAPPIN specifies the process mapping onto the no- + * des of the MPI machine configuration. PMAPPIN defaults to + * row-major ordering. + * + * PMAPPIN (global output) HPL_T_ORDER * + * On entry, NB is an array of dimension HPL_MAX_PARAM. On exit, + * the first NBS entries of this array contain the values of the + * various distribution blocking factors, to run the code with. + * + * NPQS (global output) int * + * On exit, NPQS specifies the number of different values that + * can be used for P and Q, i.e., the number of process grids to + * run the code with. NPQS must be less than or equal to + * HPL_MAX_PARAM. + * + * P (global output) int * + * On entry, P is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPQS entries of this array contain the values of P, + * the number of process rows of the NPQS grids to run the code + * with. + * + * Q (global output) int * + * On entry, Q is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPQS entries of this array contain the values of Q, + * the number of process columns of the NPQS grids to run the + * code with. + * + * NPFS (global output) int * + * On exit, NPFS specifies the number of different values that + * can be used for PF : the panel factorization algorithm to run + * the code with. NPFS is less than or equal to HPL_MAX_PARAM. + * + * PF (global output) HPL_T_FACT * + * On entry, PF is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPFS entries of this array contain the various + * panel factorization algorithms to run the code with. + * + * NBMS (global output) int * + * On exit, NBMS specifies the number of various recursive + * stopping criteria to be tested. NBMS must be less than or + * equal to HPL_MAX_PARAM. + * + * NBM (global output) int * + * On entry, NBM is an array of dimension HPL_MAX_PARAM. On + * exit, the first NBMS entries of this array contain the values + * of the various recursive stopping criteria to be tested. + * + * NDVS (global output) int * + * On exit, NDVS specifies the number of various numbers of + * panels in recursion to be tested. NDVS is less than or equal + * to HPL_MAX_PARAM. + * + * NDV (global output) int * + * On entry, NDV is an array of dimension HPL_MAX_PARAM. On + * exit, the first NDVS entries of this array contain the values + * of the various numbers of panels in recursion to be tested. + * + * NRFS (global output) int * + * On exit, NRFS specifies the number of different values that + * can be used for RF : the recursive factorization algorithm to + * be tested. NRFS is less than or equal to HPL_MAX_PARAM. + * + * RF (global output) HPL_T_FACT * + * On entry, RF is an array of dimension HPL_MAX_PARAM. On exit, + * the first NRFS entries of this array contain the various + * recursive factorization algorithms to run the code with. + * + * NTPS (global output) int * + * On exit, NTPS specifies the number of different values that + * can be used for the broadcast topologies to be tested. NTPS + * is less than or equal to HPL_MAX_PARAM. + * + * TP (global output) HPL_T_TOP * + * On entry, TP is an array of dimension HPL_MAX_PARAM. On exit, + * the first NTPS entries of this array contain the various + * broadcast (along rows) topologies to run the code with. + * + * NDHS (global output) int * + * On exit, NDHS specifies the number of different values that + * can be used for the lookahead depths to be tested. NDHS is + * less than or equal to HPL_MAX_PARAM. + * + * DH (global output) int * + * On entry, DH is an array of dimension HPL_MAX_PARAM. On + * exit, the first NDHS entries of this array contain the values + * of lookahead depths to run the code with. Such a value is at + * least 0 (no-lookahead) or greater than zero. + * + * FSWAP (global output) HPL_T_SWAP * + * On exit, FSWAP specifies the swapping algorithm to be used in + * all tests. + * + * TSWAP (global output) int * + * On exit, TSWAP specifies the swapping threshold as a number + * of columns when the mixed swapping algorithm was chosen. + * + * L1NOTRA (global output) int * + * On exit, L1NOTRAN specifies whether the upper triangle of the + * panels of columns should be stored in no-transposed form + * (L1NOTRAN=1) or in transposed form (L1NOTRAN=0). + * + * UNOTRAN (global output) int * + * On exit, UNOTRAN specifies whether the panels of rows should + * be stored in no-transposed form (UNOTRAN=1) or transposed + * form (UNOTRAN=0) during their broadcast. + * + * EQUIL (global output) int * + * On exit, EQUIL specifies whether equilibration during the + * swap-broadcast of the panel of rows should be performed + * (EQUIL=1) or not (EQUIL=0). + * + * ALIGN (global output) int * + * On exit, ALIGN specifies the alignment of the dynamically + * allocated buffers in double precision words. ALIGN is greater + * than zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + char file[HPL_LINE_MAX], line[HPL_LINE_MAX], + auth[HPL_LINE_MAX], num [HPL_LINE_MAX]; + FILE * infp; + int * iwork = NULL; + char * lineptr; + int error=0, fid, i, j, lwork, maxp, nprocs, + rank, size; +/* .. + * .. Executable Statements .. + */ + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + MPI_Comm_size( MPI_COMM_WORLD, &size ); +/* + * Initialize the TEST data structure with default values + */ + TEST->outfp = stderr; TEST->epsil = 2.0e-16; TEST->thrsh = 16.0; + TEST->kfail = TEST->kpass = TEST->kskip = TEST->ktest = 0; +/* + * Process 0 reads the input data, broadcasts to other processes and + * writes needed information to TEST->outfp. + */ + if( rank == 0 ) + { +/* + * Open file and skip data file header + */ + if( ( infp = fopen( "HPL.dat", "r" ) ) == NULL ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "cannot open file HPL.dat" ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) fgets( auth, HPL_LINE_MAX - 2, infp ); +/* + * Read name and unit number for summary output file + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", file ); + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); + fid = atoi( num ); + if ( fid == 6 ) TEST->outfp = stdout; + else if( fid == 7 ) TEST->outfp = stderr; + else if( ( TEST->outfp = fopen( file, "w" ) ) == NULL ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "cannot open file %s.", + file ); + error = 1; goto label_error; + } +/* + * Read and check the parameter values for the tests. + * + * Problem size (>=0) (N) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NS = atoi( num ); + if( ( *NS < 1 ) || ( *NS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %d", + "Number of values of N is less than 1 or greater than", + HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( N[ i ] = atoi( num ) ) < 0 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of N less than 0" ); + error = 1; goto label_error; + } + } +/* + * Block size (>=1) (NB) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NBS = atoi( num ); + if( ( *NBS < 1 ) || ( *NBS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NB is less than 1 or", + "greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NBS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NB[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NB less than 1" ); + error = 1; goto label_error; + } + } +/* + * Process grids, mapping, (>=1) (P, Q) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); + *PMAPPIN = ( atoi( num ) == 1 ? HPL_COLUMN_MAJOR : HPL_ROW_MAJOR ); + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NPQS = atoi( num ); + if( ( *NPQS < 1 ) || ( *NPQS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of grids is less", + "than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPQS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( P[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of P less than 1" ); + error = 1; goto label_error; + } + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPQS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( Q[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of Q less than 1" ); + error = 1; goto label_error; + } + } +/* + * Check for enough processes in machine configuration + */ + maxp = 0; + for( i = 0; i < *NPQS; i++ ) + { nprocs = P[i] * Q[i]; maxp = Mmax( maxp, nprocs ); } + if( maxp > size ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Need at least %d processes for these tests", maxp ); + error = 1; goto label_error; + } +/* + * Checking threshold value (TEST->thrsh) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); TEST->thrsh = atof( num ); +/* + * Panel factorization algorithm (PF) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NPFS = atoi( num ); + if( ( *NPFS < 1 ) || ( *NPFS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "number of values of PFACT", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPFS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) PF[ i ] = HPL_LEFT_LOOKING; + else if( j == 1 ) PF[ i ] = HPL_CROUT; + else if( j == 2 ) PF[ i ] = HPL_RIGHT_LOOKING; + else PF[ i ] = HPL_RIGHT_LOOKING; + } +/* + * Recursive stopping criterium (>=1) (NBM) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NBMS = atoi( num ); + if( ( *NBMS < 1 ) || ( *NBMS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NBMIN", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NBMS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NBM[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NBMIN less than 1" ); + error = 1; goto label_error; + } + } +/* + * Number of panels in recursion (>=2) (NDV) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NDVS = atoi( num ); + if( ( *NDVS < 1 ) || ( *NDVS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NDIV", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NDVS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NDV[ i ] = atoi( num ) ) < 2 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NDIV less than 2" ); + error = 1; goto label_error; + } + } +/* + * Recursive panel factorization (RF) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NRFS = atoi( num ); + if( ( *NRFS < 1 ) || ( *NRFS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of RFACT", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NRFS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) RF[ i ] = HPL_LEFT_LOOKING; + else if( j == 1 ) RF[ i ] = HPL_CROUT; + else if( j == 2 ) RF[ i ] = HPL_RIGHT_LOOKING; + else RF[ i ] = HPL_RIGHT_LOOKING; + } +/* + * Broadcast topology (TP) (0=rg, 1=2rg, 2=rgM, 3=2rgM, 4=L) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NTPS = atoi( num ); + if( ( *NTPS < 1 ) || ( *NTPS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of BCAST", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NTPS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) TP[ i ] = HPL_1RING; + else if( j == 1 ) TP[ i ] = HPL_1RING_M; + else if( j == 2 ) TP[ i ] = HPL_2RING; + else if( j == 3 ) TP[ i ] = HPL_2RING_M; + else if( j == 4 ) TP[ i ] = HPL_BLONG; + else if( j == 5 ) TP[ i ] = HPL_BLONG_M; + else TP[ i ] = HPL_1RING_M; + } +/* + * Lookahead depth (>=0) (NDH) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NDHS = atoi( num ); + if( ( *NDHS < 1 ) || ( *NDHS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of DEPTH", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NDHS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); + lineptr += strlen( num ) + 1; + if( ( DH[ i ] = atoi( num ) ) < 0 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of DEPTH less than 0" ); + error = 1; goto label_error; + } + } +/* + * Swapping algorithm (0,1 or 2) (FSWAP) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); j = atoi( num ); + if( j == 0 ) *FSWAP = HPL_SWAP00; + else if( j == 1 ) *FSWAP = HPL_SWAP01; + else if( j == 2 ) *FSWAP = HPL_SW_MIX; + else *FSWAP = HPL_SWAP01; +/* + * Swapping threshold (>=0) (TSWAP) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *TSWAP = atoi( num ); + if( *TSWAP <= 0 ) *TSWAP = 0; +/* + * L1 in (no-)transposed form (0 or 1) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *L1NOTRAN = atoi( num ); + if( ( *L1NOTRAN != 0 ) && ( *L1NOTRAN != 1 ) ) *L1NOTRAN = 0; +/* + * U in (no-)transposed form (0 or 1) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *UNOTRAN = atoi( num ); + if( ( *UNOTRAN != 0 ) && ( *UNOTRAN != 1 ) ) *UNOTRAN = 0; +/* + * Equilibration (0=no, 1=yes) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *EQUIL = atoi( num ); + if( ( *EQUIL != 0 ) && ( *EQUIL != 1 ) ) *EQUIL = 1; +/* + * Memory alignment in bytes (> 0) (ALIGN) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *ALIGN = atoi( num ); + if( *ALIGN <= 0 ) *ALIGN = 4; +/* + * Close input file + */ +label_error: + if (infp != NULL) + (void) fclose( infp ); + } + else { TEST->outfp = NULL; } +/* + * Check for error on reading input file + */ + (void) HPL_all_reduce( (void *)(&error), 1, HPL_INT, HPL_max, + MPI_COMM_WORLD ); + if( error ) + { + if( rank == 0 ) + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Illegal input in file HPL.dat. Exiting ..." ); + MPI_Finalize(); +#ifdef HPL_CALL_VSIPL + (void) vsip_finalize( NULL ); +#endif + exit( 1 ); + } +/* + * Compute and broadcast machine epsilon + */ + TEST->epsil = HPL_pdlamch( MPI_COMM_WORLD, HPL_MACH_EPS ); +/* + * Pack information arrays and broadcast + */ + (void) HPL_broadcast( (void *)(&(TEST->thrsh)), 1, HPL_DOUBLE, 0, + MPI_COMM_WORLD ); +/* + * Broadcast array sizes + */ + iwork = (int *)malloc( (size_t)(15) * sizeof( int ) ); + if( rank == 0 ) + { + iwork[ 0] = *NS; iwork[ 1] = *NBS; + iwork[ 2] = ( *PMAPPIN == HPL_ROW_MAJOR ? 0 : 1 ); + iwork[ 3] = *NPQS; iwork[ 4] = *NPFS; iwork[ 5] = *NBMS; + iwork[ 6] = *NDVS; iwork[ 7] = *NRFS; iwork[ 8] = *NTPS; + iwork[ 9] = *NDHS; iwork[10] = *TSWAP; iwork[11] = *L1NOTRAN; + iwork[12] = *UNOTRAN; iwork[13] = *EQUIL; iwork[14] = *ALIGN; + } + (void) HPL_broadcast( (void *)iwork, 15, HPL_INT, 0, MPI_COMM_WORLD ); + if( rank != 0 ) + { + *NS = iwork[ 0]; *NBS = iwork[ 1]; + *PMAPPIN = ( iwork[ 2] == 0 ? HPL_ROW_MAJOR : HPL_COLUMN_MAJOR ); + *NPQS = iwork[ 3]; *NPFS = iwork[ 4]; *NBMS = iwork[ 5]; + *NDVS = iwork[ 6]; *NRFS = iwork[ 7]; *NTPS = iwork[ 8]; + *NDHS = iwork[ 9]; *TSWAP = iwork[10]; *L1NOTRAN = iwork[11]; + *UNOTRAN = iwork[12]; *EQUIL = iwork[13]; *ALIGN = iwork[14]; + } + if( iwork ) free( iwork ); +/* + * Pack information arrays and broadcast + */ + lwork = (*NS) + (*NBS) + 2 * (*NPQS) + (*NPFS) + (*NBMS) + + (*NDVS) + (*NRFS) + (*NTPS) + (*NDHS) + 1; + + if (lwork < 0) + exit(EXIT_FAILURE); + + + iwork = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); + if( rank == 0 ) + { + j = 0; + for( i = 0; i < *NS; i++ ) { iwork[j] = N [i]; j++; } + for( i = 0; i < *NBS; i++ ) { iwork[j] = NB[i]; j++; } + for( i = 0; i < *NPQS; i++ ) { iwork[j] = P [i]; j++; } + for( i = 0; i < *NPQS; i++ ) { iwork[j] = Q [i]; j++; } + for( i = 0; i < *NPFS; i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; + else if( PF[i] == HPL_CROUT ) iwork[j] = 1; + else if( PF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; + j++; + } + for( i = 0; i < *NBMS; i++ ) { iwork[j] = NBM[i]; j++; } + for( i = 0; i < *NDVS; i++ ) { iwork[j] = NDV[i]; j++; } + for( i = 0; i < *NRFS; i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; + else if( RF[i] == HPL_CROUT ) iwork[j] = 1; + else if( RF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; + j++; + } + for( i = 0; i < *NTPS; i++ ) + { + if( TP[i] == HPL_1RING ) iwork[j] = 0; + else if( TP[i] == HPL_1RING_M ) iwork[j] = 1; + else if( TP[i] == HPL_2RING ) iwork[j] = 2; + else if( TP[i] == HPL_2RING_M ) iwork[j] = 3; + else if( TP[i] == HPL_BLONG ) iwork[j] = 4; + else if( TP[i] == HPL_BLONG_M ) iwork[j] = 5; + j++; + } + for( i = 0; i < *NDHS; i++ ) { iwork[j] = DH[i]; j++; } + + if( *FSWAP == HPL_SWAP00 ) iwork[j] = 0; + else if( *FSWAP == HPL_SWAP01 ) iwork[j] = 1; + else if( *FSWAP == HPL_SW_MIX ) iwork[j] = 2; + j++; + } + (void) HPL_broadcast( (void*)iwork, lwork, HPL_INT, 0, + MPI_COMM_WORLD ); + if ((rank != 0) && (iwork != NULL)) + { + j = 0; + for( i = 0; i < *NS; i++ ) { N [i] = iwork[j]; j++; } + for( i = 0; i < *NBS; i++ ) { NB[i] = iwork[j]; j++; } + for( i = 0; i < *NPQS; i++ ) { P [i] = iwork[j]; j++; } + for( i = 0; i < *NPQS; i++ ) { Q [i] = iwork[j]; j++; } + + for( i = 0; i < *NPFS; i++ ) + { + if( iwork[j] == 0 ) PF[i] = HPL_LEFT_LOOKING; + else if( iwork[j] == 1 ) PF[i] = HPL_CROUT; + else if( iwork[j] == 2 ) PF[i] = HPL_RIGHT_LOOKING; + j++; + } + for( i = 0; i < *NBMS; i++ ) { NBM[i] = iwork[j]; j++; } + for( i = 0; i < *NDVS; i++ ) { NDV[i] = iwork[j]; j++; } + for( i = 0; i < *NRFS; i++ ) + { + if( iwork[j] == 0 ) RF[i] = HPL_LEFT_LOOKING; + else if( iwork[j] == 1 ) RF[i] = HPL_CROUT; + else if( iwork[j] == 2 ) RF[i] = HPL_RIGHT_LOOKING; + j++; + } + for( i = 0; i < *NTPS; i++ ) + { + if( iwork[j] == 0 ) TP[i] = HPL_1RING; + else if( iwork[j] == 1 ) TP[i] = HPL_1RING_M; + else if( iwork[j] == 2 ) TP[i] = HPL_2RING; + else if( iwork[j] == 3 ) TP[i] = HPL_2RING_M; + else if( iwork[j] == 4 ) TP[i] = HPL_BLONG; + else if( iwork[j] == 5 ) TP[i] = HPL_BLONG_M; + j++; + } + for( i = 0; i < *NDHS; i++ ) { DH[i] = iwork[j]; j++; } + + if( iwork[j] == 0 ) *FSWAP = HPL_SWAP00; + else if( iwork[j] == 1 ) *FSWAP = HPL_SWAP01; + else if( iwork[j] == 2 ) *FSWAP = HPL_SW_MIX; + j++; + + if( iwork ) free( iwork ); + } +/* + * regurgitate input + */ + if( rank == 0 ) + { + + if (TEST->outfp != NULL){ + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "HPLinpack 2.3 -- High-Performance Linpack benchmark -- ", + " December 2, 2018" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Written by A. Petitet and R. Clint Whaley, ", + "Innovative Computing Laboratory, UTK" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Modified by Piotr Luszczek, ", + "Innovative Computing Laboratory, UTK" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Modified by Julien Langou, ", + "University of Colorado Denver"); + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + + HPL_fprintf( TEST->outfp, "\n%s\n", + "An explanation of the input/output parameters follows:" ); + HPL_fprintf( TEST->outfp, "%s\n", + "T/V : Wall time / encoded variant." ); + HPL_fprintf( TEST->outfp, "%s\n", + "N : The order of the coefficient matrix A." ); + HPL_fprintf( TEST->outfp, "%s\n", + "NB : The partitioning blocking factor." ); + HPL_fprintf( TEST->outfp, "%s\n", + "P : The number of process rows." ); + HPL_fprintf( TEST->outfp, "%s\n", + "Q : The number of process columns." ); + HPL_fprintf( TEST->outfp, "%s\n", + "Time : Time in seconds to solve the linear system." ); + HPL_fprintf( TEST->outfp, "%s\n\n", + "Gflops : Rate of execution for solving the linear system." ); + HPL_fprintf( TEST->outfp, "%s\n", + "The following parameter values will be used:" ); +/* + * Problem size + */ + HPL_fprintf( TEST->outfp, "\nN :" ); + for( i = 0; i < Mmin( 8, *NS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + if( *NS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + if( *NS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + } + } +/* + * Distribution blocking factor + */ + HPL_fprintf( TEST->outfp, "\nNB :" ); + for( i = 0; i < Mmin( 8, *NBS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + if( *NBS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NBS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + if( *NBS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NBS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + } + } +/* + * Process mapping + */ + HPL_fprintf( TEST->outfp, "\nPMAP :" ); + if( *PMAPPIN == HPL_ROW_MAJOR ) + HPL_fprintf( TEST->outfp, " Row-major process mapping" ); + else if( *PMAPPIN == HPL_COLUMN_MAJOR ) + HPL_fprintf( TEST->outfp, " Column-major process mapping" ); +/* + * Process grid + */ + HPL_fprintf( TEST->outfp, "\nP :" ); + for( i = 0; i < Mmin( 8, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + if( *NPQS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + if( *NPQS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPQS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + } + } + HPL_fprintf( TEST->outfp, "\nQ :" ); + for( i = 0; i < Mmin( 8, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + if( *NPQS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + if( *NPQS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPQS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + } + } +/* + * Panel Factorization + */ + HPL_fprintf( TEST->outfp, "\nPFACT :" ); + for( i = 0; i < Mmin( 8, *NPFS ); i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NPFS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPFS ); i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NPFS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPFS; i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + } + } +/* + * Recursive stopping criterium + */ + HPL_fprintf( TEST->outfp, "\nNBMIN :" ); + for( i = 0; i < Mmin( 8, *NBMS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + if( *NBMS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NBMS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + if( *NBMS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NBMS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + } + } +/* + * Number of panels in recursion + */ + HPL_fprintf( TEST->outfp, "\nNDIV :" ); + for( i = 0; i < Mmin( 8, *NDVS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + if( *NDVS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NDVS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + if( *NDVS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NDVS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + } + } +/* + * Recursive Factorization + */ + HPL_fprintf( TEST->outfp, "\nRFACT :" ); + for( i = 0; i < Mmin( 8, *NRFS ); i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NRFS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NRFS ); i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NRFS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NRFS; i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + } + } +/* + * Broadcast topology + */ + HPL_fprintf( TEST->outfp, "\nBCAST :" ); + for( i = 0; i < Mmin( 8, *NTPS ); i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + if( *NTPS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NTPS ); i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + if( *NTPS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NTPS; i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + } + } +/* + * Lookahead depths + */ + HPL_fprintf( TEST->outfp, "\nDEPTH :" ); + for( i = 0; i < Mmin( 8, *NDHS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + if( *NDHS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NDHS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + if( *NDHS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NDHS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + } + } +/* + * Swapping algorithm + */ + HPL_fprintf( TEST->outfp, "\nSWAP :" ); + if( *FSWAP == HPL_SWAP00 ) + HPL_fprintf( TEST->outfp, " Binary-exchange" ); + else if( *FSWAP == HPL_SWAP01 ) + HPL_fprintf( TEST->outfp, " Spread-roll (long)" ); + else if( *FSWAP == HPL_SW_MIX ) + HPL_fprintf( TEST->outfp, " Mix (threshold = %d)", *TSWAP ); +/* + * L1 storage form + */ + HPL_fprintf( TEST->outfp, "\nL1 :" ); + if( *L1NOTRAN != 0 ) + HPL_fprintf( TEST->outfp, " no-transposed form" ); + else + HPL_fprintf( TEST->outfp, " transposed form" ); +/* + * U storage form + */ + HPL_fprintf( TEST->outfp, "\nU :" ); + if( *UNOTRAN != 0 ) + HPL_fprintf( TEST->outfp, " no-transposed form" ); + else + HPL_fprintf( TEST->outfp, " transposed form" ); +/* + * Equilibration + */ + HPL_fprintf( TEST->outfp, "\nEQUIL :" ); + if( *EQUIL != 0 ) + HPL_fprintf( TEST->outfp, " yes" ); + else + HPL_fprintf( TEST->outfp, " no" ); +/* + * Alignment + */ + HPL_fprintf( TEST->outfp, "\nALIGN : %d double precision words", + *ALIGN ); + + HPL_fprintf( TEST->outfp, "\n\n" ); +/* + * For testing only + */ + if( TEST->thrsh > HPL_rzero ) + { + HPL_fprintf( TEST->outfp, "%s%s\n\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( TEST->outfp, "%s\n", + "- The matrix A is randomly generated for each test." ); + HPL_fprintf( TEST->outfp, "%s\n", + "- The following scaled residual check will be computed:" ); + HPL_fprintf( TEST->outfp, "%s\n", + " ||Ax-b||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )" ); + HPL_fprintf( TEST->outfp, "%s %21.6e\n", + "- The relative machine precision (eps) is taken to be ", + TEST->epsil ); + HPL_fprintf( TEST->outfp, "%s %11.1f\n\n", + "- Computational tests pass if scaled residuals are less than ", + TEST->thrsh ); + } + } + } +/* + * End of HPL_pdinfo + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdinfo.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdinfo.o new file mode 100644 index 000000000..6926b4095 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdinfo.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdtest.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdtest.c new file mode 100644 index 000000000..73a62a7ff --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdtest.c @@ -0,0 +1,438 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdtest +( + HPL_T_test * TEST, + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int N, + const int NB +) +#else +void HPL_pdtest +( TEST, GRID, ALGO, N, NB ) + HPL_T_test * TEST; + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int N; + const int NB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdtest performs one test given a set of parameters such as the + * process grid, the problem size, the distribution blocking factor ... + * This function generates the data, calls and times the linear system + * solver, checks the accuracy of the obtained vector solution and + * writes this information to the file pointed to by TEST->outfp. + * + * Arguments + * ========= + * + * TEST (global input) HPL_T_test * + * On entry, TEST points to a testing data structure: outfp + * specifies the output file where the results will be printed. + * It is only defined and used by the process 0 of the grid. + * thrsh specifies the threshhold value for the test ratio. + * Concretely, a test is declared "PASSED" if and only if the + * following inequality is satisfied: + * ||Ax-b||_oo / ( epsil * + * ( || x ||_oo * || A ||_oo + || b ||_oo ) * + * N ) < thrsh. + * epsil is the relative machine precision of the distributed + * computer. Finally the test counters, kfail, kpass, kskip and + * ktest are updated as follows: if the test passes, kpass is + * incremented by one; if the test fails, kfail is incremented + * by one; if the test is skipped, kskip is incremented by one. + * ktest is left unchanged. + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters to be used for this test. + * + * N (global input) const int + * On entry, N specifies the order of the coefficient matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_DETAILED_TIMING + double HPL_w[HPL_TIMING_N]; +#endif + HPL_T_pmat mat; + double wtime[1]; + int info[3]; + double Anorm1, AnormI, Gflops, Xnorm1, XnormI, + BnormI, resid0, resid1; + double * Bptr; + void * vptr = NULL; + static int first=1; + int ii, ip2, mycol, myrow, npcol, nprow, nq; + char ctop, cpfact, crfact; + time_t current_time_start, current_time_end; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + + mat.n = N; mat.nb = NB; mat.info = 0; + mat.mp = HPL_numroc( N, NB, NB, myrow, 0, nprow ); + nq = HPL_numroc( N, NB, NB, mycol, 0, npcol ); + mat.nq = nq + 1; +/* + * Allocate matrix, right-hand-side, and vector solution x. [ A | b ] is + * N by N+1. One column is added in every process column for the solve. + * The result however is stored in a 1 x N vector replicated in every + * process row. In every process, A is lda * (nq+1), x is 1 * nq and the + * workspace is mp. + * + * Ensure that lda is a multiple of ALIGN and not a power of 2 + */ + mat.ld = ( ( Mmax( 1, mat.mp ) - 1 ) / ALGO->align ) * ALGO->align; + do + { + ii = ( mat.ld += ALGO->align ); ip2 = 1; + while( ii > 1 ) { ii >>= 1; ip2 <<= 1; } + } + while( mat.ld == ip2 ); +/* + * Allocate dynamic memory + */ + vptr = (void*)malloc( ( (size_t)(ALGO->align) + + (size_t)(mat.ld+1) * (size_t)(mat.nq) ) * + sizeof(double) ); + info[0] = (vptr == NULL); info[1] = myrow; info[2] = mycol; + (void) HPL_all_reduce( (void *)(info), 3, HPL_INT, HPL_max, + GRID->all_comm ); + if( info[0] != 0 ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", + "[%d,%d] %s", info[1], info[2], + "Memory allocation failed for A, x and b. Skip." ); + (TEST->kskip)++; + /* some processes might have succeeded with allocation */ + if (vptr) free(vptr); + return; + } +/* + * generate matrix and right-hand-side, [ A | b ] which is N by N+1. + */ + mat.A = (double *)HPL_PTR( vptr, + ((size_t)(ALGO->align) * sizeof(double) ) ); + mat.X = Mptr( mat.A, 0, mat.nq, mat.ld ); + HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED ); +#ifdef HPL_CALL_VSIPL + mat.block = vsip_blockbind_d( (vsip_scalar_d *)(mat.A), + (vsip_length)(mat.ld * mat.nq), + VSIP_MEM_NONE ); +#endif +/* + * Solve linear system + */ + HPL_ptimer_boot(); (void) HPL_barrier( GRID->all_comm ); + time( ¤t_time_start ); + HPL_ptimer( 0 ); + HPL_pdgesv( GRID, ALGO, &mat ); + HPL_ptimer( 0 ); + time( ¤t_time_end ); +#ifdef HPL_CALL_VSIPL + (void) vsip_blockrelease_d( mat.block, VSIP_TRUE ); + vsip_blockdestroy_d( mat.block ); +#endif +/* + * Gather max of all CPU and WALL clock timings and print timing results + */ + HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME, + 1, 0, wtime ); + + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + if( first ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "T/V N NB P Q", + " Time Gflops" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + if( TEST->thrsh <= HPL_rzero ) first = 0; + } +/* + * 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve. + * Print WALL time + */ + Gflops = ( ( (double)(N) / 1.0e+9 ) * + ( (double)(N) / wtime[0] ) ) * + ( ( 2.0 / 3.0 ) * (double)(N) + ( 3.0 / 2.0 ) ); + + cpfact = ( ( (HPL_T_FACT)(ALGO->pfact) == + (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ? (char)('L') : + ( ( (HPL_T_FACT)(ALGO->pfact) == (HPL_T_FACT)(HPL_CROUT) ) ? + (char)('C') : (char)('R') ) ); + crfact = ( ( (HPL_T_FACT)(ALGO->rfact) == + (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ? (char)('L') : + ( ( (HPL_T_FACT)(ALGO->rfact) == (HPL_T_FACT)(HPL_CROUT) ) ? + (char)('C') : (char)('R') ) ); + + if( ALGO->btopo == HPL_1RING ) ctop = '0'; + else if( ALGO->btopo == HPL_1RING_M ) ctop = '1'; + else if( ALGO->btopo == HPL_2RING ) ctop = '2'; + else if( ALGO->btopo == HPL_2RING_M ) ctop = '3'; + else if( ALGO->btopo == HPL_BLONG ) ctop = '4'; + else /* if( ALGO->btopo == HPL_BLONG_M ) */ ctop = '5'; + + if( wtime[0] > HPL_rzero ) { + HPL_fprintf( TEST->outfp, + "W%c%1d%c%c%1d%c%1d%12d %5d %5d %5d %18.2f %19.4e\n", + ( GRID->order == HPL_ROW_MAJOR ? 'R' : 'C' ), + ALGO->depth, ctop, crfact, ALGO->nbdiv, cpfact, ALGO->nbmin, + N, NB, nprow, npcol, wtime[0], Gflops ); + HPL_fprintf( TEST->outfp, + "HPL_pdgesv() start time %s\n", ctime( ¤t_time_start ) ); + HPL_fprintf( TEST->outfp, + "HPL_pdgesv() end time %s\n", ctime( ¤t_time_end ) ); + } + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME, + HPL_TIMING_N, HPL_TIMING_BEG, HPL_w ); + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "--VVV--VVV--VVV--VVV--VVV--VVV--VVV--V", + "VV--VVV--VVV--VVV--VVV--VVV--VVV--VVV-" ); +/* + * Recursive panel factorization + */ + if( HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time rfact . . . : %18.2f\n", + HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] ); +/* + * Panel factorization + */ + if( HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time pfact . . : %18.2f\n", + HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] ); +/* + * Panel factorization (swap) + */ + if( HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time mxswp . . : %18.2f\n", + HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] ); +/* + * Update + */ + if( HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time update . . : %18.2f\n", + HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] ); +/* + * Update (swap) + */ + if( HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time laswp . . : %18.2f\n", + HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] ); +/* + * Upper triangular system solve + */ + if( HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time up tr sv . : %18.2f\n", + HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] ); + + if( TEST->thrsh <= HPL_rzero ) + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + } +#endif +/* + * Quick return, if I am not interested in checking the computations + */ + if( TEST->thrsh <= HPL_rzero ) + { (TEST->kpass)++; if( vptr ) free( vptr ); return; } +/* + * Check info returned by solve + */ + if( mat.info != 0 ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", "%s %d, %s", + "Error code returned by solve is", mat.info, "skip" ); + (TEST->kskip)++; + if( vptr ) free( vptr ); return; + } +/* + * Check computation, re-generate [ A | b ], compute norm 1 and inf of A and x, + * and norm inf of b - A x. Display residual checks. + */ + HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED ); + Anorm1 = HPL_pdlange( GRID, HPL_NORM_1, N, N, NB, mat.A, mat.ld ); + AnormI = HPL_pdlange( GRID, HPL_NORM_I, N, N, NB, mat.A, mat.ld ); +/* + * Because x is distributed in process rows, switch the norms + */ + XnormI = HPL_pdlange( GRID, HPL_NORM_1, 1, N, NB, mat.X, 1 ); + Xnorm1 = HPL_pdlange( GRID, HPL_NORM_I, 1, N, NB, mat.X, 1 ); +/* + * If I am in the col that owns b, (1) compute local BnormI, (2) all_reduce to + * find the max (in the col). Then (3) broadcast along the rows so that every + * process has BnormI. Note that since we use a uniform distribution in [-0.5,0.5] + * for the entries of B, it is very likely that BnormI (<=,~) 0.5. + */ + Bptr = Mptr( mat.A, 0, nq, mat.ld ); + if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ){ + if( mat.mp > 0 ) + { + BnormI = Bptr[HPL_idamax( mat.mp, Bptr, 1 )]; BnormI = Mabs( BnormI ); + } + else + { + BnormI = HPL_rzero; + } + (void) HPL_all_reduce( (void *)(&BnormI), 1, HPL_DOUBLE, HPL_max, + GRID->col_comm ); + } + (void) HPL_broadcast( (void *)(&BnormI), 1, HPL_DOUBLE, + HPL_indxg2p( N, NB, NB, 0, npcol ), + GRID->row_comm ); +/* + * If I own b, compute ( b - A x ) and ( - A x ) otherwise + */ + if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ) + { + HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone, + mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1 ); + } + else if( nq > 0 ) + { + HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone, + mat.A, mat.ld, mat.X, 1, HPL_rzero, Bptr, 1 ); + } + else { for( ii = 0; ii < mat.mp; ii++ ) Bptr[ii] = HPL_rzero; } +/* + * Reduce the distributed residual in process column 0 + */ + if( mat.mp > 0 ) + (void) HPL_reduce( Bptr, mat.mp, HPL_DOUBLE, HPL_sum, 0, + GRID->row_comm ); +/* + * Compute || b - A x ||_oo + */ + resid0 = HPL_pdlange( GRID, HPL_NORM_I, N, 1, NB, Bptr, mat.ld ); +/* + * Computes and displays norms, residuals ... + */ + if( N <= 0 ) + { + resid1 = HPL_rzero; + } + else + { + resid1 = resid0 / ( TEST->epsil * ( AnormI * XnormI + BnormI ) * (double)(N) ); + } + + if( resid1 < TEST->thrsh ) (TEST->kpass)++; + else (TEST->kfail)++; + + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( TEST->outfp, "%s%16.8e%s%s\n", + "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ", resid1, + " ...... ", ( resid1 < TEST->thrsh ? "PASSED" : "FAILED" ) ); + + if(resid1 >= TEST->thrsh ) + { + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||Ax-b||_oo . . . . . . . . . . . . . . . . . = ", resid0 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||A||_oo . . . . . . . . . . . . . . . . . . . = ", AnormI ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||A||_1 . . . . . . . . . . . . . . . . . . . = ", Anorm1 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||x||_oo . . . . . . . . . . . . . . . . . . . = ", XnormI ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||x||_1 . . . . . . . . . . . . . . . . . . . = ", Xnorm1 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||b||_oo . . . . . . . . . . . . . . . . . . . = ", BnormI ); + } + } + if( vptr ) free( vptr ); +/* + * End of HPL_pdtest + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdtest.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdtest.o new file mode 100644 index 000000000..2d3e0fcc4 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptest/HPL_pdtest.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer.c new file mode 100644 index 000000000..202416079 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer.c @@ -0,0 +1,358 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int HPL_ptimer_disabled; +static double HPL_ptimer_cpusec [HPL_NPTIMER], + HPL_ptimer_cpustart [HPL_NPTIMER], + HPL_ptimer_wallsec [HPL_NPTIMER], + HPL_ptimer_wallstart[HPL_NPTIMER]; +/* + * --------------------------------------------------------------------- + * User callable functions + * --------------------------------------------------------------------- + */ +#ifdef STDC_HEADERS +void HPL_ptimer_boot( void ) +#else +void HPL_ptimer_boot() +#endif +{ +/* + * HPL_ptimer_boot (re)sets all timers to 0, and enables HPL_ptimer. + */ +/* + * .. Local Variables .. + */ + int i; +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 0; + + for( i = 0; i < HPL_NPTIMER; i++ ) + { + HPL_ptimer_cpusec [i] = HPL_ptimer_wallsec [i] = HPL_rzero; + HPL_ptimer_cpustart[i] = HPL_ptimer_wallstart[i] = HPL_PTIMER_STARTFLAG; + } +/* + * End of HPL_ptimer_boot + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer( const int I ) +#else +void HPL_ptimer( I ) + const int I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer provides a "stopwatch" functionality cpu/wall timer in + * seconds. Up to 64 separate timers can be functioning at once. The + * first call starts the timer, and the second stops it. This routine + * can be disenabled by calling HPL_ptimer_disable(), so that calls to + * the timer are ignored. This feature can be used to make sure certain + * sections of code do not affect timings, even if they call routines + * which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable + * the timer functionality. One can retrieve the current value of a + * timer by calling + * + * t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + * + * where I is the timer index in [0..64). To inititialize the timer + * functionality, one must have called HPL_ptimer_boot() prior to any of + * the functions mentioned above. + * + * Arguments + * ========= + * + * I (global input) const int + * On entry, I specifies the timer to stop/start. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( HPL_ptimer_disabled ) return; +/* + * If timer has not been started, start it. Otherwise, stop it and add + * interval to count + */ + if( HPL_ptimer_wallstart[I] == HPL_PTIMER_STARTFLAG ) + { + HPL_ptimer_wallstart[I] = HPL_ptimer_walltime(); + HPL_ptimer_cpustart [I] = HPL_ptimer_cputime (); + } + else + { + HPL_ptimer_cpusec [I] += HPL_ptimer_cputime ()-HPL_ptimer_cpustart [I]; + HPL_ptimer_wallsec [I] += HPL_ptimer_walltime()-HPL_ptimer_wallstart[I]; + HPL_ptimer_wallstart[I] = HPL_PTIMER_STARTFLAG; + } +/* + * End of HPL_ptimer + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_enable( void ) +#else +void HPL_ptimer_enable() +#endif +{ +/* + * HPL_ptimer_enable sets it so calls to HPL_ptimer are not ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 0; + return; +/* + * End of HPL_ptimer_enable + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_disable( void ) +#else +void HPL_ptimer_disable() +#endif +{ +/* + * HPL_ptimer_disable sets it so calls to HPL_ptimer are ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 1; + return; +/* + * End of HPL_ptimer_disable + */ +} + +#ifdef STDC_HEADERS +double HPL_ptimer_inquire +( + const HPL_T_PTIME TMTYPE, + const int I +) +#else +double HPL_ptimer_inquire( TMTYPE, I ) + const int I; + const HPL_T_PTIME TMTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer_inquire returns wall- or cpu- time that has accumulated in + * timer I. + * + * Arguments + * ========= + * + * TMTYPE (global input) const HPL_T_PTIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_PTIME : wall clock time is returned, + * = HPL_CPU_PTIME : CPU time is returned (default). + * + * I (global input) const int + * On entry, I specifies the timer to return. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double time; +/* .. + * .. Executable Statements .. + */ +/* + * If wall- or cpu-time are not available on this machine, return + * HPL_PTIMER_ERROR + */ + if( TMTYPE == HPL_WALL_PTIME ) + { + if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR ) + time = HPL_PTIMER_ERROR; + else + time = HPL_ptimer_wallsec[I]; + } + else + { + if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR ) + time = HPL_PTIMER_ERROR; + else + time = HPL_ptimer_cpusec [I]; + } + return( time ); +/* + * End of HPL_ptimer_inquire + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_combine +( + MPI_Comm COMM, + const HPL_T_PTIME_OP OPE, + const HPL_T_PTIME TMTYPE, + const int N, + const int IBEG, + double * TIMES +) +#else +void HPL_ptimer_combine( COMM, OPE, TMTYPE, N, IBEG, TIMES ) + const int IBEG, N; + const HPL_T_PTIME_OP OPE; + const HPL_T_PTIME TMTYPE; + MPI_Comm COMM; + double * TIMES; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer_combine combines the timing information stored on a scope + * of processes into the user TIMES array. + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection on + * which the timings are taken. + * + * OPE (global input) const HPL_T_PTIME_OP + * On entry, OP specifies what combine operation should be done + * as follows: + * = HPL_AMAX_PTIME get max. time on any process (default), + * = HPL_AMIN_PTIME get min. time on any process, + * = HPL_SUM_PTIME get sum of times across processes. + * + * TMTYPE (global input) const HPL_T_PTIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_PTIME : wall clock time is returned, + * = HPL_CPU_PTIME : CPU time is returned (default). + * + * N (global input) const int + * On entry, N specifies the number of timers to combine. + * + * IBEG (global input) const int + * On entry, IBEG specifies the first timer to be combined. + * + * TIMES (global output) double * + * On entry, TIMES is an array of dimension at least N. On exit, + * this array contains the requested timing information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, tmpdis; +/* .. + * .. Executable Statements .. + */ + tmpdis = HPL_ptimer_disabled; HPL_ptimer_disabled = 1; +/* + * Timer has been disabled for combine operation - copy timing informa- + * tion into user times array. If wall- or cpu-time are not available + * on this machine, fill in times with HPL_PTIMER_ERROR flag and return. + */ + if( TMTYPE == HPL_WALL_PTIME ) + { + if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR ) + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return; } + else + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_wallsec[IBEG+i]; } + } + else + { + if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR ) + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return; } + else + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_cpusec[IBEG+i]; } + } +/* + * Combine all nodes information, restore HPL_ptimer_disabled, and return + */ + for( i = 0; i < N; i++ ) TIMES[i] = Mmax( HPL_rzero, TIMES[i] ); + + if( OPE == HPL_AMAX_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM ); + else if( OPE == HPL_AMIN_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_min, COMM ); + else if( OPE == HPL_SUM_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_sum, COMM ); + else + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM ); + + HPL_ptimer_disabled = tmpdis; +/* + * End of HPL_ptimer_combine + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer.o new file mode 100644 index 000000000..8c41fc9d1 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_cputime.c new file mode 100644 index 000000000..711ef185d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_cputime.c @@ -0,0 +1,146 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_ptimer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, + * the clock() function is used to return an approximation of processor + * time used by the program. The value returned is the CPU time used so + * far as a clock_t; to get the number of seconds used, the result is + * divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C + * standard library. If HPL_USE_TIMES is defined, the times() function + * is used instead. This function returns the current process times. + * times() returns the number of clock ticks that have elapsed since the + * system has been up. Otherwise and by default, the standard library + * function getrusage() is used. + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_CLOCK ) + +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + static double cps = CLOCKS_PER_SEC; + double d; + clock_t t1; + static clock_t t0 = 0; + + if( t0 == 0 ) t0 = clock(); + t1 = clock() - t0; + d = (double)(t1) / cps; + return( d ); +} + +#elif defined( HPL_USE_TIMES ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + clock_t t1; + struct tms ts; + static double ClockTick = HPL_rzero; + + if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK)); + (void) times( &ts ); + return( (double)(ts.tms_utime) / ClockTick ); +} + +/* #elif defined( HPL_USE_GETRUSAGE ) */ +#else + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + struct rusage ruse; + + (void) getrusage( RUSAGE_SELF, &ruse ); + return( (double)( ruse.ru_utime.tv_sec ) + + ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) ); +} + +/* +#else + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + return( HPL_PTIMER_ERROR ); +} +*/ + +#endif +/* + * End of HPL_ptimer_cputime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_cputime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_cputime.o new file mode 100644 index 000000000..0ed678ecb Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_cputime.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_walltime.c new file mode 100644 index 000000000..96cbd300f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_walltime.c @@ -0,0 +1,103 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_ptimer_walltime returns the elapsed (wall-clock) time. + * + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_GETTIMEOFDAY ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_walltime( void ) +#else +double HPL_ptimer_walltime() +#endif +{ + struct timeval tp; + static long start=0, startu; + + if( !start ) + { + (void) gettimeofday( &tp, NULL ); + start = tp.tv_sec; + startu = tp.tv_usec; + return( HPL_rzero ); + } + (void) gettimeofday( &tp, NULL ); + + return( (double)( tp.tv_sec - start ) + + ( (double)( tp.tv_usec-startu ) / 1000000.0 ) ); +} + +#else + +#ifdef STDC_HEADERS +double HPL_ptimer_walltime( void ) +#else +double HPL_ptimer_walltime() +#endif +{ + return( MPI_Wtime() ); +} + +#endif +/* + * End of HPL_ptimer_walltime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_walltime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_walltime.o new file mode 100644 index 000000000..b00e05dc8 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/ptimer/HPL_ptimer_walltime.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer.c new file mode 100644 index 000000000..3be9665f7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer.c @@ -0,0 +1,253 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int HPL_timer_disabled; +static double HPL_timer_cpusec [HPL_NTIMER], + HPL_timer_cpustart [HPL_NTIMER], + HPL_timer_wallsec [HPL_NTIMER], + HPL_timer_wallstart[HPL_NTIMER]; +/* + * --------------------------------------------------------------------- + * User callable functions + * --------------------------------------------------------------------- + */ +#ifdef STDC_HEADERS +void HPL_timer_boot( void ) +#else +void HPL_timer_boot() +#endif +{ +/* + * HPL_timer_boot (re)sets all timers to 0, and enables HPL_timer. + */ +/* + * .. Local Variables .. + */ + int i; +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 0; + + for( i = 0; i < HPL_NTIMER; i++ ) + { + HPL_timer_cpusec [i] = HPL_timer_wallsec [i] = HPL_rzero; + HPL_timer_cpustart[i] = HPL_timer_wallstart[i] = HPL_TIMER_STARTFLAG; + } +/* + * End of HPL_timer_boot + */ +} + +#ifdef STDC_HEADERS +void HPL_timer( const int I ) +#else +void HPL_timer( I ) + const int I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_timer provides a "stopwatch" functionality cpu/wall timer in + * seconds. Up to 64 separate timers can be functioning at once. The + * first call starts the timer, and the second stops it. This routine + * can be disenabled by calling HPL_timer_disable(), so that calls to + * the timer are ignored. This feature can be used to make sure certain + * sections of code do not affect timings, even if they call routines + * which have HPL_timer calls in them. HPL_timer_enable() will re-enable + * the timer functionality. One can retrieve the current value of a + * timer by calling + * + * t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + * + * where I is the timer index in [0..64). To initialize the timer + * functionality, one must have called HPL_timer_boot() prior to any of + * the functions mentioned above. + * + * Arguments + * ========= + * + * I (global input) const int + * On entry, I specifies the timer to stop/start. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( HPL_timer_disabled ) return; +/* + * If timer has not been started, start it. Otherwise, stop it and add + * interval to count + */ + if( HPL_timer_wallstart[I] == HPL_TIMER_STARTFLAG ) + { + HPL_timer_wallstart[I] = HPL_timer_walltime(); + HPL_timer_cpustart [I] = HPL_timer_cputime (); + } + else + { + HPL_timer_cpusec [I] += HPL_timer_cputime () - HPL_timer_cpustart [I]; + HPL_timer_wallsec [I] += HPL_timer_walltime() - HPL_timer_wallstart[I]; + HPL_timer_wallstart[I] = HPL_TIMER_STARTFLAG; + } +/* + * End of HPL_timer + */ +} + +#ifdef STDC_HEADERS +void HPL_timer_enable( void ) +#else +void HPL_timer_enable() +#endif +{ +/* + * HPL_timer_enable sets it so calls to HPL_timer are not ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 0; + return; +/* + * End of HPL_timer_enable + */ +} + +#ifdef STDC_HEADERS +void HPL_timer_disable( void ) +#else +void HPL_timer_disable() +#endif +{ +/* + * HPL_timer_disable sets it so calls to HPL_timer are ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 1; + return; +/* + * End of HPL_timer_disable + */ +} + +#ifdef STDC_HEADERS +double HPL_timer_inquire +( + const HPL_T_TIME TMTYPE, + const int I +) +#else +double HPL_timer_inquire( TMTYPE, I ) + const int I; + const HPL_T_TIME TMTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_timer_inquire returns wall- or cpu- time that has accumulated in + * timer I. + * + * Arguments + * ========= + * + * TMTYPE (global input) const HPL_T_TIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_TIME : wall clock time is returned, + * = HPL_CPU_TIME : CPU time is returned (default). + * + * I (global input) const int + * On entry, I specifies the timer to return. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double time; +/* .. + * .. Executable Statements .. + */ +/* + * If wall- or cpu-time are not available on this machine, return + * HPL_TIMER_ERROR + */ + if( TMTYPE == HPL_WALL_TIME ) + { + if( HPL_timer_walltime() == HPL_TIMER_ERROR ) + time = HPL_TIMER_ERROR; + else + time = HPL_timer_wallsec[I]; + } + else + { + if( HPL_timer_cputime() == HPL_TIMER_ERROR ) + time = HPL_TIMER_ERROR; + else + time = HPL_timer_cpusec [I]; + } + return( time ); +/* + * End of HPL_timer_inquire + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer.o new file mode 100644 index 000000000..fe3540ab4 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_cputime.c new file mode 100644 index 000000000..4a7f9dfef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_cputime.c @@ -0,0 +1,145 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_timer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, + * the clock() function is used to return an approximation of processor + * time used by the program. The value returned is the CPU time used so + * far as a clock_t; to get the number of seconds used, the result is + * divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C + * standard library. If HPL_USE_TIMES is defined, the times() function + * is used instead. This function returns the current process times. + * times() returns the number of clock ticks that have elapsed since the + * system has been up. Otherwise and by default, the standard library + * function getrusage() is used. + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_CLOCK ) + +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + static double cps = CLOCKS_PER_SEC; + double d; + clock_t t1; + static clock_t t0 = 0; + + if( t0 == 0 ) t0 = clock(); + t1 = clock() - t0; + d = (double)(t1) / cps; + return( d ); +} + +#elif defined( HPL_USE_TIMES ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + clock_t t1; + struct tms ts; + static double ClockTick = HPL_rzero; + + if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK)); + (void) times( &ts ); + return( (double)(ts.tms_utime) / ClockTick ); +} + +/* #elif defined( HPL_USE_GETRUSAGE ) */ +#else + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + struct rusage ruse; + (void) getrusage( RUSAGE_SELF, &ruse ); + return( (double)( ruse.ru_utime.tv_sec ) + + ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) ); +} + +/* +#else + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + return( HPL_TIMER_ERROR ); +} +*/ + +#endif +/* + * End of HPL_timer_cputime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_cputime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_cputime.o new file mode 100644 index 000000000..3b221b80d Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_cputime.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_walltime.c new file mode 100644 index 000000000..f4f44f202 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_walltime.c @@ -0,0 +1,88 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_timer_walltime returns the elapsed (wall-clock) time. + * + * + * --------------------------------------------------------------------- + */ + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_walltime( void ) +#else +double HPL_timer_walltime() +#endif +{ + struct timeval tp; + static long start=0, startu; + + if( !start ) + { + (void) gettimeofday( &tp, NULL ); + start = tp.tv_sec; + startu = tp.tv_usec; + return( HPL_rzero ); + } + (void) gettimeofday( &tp, NULL ); + + return( (double)( tp.tv_sec - start ) + + ( (double)( tp.tv_usec-startu ) / 1000000.0 ) ); +} +/* + * End of HPL_timer_walltime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_walltime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_walltime.o new file mode 100644 index 000000000..e73b5e8a6 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/temp/testing/timer/HPL_timer_walltime.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/Makefile.am new file mode 100644 index 000000000..452ea5f06 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/Makefile.am @@ -0,0 +1,13 @@ + +AM_CPPFLAGS = -I$(top_srcdir)/include + +xhpl_LDADD = ../src/libhpl.a + +bin_PROGRAMS = xhpl + +xhpl_SOURCES = \ +matgen/HPL_jumpit.c matgen/HPL_rand.c matgen/HPL_setran.c matgen/HPL_xjumpm.c \ +matgen/HPL_lmul.c matgen/HPL_ladd.c \ +pmatgen/HPL_pdmatgen.c \ +ptest/HPL_pddriver.c ptest/HPL_pdinfo.c ptest/HPL_pdtest.c \ +ptimer/HPL_ptimer.c ptimer/HPL_ptimer_cputime.c ptimer/HPL_ptimer_walltime.c diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/Makefile.in new file mode 100644 index 000000000..034564545 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/Makefile.in @@ -0,0 +1,698 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +bin_PROGRAMS = xhpl$(EXEEXT) +subdir = testing +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/include/hplconfig.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" +PROGRAMS = $(bin_PROGRAMS) +am__dirstamp = $(am__leading_dot)dirstamp +am_xhpl_OBJECTS = matgen/HPL_jumpit.$(OBJEXT) \ + matgen/HPL_rand.$(OBJEXT) matgen/HPL_setran.$(OBJEXT) \ + matgen/HPL_xjumpm.$(OBJEXT) matgen/HPL_lmul.$(OBJEXT) \ + matgen/HPL_ladd.$(OBJEXT) pmatgen/HPL_pdmatgen.$(OBJEXT) \ + ptest/HPL_pddriver.$(OBJEXT) ptest/HPL_pdinfo.$(OBJEXT) \ + ptest/HPL_pdtest.$(OBJEXT) ptimer/HPL_ptimer.$(OBJEXT) \ + ptimer/HPL_ptimer_cputime.$(OBJEXT) \ + ptimer/HPL_ptimer_walltime.$(OBJEXT) +xhpl_OBJECTS = $(am_xhpl_OBJECTS) +xhpl_DEPENDENCIES = ../src/libhpl.a +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = matgen/$(DEPDIR)/HPL_jumpit.Po \ + matgen/$(DEPDIR)/HPL_ladd.Po matgen/$(DEPDIR)/HPL_lmul.Po \ + matgen/$(DEPDIR)/HPL_rand.Po matgen/$(DEPDIR)/HPL_setran.Po \ + matgen/$(DEPDIR)/HPL_xjumpm.Po \ + pmatgen/$(DEPDIR)/HPL_pdmatgen.Po \ + ptest/$(DEPDIR)/HPL_pddriver.Po ptest/$(DEPDIR)/HPL_pdinfo.Po \ + ptest/$(DEPDIR)/HPL_pdtest.Po ptimer/$(DEPDIR)/HPL_ptimer.Po \ + ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po \ + ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(xhpl_SOURCES) +DIST_SOURCES = $(xhpl_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIBS = @BLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build_alias = @build_alias@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host_alias = @host_alias@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/include +xhpl_LDADD = ../src/libhpl.a +xhpl_SOURCES = \ +matgen/HPL_jumpit.c matgen/HPL_rand.c matgen/HPL_setran.c matgen/HPL_xjumpm.c \ +matgen/HPL_lmul.c matgen/HPL_ladd.c \ +pmatgen/HPL_pdmatgen.c \ +ptest/HPL_pddriver.c ptest/HPL_pdinfo.c ptest/HPL_pdtest.c \ +ptimer/HPL_ptimer.c ptimer/HPL_ptimer_cputime.c ptimer/HPL_ptimer_walltime.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu testing/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu testing/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) +matgen/$(am__dirstamp): + @$(MKDIR_P) matgen + @: > matgen/$(am__dirstamp) +matgen/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) matgen/$(DEPDIR) + @: > matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_jumpit.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_rand.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_setran.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_xjumpm.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_lmul.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_ladd.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +pmatgen/$(am__dirstamp): + @$(MKDIR_P) pmatgen + @: > pmatgen/$(am__dirstamp) +pmatgen/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pmatgen/$(DEPDIR) + @: > pmatgen/$(DEPDIR)/$(am__dirstamp) +pmatgen/HPL_pdmatgen.$(OBJEXT): pmatgen/$(am__dirstamp) \ + pmatgen/$(DEPDIR)/$(am__dirstamp) +ptest/$(am__dirstamp): + @$(MKDIR_P) ptest + @: > ptest/$(am__dirstamp) +ptest/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ptest/$(DEPDIR) + @: > ptest/$(DEPDIR)/$(am__dirstamp) +ptest/HPL_pddriver.$(OBJEXT): ptest/$(am__dirstamp) \ + ptest/$(DEPDIR)/$(am__dirstamp) +ptest/HPL_pdinfo.$(OBJEXT): ptest/$(am__dirstamp) \ + ptest/$(DEPDIR)/$(am__dirstamp) +ptest/HPL_pdtest.$(OBJEXT): ptest/$(am__dirstamp) \ + ptest/$(DEPDIR)/$(am__dirstamp) +ptimer/$(am__dirstamp): + @$(MKDIR_P) ptimer + @: > ptimer/$(am__dirstamp) +ptimer/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ptimer/$(DEPDIR) + @: > ptimer/$(DEPDIR)/$(am__dirstamp) +ptimer/HPL_ptimer.$(OBJEXT): ptimer/$(am__dirstamp) \ + ptimer/$(DEPDIR)/$(am__dirstamp) +ptimer/HPL_ptimer_cputime.$(OBJEXT): ptimer/$(am__dirstamp) \ + ptimer/$(DEPDIR)/$(am__dirstamp) +ptimer/HPL_ptimer_walltime.$(OBJEXT): ptimer/$(am__dirstamp) \ + ptimer/$(DEPDIR)/$(am__dirstamp) + +xhpl$(EXEEXT): $(xhpl_OBJECTS) $(xhpl_DEPENDENCIES) $(EXTRA_xhpl_DEPENDENCIES) + @rm -f xhpl$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(xhpl_OBJECTS) $(xhpl_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f matgen/*.$(OBJEXT) + -rm -f pmatgen/*.$(OBJEXT) + -rm -f ptest/*.$(OBJEXT) + -rm -f ptimer/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_jumpit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_ladd.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_lmul.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_rand.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_setran.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_xjumpm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pmatgen/$(DEPDIR)/HPL_pdmatgen.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pddriver.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pdinfo.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pdtest.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) +installdirs: + for dir in "$(DESTDIR)$(bindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f matgen/$(DEPDIR)/$(am__dirstamp) + -rm -f matgen/$(am__dirstamp) + -rm -f pmatgen/$(DEPDIR)/$(am__dirstamp) + -rm -f pmatgen/$(am__dirstamp) + -rm -f ptest/$(DEPDIR)/$(am__dirstamp) + -rm -f ptest/$(am__dirstamp) + -rm -f ptimer/$(DEPDIR)/$(am__dirstamp) + -rm -f ptimer/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic mostlyclean-am + +distclean: distclean-am + -rm -f matgen/$(DEPDIR)/HPL_jumpit.Po + -rm -f matgen/$(DEPDIR)/HPL_ladd.Po + -rm -f matgen/$(DEPDIR)/HPL_lmul.Po + -rm -f matgen/$(DEPDIR)/HPL_rand.Po + -rm -f matgen/$(DEPDIR)/HPL_setran.Po + -rm -f matgen/$(DEPDIR)/HPL_xjumpm.Po + -rm -f pmatgen/$(DEPDIR)/HPL_pdmatgen.Po + -rm -f ptest/$(DEPDIR)/HPL_pddriver.Po + -rm -f ptest/$(DEPDIR)/HPL_pdinfo.Po + -rm -f ptest/$(DEPDIR)/HPL_pdtest.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-binPROGRAMS + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f matgen/$(DEPDIR)/HPL_jumpit.Po + -rm -f matgen/$(DEPDIR)/HPL_ladd.Po + -rm -f matgen/$(DEPDIR)/HPL_lmul.Po + -rm -f matgen/$(DEPDIR)/HPL_rand.Po + -rm -f matgen/$(DEPDIR)/HPL_setran.Po + -rm -f matgen/$(DEPDIR)/HPL_xjumpm.Po + -rm -f pmatgen/$(DEPDIR)/HPL_pdmatgen.Po + -rm -f ptest/$(DEPDIR)/HPL_pddriver.Po + -rm -f ptest/$(DEPDIR)/HPL_pdinfo.Po + -rm -f ptest/$(DEPDIR)/HPL_pdtest.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-binPROGRAMS clean-generic cscopelist-am ctags ctags-am \ + distclean distclean-compile distclean-generic distclean-tags \ + distdir dvi dvi-am html html-am info info-am install \ + install-am install-binPROGRAMS install-data install-data-am \ + install-dvi install-dvi-am install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-binPROGRAMS + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_dmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_dmatgen.c new file mode 100644 index 000000000..c14ef0fd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_dmatgen.c @@ -0,0 +1,134 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dmatgen +( + const int M, + const int N, + double * A, + const int LDA, + const int ISEED +) +#else +void HPL_dmatgen +( M, N, A, LDA, ISEED ) + const int M; + const int N; + double * A; + const int LDA; + const int ISEED; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dmatgen generates (or regenerates) a random matrix A. + * + * The pseudo-random generator uses the linear congruential algorithm: + * X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer + * Programming, Knuth 1973, Vol. 2. + * + * Arguments + * ========= + * + * M (input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * A (output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * this array contains the coefficients of the randomly + * generated matrix. + * + * LDA (input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * ISEED (input) const int + * On entry, ISEED specifies the seed number to generate the + * matrix A. ISEED must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int iadd[2], ia1[2], ic1[2], iran1[2], + jseed[2], mult[2]; + int i, incA = LDA - M, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; +/* + * Initialize the random sequence + */ + mult [0] = HPL_MULT0; mult [1] = HPL_MULT1; + iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1; + jseed[0] = ISEED; jseed[1] = 0; + + HPL_xjumpm( 1, mult, iadd, jseed, iran1, ia1, ic1 ); + HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 ); +/* + * Generate an M by N matrix + */ + for( j = 0; j < N; A += incA, j++ ) + for( i = 0; i < M; A++, i++ ) *A = HPL_rand(); +/* + * End of HPL_dmatgen + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_jumpit.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_jumpit.c new file mode 100644 index 000000000..4d4dc4db5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_jumpit.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_jumpit +( + int * MULT, + int * IADD, + int * IRANN, + int * IRANM +) +#else +void HPL_jumpit +( MULT, IADD, IRANN, IRANM ) + int * MULT; + int * IADD; + int * IRANN; + int * IRANM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_jumpit jumps in the random sequence from the number X(n) encoded + * in IRANN to the number X(m) encoded in IRANM using the constants A + * and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A + * and C obviously depend on m and n, see the function HPL_xjumpm in + * order to initialize them. + * + * Arguments + * ========= + * + * MULT (local input) int * + * On entry, MULT is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant A. + * + * IADD (local input) int * + * On entry, IADD is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant C. + * + * IRANN (local input) int * + * On entry, IRANN is an array of dimension 2, that contains + * the 16-lower and 15-higher bits of the encoding of X(n). + * + * IRANM (local output) int * + * On entry, IRANM is an array of dimension 2. On exit, this + * array contains respectively the 16-lower and 15-higher bits + * of the encoding of X(m). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + HPL_lmul( IRANN, MULT, j ); /* j = IRANN * MULT; */ + HPL_ladd( j, IADD, IRANM ); /* IRANM = j + IADD; */ + HPL_setran( 0, IRANM ); /* irand = IRANM */ +/* + * End of HPL_jumpit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_ladd.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_ladd.c new file mode 100644 index 000000000..0d4e4c08c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_ladd.c @@ -0,0 +1,126 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_ladd +( + int * J, + int * K, + int * I +) +#else +void HPL_ladd +( J, K, I ) + int * J; + int * K; + int * I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ladd adds without carry two long positive integers K and J and + * puts the result into I. The long integers I, J, K are encoded on 64 + * bits using an array of 2 integers. The 32-lower bits are stored in + * the first entry of each array, the 32-higher bits in the second + * entry. + * + * Arguments + * ========= + * + * J (local input) int * + * On entry, J is an integer array of dimension 2 containing the + * encoded long integer J. + * + * K (local input) int * + * On entry, K is an integer array of dimension 2 containing the + * encoded long integer K. + * + * I (local output) int * + * On entry, I is an integer array of dimension 2. On exit, this + * array contains the encoded long integer result. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + unsigned int itmp0, itmp1; + unsigned int ktmp0 = K[0] & 65535, ktmp1 = (unsigned)K[0] >> 16; + unsigned int ktmp2 = K[1] & 65535, ktmp3 = (unsigned)K[1] >> 16; + unsigned int jtmp0 = J[0] & 65535, jtmp1 = (unsigned)J[0] >> 16; + unsigned int jtmp2 = J[1] & 65535, jtmp3 = (unsigned)J[1] >> 16; + +/* .. + * .. Executable Statements .. + */ +/* + * K[1] K[0] K I[0] = (K[0]+J[0]) % 2^32 + * XXXX XXXX carry = (K[0]+J[0]) / 2^32 + * + * + J[1] J[0] J I[1] = K[1] + J[1] + carry + * XXXX XXXX I[1] = I[1] % 2^32 + * ------------- + * I[1] I[0] + * 0XXX XXXX I + */ + itmp0 = ktmp0 + jtmp0; + itmp1 = itmp0 >> 16; I[0] = itmp0 - (itmp1 << 16 ); + itmp1 += ktmp1 + jtmp1; I[0] |= (itmp1 & 65535) << 16; + itmp0 = (itmp1 >> 16) + ktmp2 + jtmp2; + I[1] = itmp0 - ((itmp0 >> 16 ) << 16); + itmp1 = (itmp0 >> 16) + ktmp3 + jtmp3; + I[1] |= (itmp1 & 65535) << 16; +/* + * End of HPL_ladd + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_lmul.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_lmul.c new file mode 100644 index 000000000..254b192f6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_lmul.c @@ -0,0 +1,131 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_lmul +( + int * K, + int * J, + int * I +) +#else +void HPL_lmul +( K, J, I ) + int * K; + int * J; + int * I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_lmul multiplies without carry two long positive integers K and J + * and puts the result into I. The long integers I, J, K are encoded on + * 64 bits using an array of 2 integers. The 32-lower bits are stored in + * the first entry of each array, the 32-higher bits in the second entry + * of each array. For efficiency purposes, the intrisic modulo function + * is inlined. + * + * Arguments + * ========= + * + * K (local input) int * + * On entry, K is an integer array of dimension 2 containing the + * encoded long integer K. + * + * J (local input) int * + * On entry, J is an integer array of dimension 2 containing the + * encoded long integer J. + * + * I (local output) int * + * On entry, I is an integer array of dimension 2. On exit, this + * array contains the encoded long integer result. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int r, c; + unsigned int kk[4], jj[4], res[5]; +/* .. + * .. Executable Statements .. + */ +/* + * Addition is done with 16 bits at a time. Multiplying two 16-bit + * integers yields a 32-bit result. The lower 16-bits of the result + * are kept in I, and the higher 16-bits are carried over to the + * next multiplication. + */ + for (c = 0; c < 2; ++c) { + kk[2*c] = K[c] & 65535; + kk[2*c+1] = ((unsigned)K[c] >> 16) & 65535; + jj[2*c] = J[c] & 65535; + jj[2*c+1] = ((unsigned)J[c] >> 16) & 65535; + } + + res[0] = 0; + for (c = 0; c < 4; ++c) { + res[c+1] = (res[c] >> 16) & 65535; + res[c] &= 65535; + for (r = 0; r < c+1; ++r) { + res[c] = kk[r] * jj[c-r] + (res[c] & 65535); + res[c+1] += (res[c] >> 16) & 65535; + } + } + + for (c = 0; c < 2; ++c) + I[c] = (int)(((res[2*c+1] & 65535) << 16) | (res[2*c] & 65535)); +/* + * End of HPL_lmul + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_rand.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_rand.c new file mode 100644 index 000000000..fe4e12f5e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_rand.c @@ -0,0 +1,94 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_rand( void ) +#else +double HPL_rand() +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rand generates the next number in the random sequence. This + * function ensures that this number lies in the interval (-0.5, 0.5]. + * + * The static array irand contains the information (2 integers) required + * to generate the next number in the sequence X(n). This number is + * computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the + * constant d is the largest 64 bit positive unsigned integer. The array + * irand is then updated for the generation of the next number X(n+1) + * in the random sequence as follows X(n+1) = a * X(n) + c. The + * constants a and c should have been preliminarily stored in the arrays + * ias and ics as 2 pairs of integers. The initialization of ias, ics + * and irand is performed by the function HPL_setran. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + HPL_setran( 3, j ); +/* + * return number between -0.5 and 0.5 + */ + return( HPL_HALF - + (((j[0] & 65535) + ((unsigned)j[0] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF + + (j[1] & 65535) + ((unsigned)j[1] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF ); +/* + * End of HPL_rand + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_setran.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_setran.c new file mode 100644 index 000000000..1a3ca73aa --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_setran.c @@ -0,0 +1,115 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int ias[2], ics[2], irand[2]; + +#ifdef STDC_HEADERS +void HPL_setran +( + const int OPTION, + int * IRAN +) +#else +void HPL_setran +( OPTION, IRAN ) + const int OPTION; + int * IRAN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_setran initializes the random generator with the encoding of the + * first number X(0) in the sequence, and the constants a and c used to + * compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), + * a and c are stored in the static variables irand, ias and ics. When + * OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the + * values of the input array IRAN. When OPTION is 3, IRAN is set to the + * current value of irand, and irand is then incremented. + * + * Arguments + * ========= + * + * OPTION (local input) const int + * On entry, OPTION is an integer that specifies the operations + * to be performed on the random generator as specified above. + * + * IRAN (local input/output) int * + * On entry, IRAN is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of a random number. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + if( OPTION == 3 ) + { /* return current value */ + IRAN[0] = irand[0]; IRAN[1] = irand[1]; + HPL_lmul( irand, ias, j ); /* j = irand * ias; */ + HPL_ladd( j, ics, irand ); /* irand = j + ics; */ + } + else if( OPTION == 0 ) { irand[0] = IRAN[0]; irand[1] = IRAN[1]; } + else if( OPTION == 1 ) { ias [0] = IRAN[0]; ias [1] = IRAN[1]; } + else if( OPTION == 2 ) { ics [0] = IRAN[0]; ics [1] = IRAN[1]; } +/* + * End of HPL_setran + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_xjumpm.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_xjumpm.c new file mode 100644 index 000000000..ae70bbc16 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/HPL_xjumpm.c @@ -0,0 +1,158 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_xjumpm +( + const int JUMPM, + int * MULT, + int * IADD, + int * IRANN, + int * IRANM, + int * IAM, + int * ICM +) +#else +void HPL_xjumpm +( JUMPM, MULT, IADD, IRANN, IRANM, IAM, ICM ) + const int JUMPM; + int * MULT; + int * IADD; + int * IRANN; + int * IRANM; + int * IAM; + int * ICM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_xjumpm computes the constants A and C to jump JUMPM numbers in + * the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in + * MULT and IADD specify how to jump from one entry in the sequence to + * the next. + * + * Arguments + * ========= + * + * JUMPM (local input) const int + * On entry, JUMPM specifies the number of entries in the + * sequence to jump over. When JUMPM is less or equal than zero, + * A and C are not computed, IRANM is set to IRANN corresponding + * to a jump of size zero. + * + * MULT (local input) int * + * On entry, MULT is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant a to jump from + * X(n) to X(n+1) = a*X(n) + c in the random sequence. + * + * IADD (local input) int * + * On entry, IADD is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant c to jump from + * X(n) to X(n+1) = a*X(n) + c in the random sequence. + * + * IRANN (local input) int * + * On entry, IRANN is an array of dimension 2. that contains the + * 16-lower and 15-higher bits of the encoding of X(n). + * + * IRANM (local output) int * + * On entry, IRANM is an array of dimension 2. On exit, this + * array contains respectively the 16-lower and 15-higher bits + * of the encoding of X(n+JUMPM). + * + * IAM (local output) int * + * On entry, IAM is an array of dimension 2. On exit, when JUMPM + * is greater than zero, this array contains the encoded + * constant A to jump from X(n) to X(n+JUMPM) in the random + * sequence. IAM(0:1) contains respectively the 16-lower and + * 15-higher bits of this constant A. When JUMPM is less or + * equal than zero, this array is not referenced. + * + * ICM (local output) int * + * On entry, ICM is an array of dimension 2. On exit, when JUMPM + * is greater than zero, this array contains the encoded + * constant C to jump from X(n) to X(n+JUMPM) in the random + * sequence. ICM(0:1) contains respectively the 16-lower and + * 15-higher bits of this constant C. When JUMPM is less or + * equal than zero, this array is not referenced. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2], k; +/* .. + * .. Executable Statements .. + */ + if( JUMPM > 0 ) + { + IAM[0] = MULT[0]; IAM[1] = MULT[1]; /* IAM = MULT; */ + ICM[0] = IADD[0]; ICM[1] = IADD[1]; /* ICM = IADD; */ + for( k = 1; k <= JUMPM-1; k++ ) + { + HPL_lmul( IAM, MULT, j ); /* j = IAM * MULT; */ + IAM[0] = j[0]; IAM[1] = j[1]; /* IAM = j; */ + HPL_lmul( ICM, MULT, j ); /* j = ICM * MULT; */ + HPL_ladd( IADD, j, ICM ); /* ICM = IADD + j; */ + } + HPL_lmul( IRANN, IAM, j ); /* j = IRANN * IAM; */ + HPL_ladd( j, ICM, IRANM ); /* IRANM = j + ICM; */ + } + else + { /* IRANM = IRANN */ + IRANM[0] = IRANN[0]; IRANM[1] = IRANN[1]; + } +/* + * End of HPL_xjumpm + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_dmatgen.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_dmatgen.o new file mode 100644 index 000000000..f2887d460 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_dmatgen.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_jumpit.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_jumpit.o new file mode 100644 index 000000000..65b616d11 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_jumpit.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_ladd.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_ladd.o new file mode 100644 index 000000000..cb47dddff Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_ladd.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_lmul.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_lmul.o new file mode 100644 index 000000000..10dc8eedf Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_lmul.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_rand.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_rand.o new file mode 100644 index 000000000..dd2332dee Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_rand.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_setran.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_setran.o new file mode 100644 index 000000000..dd58ebfaf Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_setran.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_xjumpm.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_xjumpm.o new file mode 100644 index 000000000..e740f38d1 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/HPL_xjumpm.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/Make.inc new file mode 120000 index 000000000..8547ec814 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/Make.inc @@ -0,0 +1 @@ +/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/Makefile new file mode 100644 index 000000000..f027fbc06 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/Makefile @@ -0,0 +1,95 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_matgen.h +# +## Object files ######################################################## +# +HPL_matobj = \ + HPL_dmatgen.o HPL_ladd.o HPL_lmul.o \ + HPL_xjumpm.o HPL_jumpit.o HPL_rand.o \ + HPL_setran.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_matobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_matobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dmatgen.o : ../HPL_dmatgen.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dmatgen.c +HPL_ladd.o : ../HPL_ladd.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ladd.c +HPL_lmul.o : ../HPL_lmul.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_lmul.c +HPL_xjumpm.o : ../HPL_xjumpm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_xjumpm.c +HPL_jumpit.o : ../HPL_jumpit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_jumpit.c +HPL_rand.o : ../HPL_rand.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rand.c +HPL_setran.o : ../HPL_setran.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_setran.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/matgen/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c new file mode 100644 index 000000000..2d129c863 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c @@ -0,0 +1,198 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdmatgen +( + const HPL_T_grid * GRID, + const int M, + const int N, + const int NB, + double * A, + const int LDA, + const int ISEED +) +#else +void HPL_pdmatgen +( GRID, M, N, NB, A, LDA, ISEED ) + const HPL_T_grid * GRID; + const int M; + const int N; + const int NB; + double * A; + const int LDA; + const int ISEED; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdmatgen generates (or regenerates) a parallel random matrix A. + * + * The pseudo-random generator uses the linear congruential algorithm: + * X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer + * Programming, Knuth 1973, Vol. 2. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * M (global input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,LocQ(N)). + * On exit, this array contains the coefficients of the randomly + * generated matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * ISEED (global input) const int + * On entry, ISEED specifies the seed number to generate the + * matrix A. ISEED must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int iadd [2], ia1 [2], ia2 [2], ia3 [2], + ia4 [2], ia5 [2], ib1 [2], ib2 [2], + ib3 [2], ic1 [2], ic2 [2], ic3 [2], + ic4 [2], ic5 [2], iran1[2], iran2[2], + iran3[2], iran4[2], itmp1[2], itmp2[2], + itmp3[2], jseed[2], mult [2]; + int ib, iblk, ik, jb, jblk, jk, jump1, jump2, + jump3, jump4, jump5, jump6, jump7, lmb, + lnb, mblks, mp, mycol, myrow, nblks, + npcol, nprow, nq; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + + mult [0] = HPL_MULT0; mult [1] = HPL_MULT1; + iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1; + jseed[0] = ISEED; jseed[1] = 0; +/* + * Generate an M by N matrix starting in process (0,0) + */ + Mnumroc( mp, M, NB, NB, myrow, 0, nprow ); + Mnumroc( nq, N, NB, NB, mycol, 0, npcol ); + + if( ( mp <= 0 ) || ( nq <= 0 ) ) return; +/* + * Local number of blocks and size of the last one + */ + mblks = ( mp + NB - 1 ) / NB; lmb = mp - ( ( mp - 1 ) / NB ) * NB; + nblks = ( nq + NB - 1 ) / NB; lnb = nq - ( ( nq - 1 ) / NB ) * NB; +/* + * Compute multiplier/adder for various jumps in random sequence + */ + jump1 = 1; jump2 = nprow * NB; jump3 = M; jump4 = npcol * NB; + jump5 = NB; jump6 = mycol; jump7 = myrow * NB; + + HPL_xjumpm( jump1, mult, iadd, jseed, iran1, ia1, ic1 ); + HPL_xjumpm( jump2, mult, iadd, iran1, itmp1, ia2, ic2 ); + HPL_xjumpm( jump3, mult, iadd, iran1, itmp1, ia3, ic3 ); + HPL_xjumpm( jump4, ia3, ic3, iran1, itmp1, ia4, ic4 ); + HPL_xjumpm( jump5, ia3, ic3, iran1, itmp1, ia5, ic5 ); + HPL_xjumpm( jump6, ia5, ic5, iran1, itmp3, itmp1, itmp2 ); + HPL_xjumpm( jump7, mult, iadd, itmp3, iran1, itmp1, itmp2 ); + HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 ); +/* + * Save value of first number in sequence + */ + ib1[0] = iran1[0]; ib1[1] = iran1[1]; + ib2[0] = iran1[0]; ib2[1] = iran1[1]; + ib3[0] = iran1[0]; ib3[1] = iran1[1]; + + for( jblk = 0; jblk < nblks; jblk++ ) + { + jb = ( jblk == nblks - 1 ? lnb : NB ); + for( jk = 0; jk < jb; jk++ ) + { + for( iblk = 0; iblk < mblks; iblk++ ) + { + ib = ( iblk == mblks - 1 ? lmb : NB ); + for( ik = 0; ik < ib; A++, ik++ ) *A = HPL_rand(); + HPL_jumpit( ia2, ic2, ib1, iran2 ); + ib1[0] = iran2[0]; ib1[1] = iran2[1]; + } + A += LDA - mp; + HPL_jumpit( ia3, ic3, ib2, iran3 ); + ib1[0] = iran3[0]; ib1[1] = iran3[1]; + ib2[0] = iran3[0]; ib2[1] = iran3[1]; + } + HPL_jumpit( ia4, ic4, ib3, iran4 ); + ib1[0] = iran4[0]; ib1[1] = iran4[1]; + ib2[0] = iran4[0]; ib2[1] = iran4[1]; + ib3[0] = iran4[0]; ib3[1] = iran4[1]; + } +/* + * End of HPL_pdmatgen + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/HPL_pdmatgen.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/HPL_pdmatgen.o new file mode 100644 index 000000000..3e89d607f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/HPL_pdmatgen.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/Make.inc new file mode 120000 index 000000000..8547ec814 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/Make.inc @@ -0,0 +1 @@ +/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/Makefile new file mode 100644 index 000000000..bf33fcd7b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/Makefile @@ -0,0 +1,81 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_matgen.h $(INCdir)/hpl_pmisc.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pmatgen.h +# +## Object files ######################################################## +# +HPL_pmaobj = \ + HPL_pdmatgen.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pmaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pmaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pdmatgen.o : ../HPL_pdmatgen.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmatgen.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/pmatgen/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL.dat b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL.dat new file mode 100644 index 000000000..47aee883e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL.dat @@ -0,0 +1,31 @@ +HPLinpack benchmark input file +Innovative Computing Laboratory, University of Tennessee +HPL.out output file name (if any) +6 device out (6=stdout,7=stderr,file) +4 # of problems sizes (N) +29 30 34 35 Ns +4 # of NBs +1 2 3 4 NBs +0 PMAP process mapping (0=Row-,1=Column-major) +3 # of process grids (P x Q) +2 1 4 Ps +2 4 1 Qs +16.0 threshold +3 # of panel fact +0 1 2 PFACTs (0=left, 1=Crout, 2=Right) +2 # of recursive stopping criterium +2 4 NBMINs (>= 1) +1 # of panels in recursion +2 NDIVs +3 # of recursive panel fact. +0 1 2 RFACTs (0=left, 1=Crout, 2=Right) +1 # of broadcast +0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) +1 # of lookahead depth +0 DEPTHs (>=0) +2 SWAP (0=bin-exch,1=long,2=mix) +64 swapping threshold +0 L1 in (0=transposed,1=no-transposed) form +0 U in (0=transposed,1=no-transposed) form +1 Equilibration (0=no,1=yes) +8 memory alignment in double (> 0) diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pddriver.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pddriver.c new file mode 100644 index 000000000..5e4050f48 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pddriver.c @@ -0,0 +1,293 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int main +( + int ARGC, + char * * ARGV +) +#else +int main( ARGC, ARGV ) +/* + * .. Scalar Arguments .. + */ + int ARGC; +/* + * .. Array Arguments .. + */ + char * * ARGV; +#endif +{ +/* + * Purpose + * ======= + * + * main is the main driver program for testing the HPL routines. + * This program is driven by a short data file named "HPL.dat". + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int nval [HPL_MAX_PARAM], + nbval [HPL_MAX_PARAM], + pval [HPL_MAX_PARAM], + qval [HPL_MAX_PARAM], + nbmval[HPL_MAX_PARAM], + ndvval[HPL_MAX_PARAM], + ndhval[HPL_MAX_PARAM]; + + HPL_T_FACT pfaval[HPL_MAX_PARAM], + rfaval[HPL_MAX_PARAM]; + + HPL_T_TOP topval[HPL_MAX_PARAM]; + + HPL_T_grid grid; + HPL_T_palg algo; + HPL_T_test test; + int L1notran, Unotran, align, equil, in, inb, + inbm, indh, indv, ipfa, ipq, irfa, itop, + mycol, myrow, ns, nbs, nbms, ndhs, ndvs, + npcol, npfs, npqs, nprow, nrfs, ntps, + rank, size, tswap; + HPL_T_ORDER pmapping; + HPL_T_FACT rpfa; + HPL_T_SWAP fswap; +/* .. + * .. Executable Statements .. + */ + MPI_Init( &ARGC, &ARGV ); +#ifdef HPL_CALL_VSIPL + vsip_init((void*)0); +#endif + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + MPI_Comm_size( MPI_COMM_WORLD, &size ); +/* + * Read and check validity of test parameters from input file + * + * HPL Version 1.0, Linpack benchmark input file + * Your message here + * HPL.out output file name (if any) + * 6 device out (6=stdout,7=stderr,file) + * 4 # of problems sizes (N) + * 29 30 34 35 Ns + * 4 # of NBs + * 1 2 3 4 NBs + * 0 PMAP process mapping (0=Row-,1=Column-major) + * 3 # of process grids (P x Q) + * 2 1 4 Ps + * 2 4 1 Qs + * 16.0 threshold + * 3 # of panel fact + * 0 1 2 PFACTs (0=left, 1=Crout, 2=Right) + * 2 # of recursive stopping criterium + * 2 4 NBMINs (>= 1) + * 1 # of panels in recursion + * 2 NDIVs + * 3 # of recursive panel fact. + * 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) + * 1 # of broadcast + * 0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + * 1 # of lookahead depth + * 0 DEPTHs (>=0) + * 2 SWAP (0=bin-exch,1=long,2=mix) + * 4 swapping threshold + * 0 L1 in (0=transposed,1=no-transposed) form + * 0 U in (0=transposed,1=no-transposed) form + * 1 Equilibration (0=no,1=yes) + * 8 memory alignment in double (> 0) + */ + HPL_pdinfo( &test, &ns, nval, &nbs, nbval, &pmapping, &npqs, pval, qval, + &npfs, pfaval, &nbms, nbmval, &ndvs, ndvval, &nrfs, rfaval, + &ntps, topval, &ndhs, ndhval, &fswap, &tswap, &L1notran, + &Unotran, &equil, &align ); +/* + * Loop over different process grids - Define process grid. Go to bottom + * of process grid loop if this case does not use my process. + */ + for( ipq = 0; ipq < npqs; ipq++ ) + { + (void) HPL_grid_init( MPI_COMM_WORLD, pmapping, pval[ipq], qval[ipq], + &grid ); + (void) HPL_grid_info( &grid, &nprow, &npcol, &myrow, &mycol ); + + if( ( myrow < 0 ) || ( myrow >= nprow ) || + ( mycol < 0 ) || ( mycol >= npcol ) ) goto label_end_of_npqs; + + for( in = 0; in < ns; in++ ) + { /* Loop over various problem sizes */ + for( inb = 0; inb < nbs; inb++ ) + { /* Loop over various blocking factors */ + for( indh = 0; indh < ndhs; indh++ ) + { /* Loop over various lookahead depths */ + for( itop = 0; itop < ntps; itop++ ) + { /* Loop over various broadcast topologies */ + for( irfa = 0; irfa < nrfs; irfa++ ) + { /* Loop over various recursive factorizations */ + for( ipfa = 0; ipfa < npfs; ipfa++ ) + { /* Loop over various panel factorizations */ + for( inbm = 0; inbm < nbms; inbm++ ) + { /* Loop over various recursive stopping criteria */ + for( indv = 0; indv < ndvs; indv++ ) + { /* Loop over various # of panels in recursion */ +/* + * Set up the algorithm parameters + */ + algo.btopo = topval[itop]; algo.depth = ndhval[indh]; + algo.nbmin = nbmval[inbm]; algo.nbdiv = ndvval[indv]; + + algo.pfact = rpfa = pfaval[ipfa]; + + if( L1notran != 0 ) + { + if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllN; + else if( rpfa == HPL_CROUT ) algo.pffun = HPL_pdpancrN; + else algo.pffun = HPL_pdpanrlN; + + algo.rfact = rpfa = rfaval[irfa]; + if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllN; + else if( rpfa == HPL_CROUT ) algo.rffun = HPL_pdrpancrN; + else algo.rffun = HPL_pdrpanrlN; + + if( Unotran != 0 ) algo.upfun = HPL_pdupdateNN; + else algo.upfun = HPL_pdupdateNT; + } + else + { + if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllT; + else if( rpfa == HPL_CROUT ) algo.pffun = HPL_pdpancrT; + else algo.pffun = HPL_pdpanrlT; + + algo.rfact = rpfa = rfaval[irfa]; + if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllT; + else if( rpfa == HPL_CROUT ) algo.rffun = HPL_pdrpancrT; + else algo.rffun = HPL_pdrpanrlT; + + if( Unotran != 0 ) algo.upfun = HPL_pdupdateTN; + else algo.upfun = HPL_pdupdateTT; + } + + algo.fswap = fswap; algo.fsthr = tswap; + algo.equil = equil; algo.align = align; + + HPL_pdtest( &test, &grid, &algo, nval[in], nbval[inb] ); + + } + } + } + } + } + } + } + } + (void) HPL_grid_exit( &grid ); +label_end_of_npqs: ; + } +/* + * Print ending messages, close output file, exit. + */ + if( rank == 0 ) + { + test.ktest = test.kpass + test.kfail + test.kskip; +#ifndef HPL_DETAILED_TIMING + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); +#else + if( test.thrsh > HPL_rzero ) + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); +#endif + + HPL_fprintf( test.outfp, "\n%s %6d %s\n", "Finished", test.ktest, + "tests with the following results:" ); + if( test.thrsh > HPL_rzero ) + { + HPL_fprintf( test.outfp, " %6d %s\n", test.kpass, + "tests completed and passed residual checks," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kfail, + "tests completed and failed residual checks," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kskip, + "tests skipped because of illegal input values." ); + } + else + { + HPL_fprintf( test.outfp, " %6d %s\n", test.kpass, + "tests completed without checking," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kskip, + "tests skipped because of illegal input values." ); + } + + HPL_fprintf( test.outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( test.outfp, "\nEnd of Tests.\n" ); + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); + + if( ( test.outfp != stdout ) && ( test.outfp != stderr ) ) + (void) fclose( test.outfp ); + } +#ifdef HPL_CALL_VSIPL + vsip_finalize((void*)0); +#endif + MPI_Finalize(); + exit( 0 ); + + return( 0 ); +/* + * End of main + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdinfo.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdinfo.c new file mode 100644 index 000000000..4ede45be6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdinfo.c @@ -0,0 +1,1182 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdinfo +( + HPL_T_test * TEST, + int * NS, + int * N, + int * NBS, + int * NB, + HPL_T_ORDER * PMAPPIN, + int * NPQS, + int * P, + int * Q, + int * NPFS, + HPL_T_FACT * PF, + int * NBMS, + int * NBM, + int * NDVS, + int * NDV, + int * NRFS, + HPL_T_FACT * RF, + int * NTPS, + HPL_T_TOP * TP, + int * NDHS, + int * DH, + HPL_T_SWAP * FSWAP, + int * TSWAP, + int * L1NOTRAN, + int * UNOTRAN, + int * EQUIL, + int * ALIGN +) +#else +void HPL_pdinfo +( TEST, NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN ) + HPL_T_test * TEST; + int * NS; + int * N; + int * NBS; + int * NB; + HPL_T_ORDER * PMAPPIN; + int * NPQS; + int * P; + int * Q; + int * NPFS; + HPL_T_FACT * PF; + int * NBMS; + int * NBM; + int * NDVS; + int * NDV; + int * NRFS; + HPL_T_FACT * RF; + int * NTPS; + HPL_T_TOP * TP; + int * NDHS; + int * DH; + HPL_T_SWAP * FSWAP; + int * TSWAP; + int * L1NOTRAN; + int * UNOTRAN; + int * EQUIL; + int * ALIGN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdinfo reads the startup information for the various tests and + * transmits it to all processes. + * + * Arguments + * ========= + * + * TEST (global output) HPL_T_test * + * On entry, TEST points to a testing data structure. On exit, + * the fields of this data structure are initialized as follows: + * TEST->outfp specifies the output file where the results will + * be printed. It is only defined and used by the process 0 of + * the grid. TEST->thrsh specifies the threshhold value for the + * test ratio. TEST->epsil is the relative machine precision of + * the distributed computer. Finally the test counters, kfail, + * kpass, kskip, ktest are initialized to zero. + * + * NS (global output) int * + * On exit, NS specifies the number of different problem sizes + * to be tested. NS is less than or equal to HPL_MAX_PARAM. + * + * N (global output) int * + * On entry, N is an array of dimension HPL_MAX_PARAM. On exit, + * the first NS entries of this array contain the problem sizes + * to run the code with. + * + * NBS (global output) int * + * On exit, NBS specifies the number of different distribution + * blocking factors to be tested. NBS must be less than or equal + * to HPL_MAX_PARAM. + * + * NB (global output) int * + * On exit, PMAPPIN specifies the process mapping onto the no- + * des of the MPI machine configuration. PMAPPIN defaults to + * row-major ordering. + * + * PMAPPIN (global output) HPL_T_ORDER * + * On entry, NB is an array of dimension HPL_MAX_PARAM. On exit, + * the first NBS entries of this array contain the values of the + * various distribution blocking factors, to run the code with. + * + * NPQS (global output) int * + * On exit, NPQS specifies the number of different values that + * can be used for P and Q, i.e., the number of process grids to + * run the code with. NPQS must be less than or equal to + * HPL_MAX_PARAM. + * + * P (global output) int * + * On entry, P is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPQS entries of this array contain the values of P, + * the number of process rows of the NPQS grids to run the code + * with. + * + * Q (global output) int * + * On entry, Q is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPQS entries of this array contain the values of Q, + * the number of process columns of the NPQS grids to run the + * code with. + * + * NPFS (global output) int * + * On exit, NPFS specifies the number of different values that + * can be used for PF : the panel factorization algorithm to run + * the code with. NPFS is less than or equal to HPL_MAX_PARAM. + * + * PF (global output) HPL_T_FACT * + * On entry, PF is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPFS entries of this array contain the various + * panel factorization algorithms to run the code with. + * + * NBMS (global output) int * + * On exit, NBMS specifies the number of various recursive + * stopping criteria to be tested. NBMS must be less than or + * equal to HPL_MAX_PARAM. + * + * NBM (global output) int * + * On entry, NBM is an array of dimension HPL_MAX_PARAM. On + * exit, the first NBMS entries of this array contain the values + * of the various recursive stopping criteria to be tested. + * + * NDVS (global output) int * + * On exit, NDVS specifies the number of various numbers of + * panels in recursion to be tested. NDVS is less than or equal + * to HPL_MAX_PARAM. + * + * NDV (global output) int * + * On entry, NDV is an array of dimension HPL_MAX_PARAM. On + * exit, the first NDVS entries of this array contain the values + * of the various numbers of panels in recursion to be tested. + * + * NRFS (global output) int * + * On exit, NRFS specifies the number of different values that + * can be used for RF : the recursive factorization algorithm to + * be tested. NRFS is less than or equal to HPL_MAX_PARAM. + * + * RF (global output) HPL_T_FACT * + * On entry, RF is an array of dimension HPL_MAX_PARAM. On exit, + * the first NRFS entries of this array contain the various + * recursive factorization algorithms to run the code with. + * + * NTPS (global output) int * + * On exit, NTPS specifies the number of different values that + * can be used for the broadcast topologies to be tested. NTPS + * is less than or equal to HPL_MAX_PARAM. + * + * TP (global output) HPL_T_TOP * + * On entry, TP is an array of dimension HPL_MAX_PARAM. On exit, + * the first NTPS entries of this array contain the various + * broadcast (along rows) topologies to run the code with. + * + * NDHS (global output) int * + * On exit, NDHS specifies the number of different values that + * can be used for the lookahead depths to be tested. NDHS is + * less than or equal to HPL_MAX_PARAM. + * + * DH (global output) int * + * On entry, DH is an array of dimension HPL_MAX_PARAM. On + * exit, the first NDHS entries of this array contain the values + * of lookahead depths to run the code with. Such a value is at + * least 0 (no-lookahead) or greater than zero. + * + * FSWAP (global output) HPL_T_SWAP * + * On exit, FSWAP specifies the swapping algorithm to be used in + * all tests. + * + * TSWAP (global output) int * + * On exit, TSWAP specifies the swapping threshold as a number + * of columns when the mixed swapping algorithm was chosen. + * + * L1NOTRA (global output) int * + * On exit, L1NOTRAN specifies whether the upper triangle of the + * panels of columns should be stored in no-transposed form + * (L1NOTRAN=1) or in transposed form (L1NOTRAN=0). + * + * UNOTRAN (global output) int * + * On exit, UNOTRAN specifies whether the panels of rows should + * be stored in no-transposed form (UNOTRAN=1) or transposed + * form (UNOTRAN=0) during their broadcast. + * + * EQUIL (global output) int * + * On exit, EQUIL specifies whether equilibration during the + * swap-broadcast of the panel of rows should be performed + * (EQUIL=1) or not (EQUIL=0). + * + * ALIGN (global output) int * + * On exit, ALIGN specifies the alignment of the dynamically + * allocated buffers in double precision words. ALIGN is greater + * than zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + char file[HPL_LINE_MAX], line[HPL_LINE_MAX], + auth[HPL_LINE_MAX], num [HPL_LINE_MAX]; + FILE * infp; + int * iwork = NULL; + char * lineptr; + int error=0, fid, i, j, lwork, maxp, nprocs, + rank, size; +/* .. + * .. Executable Statements .. + */ + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + MPI_Comm_size( MPI_COMM_WORLD, &size ); +/* + * Initialize the TEST data structure with default values + */ + TEST->outfp = stderr; TEST->epsil = 2.0e-16; TEST->thrsh = 16.0; + TEST->kfail = TEST->kpass = TEST->kskip = TEST->ktest = 0; +/* + * Process 0 reads the input data, broadcasts to other processes and + * writes needed information to TEST->outfp. + */ + if( rank == 0 ) + { +/* + * Open file and skip data file header + */ + if( ( infp = fopen( "HPL.dat", "r" ) ) == NULL ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "cannot open file HPL.dat" ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) fgets( auth, HPL_LINE_MAX - 2, infp ); +/* + * Read name and unit number for summary output file + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", file ); + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); + fid = atoi( num ); + if ( fid == 6 ) TEST->outfp = stdout; + else if( fid == 7 ) TEST->outfp = stderr; + else if( ( TEST->outfp = fopen( file, "w" ) ) == NULL ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "cannot open file %s.", + file ); + error = 1; goto label_error; + } +/* + * Read and check the parameter values for the tests. + * + * Problem size (>=0) (N) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NS = atoi( num ); + if( ( *NS < 1 ) || ( *NS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %d", + "Number of values of N is less than 1 or greater than", + HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( N[ i ] = atoi( num ) ) < 0 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of N less than 0" ); + error = 1; goto label_error; + } + } +/* + * Block size (>=1) (NB) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NBS = atoi( num ); + if( ( *NBS < 1 ) || ( *NBS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NB is less than 1 or", + "greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NBS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NB[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NB less than 1" ); + error = 1; goto label_error; + } + } +/* + * Process grids, mapping, (>=1) (P, Q) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); + *PMAPPIN = ( atoi( num ) == 1 ? HPL_COLUMN_MAJOR : HPL_ROW_MAJOR ); + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NPQS = atoi( num ); + if( ( *NPQS < 1 ) || ( *NPQS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of grids is less", + "than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPQS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( P[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of P less than 1" ); + error = 1; goto label_error; + } + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPQS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( Q[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of Q less than 1" ); + error = 1; goto label_error; + } + } +/* + * Check for enough processes in machine configuration + */ + maxp = 0; + for( i = 0; i < *NPQS; i++ ) + { nprocs = P[i] * Q[i]; maxp = Mmax( maxp, nprocs ); } + if( maxp > size ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Need at least %d processes for these tests", maxp ); + error = 1; goto label_error; + } +/* + * Checking threshold value (TEST->thrsh) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); TEST->thrsh = atof( num ); +/* + * Panel factorization algorithm (PF) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NPFS = atoi( num ); + if( ( *NPFS < 1 ) || ( *NPFS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "number of values of PFACT", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPFS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) PF[ i ] = HPL_LEFT_LOOKING; + else if( j == 1 ) PF[ i ] = HPL_CROUT; + else if( j == 2 ) PF[ i ] = HPL_RIGHT_LOOKING; + else PF[ i ] = HPL_RIGHT_LOOKING; + } +/* + * Recursive stopping criterium (>=1) (NBM) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NBMS = atoi( num ); + if( ( *NBMS < 1 ) || ( *NBMS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NBMIN", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NBMS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NBM[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NBMIN less than 1" ); + error = 1; goto label_error; + } + } +/* + * Number of panels in recursion (>=2) (NDV) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NDVS = atoi( num ); + if( ( *NDVS < 1 ) || ( *NDVS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NDIV", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NDVS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NDV[ i ] = atoi( num ) ) < 2 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NDIV less than 2" ); + error = 1; goto label_error; + } + } +/* + * Recursive panel factorization (RF) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NRFS = atoi( num ); + if( ( *NRFS < 1 ) || ( *NRFS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of RFACT", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NRFS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) RF[ i ] = HPL_LEFT_LOOKING; + else if( j == 1 ) RF[ i ] = HPL_CROUT; + else if( j == 2 ) RF[ i ] = HPL_RIGHT_LOOKING; + else RF[ i ] = HPL_RIGHT_LOOKING; + } +/* + * Broadcast topology (TP) (0=rg, 1=2rg, 2=rgM, 3=2rgM, 4=L) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NTPS = atoi( num ); + if( ( *NTPS < 1 ) || ( *NTPS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of BCAST", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NTPS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) TP[ i ] = HPL_1RING; + else if( j == 1 ) TP[ i ] = HPL_1RING_M; + else if( j == 2 ) TP[ i ] = HPL_2RING; + else if( j == 3 ) TP[ i ] = HPL_2RING_M; + else if( j == 4 ) TP[ i ] = HPL_BLONG; + else if( j == 5 ) TP[ i ] = HPL_BLONG_M; + else TP[ i ] = HPL_1RING_M; + } +/* + * Lookahead depth (>=0) (NDH) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NDHS = atoi( num ); + if( ( *NDHS < 1 ) || ( *NDHS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of DEPTH", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NDHS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); + lineptr += strlen( num ) + 1; + if( ( DH[ i ] = atoi( num ) ) < 0 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of DEPTH less than 0" ); + error = 1; goto label_error; + } + } +/* + * Swapping algorithm (0,1 or 2) (FSWAP) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); j = atoi( num ); + if( j == 0 ) *FSWAP = HPL_SWAP00; + else if( j == 1 ) *FSWAP = HPL_SWAP01; + else if( j == 2 ) *FSWAP = HPL_SW_MIX; + else *FSWAP = HPL_SWAP01; +/* + * Swapping threshold (>=0) (TSWAP) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *TSWAP = atoi( num ); + if( *TSWAP <= 0 ) *TSWAP = 0; +/* + * L1 in (no-)transposed form (0 or 1) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *L1NOTRAN = atoi( num ); + if( ( *L1NOTRAN != 0 ) && ( *L1NOTRAN != 1 ) ) *L1NOTRAN = 0; +/* + * U in (no-)transposed form (0 or 1) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *UNOTRAN = atoi( num ); + if( ( *UNOTRAN != 0 ) && ( *UNOTRAN != 1 ) ) *UNOTRAN = 0; +/* + * Equilibration (0=no, 1=yes) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *EQUIL = atoi( num ); + if( ( *EQUIL != 0 ) && ( *EQUIL != 1 ) ) *EQUIL = 1; +/* + * Memory alignment in bytes (> 0) (ALIGN) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *ALIGN = atoi( num ); + if( *ALIGN <= 0 ) *ALIGN = 4; +/* + * Close input file + */ +label_error: + if (infp != NULL) + (void) fclose( infp ); + } + else { TEST->outfp = NULL; } +/* + * Check for error on reading input file + */ + (void) HPL_all_reduce( (void *)(&error), 1, HPL_INT, HPL_max, + MPI_COMM_WORLD ); + if( error ) + { + if( rank == 0 ) + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Illegal input in file HPL.dat. Exiting ..." ); + MPI_Finalize(); +#ifdef HPL_CALL_VSIPL + (void) vsip_finalize( NULL ); +#endif + exit( 1 ); + } +/* + * Compute and broadcast machine epsilon + */ + TEST->epsil = HPL_pdlamch( MPI_COMM_WORLD, HPL_MACH_EPS ); +/* + * Pack information arrays and broadcast + */ + (void) HPL_broadcast( (void *)(&(TEST->thrsh)), 1, HPL_DOUBLE, 0, + MPI_COMM_WORLD ); +/* + * Broadcast array sizes + */ + iwork = (int *)malloc( (size_t)(15) * sizeof( int ) ); + if( rank == 0 ) + { + iwork[ 0] = *NS; iwork[ 1] = *NBS; + iwork[ 2] = ( *PMAPPIN == HPL_ROW_MAJOR ? 0 : 1 ); + iwork[ 3] = *NPQS; iwork[ 4] = *NPFS; iwork[ 5] = *NBMS; + iwork[ 6] = *NDVS; iwork[ 7] = *NRFS; iwork[ 8] = *NTPS; + iwork[ 9] = *NDHS; iwork[10] = *TSWAP; iwork[11] = *L1NOTRAN; + iwork[12] = *UNOTRAN; iwork[13] = *EQUIL; iwork[14] = *ALIGN; + } + (void) HPL_broadcast( (void *)iwork, 15, HPL_INT, 0, MPI_COMM_WORLD ); + if( rank != 0 ) + { + *NS = iwork[ 0]; *NBS = iwork[ 1]; + *PMAPPIN = ( iwork[ 2] == 0 ? HPL_ROW_MAJOR : HPL_COLUMN_MAJOR ); + *NPQS = iwork[ 3]; *NPFS = iwork[ 4]; *NBMS = iwork[ 5]; + *NDVS = iwork[ 6]; *NRFS = iwork[ 7]; *NTPS = iwork[ 8]; + *NDHS = iwork[ 9]; *TSWAP = iwork[10]; *L1NOTRAN = iwork[11]; + *UNOTRAN = iwork[12]; *EQUIL = iwork[13]; *ALIGN = iwork[14]; + } + if( iwork ) free( iwork ); +/* + * Pack information arrays and broadcast + */ + lwork = (*NS) + (*NBS) + 2 * (*NPQS) + (*NPFS) + (*NBMS) + + (*NDVS) + (*NRFS) + (*NTPS) + (*NDHS) + 1; + + if (lwork < 0) + exit(EXIT_FAILURE); + + + iwork = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); + if( rank == 0 ) + { + j = 0; + for( i = 0; i < *NS; i++ ) { iwork[j] = N [i]; j++; } + for( i = 0; i < *NBS; i++ ) { iwork[j] = NB[i]; j++; } + for( i = 0; i < *NPQS; i++ ) { iwork[j] = P [i]; j++; } + for( i = 0; i < *NPQS; i++ ) { iwork[j] = Q [i]; j++; } + for( i = 0; i < *NPFS; i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; + else if( PF[i] == HPL_CROUT ) iwork[j] = 1; + else if( PF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; + j++; + } + for( i = 0; i < *NBMS; i++ ) { iwork[j] = NBM[i]; j++; } + for( i = 0; i < *NDVS; i++ ) { iwork[j] = NDV[i]; j++; } + for( i = 0; i < *NRFS; i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; + else if( RF[i] == HPL_CROUT ) iwork[j] = 1; + else if( RF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; + j++; + } + for( i = 0; i < *NTPS; i++ ) + { + if( TP[i] == HPL_1RING ) iwork[j] = 0; + else if( TP[i] == HPL_1RING_M ) iwork[j] = 1; + else if( TP[i] == HPL_2RING ) iwork[j] = 2; + else if( TP[i] == HPL_2RING_M ) iwork[j] = 3; + else if( TP[i] == HPL_BLONG ) iwork[j] = 4; + else if( TP[i] == HPL_BLONG_M ) iwork[j] = 5; + j++; + } + for( i = 0; i < *NDHS; i++ ) { iwork[j] = DH[i]; j++; } + + if( *FSWAP == HPL_SWAP00 ) iwork[j] = 0; + else if( *FSWAP == HPL_SWAP01 ) iwork[j] = 1; + else if( *FSWAP == HPL_SW_MIX ) iwork[j] = 2; + j++; + } + (void) HPL_broadcast( (void*)iwork, lwork, HPL_INT, 0, + MPI_COMM_WORLD ); + if ((rank != 0) && (iwork != NULL)) + { + j = 0; + for( i = 0; i < *NS; i++ ) { N [i] = iwork[j]; j++; } + for( i = 0; i < *NBS; i++ ) { NB[i] = iwork[j]; j++; } + for( i = 0; i < *NPQS; i++ ) { P [i] = iwork[j]; j++; } + for( i = 0; i < *NPQS; i++ ) { Q [i] = iwork[j]; j++; } + + for( i = 0; i < *NPFS; i++ ) + { + if( iwork[j] == 0 ) PF[i] = HPL_LEFT_LOOKING; + else if( iwork[j] == 1 ) PF[i] = HPL_CROUT; + else if( iwork[j] == 2 ) PF[i] = HPL_RIGHT_LOOKING; + j++; + } + for( i = 0; i < *NBMS; i++ ) { NBM[i] = iwork[j]; j++; } + for( i = 0; i < *NDVS; i++ ) { NDV[i] = iwork[j]; j++; } + for( i = 0; i < *NRFS; i++ ) + { + if( iwork[j] == 0 ) RF[i] = HPL_LEFT_LOOKING; + else if( iwork[j] == 1 ) RF[i] = HPL_CROUT; + else if( iwork[j] == 2 ) RF[i] = HPL_RIGHT_LOOKING; + j++; + } + for( i = 0; i < *NTPS; i++ ) + { + if( iwork[j] == 0 ) TP[i] = HPL_1RING; + else if( iwork[j] == 1 ) TP[i] = HPL_1RING_M; + else if( iwork[j] == 2 ) TP[i] = HPL_2RING; + else if( iwork[j] == 3 ) TP[i] = HPL_2RING_M; + else if( iwork[j] == 4 ) TP[i] = HPL_BLONG; + else if( iwork[j] == 5 ) TP[i] = HPL_BLONG_M; + j++; + } + for( i = 0; i < *NDHS; i++ ) { DH[i] = iwork[j]; j++; } + + if( iwork[j] == 0 ) *FSWAP = HPL_SWAP00; + else if( iwork[j] == 1 ) *FSWAP = HPL_SWAP01; + else if( iwork[j] == 2 ) *FSWAP = HPL_SW_MIX; + j++; + + if( iwork ) free( iwork ); + } +/* + * regurgitate input + */ + if( rank == 0 ) + { + + if (TEST->outfp != NULL){ + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "HPLinpack 2.3 -- High-Performance Linpack benchmark -- ", + " December 2, 2018" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Written by A. Petitet and R. Clint Whaley, ", + "Innovative Computing Laboratory, UTK" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Modified by Piotr Luszczek, ", + "Innovative Computing Laboratory, UTK" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Modified by Julien Langou, ", + "University of Colorado Denver"); + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + + HPL_fprintf( TEST->outfp, "\n%s\n", + "An explanation of the input/output parameters follows:" ); + HPL_fprintf( TEST->outfp, "%s\n", + "T/V : Wall time / encoded variant." ); + HPL_fprintf( TEST->outfp, "%s\n", + "N : The order of the coefficient matrix A." ); + HPL_fprintf( TEST->outfp, "%s\n", + "NB : The partitioning blocking factor." ); + HPL_fprintf( TEST->outfp, "%s\n", + "P : The number of process rows." ); + HPL_fprintf( TEST->outfp, "%s\n", + "Q : The number of process columns." ); + HPL_fprintf( TEST->outfp, "%s\n", + "Time : Time in seconds to solve the linear system." ); + HPL_fprintf( TEST->outfp, "%s\n\n", + "Gflops : Rate of execution for solving the linear system." ); + HPL_fprintf( TEST->outfp, "%s\n", + "The following parameter values will be used:" ); +/* + * Problem size + */ + HPL_fprintf( TEST->outfp, "\nN :" ); + for( i = 0; i < Mmin( 8, *NS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + if( *NS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + if( *NS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + } + } +/* + * Distribution blocking factor + */ + HPL_fprintf( TEST->outfp, "\nNB :" ); + for( i = 0; i < Mmin( 8, *NBS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + if( *NBS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NBS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + if( *NBS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NBS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + } + } +/* + * Process mapping + */ + HPL_fprintf( TEST->outfp, "\nPMAP :" ); + if( *PMAPPIN == HPL_ROW_MAJOR ) + HPL_fprintf( TEST->outfp, " Row-major process mapping" ); + else if( *PMAPPIN == HPL_COLUMN_MAJOR ) + HPL_fprintf( TEST->outfp, " Column-major process mapping" ); +/* + * Process grid + */ + HPL_fprintf( TEST->outfp, "\nP :" ); + for( i = 0; i < Mmin( 8, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + if( *NPQS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + if( *NPQS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPQS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + } + } + HPL_fprintf( TEST->outfp, "\nQ :" ); + for( i = 0; i < Mmin( 8, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + if( *NPQS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + if( *NPQS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPQS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + } + } +/* + * Panel Factorization + */ + HPL_fprintf( TEST->outfp, "\nPFACT :" ); + for( i = 0; i < Mmin( 8, *NPFS ); i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NPFS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPFS ); i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NPFS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPFS; i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + } + } +/* + * Recursive stopping criterium + */ + HPL_fprintf( TEST->outfp, "\nNBMIN :" ); + for( i = 0; i < Mmin( 8, *NBMS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + if( *NBMS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NBMS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + if( *NBMS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NBMS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + } + } +/* + * Number of panels in recursion + */ + HPL_fprintf( TEST->outfp, "\nNDIV :" ); + for( i = 0; i < Mmin( 8, *NDVS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + if( *NDVS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NDVS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + if( *NDVS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NDVS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + } + } +/* + * Recursive Factorization + */ + HPL_fprintf( TEST->outfp, "\nRFACT :" ); + for( i = 0; i < Mmin( 8, *NRFS ); i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NRFS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NRFS ); i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NRFS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NRFS; i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + } + } +/* + * Broadcast topology + */ + HPL_fprintf( TEST->outfp, "\nBCAST :" ); + for( i = 0; i < Mmin( 8, *NTPS ); i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + if( *NTPS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NTPS ); i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + if( *NTPS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NTPS; i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + } + } +/* + * Lookahead depths + */ + HPL_fprintf( TEST->outfp, "\nDEPTH :" ); + for( i = 0; i < Mmin( 8, *NDHS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + if( *NDHS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NDHS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + if( *NDHS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NDHS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + } + } +/* + * Swapping algorithm + */ + HPL_fprintf( TEST->outfp, "\nSWAP :" ); + if( *FSWAP == HPL_SWAP00 ) + HPL_fprintf( TEST->outfp, " Binary-exchange" ); + else if( *FSWAP == HPL_SWAP01 ) + HPL_fprintf( TEST->outfp, " Spread-roll (long)" ); + else if( *FSWAP == HPL_SW_MIX ) + HPL_fprintf( TEST->outfp, " Mix (threshold = %d)", *TSWAP ); +/* + * L1 storage form + */ + HPL_fprintf( TEST->outfp, "\nL1 :" ); + if( *L1NOTRAN != 0 ) + HPL_fprintf( TEST->outfp, " no-transposed form" ); + else + HPL_fprintf( TEST->outfp, " transposed form" ); +/* + * U storage form + */ + HPL_fprintf( TEST->outfp, "\nU :" ); + if( *UNOTRAN != 0 ) + HPL_fprintf( TEST->outfp, " no-transposed form" ); + else + HPL_fprintf( TEST->outfp, " transposed form" ); +/* + * Equilibration + */ + HPL_fprintf( TEST->outfp, "\nEQUIL :" ); + if( *EQUIL != 0 ) + HPL_fprintf( TEST->outfp, " yes" ); + else + HPL_fprintf( TEST->outfp, " no" ); +/* + * Alignment + */ + HPL_fprintf( TEST->outfp, "\nALIGN : %d double precision words", + *ALIGN ); + + HPL_fprintf( TEST->outfp, "\n\n" ); +/* + * For testing only + */ + if( TEST->thrsh > HPL_rzero ) + { + HPL_fprintf( TEST->outfp, "%s%s\n\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( TEST->outfp, "%s\n", + "- The matrix A is randomly generated for each test." ); + HPL_fprintf( TEST->outfp, "%s\n", + "- The following scaled residual check will be computed:" ); + HPL_fprintf( TEST->outfp, "%s\n", + " ||Ax-b||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )" ); + HPL_fprintf( TEST->outfp, "%s %21.6e\n", + "- The relative machine precision (eps) is taken to be ", + TEST->epsil ); + HPL_fprintf( TEST->outfp, "%s %11.1f\n\n", + "- Computational tests pass if scaled residuals are less than ", + TEST->thrsh ); + } + } + } +/* + * End of HPL_pdinfo + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdtest.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdtest.c new file mode 100644 index 000000000..73a62a7ff --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/HPL_pdtest.c @@ -0,0 +1,438 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdtest +( + HPL_T_test * TEST, + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int N, + const int NB +) +#else +void HPL_pdtest +( TEST, GRID, ALGO, N, NB ) + HPL_T_test * TEST; + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int N; + const int NB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdtest performs one test given a set of parameters such as the + * process grid, the problem size, the distribution blocking factor ... + * This function generates the data, calls and times the linear system + * solver, checks the accuracy of the obtained vector solution and + * writes this information to the file pointed to by TEST->outfp. + * + * Arguments + * ========= + * + * TEST (global input) HPL_T_test * + * On entry, TEST points to a testing data structure: outfp + * specifies the output file where the results will be printed. + * It is only defined and used by the process 0 of the grid. + * thrsh specifies the threshhold value for the test ratio. + * Concretely, a test is declared "PASSED" if and only if the + * following inequality is satisfied: + * ||Ax-b||_oo / ( epsil * + * ( || x ||_oo * || A ||_oo + || b ||_oo ) * + * N ) < thrsh. + * epsil is the relative machine precision of the distributed + * computer. Finally the test counters, kfail, kpass, kskip and + * ktest are updated as follows: if the test passes, kpass is + * incremented by one; if the test fails, kfail is incremented + * by one; if the test is skipped, kskip is incremented by one. + * ktest is left unchanged. + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters to be used for this test. + * + * N (global input) const int + * On entry, N specifies the order of the coefficient matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_DETAILED_TIMING + double HPL_w[HPL_TIMING_N]; +#endif + HPL_T_pmat mat; + double wtime[1]; + int info[3]; + double Anorm1, AnormI, Gflops, Xnorm1, XnormI, + BnormI, resid0, resid1; + double * Bptr; + void * vptr = NULL; + static int first=1; + int ii, ip2, mycol, myrow, npcol, nprow, nq; + char ctop, cpfact, crfact; + time_t current_time_start, current_time_end; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + + mat.n = N; mat.nb = NB; mat.info = 0; + mat.mp = HPL_numroc( N, NB, NB, myrow, 0, nprow ); + nq = HPL_numroc( N, NB, NB, mycol, 0, npcol ); + mat.nq = nq + 1; +/* + * Allocate matrix, right-hand-side, and vector solution x. [ A | b ] is + * N by N+1. One column is added in every process column for the solve. + * The result however is stored in a 1 x N vector replicated in every + * process row. In every process, A is lda * (nq+1), x is 1 * nq and the + * workspace is mp. + * + * Ensure that lda is a multiple of ALIGN and not a power of 2 + */ + mat.ld = ( ( Mmax( 1, mat.mp ) - 1 ) / ALGO->align ) * ALGO->align; + do + { + ii = ( mat.ld += ALGO->align ); ip2 = 1; + while( ii > 1 ) { ii >>= 1; ip2 <<= 1; } + } + while( mat.ld == ip2 ); +/* + * Allocate dynamic memory + */ + vptr = (void*)malloc( ( (size_t)(ALGO->align) + + (size_t)(mat.ld+1) * (size_t)(mat.nq) ) * + sizeof(double) ); + info[0] = (vptr == NULL); info[1] = myrow; info[2] = mycol; + (void) HPL_all_reduce( (void *)(info), 3, HPL_INT, HPL_max, + GRID->all_comm ); + if( info[0] != 0 ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", + "[%d,%d] %s", info[1], info[2], + "Memory allocation failed for A, x and b. Skip." ); + (TEST->kskip)++; + /* some processes might have succeeded with allocation */ + if (vptr) free(vptr); + return; + } +/* + * generate matrix and right-hand-side, [ A | b ] which is N by N+1. + */ + mat.A = (double *)HPL_PTR( vptr, + ((size_t)(ALGO->align) * sizeof(double) ) ); + mat.X = Mptr( mat.A, 0, mat.nq, mat.ld ); + HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED ); +#ifdef HPL_CALL_VSIPL + mat.block = vsip_blockbind_d( (vsip_scalar_d *)(mat.A), + (vsip_length)(mat.ld * mat.nq), + VSIP_MEM_NONE ); +#endif +/* + * Solve linear system + */ + HPL_ptimer_boot(); (void) HPL_barrier( GRID->all_comm ); + time( ¤t_time_start ); + HPL_ptimer( 0 ); + HPL_pdgesv( GRID, ALGO, &mat ); + HPL_ptimer( 0 ); + time( ¤t_time_end ); +#ifdef HPL_CALL_VSIPL + (void) vsip_blockrelease_d( mat.block, VSIP_TRUE ); + vsip_blockdestroy_d( mat.block ); +#endif +/* + * Gather max of all CPU and WALL clock timings and print timing results + */ + HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME, + 1, 0, wtime ); + + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + if( first ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "T/V N NB P Q", + " Time Gflops" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + if( TEST->thrsh <= HPL_rzero ) first = 0; + } +/* + * 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve. + * Print WALL time + */ + Gflops = ( ( (double)(N) / 1.0e+9 ) * + ( (double)(N) / wtime[0] ) ) * + ( ( 2.0 / 3.0 ) * (double)(N) + ( 3.0 / 2.0 ) ); + + cpfact = ( ( (HPL_T_FACT)(ALGO->pfact) == + (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ? (char)('L') : + ( ( (HPL_T_FACT)(ALGO->pfact) == (HPL_T_FACT)(HPL_CROUT) ) ? + (char)('C') : (char)('R') ) ); + crfact = ( ( (HPL_T_FACT)(ALGO->rfact) == + (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ? (char)('L') : + ( ( (HPL_T_FACT)(ALGO->rfact) == (HPL_T_FACT)(HPL_CROUT) ) ? + (char)('C') : (char)('R') ) ); + + if( ALGO->btopo == HPL_1RING ) ctop = '0'; + else if( ALGO->btopo == HPL_1RING_M ) ctop = '1'; + else if( ALGO->btopo == HPL_2RING ) ctop = '2'; + else if( ALGO->btopo == HPL_2RING_M ) ctop = '3'; + else if( ALGO->btopo == HPL_BLONG ) ctop = '4'; + else /* if( ALGO->btopo == HPL_BLONG_M ) */ ctop = '5'; + + if( wtime[0] > HPL_rzero ) { + HPL_fprintf( TEST->outfp, + "W%c%1d%c%c%1d%c%1d%12d %5d %5d %5d %18.2f %19.4e\n", + ( GRID->order == HPL_ROW_MAJOR ? 'R' : 'C' ), + ALGO->depth, ctop, crfact, ALGO->nbdiv, cpfact, ALGO->nbmin, + N, NB, nprow, npcol, wtime[0], Gflops ); + HPL_fprintf( TEST->outfp, + "HPL_pdgesv() start time %s\n", ctime( ¤t_time_start ) ); + HPL_fprintf( TEST->outfp, + "HPL_pdgesv() end time %s\n", ctime( ¤t_time_end ) ); + } + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME, + HPL_TIMING_N, HPL_TIMING_BEG, HPL_w ); + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "--VVV--VVV--VVV--VVV--VVV--VVV--VVV--V", + "VV--VVV--VVV--VVV--VVV--VVV--VVV--VVV-" ); +/* + * Recursive panel factorization + */ + if( HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time rfact . . . : %18.2f\n", + HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] ); +/* + * Panel factorization + */ + if( HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time pfact . . : %18.2f\n", + HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] ); +/* + * Panel factorization (swap) + */ + if( HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time mxswp . . : %18.2f\n", + HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] ); +/* + * Update + */ + if( HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time update . . : %18.2f\n", + HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] ); +/* + * Update (swap) + */ + if( HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time laswp . . : %18.2f\n", + HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] ); +/* + * Upper triangular system solve + */ + if( HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time up tr sv . : %18.2f\n", + HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] ); + + if( TEST->thrsh <= HPL_rzero ) + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + } +#endif +/* + * Quick return, if I am not interested in checking the computations + */ + if( TEST->thrsh <= HPL_rzero ) + { (TEST->kpass)++; if( vptr ) free( vptr ); return; } +/* + * Check info returned by solve + */ + if( mat.info != 0 ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", "%s %d, %s", + "Error code returned by solve is", mat.info, "skip" ); + (TEST->kskip)++; + if( vptr ) free( vptr ); return; + } +/* + * Check computation, re-generate [ A | b ], compute norm 1 and inf of A and x, + * and norm inf of b - A x. Display residual checks. + */ + HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED ); + Anorm1 = HPL_pdlange( GRID, HPL_NORM_1, N, N, NB, mat.A, mat.ld ); + AnormI = HPL_pdlange( GRID, HPL_NORM_I, N, N, NB, mat.A, mat.ld ); +/* + * Because x is distributed in process rows, switch the norms + */ + XnormI = HPL_pdlange( GRID, HPL_NORM_1, 1, N, NB, mat.X, 1 ); + Xnorm1 = HPL_pdlange( GRID, HPL_NORM_I, 1, N, NB, mat.X, 1 ); +/* + * If I am in the col that owns b, (1) compute local BnormI, (2) all_reduce to + * find the max (in the col). Then (3) broadcast along the rows so that every + * process has BnormI. Note that since we use a uniform distribution in [-0.5,0.5] + * for the entries of B, it is very likely that BnormI (<=,~) 0.5. + */ + Bptr = Mptr( mat.A, 0, nq, mat.ld ); + if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ){ + if( mat.mp > 0 ) + { + BnormI = Bptr[HPL_idamax( mat.mp, Bptr, 1 )]; BnormI = Mabs( BnormI ); + } + else + { + BnormI = HPL_rzero; + } + (void) HPL_all_reduce( (void *)(&BnormI), 1, HPL_DOUBLE, HPL_max, + GRID->col_comm ); + } + (void) HPL_broadcast( (void *)(&BnormI), 1, HPL_DOUBLE, + HPL_indxg2p( N, NB, NB, 0, npcol ), + GRID->row_comm ); +/* + * If I own b, compute ( b - A x ) and ( - A x ) otherwise + */ + if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ) + { + HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone, + mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1 ); + } + else if( nq > 0 ) + { + HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone, + mat.A, mat.ld, mat.X, 1, HPL_rzero, Bptr, 1 ); + } + else { for( ii = 0; ii < mat.mp; ii++ ) Bptr[ii] = HPL_rzero; } +/* + * Reduce the distributed residual in process column 0 + */ + if( mat.mp > 0 ) + (void) HPL_reduce( Bptr, mat.mp, HPL_DOUBLE, HPL_sum, 0, + GRID->row_comm ); +/* + * Compute || b - A x ||_oo + */ + resid0 = HPL_pdlange( GRID, HPL_NORM_I, N, 1, NB, Bptr, mat.ld ); +/* + * Computes and displays norms, residuals ... + */ + if( N <= 0 ) + { + resid1 = HPL_rzero; + } + else + { + resid1 = resid0 / ( TEST->epsil * ( AnormI * XnormI + BnormI ) * (double)(N) ); + } + + if( resid1 < TEST->thrsh ) (TEST->kpass)++; + else (TEST->kfail)++; + + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( TEST->outfp, "%s%16.8e%s%s\n", + "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ", resid1, + " ...... ", ( resid1 < TEST->thrsh ? "PASSED" : "FAILED" ) ); + + if(resid1 >= TEST->thrsh ) + { + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||Ax-b||_oo . . . . . . . . . . . . . . . . . = ", resid0 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||A||_oo . . . . . . . . . . . . . . . . . . . = ", AnormI ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||A||_1 . . . . . . . . . . . . . . . . . . . = ", Anorm1 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||x||_oo . . . . . . . . . . . . . . . . . . . = ", XnormI ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||x||_1 . . . . . . . . . . . . . . . . . . . = ", Xnorm1 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||b||_oo . . . . . . . . . . . . . . . . . . . = ", BnormI ); + } + } + if( vptr ) free( vptr ); +/* + * End of HPL_pdtest + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pddriver.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pddriver.o new file mode 100644 index 000000000..2f493afd2 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pddriver.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pdinfo.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pdinfo.o new file mode 100644 index 000000000..619588240 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pdinfo.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pdtest.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pdtest.o new file mode 100644 index 000000000..3460f7cbd Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/HPL_pdtest.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/Make.inc new file mode 120000 index 000000000..8547ec814 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/Make.inc @@ -0,0 +1 @@ +/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/Makefile new file mode 100644 index 000000000..cfc96e667 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/Makefile @@ -0,0 +1,94 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_gesv.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h \ + $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h $(INCdir)/hpl_pmatgen.h \ + $(INCdir)/hpl_ptimer.h $(INCdir)/hpl_ptest.h +# +## Executable names #################################################### +# +xhpl = $(BINdir)/xhpl +# +## Object files ######################################################## +# +HPL_pteobj = \ + HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o +# +## Targets ############################################################# +# +all : dexe +# +dexe : dexe.grd +# +$(BINdir)/HPL.dat : ../HPL.dat + ( $(CP) ../HPL.dat $(BINdir) ) +# +dexe.grd: $(HPL_pteobj) $(HPLlib) + $(LINKER) $(LINKFLAGS) -o $(xhpl) $(HPL_pteobj) $(HPL_LIBS) + $(MAKE) $(BINdir)/HPL.dat + $(TOUCH) dexe.grd +# +# ###################################################################### +# +HPL_pddriver.o : ../HPL_pddriver.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pddriver.c +HPL_pdinfo.o : ../HPL_pdinfo.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdinfo.c +HPL_pdtest.o : ../HPL_pdtest.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtest.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/dexe.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptest/intel64/dexe.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer.c new file mode 100644 index 000000000..202416079 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer.c @@ -0,0 +1,358 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int HPL_ptimer_disabled; +static double HPL_ptimer_cpusec [HPL_NPTIMER], + HPL_ptimer_cpustart [HPL_NPTIMER], + HPL_ptimer_wallsec [HPL_NPTIMER], + HPL_ptimer_wallstart[HPL_NPTIMER]; +/* + * --------------------------------------------------------------------- + * User callable functions + * --------------------------------------------------------------------- + */ +#ifdef STDC_HEADERS +void HPL_ptimer_boot( void ) +#else +void HPL_ptimer_boot() +#endif +{ +/* + * HPL_ptimer_boot (re)sets all timers to 0, and enables HPL_ptimer. + */ +/* + * .. Local Variables .. + */ + int i; +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 0; + + for( i = 0; i < HPL_NPTIMER; i++ ) + { + HPL_ptimer_cpusec [i] = HPL_ptimer_wallsec [i] = HPL_rzero; + HPL_ptimer_cpustart[i] = HPL_ptimer_wallstart[i] = HPL_PTIMER_STARTFLAG; + } +/* + * End of HPL_ptimer_boot + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer( const int I ) +#else +void HPL_ptimer( I ) + const int I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer provides a "stopwatch" functionality cpu/wall timer in + * seconds. Up to 64 separate timers can be functioning at once. The + * first call starts the timer, and the second stops it. This routine + * can be disenabled by calling HPL_ptimer_disable(), so that calls to + * the timer are ignored. This feature can be used to make sure certain + * sections of code do not affect timings, even if they call routines + * which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable + * the timer functionality. One can retrieve the current value of a + * timer by calling + * + * t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + * + * where I is the timer index in [0..64). To inititialize the timer + * functionality, one must have called HPL_ptimer_boot() prior to any of + * the functions mentioned above. + * + * Arguments + * ========= + * + * I (global input) const int + * On entry, I specifies the timer to stop/start. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( HPL_ptimer_disabled ) return; +/* + * If timer has not been started, start it. Otherwise, stop it and add + * interval to count + */ + if( HPL_ptimer_wallstart[I] == HPL_PTIMER_STARTFLAG ) + { + HPL_ptimer_wallstart[I] = HPL_ptimer_walltime(); + HPL_ptimer_cpustart [I] = HPL_ptimer_cputime (); + } + else + { + HPL_ptimer_cpusec [I] += HPL_ptimer_cputime ()-HPL_ptimer_cpustart [I]; + HPL_ptimer_wallsec [I] += HPL_ptimer_walltime()-HPL_ptimer_wallstart[I]; + HPL_ptimer_wallstart[I] = HPL_PTIMER_STARTFLAG; + } +/* + * End of HPL_ptimer + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_enable( void ) +#else +void HPL_ptimer_enable() +#endif +{ +/* + * HPL_ptimer_enable sets it so calls to HPL_ptimer are not ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 0; + return; +/* + * End of HPL_ptimer_enable + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_disable( void ) +#else +void HPL_ptimer_disable() +#endif +{ +/* + * HPL_ptimer_disable sets it so calls to HPL_ptimer are ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 1; + return; +/* + * End of HPL_ptimer_disable + */ +} + +#ifdef STDC_HEADERS +double HPL_ptimer_inquire +( + const HPL_T_PTIME TMTYPE, + const int I +) +#else +double HPL_ptimer_inquire( TMTYPE, I ) + const int I; + const HPL_T_PTIME TMTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer_inquire returns wall- or cpu- time that has accumulated in + * timer I. + * + * Arguments + * ========= + * + * TMTYPE (global input) const HPL_T_PTIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_PTIME : wall clock time is returned, + * = HPL_CPU_PTIME : CPU time is returned (default). + * + * I (global input) const int + * On entry, I specifies the timer to return. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double time; +/* .. + * .. Executable Statements .. + */ +/* + * If wall- or cpu-time are not available on this machine, return + * HPL_PTIMER_ERROR + */ + if( TMTYPE == HPL_WALL_PTIME ) + { + if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR ) + time = HPL_PTIMER_ERROR; + else + time = HPL_ptimer_wallsec[I]; + } + else + { + if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR ) + time = HPL_PTIMER_ERROR; + else + time = HPL_ptimer_cpusec [I]; + } + return( time ); +/* + * End of HPL_ptimer_inquire + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_combine +( + MPI_Comm COMM, + const HPL_T_PTIME_OP OPE, + const HPL_T_PTIME TMTYPE, + const int N, + const int IBEG, + double * TIMES +) +#else +void HPL_ptimer_combine( COMM, OPE, TMTYPE, N, IBEG, TIMES ) + const int IBEG, N; + const HPL_T_PTIME_OP OPE; + const HPL_T_PTIME TMTYPE; + MPI_Comm COMM; + double * TIMES; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer_combine combines the timing information stored on a scope + * of processes into the user TIMES array. + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection on + * which the timings are taken. + * + * OPE (global input) const HPL_T_PTIME_OP + * On entry, OP specifies what combine operation should be done + * as follows: + * = HPL_AMAX_PTIME get max. time on any process (default), + * = HPL_AMIN_PTIME get min. time on any process, + * = HPL_SUM_PTIME get sum of times across processes. + * + * TMTYPE (global input) const HPL_T_PTIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_PTIME : wall clock time is returned, + * = HPL_CPU_PTIME : CPU time is returned (default). + * + * N (global input) const int + * On entry, N specifies the number of timers to combine. + * + * IBEG (global input) const int + * On entry, IBEG specifies the first timer to be combined. + * + * TIMES (global output) double * + * On entry, TIMES is an array of dimension at least N. On exit, + * this array contains the requested timing information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, tmpdis; +/* .. + * .. Executable Statements .. + */ + tmpdis = HPL_ptimer_disabled; HPL_ptimer_disabled = 1; +/* + * Timer has been disabled for combine operation - copy timing informa- + * tion into user times array. If wall- or cpu-time are not available + * on this machine, fill in times with HPL_PTIMER_ERROR flag and return. + */ + if( TMTYPE == HPL_WALL_PTIME ) + { + if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR ) + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return; } + else + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_wallsec[IBEG+i]; } + } + else + { + if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR ) + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return; } + else + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_cpusec[IBEG+i]; } + } +/* + * Combine all nodes information, restore HPL_ptimer_disabled, and return + */ + for( i = 0; i < N; i++ ) TIMES[i] = Mmax( HPL_rzero, TIMES[i] ); + + if( OPE == HPL_AMAX_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM ); + else if( OPE == HPL_AMIN_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_min, COMM ); + else if( OPE == HPL_SUM_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_sum, COMM ); + else + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM ); + + HPL_ptimer_disabled = tmpdis; +/* + * End of HPL_ptimer_combine + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c new file mode 100644 index 000000000..711ef185d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c @@ -0,0 +1,146 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_ptimer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, + * the clock() function is used to return an approximation of processor + * time used by the program. The value returned is the CPU time used so + * far as a clock_t; to get the number of seconds used, the result is + * divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C + * standard library. If HPL_USE_TIMES is defined, the times() function + * is used instead. This function returns the current process times. + * times() returns the number of clock ticks that have elapsed since the + * system has been up. Otherwise and by default, the standard library + * function getrusage() is used. + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_CLOCK ) + +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + static double cps = CLOCKS_PER_SEC; + double d; + clock_t t1; + static clock_t t0 = 0; + + if( t0 == 0 ) t0 = clock(); + t1 = clock() - t0; + d = (double)(t1) / cps; + return( d ); +} + +#elif defined( HPL_USE_TIMES ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + clock_t t1; + struct tms ts; + static double ClockTick = HPL_rzero; + + if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK)); + (void) times( &ts ); + return( (double)(ts.tms_utime) / ClockTick ); +} + +/* #elif defined( HPL_USE_GETRUSAGE ) */ +#else + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + struct rusage ruse; + + (void) getrusage( RUSAGE_SELF, &ruse ); + return( (double)( ruse.ru_utime.tv_sec ) + + ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) ); +} + +/* +#else + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + return( HPL_PTIMER_ERROR ); +} +*/ + +#endif +/* + * End of HPL_ptimer_cputime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c new file mode 100644 index 000000000..96cbd300f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c @@ -0,0 +1,103 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_ptimer_walltime returns the elapsed (wall-clock) time. + * + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_GETTIMEOFDAY ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_walltime( void ) +#else +double HPL_ptimer_walltime() +#endif +{ + struct timeval tp; + static long start=0, startu; + + if( !start ) + { + (void) gettimeofday( &tp, NULL ); + start = tp.tv_sec; + startu = tp.tv_usec; + return( HPL_rzero ); + } + (void) gettimeofday( &tp, NULL ); + + return( (double)( tp.tv_sec - start ) + + ( (double)( tp.tv_usec-startu ) / 1000000.0 ) ); +} + +#else + +#ifdef STDC_HEADERS +double HPL_ptimer_walltime( void ) +#else +double HPL_ptimer_walltime() +#endif +{ + return( MPI_Wtime() ); +} + +#endif +/* + * End of HPL_ptimer_walltime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer.o new file mode 100644 index 000000000..da32dc1af Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer_cputime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer_cputime.o new file mode 100644 index 000000000..0ed678ecb Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer_cputime.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer_walltime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer_walltime.o new file mode 100644 index 000000000..b00e05dc8 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/HPL_ptimer_walltime.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/Make.inc new file mode 120000 index 000000000..8547ec814 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/Make.inc @@ -0,0 +1 @@ +/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/Makefile new file mode 100644 index 000000000..971500764 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/Makefile @@ -0,0 +1,84 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_ptimer.h +# +## Object files ######################################################## +# +HPL_ptiobj = \ + HPL_ptimer.o HPL_ptimer_cputime.o HPL_ptimer_walltime.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_ptiobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_ptiobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_ptimer.o : ../HPL_ptimer.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer.c +HPL_ptimer_cputime.o : ../HPL_ptimer_cputime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_cputime.c +HPL_ptimer_walltime.o : ../HPL_ptimer_walltime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_walltime.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/ptimer/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer.c new file mode 100644 index 000000000..3be9665f7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer.c @@ -0,0 +1,253 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int HPL_timer_disabled; +static double HPL_timer_cpusec [HPL_NTIMER], + HPL_timer_cpustart [HPL_NTIMER], + HPL_timer_wallsec [HPL_NTIMER], + HPL_timer_wallstart[HPL_NTIMER]; +/* + * --------------------------------------------------------------------- + * User callable functions + * --------------------------------------------------------------------- + */ +#ifdef STDC_HEADERS +void HPL_timer_boot( void ) +#else +void HPL_timer_boot() +#endif +{ +/* + * HPL_timer_boot (re)sets all timers to 0, and enables HPL_timer. + */ +/* + * .. Local Variables .. + */ + int i; +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 0; + + for( i = 0; i < HPL_NTIMER; i++ ) + { + HPL_timer_cpusec [i] = HPL_timer_wallsec [i] = HPL_rzero; + HPL_timer_cpustart[i] = HPL_timer_wallstart[i] = HPL_TIMER_STARTFLAG; + } +/* + * End of HPL_timer_boot + */ +} + +#ifdef STDC_HEADERS +void HPL_timer( const int I ) +#else +void HPL_timer( I ) + const int I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_timer provides a "stopwatch" functionality cpu/wall timer in + * seconds. Up to 64 separate timers can be functioning at once. The + * first call starts the timer, and the second stops it. This routine + * can be disenabled by calling HPL_timer_disable(), so that calls to + * the timer are ignored. This feature can be used to make sure certain + * sections of code do not affect timings, even if they call routines + * which have HPL_timer calls in them. HPL_timer_enable() will re-enable + * the timer functionality. One can retrieve the current value of a + * timer by calling + * + * t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + * + * where I is the timer index in [0..64). To initialize the timer + * functionality, one must have called HPL_timer_boot() prior to any of + * the functions mentioned above. + * + * Arguments + * ========= + * + * I (global input) const int + * On entry, I specifies the timer to stop/start. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( HPL_timer_disabled ) return; +/* + * If timer has not been started, start it. Otherwise, stop it and add + * interval to count + */ + if( HPL_timer_wallstart[I] == HPL_TIMER_STARTFLAG ) + { + HPL_timer_wallstart[I] = HPL_timer_walltime(); + HPL_timer_cpustart [I] = HPL_timer_cputime (); + } + else + { + HPL_timer_cpusec [I] += HPL_timer_cputime () - HPL_timer_cpustart [I]; + HPL_timer_wallsec [I] += HPL_timer_walltime() - HPL_timer_wallstart[I]; + HPL_timer_wallstart[I] = HPL_TIMER_STARTFLAG; + } +/* + * End of HPL_timer + */ +} + +#ifdef STDC_HEADERS +void HPL_timer_enable( void ) +#else +void HPL_timer_enable() +#endif +{ +/* + * HPL_timer_enable sets it so calls to HPL_timer are not ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 0; + return; +/* + * End of HPL_timer_enable + */ +} + +#ifdef STDC_HEADERS +void HPL_timer_disable( void ) +#else +void HPL_timer_disable() +#endif +{ +/* + * HPL_timer_disable sets it so calls to HPL_timer are ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 1; + return; +/* + * End of HPL_timer_disable + */ +} + +#ifdef STDC_HEADERS +double HPL_timer_inquire +( + const HPL_T_TIME TMTYPE, + const int I +) +#else +double HPL_timer_inquire( TMTYPE, I ) + const int I; + const HPL_T_TIME TMTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_timer_inquire returns wall- or cpu- time that has accumulated in + * timer I. + * + * Arguments + * ========= + * + * TMTYPE (global input) const HPL_T_TIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_TIME : wall clock time is returned, + * = HPL_CPU_TIME : CPU time is returned (default). + * + * I (global input) const int + * On entry, I specifies the timer to return. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double time; +/* .. + * .. Executable Statements .. + */ +/* + * If wall- or cpu-time are not available on this machine, return + * HPL_TIMER_ERROR + */ + if( TMTYPE == HPL_WALL_TIME ) + { + if( HPL_timer_walltime() == HPL_TIMER_ERROR ) + time = HPL_TIMER_ERROR; + else + time = HPL_timer_wallsec[I]; + } + else + { + if( HPL_timer_cputime() == HPL_TIMER_ERROR ) + time = HPL_TIMER_ERROR; + else + time = HPL_timer_cpusec [I]; + } + return( time ); +/* + * End of HPL_timer_inquire + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_cputime.c new file mode 100644 index 000000000..4a7f9dfef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_cputime.c @@ -0,0 +1,145 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_timer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, + * the clock() function is used to return an approximation of processor + * time used by the program. The value returned is the CPU time used so + * far as a clock_t; to get the number of seconds used, the result is + * divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C + * standard library. If HPL_USE_TIMES is defined, the times() function + * is used instead. This function returns the current process times. + * times() returns the number of clock ticks that have elapsed since the + * system has been up. Otherwise and by default, the standard library + * function getrusage() is used. + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_CLOCK ) + +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + static double cps = CLOCKS_PER_SEC; + double d; + clock_t t1; + static clock_t t0 = 0; + + if( t0 == 0 ) t0 = clock(); + t1 = clock() - t0; + d = (double)(t1) / cps; + return( d ); +} + +#elif defined( HPL_USE_TIMES ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + clock_t t1; + struct tms ts; + static double ClockTick = HPL_rzero; + + if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK)); + (void) times( &ts ); + return( (double)(ts.tms_utime) / ClockTick ); +} + +/* #elif defined( HPL_USE_GETRUSAGE ) */ +#else + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + struct rusage ruse; + (void) getrusage( RUSAGE_SELF, &ruse ); + return( (double)( ruse.ru_utime.tv_sec ) + + ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) ); +} + +/* +#else + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + return( HPL_TIMER_ERROR ); +} +*/ + +#endif +/* + * End of HPL_timer_cputime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_walltime.c new file mode 100644 index 000000000..f4f44f202 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/HPL_timer_walltime.c @@ -0,0 +1,88 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_timer_walltime returns the elapsed (wall-clock) time. + * + * + * --------------------------------------------------------------------- + */ + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_walltime( void ) +#else +double HPL_timer_walltime() +#endif +{ + struct timeval tp; + static long start=0, startu; + + if( !start ) + { + (void) gettimeofday( &tp, NULL ); + start = tp.tv_sec; + startu = tp.tv_usec; + return( HPL_rzero ); + } + (void) gettimeofday( &tp, NULL ); + + return( (double)( tp.tv_sec - start ) + + ( (double)( tp.tv_usec-startu ) / 1000000.0 ) ); +} +/* + * End of HPL_timer_walltime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer.o new file mode 100644 index 000000000..874d5ee26 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer_cputime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer_cputime.o new file mode 100644 index 000000000..3b221b80d Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer_cputime.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer_walltime.o b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer_walltime.o new file mode 100644 index 000000000..4ec1ce1b4 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/HPL_timer_walltime.o differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/Make.inc new file mode 120000 index 000000000..8547ec814 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/Make.inc @@ -0,0 +1 @@ +/home/chenshe1/sandbox/Velocity-Bench/hplinpack/cuda/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/Makefile new file mode 100644 index 000000000..b8009e88a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/Makefile @@ -0,0 +1,84 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_timer.h +# +## Object files ######################################################## +# +HPL_timobj = \ + HPL_timer.o HPL_timer_cputime.o HPL_timer_walltime.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_timobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_timobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_timer.o : ../HPL_timer.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer.c +HPL_timer_cputime.o : ../HPL_timer_cputime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_cputime.c +HPL_timer_walltime.o : ../HPL_timer_walltime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_walltime.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/testing/timer/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/1rinM.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/1rinM.jpg new file mode 100755 index 000000000..9af78f844 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/1rinM.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/1ring.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/1ring.jpg new file mode 100755 index 000000000..73e4391cf Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/1ring.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2-273x48.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2-273x48.jpg new file mode 100755 index 000000000..23795f8b9 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2-273x48.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2rinM.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2rinM.jpg new file mode 100755 index 000000000..c294e0d07 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2rinM.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2ring.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2ring.jpg new file mode 100755 index 000000000..f37187f13 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/2ring.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_abort.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_abort.html new file mode 100755 index 000000000..49a4bd318 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_abort.html @@ -0,0 +1,67 @@ + + +HPL_abort HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_abort halts execution. + +

Synopsis

+#include "hpl.h"

+void +HPL_abort( +int +LINE, +const char * +SRNAME, +const char * +FORM, +... +); + +

Description

+HPL_abort +displays an error message on stderr and halts execution. + +

Arguments

+
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+
+
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   HPL_abort( __LINE__, __FILE__, "Halt.\n" );
+   exit(0); return(0);
+}
+
+ +

See Also

+
HPL_fprintf, +HPL_warn. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_all_reduce.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_all_reduce.html new file mode 100755 index 000000000..591cdd596 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_all_reduce.html @@ -0,0 +1,67 @@ + + +HPL_all_reduce HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_all_reduce All reduce operation. + +

Synopsis

+#include "hpl.h"

+int +HPL_all_reduce( +void * +BUFFER, +const int +COUNT, +const HPL_T_TYPE +DTYPE, +const HPL_T_OP +OP, +MPI_Comm +COMM +); + +

Description

+HPL_all_reduce +performs a global reduce operation across all +processes of a group leaving the results on all processes. + +

Arguments

+
+BUFFER  (local input/global output)   void *
+        On entry,  BUFFER  points to  the  buffer to be combined.  On
+        exit, this array contains the combined data and  is identical
+        on all processes in the group.
+
+
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+
+
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+
+OP      (global input)                const HPL_T_OP 
+        On entry, OP is a pointer to the local combine function.
+
+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_barrier, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_barrier.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_barrier.html new file mode 100755 index 000000000..86ae426ad --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_barrier.html @@ -0,0 +1,41 @@ + + +HPL_barrier HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_barrier Barrier operation. + +

Synopsis

+#include "hpl.h"

+int +HPL_barrier( +MPI_Comm +COMM +); + +

Description

+HPL_barrier +blocks the caller until all process members have call it. +The call returns at any process only after all group members have +entered the call. + +

Arguments

+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_all_reduce, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_bcast.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_bcast.html new file mode 100755 index 000000000..079325ed7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_bcast.html @@ -0,0 +1,46 @@ + + +HPL_bcast HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_bcast Perform the row broadcast. + +

Synopsis

+#include "hpl.h"

+int +HPL_bcast( +HPL_T_panel * +PANEL, +int * +IFLAG +); + +

Description

+HPL_bcast +broadcasts the current panel. Successful completion is +indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to +HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was +not completed, in which case this function should be called again. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+
+IFLAG   (output)                      int *
+        On exit,  IFLAG  indicates  whether  or not the broadcast has
+        occured.
+
+ +

See Also

+HPL_binit, +HPL_bwait. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_binit.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_binit.html new file mode 100755 index 000000000..0f9a9e1ae --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_binit.html @@ -0,0 +1,37 @@ + + +HPL_binit HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_binit Initialize the row broadcast. + +

Synopsis

+#include "hpl.h"

+int +HPL_binit( +HPL_T_panel * +PANEL +); + +

Description

+HPL_binit +initializes a row broadcast. Successful completion is +indicated by the returned error code HPL_SUCCESS. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+ +

See Also

+HPL_bcast, +HPL_bwait. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_broadcast.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_broadcast.html new file mode 100755 index 000000000..6e24b2c2b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_broadcast.html @@ -0,0 +1,67 @@ + + +HPL_broadcast HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_broadcast Broadcast operation. + +

Synopsis

+#include "hpl.h"

+int +HPL_broadcast( +void * +BUFFER, +const int +COUNT, +const HPL_T_TYPE +DTYPE, +const int +ROOT, +MPI_Comm +COMM +); + +

Description

+HPL_broadcast +broadcasts a message from the process with rank ROOT to +all processes in the group. + +

Arguments

+
+BUFFER  (local input/output)          void *
+        On entry,  BUFFER  points to  the  buffer to be broadcast. On
+        exit, this array contains the broadcast data and is identical
+        on all processes in the group.
+
+
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+
+
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+
+ROOT    (global input)                const int
+        On entry, ROOT is the coordinate of the source process.
+
+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+ +

See Also

+HPL_reduce, +HPL_all_reduce, +HPL_barrier, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_bwait.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_bwait.html new file mode 100755 index 000000000..f1dd51e7b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_bwait.html @@ -0,0 +1,38 @@ + + +HPL_bwait HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_bwait Finalize the row broadcast. + +

Synopsis

+#include "hpl.h"

+int +HPL_bwait( +HPL_T_panel * +PANEL +); + +

Description

+HPL_bwait +HPL_bwait waits for the row broadcast of the current panel to +terminate. Successful completion is indicated by the returned error +code HPL_SUCCESS. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+ +

See Also

+HPL_binit, +HPL_bcast. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_copyL.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_copyL.html new file mode 100755 index 000000000..4b98963ac --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_copyL.html @@ -0,0 +1,42 @@ + + +HPL_copyL HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_copyL Copy the current panel into a contiguous workspace. + +

Synopsis

+#include "hpl.h"

+void +HPL_copyL( +HPL_T_panel * +PANEL +); + +

Description

+HPL_copyL +copies the panel of columns, the L1 replicated submatrix, +the pivot array and the info scalar into a contiguous workspace for +later broadcast. + +The copy of this panel into a contiguous buffer can be enforced by +specifying -DHPL_COPY_L in the architecture specific Makefile. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+ +

See Also

+HPL_binit, +HPL_bcast, +HPL_bwait. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_daxpy.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_daxpy.html new file mode 100755 index 000000000..c34d0b2e8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_daxpy.html @@ -0,0 +1,89 @@ + + +HPL_daxpy HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_daxpy y := y + alpha * x. + +

Synopsis

+#include "hpl.h"

+void +HPL_daxpy( +const int +N, +const double +ALPHA, +const double * +X, +const int +INCX, +double * +Y, +const int +INCY +); + +

Description

+HPL_daxpy +scales the vector x by alpha and adds it to y. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero, then the entries of the incremented array X
+        need not be set on input.
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the scaled entries of the incremented array X.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_daxpy( 3, 2.0, x, 1, y, 1 );
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dcopy, +HPL_dscal, +HPL_dswap. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dcopy.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dcopy.html new file mode 100755 index 000000000..2a4a485b5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dcopy.html @@ -0,0 +1,81 @@ + + +HPL_dcopy HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dcopy y := x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dcopy( +const int +N, +const double * +X, +const int +INCX, +double * +Y, +const int +INCY +); + +

Description

+HPL_dcopy +copies the vector x into the vector y. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the entries of the incremented array X.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_dcopy( 3, x, 1, y, 1 );
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_daxpy, +HPL_dscal, +HPL_dswap. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dgemm.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dgemm.html new file mode 100755 index 000000000..667c0ff01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dgemm.html @@ -0,0 +1,178 @@ + + +HPL_dgemm HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dgemm C := alpha * op(A) * op(B) + beta * C. + +

Synopsis

+#include "hpl.h"

+void +HPL_dgemm( +const enum HPL_ORDER +ORDER, +const enum HPL_TRANS +TRANSA, +const enum HPL_TRANS +TRANSB, +const int +M, +const int +N, +const int +K, +const double +ALPHA, +const double * +A, +const int +LDA, +const double * +B, +const int +LDB, +const double +BETA, +double * +C, +const int +LDC +); + +

Description

+HPL_dgemm +performs one of the matrix-matrix operations + + C := alpha * op( A ) * op( B ) + beta * C + + where op( X ) is one of + + op( X ) = X or op( X ) = X^T. + +Alpha and beta are scalars, and A, B and C are matrices, with op(A) +an m by k matrix, op(B) a k by n matrix and C an m by n matrix. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+TRANSA  (local input)                 const enum HPL_TRANS
+        On entry, TRANSA  specifies the form of  op(A)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSA==HplNoTrans    : op( A ) = A,                     
+           TRANSA==HplTrans      : op( A ) = A^T,                   
+           TRANSA==HplConjTrans  : op( A ) = A^T.                   
+
+
+TRANSB  (local input)                 const enum HPL_TRANS
+        On entry, TRANSB  specifies the form of  op(B)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSB==HplNoTrans    : op( B ) = B,                     
+           TRANSB==HplTrans      : op( B ) = B^T,                   
+           TRANSB==HplConjTrans  : op( B ) = B^T.                   
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the  number  of rows  of the  matrix
+        op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the number  of columns of the matrix
+        op(B)  and  the number of columns of the matrix  C. N must be
+        at least zero.
+
+
+K       (local input)                 const int
+        On entry,  K  specifies  the  number of columns of the matrix
+        op(A) and the number of rows of the matrix op(B).  K  must be
+        be at least  zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied  as  zero  then the elements of the matrices A and B
+        need not be set on input.
+
+
+A       (local input)                 const double *
+        On entry,  A  is an array of dimension (LDA,ka),  where ka is
+        k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+        entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+        the array  A must contain the matrix A, otherwise the leading
+        k  by  m  part of the array  A  must  contain the  matrix  A.
+
+
+LDA     (local input)                 const int
+        On entry, LDA  specifies the first dimension of A as declared
+        in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+        LDA must be at least max(1,m), otherwise LDA must be at least
+        max(1,k).
+
+
+B       (local input)                 const double *
+        On entry, B is an array of dimension (LDB,kb),  where  kb  is
+        n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+        entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+        the array  B must contain the matrix B, otherwise the leading
+        n  by  k  part of the array  B  must  contain  the matrix  B.
+
+
+LDB     (local input)                 const int
+        On entry, LDB  specifies the first dimension of B as declared
+        in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+        LDB must be at least max(1,k), otherwise LDB must be at least
+        max(1,n).
+
+
+BETA    (local input)                 const double
+        On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+        supplied  as  zero  then  the  elements of the matrix C  need
+        not be set on input.
+
+
+C       (local input/output)          double *
+        On entry,  C  is an array of dimension (LDC,n). Before entry,
+        the  leading m by n part  of  the  array  C  must contain the
+        matrix C,  except when beta is zero, in which case C need not
+        be set on entry. On exit, the array  C  is overwritten by the
+        m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+
+
+LDC     (local input)                 const int
+        On entry, LDC  specifies the first dimension of C as declared
+        in  the   calling  (sub)  program.   LDC  must  be  at  least
+        max(1,m).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2], c[2*2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+   c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0;
+   HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans,
+              2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 );
+   printf("  [%f,%f]\n", c[0], c[2]);
+   printf("c=[%f,%f]\n", c[1], c[3]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dtrsm. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dgemv.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dgemv.html new file mode 100755 index 000000000..d5921a9b2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dgemv.html @@ -0,0 +1,146 @@ + + +HPL_dgemv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dgemv y := beta * y + alpha * op(A) * x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dgemv( +const enum HPL_ORDER +ORDER, +const enum HPL_TRANS +TRANS, +const int +M, +const int +N, +const double +ALPHA, +const double * +A, +const int +LDA, +const double * +X, +const int +INCX, +const double +BETA, +double * +Y, +const int +INCY +); + +

Description

+HPL_dgemv +performs one of the matrix-vector operations + + y := alpha * op( A ) * x + beta * y, + + where op( X ) is one of + + op( X ) = X or op( X ) = X^T. + +where alpha and beta are scalars, x and y are vectors and A is an m +by n matrix. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry,  TRANS  specifies the  operation to be performed as
+        follows:   
+           TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+           TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of  the matrix A.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero then  A and X  need not be set on input.
+
+
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n.  Before  entry, the leading m by n part  of the
+        array  A  must contain the matrix coefficients.
+
+
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m).
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+BETA    (local input)                 const double
+        On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+        supplied as zero then  Y  need not be set on input.
+
+
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        Before entry with BETA non-zero, the incremented array Y must
+        contain the vector  y.  On exit,  Y  is  overwritten  by  the
+        updated vector y.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2], y[2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+   HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0,
+              a, 2, x, 1, -1.0, y, 1 );
+   printf("y=[%f,%f]\n", y[0], y[1]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dger, +HPL_dtrsv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dger.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dger.html new file mode 100755 index 000000000..e4ea948ed --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dger.html @@ -0,0 +1,124 @@ + + +HPL_dger HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dger A := alpha * x * y^T + A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dger( +const enum HPL_ORDER +ORDER, +const int +M, +const int +N, +const double +ALPHA, +const double * +X, +const int +INCX, +double * +Y, +const int +INCY, +double * +A, +const int +LDA +); + +

Description

+HPL_dger +performs the rank 1 operation + + A := alpha * x * y^T + A, + +where alpha is a scalar, x is an m-element vector, y is an n-element +vector and A is an m by n matrix. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of  the matrix A.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero then  X and Y  need not be set on input.
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+Y       (local input)                 double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+
+A       (local input/output)          double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n.  Before  entry, the leading m by n part  of the
+        array  A  must contain the matrix coefficients. On exit, A is
+        overwritten by the updated matrix.
+
+
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2], y[2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+   HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1,
+             a, 2 );
+   printf("y=[%f,%f]\n", y[0], y[1]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dgemv, +HPL_dtrsv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlacpy.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlacpy.html new file mode 100755 index 000000000..b64d34e0c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlacpy.html @@ -0,0 +1,84 @@ + + +HPL_dlacpy HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlacpy B := A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlacpy( +const int +M, +const int +N, +const double * +A, +const int +LDA, +double * +B, +const int +LDB +); + +

Description

+HPL_dlacpy +copies an array A into an array B. + +

Arguments

+
+M       (local input)                 const int
+        On entry,  M specifies the number of rows of the arrays A and
+        B. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N specifies  the number of columns of the arrays A
+        and B. N must be at least zero.
+
+
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,N).
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+B       (local output)                double *
+        On entry, B points to an array of dimension (LDB,N). On exit,
+        B is overwritten with A.
+
+
+LDB     (local input)                 const int
+        On entry, LDB specifies the leading dimension of the array B.
+        LDB must be at least MAX(1,M).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0);
+   return(0);
+}
+
+ +

See Also

+HPL_dlatcpy. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlamch.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlamch.html new file mode 100755 index 000000000..cb87a90ba --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlamch.html @@ -0,0 +1,86 @@ + + +HPL_dlamch HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlamch determines machine-specific arithmetic constants. + +

Synopsis

+#include "hpl.h"

+double +HPL_dlamch( +const HPL_T_MACH +CMACH +); + +

Description

+HPL_dlamch +determines machine-specific arithmetic constants such as +the relative machine precision (eps), the safe minimum (sfmin) such +that 1 / sfmin does not overflow, the base of the machine (base), the +precision (prec), the number of (base) digits in the mantissa (t), +whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the +minimum exponent before (gradual) underflow (emin), the underflow +threshold (rmin) base**(emin-1), the largest exponent before overflow +(emax), the overflow threshold (rmax) (base**emax)*(1-eps). + +

Arguments

+
+CMACH   (local input)                 const HPL_T_MACH
+        Specifies the value to be returned by HPL_dlamch             
+           = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+           = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+           = HPL_MACH_BASE,  HPL_dlamch := base                      
+           = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+           = HPL_MACH_MLEN,  HPL_dlamch := t                         
+           = HPL_MACH_RND,   HPL_dlamch := rnd                       
+           = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+           = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+           = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+           = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+         
+        where                                                        
+         
+           eps   = relative machine precision,                       
+           sfmin = safe minimum,                                     
+           base  = base of the machine,                              
+           prec  = eps*base,                                         
+           t     = number of digits in the mantissa,                 
+           rnd   = 1.0 if rounding occurs in addition,               
+           emin  = minimum exponent before underflow,                
+           rmin  = underflow threshold,                              
+           emax  = largest exponent before overflow,                 
+           rmax  = overflow threshold.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double eps;
+   eps = HPL_dlamch( HPL_MACH_EPS );
+   printf("eps=%18.8e\n", eps);
+   exit(0); return(0);
+}
+
+ +

References

+This function has been manually translated from the Fortran 77 LAPACK +auxiliary function dlamch.f (version 2.0 -- 1992), that was itself +based on the function ENVRON by Malcolm and incorporated suggestions +by Gentleman and Marovich. See + +Malcolm M. A., Algorithms to reveal properties of floating-point +arithmetic., Comms. of the ACM, 15, 949-951 (1972). + +Gentleman W. M. and Marovich S. B., More on algorithms that reveal +properties of floating point arithmetic units., Comms. of the ACM, +17, 276-277 (1974). + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlange.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlange.html new file mode 100755 index 000000000..ce276e257 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlange.html @@ -0,0 +1,86 @@ + + +HPL_dlange HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlange Compute ||A||. + +

Synopsis

+#include "hpl.h"

+double +HPL_dlange( +const HPL_T_NORM +NORM, +const int +M, +const int +N, +const double * +A, +const int +LDA +); + +

Description

+HPL_dlange +returns the value of the one norm, or the infinity norm, +or the element of largest absolute value of a matrix A: + + max(abs(A(i,j))) when NORM = HPL_NORM_A, + norm1(A), when NORM = HPL_NORM_1, + normI(A), when NORM = HPL_NORM_I, + +where norm1 denotes the one norm of a matrix (maximum column sum) and +normI denotes the infinity norm of a matrix (maximum row sum). Note +that max(abs(A(i,j))) is not a matrix norm. + +

Arguments

+
+NORM    (local input)                 const HPL_T_NORM
+        On entry,  NORM  specifies  the  value to be returned by this
+        function as described above.
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+A       (local input)                 const double *
+        On entry,  A  points to an  array of dimension  (LDA,N), that
+        contains the matrix A.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 );
+   printf("norm=%f\n", norm);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dlaprnt, +HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaprnt.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaprnt.html new file mode 100755 index 000000000..f589ee2bb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaprnt.html @@ -0,0 +1,86 @@ + + +HPL_dlaprnt HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaprnt Print the matrix A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaprnt( +const int +M, +const int +N, +double * +A, +const int +IA, +const int +JA, +const int +LDA, +const char * +CMATNM +); + +

Description

+HPL_dlaprnt +prints to standard error an M-by-N matrix A. + +

Arguments

+
+M       (local input)                 const int
+        On entry,  M  specifies the number of rows of A. M must be at
+        least zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies the number of columns of A. N must be
+        at least zero.
+
+
+A       (local input)                 double *
+        On entry, A  points to an array of dimension (LDA,N).
+
+
+IA      (local input)                 const int
+        On entry, IA specifies the starting row index to be printed.
+
+
+JA      (local input)                 const int
+        On entry,  JA  specifies  the  starting  column index  to be
+        printed.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+
+
+CMATNM  (local input)                 const char *
+        On entry, CMATNM is the name of the matrix to be printed.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" );
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp00N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp00N.html new file mode 100755 index 000000000..8e36cf6c6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp00N.html @@ -0,0 +1,78 @@ + + +HPL_dlaswp00N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp00N performs a series of row interchanges. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp00N( +const int +M, +const int +N, +double * +A, +const int +LDA, +const int * +IPIV +); + +

Description

+HPL_dlaswp00N +performs a series of local row interchanges on a matrix +A. One row interchange is initiated for rows 0 through M-1 of A. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M specifies the number of rows of the array A to be
+        interchanged. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies  the number of columns of the array A.
+        N must be at least zero.
+
+
+A       (local input/output)          double *
+        On entry, A  points to an array of dimension (LDA,N) to which
+        the row interchanges will be  applied.  On exit, the permuted
+        matrix.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+IPIV    (local input)                 const int *
+        On entry,  IPIV  is  an  array of size  M  that  contains the
+        pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+        implies that local rows k and l are to be interchanged.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp01N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp01N.html new file mode 100755 index 000000000..aa8861d10 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp01N.html @@ -0,0 +1,109 @@ + + +HPL_dlaswp01N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp01N copies rows of A into itself and into U. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp01N( +const int +M, +const int +N, +double * +A, +const int +LDA, +double * +U, +const int +LDU, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp01N +copies scattered rows of A into itself and into an +array U. The row offsets in A of the source rows are specified by +LINDXA. The destination of those rows are specified by LINDXAU. A +positive value of LINDXAU indicates that the array destination is U, +and A otherwise. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        moved within A or copied into U. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        moved within A or copied into U. N must be at least zero.
+
+
+A       (local input/output)          double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be moved within A or
+        copied into U.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,N). The rows
+        of A specified by LINDXA are be copied within this array U at
+        the positions indicated by positive values of LINDXAU.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local  row indexes  of  A  that should be moved within  A  or
+        or copied into U.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local  row indexes of  U  where the rows of  A  should be
+        copied at. This array also contains the  local row offsets in
+        A where some of the rows of A should be moved to.  A positive
+        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+        should be copied into U at the position LINDXAU[i]; otherwise
+        the row  LINDXA[i]  of  A  should be moved  at  the  position
+        -LINDXAU[i] within A.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp01T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp01T.html new file mode 100755 index 000000000..9697471c5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp01T.html @@ -0,0 +1,110 @@ + + +HPL_dlaswp01T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp01T copies rows of A into itself and into U. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp01T( +const int +M, +const int +N, +double * +A, +const int +LDA, +double * +U, +const int +LDU, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp01T +copies scattered rows of A into itself and into an +array U. The row offsets in A of the source rows are specified by +LINDXA. The destination of those rows are specified by LINDXAU. A +positive value of LINDXAU indicates that the array destination is U, +and A otherwise. Rows of A are stored as columns in U. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        moved within A or copied into U. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        moved within A or copied into U. N must be at least zero.
+
+
+A       (local input/output)          double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be moved within A or
+        copied into U.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,M). The rows
+        of A specified by  LINDXA  are copied within this array  U at
+        the  positions indicated by positive values of LINDXAU.  The
+        rows of A are stored as columns in U.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local  row indexes  of  A  that should be moved within  A  or
+        or copied into U.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local  row indexes of  U  where the rows of  A  should be
+        copied at. This array also contains the  local row offsets in
+        A where some of the rows of A should be moved to.  A positive
+        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+        should be copied into U at the position LINDXAU[i]; otherwise
+        the row  LINDXA[i]  of  A  should be moved  at  the  position
+        -LINDXAU[i] within A.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp02N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp02N.html new file mode 100755 index 000000000..d4e1a0cf8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp02N.html @@ -0,0 +1,107 @@ + + +HPL_dlaswp02N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp02N pack rows of A into columns of W. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp02N( +const int +M, +const int +N, +const double * +A, +const int +LDA, +double * +W0, +double * +W, +const int +LDW, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp02N +packs scattered rows of an array A into workspace W. +The row offsets in A are specified by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        copied into W. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        copied into W. N must be at least zero.
+
+
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be copied into W.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+W0      (local input/output)          double *
+        On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local output)                double *
+        On entry, W  is an array of size (LDW,M). On exit, W contains
+        the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+        in W(:,i).
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied into W.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M  that  contains
+        the local  row indexes of  U that should be copied into A and
+        replaced by the rows of W.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp03N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp03N.html new file mode 100755 index 000000000..f5c4127b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp03N.html @@ -0,0 +1,95 @@ + + +HPL_dlaswp03N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp03N copy rows of W into U. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp03N( +const int +M, +const int +N, +double * +U, +const int +LDU, +const double * +W0, +const double * +W, +const int +LDW +); + +

Description

+HPL_dlaswp03N +copies columns of W into rows of an array U. The +destination in U of these columns contained in W is stored within W0. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies  the  number  of columns of  W  stored
+        contiguously that should be copied into U. M must be at least
+        zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the  length of columns of  W  stored
+        contiguously that should be copied into U. N must be at least
+        zero.
+
+
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,N).  Columns
+        of W are copied as rows within this array U at  the positions
+        specified in W0.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+
+
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M),  that contains data
+        to be copied into U. For i in [0..M),  entries W(:,i)  should
+        be copied into the row or column W0(i*LDW) of U.
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp03T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp03T.html new file mode 100755 index 000000000..010175313 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp03T.html @@ -0,0 +1,95 @@ + + +HPL_dlaswp03T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp03T copy columns of W into U. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp03T( +const int +M, +const int +N, +double * +U, +const int +LDU, +const double * +W0, +const double * +W, +const int +LDW +); + +

Description

+HPL_dlaswp03T +copies columns of W into an array U. The destination +in U of these columns contained in W is stored within W0. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies  the  number  of columns of  W  stored
+        contiguously that should be copied into U. M must be at least
+        zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the  length of columns of  W  stored
+        contiguously that should be copied into U. N must be at least
+        zero.
+
+
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,M).  Columns
+        of W are copied within the array U at the positions specified
+        in W0.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M),  that contains data
+        to be copied into U. For i in [0..M),  entries W(:,i)  should
+        be copied into the row or column W0(i*LDW) of U.
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp04N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp04N.html new file mode 100755 index 000000000..bb6cab0a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp04N.html @@ -0,0 +1,131 @@ + + +HPL_dlaswp04N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp04N copy rows of U in A and replace them with columns of W. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp04N( +const int +M0, +const int +M1, +const int +N, +double * +U, +const int +LDU, +double * +A, +const int +LDA, +const double * +W0, +const double * +W, +const int +LDW, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp04N +copies M0 rows of U into A and replaces those rows of U +with columns of W. In addition M1 - M0 columns of W are copied into +rows of U. + +

Arguments

+
+M0      (local input)                 const int
+        On entry, M0 specifies the number of rows of U that should be
+        copied into  A  and replaced by columns of  W.  M0 must be at
+        least zero.
+
+
+M1      (local input)                 const int
+        On entry, M1 specifies the number of columns of W that should
+        be copied into rows of U. M1 must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of U that should
+        be copied into A. N must be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  points to  an array of dimension (LDU,N).  This
+        array contains the rows that are to be copied into A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M1).
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows of U indicated by LINDXAU.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M0).
+
+
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M0+M1),  that  contains
+        data to be copied into U.  For i in [M0..M0+M1),  the entries
+        W(:,i) are copied into the row W0(i*LDW) of U.
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA  is an array of dimension  M0 containing the
+        local row indexes A into which rows of U are copied.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M0 that  contains
+        the local  row indexes of  U that should be copied into A and
+        replaced by the columns of W.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp04T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp04T.html new file mode 100755 index 000000000..0209a3689 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp04T.html @@ -0,0 +1,132 @@ + + +HPL_dlaswp04T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp04T copy columns of U in rows of A and replace them with columns of W. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp04T( +const int +M0, +const int +M1, +const int +N, +double * +U, +const int +LDU, +double * +A, +const int +LDA, +const double * +W0, +const double * +W, +const int +LDW, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp04T +copies M0 columns of U into rows of A and replaces those +columns of U with columns of W. In addition M1 - M0 columns of W are +copied into U. + +

Arguments

+
+M0      (local input)                 const int
+        On entry, M0 specifies the number of columns of U that should
+        be copied into A and replaced by columns of W.  M0 must be at
+        least zero.
+
+
+M1      (local input)                 const int
+        On entry, M1 specifies  the number of columnns of W that will
+        be copied into U. M1 must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies the length of the columns of  U  that
+        will be copied into rows of A. N must be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns that are to be copied into rows of
+        A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U indicated by LINDXAU.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M0).
+
+
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M0+M1),  that  contains
+        data to be copied into U.  For i in [M0..M0+M1),  the entries
+        W(:,i) are copied into the column W0(i*LDW) of U.
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA  is an array of dimension  M0 containing the
+        local row indexes A into which columns of U are copied.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M0 that  contains
+        the  local column indexes of  U  that should be copied into A
+        and replaced by the columns of W.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp05N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp05N.html new file mode 100755 index 000000000..f428b7354 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp05N.html @@ -0,0 +1,98 @@ + + +HPL_dlaswp05N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp05N copy rows of U into A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp05N( +const int +M, +const int +N, +double * +A, +const int +LDA, +const double * +U, +const int +LDU, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp05N +copies rows of U of global offset LINDXAU into rows of +A at positions indicated by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of U that should be
+        copied into A. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of U that should
+        be copied into A. N must be at least zero.
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows of U indicated by LINDXAU.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          const double *
+        On entry,  U  points to an array of dimension  (LDU,N).  This
+        array contains the rows that are to be copied into A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied from U.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local row indexes of U that should be copied in A.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp05T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp05T.html new file mode 100755 index 000000000..fffb9f320 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp05T.html @@ -0,0 +1,98 @@ + + +HPL_dlaswp05T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp05T copy rows of U into A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp05T( +const int +M, +const int +N, +double * +A, +const int +LDA, +const double * +U, +const int +LDU, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp05T +copies columns of U of global offset LINDXAU into rows +of A at positions indicated by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the columns of U that will
+        be copied into rows of A. N must be at least zero.
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U indicated by LINDXAU.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          const double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns that are to be copied into rows of
+        A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied from U.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local column indexes of U that should be copied in A.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp06N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp06N.html new file mode 100755 index 000000000..f28ab48c6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp06N.html @@ -0,0 +1,92 @@ + + +HPL_dlaswp06N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp06N swap rows of U with rows of A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp06N( +const int +M, +const int +N, +double * +A, +const int +LDA, +double * +U, +const int +LDU, +const int * +LINDXA +); + +

Description

+HPL_dlaswp06N +swaps rows of U with rows of A at positions +indicated by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        swapped with rows of U. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of A that should
+        be swapped with rows of U. N must be at least zero.
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows or columns of U.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,N).  This
+        array contains the rows of U that are to be swapped with rows
+        of A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be swapped with U.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp06T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp06T.html new file mode 100755 index 000000000..86032a9f4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp06T.html @@ -0,0 +1,92 @@ + + +HPL_dlaswp06T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp06T swap rows or columns of U with rows of A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp06T( +const int +M, +const int +N, +double * +A, +const int +LDA, +double * +U, +const int +LDU, +const int * +LINDXA +); + +

Description

+HPL_dlaswp06T +swaps columns of U with rows of A at positions +indicated by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        swapped with columns of U. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of A that should
+        be swapped with columns of U. N must be at least zero.
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns of  U  that are to be swapped with
+        rows of A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be swapped with U.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp10N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp10N.html new file mode 100755 index 000000000..84403ca79 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlaswp10N.html @@ -0,0 +1,77 @@ + + +HPL_dlaswp10N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp10N performs a series column interchanges. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp10N( +const int +M, +const int +N, +double * +A, +const int +LDA, +const int * +IPIV +); + +

Description

+HPL_dlaswp10N +performs a sequence of local column interchanges on a +matrix A. One column interchange is initiated for columns 0 through +N-1 of A. + +

Arguments

+
+M       (local input)                 const int
+        __arg0__
+
+
+N       (local input)                 const int
+        On entry,  M  specifies  the number of rows of the array A. M
+        must be at least zero.
+
+
+A       (local input/output)          double *
+        On entry, N specifies the number of columns of the array A. N
+        must be at least zero.
+
+
+LDA     (local input)                 const int
+        On entry, A  points to an  array of  dimension (LDA,N).  This
+        array contains the columns onto which the interchanges should
+        be applied. On exit, A contains the permuted matrix.
+
+
+IPIV    (local input)                 const int *
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlatcpy.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlatcpy.html new file mode 100755 index 000000000..fa1cca5d9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlatcpy.html @@ -0,0 +1,83 @@ + + +HPL_dlatcpy HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlatcpy B := A^T + +

Synopsis

+#include "hpl.h"

+void +HPL_dlatcpy( +const int +M, +const int +N, +const double * +A, +const int +LDA, +double * +B, +const int +LDB +); + +

Description

+HPL_dlatcpy +copies the transpose of an array A into an array B. + +

Arguments

+
+M       (local input)                 const int
+        On entry,  M specifies the number of  rows of the array B and
+        the number of columns of A. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N specifies the number of  rows of the array A and
+        the number of columns of B. N must be at least zero.
+
+
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,M).
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,N).
+
+
+B       (local output)                double *
+        On entry, B points to an array of dimension (LDB,N). On exit,
+        B is overwritten with the transpose of A.
+
+
+LDB     (local input)                 const int
+        On entry, LDB specifies the leading dimension of the array B.
+        LDB must be at least MAX(1,M).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dlacpy. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocmax.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocmax.html new file mode 100755 index 000000000..c3361f32d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocmax.html @@ -0,0 +1,87 @@ + + +HPL_dlocmax HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlocmax finds the maximum entry in matrix column. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlocmax( +HPL_T_panel * +PANEL, +const int +N, +const int +II, +const int +JJ, +double * +WORK +); + +

Description

+HPL_dlocmax +finds the maximum entry in the current column and packs +the useful information in WORK[0:3]. On exit, WORK[0] contains the +local maximum absolute value scalar, WORK[1] is the corresponding +local row index, WORK[2] is the corresponding global row index, and +WORK[3] is the coordinate of the process owning this max. When N is +less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set +to the total number of process rows. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of rows of the column
+        of A on which we operate.
+
+
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+
+
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is  a workarray of size at least 4.  On exit,
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.
+
+ +

See Also

+HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocswpN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocswpN.html new file mode 100755 index 000000000..b5c4b74a9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocswpN.html @@ -0,0 +1,79 @@ + + +HPL_dlocswpN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlocswpN locally swaps rows within panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlocswpN( +HPL_T_panel * +PANEL, +const int +II, +const int +JJ, +double * +WORK +); + +

Description

+HPL_dlocswpN +performs the local swapping operations within a panel. +The lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+
+
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.  The N0 length max
+        row is stored in WORK[4:4+N0-1];  Note  that this is also the
+        JJth row  (or column) of L1. The remaining part of this array
+        is used as workspace.
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocswpT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocswpT.html new file mode 100755 index 000000000..d31361543 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dlocswpT.html @@ -0,0 +1,79 @@ + + +HPL_dlocswpT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlocswpT locally swaps rows within panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlocswpT( +HPL_T_panel * +PANEL, +const int +II, +const int +JJ, +double * +WORK +); + +

Description

+HPL_dlocswpT +performs the local swapping operations within a panel. +The lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+
+
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.  The N0 length max
+        row is stored in WORK[4:4+N0-1];  Note  that this is also the
+        JJth row  (or column) of L1. The remaining part of this array
+        is used as workspace.
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dmatgen.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dmatgen.html new file mode 100755 index 000000000..7886da146 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dmatgen.html @@ -0,0 +1,73 @@ + + +HPL_dmatgen HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dmatgen random matrix generator. + +

Synopsis

+#include "hpl.h"

+void +HPL_dmatgen( +const int +M, +const int +N, +double * +A, +const int +LDA, +const int +ISEED +); + +

Description

+HPL_dmatgen +generates (or regenerates) a random matrix A. + +The pseudo-random generator uses the linear congruential algorithm: +X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer +Programming, Knuth 1973, Vol. 2. + +

Arguments

+
+M       (input)                       const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+
+
+N       (input)                       const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+A       (output)                      double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        this  array  contains   the   coefficients  of  the  randomly
+        generated matrix.
+
+
+LDA     (input)                       const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+
+
+ISEED   (input)                       const int
+        On entry, ISEED  specifies  the  seed  number to generate the
+        matrix A. ISEED must be at least zero.
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dscal.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dscal.html new file mode 100755 index 000000000..c13427f44 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dscal.html @@ -0,0 +1,74 @@ + + +HPL_dscal HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dscal x = alpha * x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dscal( +const int +N, +const double +ALPHA, +double * +X, +const int +INCX +); + +

Description

+HPL_dscal +scales the vector x by alpha. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vector x. N  must  be
+        at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero, then the entries of the incremented array X
+        need not be set on input.
+
+
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        On exit, the entries of the incremented array  X  are  scaled
+        by the scalar alpha.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   HPL_dscal( 3, 2.0, x, 1 );
+   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_daxpy, +HPL_dcopy, +HPL_dswap. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dswap.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dswap.html new file mode 100755 index 000000000..cae6980a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dswap.html @@ -0,0 +1,84 @@ + + +HPL_dswap HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dswap y <-> x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dswap( +const int +N, +double * +X, +const int +INCX, +double * +Y, +const int +INCY +); + +

Description

+HPL_dswap +swaps the vectors x and y. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+
+
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        On exit, the entries of the incremented array  X  are updated
+        with the entries of the incremented array Y.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the entries of the incremented array X.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_dswap( 3, x, 1, y, 1 );
+   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_daxpy, +HPL_dcopy, +HPL_dscal. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dtrsm.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dtrsm.html new file mode 100755 index 000000000..3d60e597f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dtrsm.html @@ -0,0 +1,168 @@ + + +HPL_dtrsm HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dtrsm B := A^{-1} * B or B := B * A^{-1}. + +

Synopsis

+#include "hpl.h"

+void +HPL_dtrsm( +const enum HPL_ORDER +ORDER, +const enum HPL_SIDE +SIDE, +const enum HPL_UPLO +UPLO, +const enum HPL_TRANS +TRANS, +const enum HPL_DIAG +DIAG, +const int +M, +const int +N, +const double +ALPHA, +const double * +A, +const int +LDA, +double * +B, +const int +LDB +); + +

Description

+HPL_dtrsm +solves one of the matrix equations + + op( A ) * X = alpha * B, or X * op( A ) = alpha * B, + +where alpha is a scalar, X and B are m by n matrices, A is a unit, or +non-unit, upper or lower triangular matrix and op(A) is one of + + op( A ) = A or op( A ) = A^T. + +The matrix X is overwritten on B. + +No test for singularity or near-singularity is included in this +routine. Such tests must be performed before calling this routine. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+SIDE    (local input)                 const enum HPL_SIDE
+        On entry, SIDE  specifies  whether  op(A) appears on the left
+        or right of X as follows:
+           SIDE==HplLeft    op( A ) * X = alpha * B,
+           SIDE==HplRight   X * op( A ) = alpha * B.
+
+
+UPLO    (local input)                 const enum HPL_UPLO
+        On  entry,   UPLO   specifies  whether  the  upper  or  lower
+        triangular  part  of the array  A  is to be referenced.  When
+        UPLO==HplUpper, only  the upper triangular part of A is to be
+        referenced, otherwise only the lower triangular part of A is 
+        to be referenced. 
+
+
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry, TRANSA  specifies the form of  op(A)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSA==HplNoTrans    : op( A ) = A,                     
+           TRANSA==HplTrans      : op( A ) = A^T,                   
+           TRANSA==HplConjTrans  : op( A ) = A^T.                   
+
+
+DIAG    (local input)                 const enum HPL_DIAG
+        On entry,  DIAG  specifies  whether  A  is unit triangular or
+        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+        and otherwise, A is not assumed to be unit triangular.
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of the  matrix B.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix B.
+        N must be at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied  as  zero then the elements of the matrix B need not
+        be set on input.
+
+
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+        otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+        k by k upper triangular  part of the array A must contain the
+        upper triangular  matrix and the  strictly  lower  triangular
+        part of A is not referenced.  When  UPLO==HplLower on  entry,
+        the  leading k by k lower triangular part of the array A must
+        contain the lower triangular matrix  and  the  strictly upper
+        triangular part of A is not referenced.
+         
+        Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+        not referenced  either,  but are assumed to be unity.
+
+
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+
+
+B       (local input/output)          double *
+        On entry,  B  points  to an array of size equal to or greater
+        than LDB * n.  Before entry, the leading  m by n  part of the
+        array B must contain the matrix  B, except when beta is zero,
+        in which case B need not be set on entry.  On exit, the array
+        B is overwritten by the m by n solution matrix.
+
+
+LDB     (local input)                 const int
+        On entry,  LDB  specifies  the  leading  dimension  of  B  as
+        declared  in  the  calling  (sub) program.  LDB  must  be  at
+        least MAX(1,m).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+   HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper,
+              HplNoTrans, HplNonUnit, 2, 2, 2.0,
+              a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dgemm. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dtrsv.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dtrsv.html new file mode 100755 index 000000000..3e4703529 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_dtrsv.html @@ -0,0 +1,136 @@ + + +HPL_dtrsv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dtrsv x := A^{-1} x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dtrsv( +const enum HPL_ORDER +ORDER, +const enum HPL_UPLO +UPLO, +const enum HPL_TRANS +TRANS, +const enum HPL_DIAG +DIAG, +const int +N, +const double * +A, +const int +LDA, +double * +X, +const int +INCX +); + +

Description

+HPL_dtrsv +solves one of the systems of equations + + A * x = b, or A^T * x = b, + +where b and x are n-element vectors and A is an n by n non-unit, or +unit, upper or lower triangular matrix. + +No test for singularity or near-singularity is included in this +routine. Such tests must be performed before calling this routine. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+UPLO    (local input)                 const enum HPL_UPLO
+        On  entry,   UPLO   specifies  whether  the  upper  or  lower
+        triangular  part  of the array  A  is to be referenced.  When
+        UPLO==HplUpper, only  the upper triangular part of A is to be
+        referenced, otherwise only the lower triangular part of A is 
+        to be referenced. 
+
+
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry,  TRANS  specifies  the equations  to  be  solved as
+        follows:
+           TRANS==HplNoTrans     A   * x = b,
+           TRANS==HplTrans       A^T * x = b.
+
+
+DIAG    (local input)                 const enum HPL_DIAG
+        On entry,  DIAG  specifies  whether  A  is unit triangular or
+        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+        and otherwise, A is not assumed to be unit triangular.
+
+
+N       (local input)                 const int
+        On entry, N specifies the order of the matrix A. N must be at
+        least zero.
+
+
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+        n by n upper triangular  part of the array A must contain the
+        upper triangular  matrix and the  strictly  lower  triangular
+        part of A is not referenced.  When  UPLO==HplLower  on entry,
+        the  leading n by n lower triangular part of the array A must
+        contain the lower triangular matrix  and  the  strictly upper
+        triangular part of A is not referenced.
+         
+        Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+        not referenced  either,  but are assumed to be unity.
+
+
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,n).
+
+
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        Before entry,  the  incremented array  X  must contain  the n
+        element right-hand side vector b. On exit,  X  is overwritten
+        with the solution vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2];
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+   x[0] = 2.0; x[1] = 1.0;
+   HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans,
+              HplNoUnit, a, 2, x, 1 );
+   printf("x=[%f,%f]\n", x[0], x[1]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dger, +HPL_dgemv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_equil.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_equil.html new file mode 100755 index 000000000..d64ecab99 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_equil.html @@ -0,0 +1,115 @@ + + +HPL_equil HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_equil Equilibrate U and forward the column panel L. + +

Synopsis

+#include "hpl.h"

+void +HPL_equil( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const enum HPL_TRANS +TRANS, +const int +N, +double * +U, +const int +LDU, +int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1, +int * +IWORK +); + +

Description

+HPL_equil +equilibrates the local pieces of U, so that on exit to +this function, pieces of U contained in every process row are of the +same size. This phase makes the rolling phase optimal. In addition, +this function probes for the column panel L and forwards it when +possible. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be equilibrated) information.
+
+
+TRANS   (global input)                const enum HPL_TRANS
+        On entry, TRANS specifies whether  U  is stored in transposed
+        or non-transposed form.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the number of rows or columns of  U. N
+        must be at least 0.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+        non-transposed form, and MAX(1,N) otherwise.
+
+
+IPLEN   (global input)                int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in process IPMAP[i].
+
+
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry, IPMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+
+
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension NPROW+1.
+
+ +

See Also

+HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_fprintf.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_fprintf.html new file mode 100755 index 000000000..d62b2c871 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_fprintf.html @@ -0,0 +1,58 @@ + + +HPL_fprintf HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_fprintf fprintf + fflush wrapper. + +

Synopsis

+#include "hpl.h"

+void +HPL_fprintf( +FILE * +STREAM, +const char * +FORM, +... +); + +

Description

+HPL_fprintf +is a wrapper around fprintf flushing the output stream. + +

Arguments

+
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   HPL_fprintf( stdout, "Hello World.\n" );
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_abort, +HPL_warn. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_exit.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_exit.html new file mode 100755 index 000000000..b42f315c9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_exit.html @@ -0,0 +1,39 @@ + + +HPL_grid_exit HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_grid_exit Exit process grid. + +

Synopsis

+#include "hpl.h"

+int +HPL_grid_exit( +HPL_T_grid * +GRID +); + +

Description

+HPL_grid_exit +marks the process grid object for deallocation. The +returned error code MPI_SUCCESS indicates successful completion. +Other error codes are (MPI) implementation dependent. + +

Arguments

+
+GRID    (local input/output)          HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid to be released.
+
+ +

See Also

+HPL_pnum, +HPL_grid_init, +HPL_grid_info. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_info.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_info.html new file mode 100755 index 000000000..47f63672d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_info.html @@ -0,0 +1,70 @@ + + +HPL_grid_info HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_grid_info Retrieve grid information. + +

Synopsis

+#include "hpl.h"

+int +HPL_grid_info( +const HPL_T_grid * +GRID, +int * +NPROW, +int * +NPCOL, +int * +MYROW, +int * +MYCOL +); + +

Description

+HPL_grid_info +returns the grid shape and the coordinates in the grid +of the calling process. Successful completion is indicated by the +returned error code MPI_SUCCESS. Other error codes depend on the MPI +implementation. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+NPROW   (global output)               int *
+        On exit,   NPROW  specifies the number of process rows in the
+        grid. NPROW is at least one.
+
+
+NPCOL   (global output)               int *
+        On exit,   NPCOL  specifies  the number of process columns in
+        the grid. NPCOL is at least one.
+
+
+MYROW   (global output)               int *
+        On exit,  MYROW  specifies my  row process  coordinate in the
+        grid. MYROW is greater than or equal  to zero  and  less than
+        NPROW.
+
+
+MYCOL   (global output)               int *
+        On exit,  MYCOL specifies my column process coordinate in the
+        grid. MYCOL is greater than or equal  to zero  and  less than
+        NPCOL.
+
+ +

See Also

+HPL_pnum, +HPL_grid_init, +HPL_grid_exit. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_init.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_init.html new file mode 100755 index 000000000..0bec56e6e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_grid_init.html @@ -0,0 +1,73 @@ + + +HPL_grid_init HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_grid_init Create a process grid. + +

Synopsis

+#include "hpl.h"

+int +HPL_grid_init( +MPI_Comm +COMM, +const HPL_T_ORDER +ORDER, +const int +NPROW, +const int +NPCOL, +HPL_T_grid * +GRID +); + +

Description

+HPL_grid_init +creates a NPROW x NPCOL process grid using column- or +row-major ordering from an initial collection of processes identified +by an MPI communicator. Successful completion is indicated by the +returned error code MPI_SUCCESS. Other error codes depend on the MPI +implementation. The coordinates of processes that are not part of the +grid are set to values outside of [0..NPROW) x [0..NPCOL). + +

Arguments

+
+COMM    (global/local input)          MPI_Comm
+        On entry,  COMM  is  the  MPI  communicator  identifying  the
+        initial  collection  of  processes out of which  the  grid is
+        formed.
+
+
+ORDER   (global input)                const HPL_T_ORDER
+        On entry, ORDER specifies how the processes should be ordered
+        in the grid as follows:
+           ORDER = HPL_ROW_MAJOR    row-major    ordering;
+           ORDER = HPL_COLUMN_MAJOR column-major ordering;
+
+
+NPROW   (global input)                const int
+        On entry,  NPROW  specifies the number of process rows in the
+        grid to be created. NPROW must be at least one.
+
+
+NPCOL   (global input)                const int
+        On entry,  NPCOL  specifies  the number of process columns in
+        the grid to be created. NPCOL must be at least one.
+
+
+GRID    (local input/output)          HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information to be initialized.
+
+ +

See Also

+HPL_pnum, +HPL_grid_info, +HPL_grid_exit. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_idamax.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_idamax.html new file mode 100755 index 000000000..f16b296f6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_idamax.html @@ -0,0 +1,68 @@ + + +HPL_idamax HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_idamax 1st k s.t. |x_k| = max_i(|x_i|). + +

Synopsis

+#include "hpl.h"

+int +HPL_idamax( +const int +N, +const double * +X, +const int +INCX +); + +

Description

+HPL_idamax +returns the index in an n-vector x of the first element +having maximum absolute value. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vector x. N  must  be
+        at least zero.
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3];
+   int    imax;
+   x[0] = 1.0; x[1] = 3.0; x[2] = 2.0;
+   imax = HPL_idamax( 3, x, 1 );
+   printf("imax=%d\n", imax);
+   exit(0);
+   return(0);
+}
+
+ +

See Also

+HPL_daxpy, +HPL_dcopy, +HPL_dscal, +HPL_dswap. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2l.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2l.html new file mode 100755 index 000000000..a3eb758da --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2l.html @@ -0,0 +1,71 @@ + + +HPL_indxg2l HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_indxg2l Map a global index into a local one. + +

Synopsis

+#include "hpl.h"

+int +HPL_indxg2l( +const int +IG, +const int +INB, +const int +NB, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_indxg2l +computes the local index of a matrix entry pointed to by +the global index IG. This local returned index is the same in all +processes. + +

Arguments

+
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+
+
+SRCPROC (input)                       const int
+        On entry, if SRCPROC = -1, the data  is not  distributed  but
+        replicated,  in  which  case  this  routine returns IG in all
+        processes. Otherwise, the value of SRCPROC is ignored.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2lp, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2lp.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2lp.html new file mode 100755 index 000000000..d9fa00436 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2lp.html @@ -0,0 +1,86 @@ + + +HPL_indxg2lp HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_indxg2lp Map a local index into a global one. + +

Synopsis

+#include "hpl.h"

+void +HPL_indxg2lp( +int * +IL, +int * +PROC, +const int +IG, +const int +INB, +const int +NB, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_indxg2lp +computes the local index of a matrix entry pointed to by +the global index IG as well as the process coordinate which posseses +this entry. The local returned index is the same in all processes. + +

Arguments

+
+IL      (output)                      int *
+        On exit, IL specifies the local index corresponding to IG. IL
+        is at least zero.
+
+
+PROC    (output)                      int *
+        On exit,  PROC  is the  coordinate of the process  owning the
+        entry specified by the global index IG. PROC is at least zero
+        and less than NPROCS.
+
+
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+SRCPROC (input)                       const int
+        On entry, if SRCPROC = -1, the data  is not  distributed  but
+        replicated,  in  which  case  this  routine returns IG in all
+        processes. Otherwise, the value of SRCPROC is ignored.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2p.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2p.html new file mode 100755 index 000000000..0068dede3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxg2p.html @@ -0,0 +1,70 @@ + + +HPL_indxg2p HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_indxg2p Map a global index into a process coordinate. + +

Synopsis

+#include "hpl.h"

+int +HPL_indxg2p( +const int +IG, +const int +INB, +const int +NB, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_indxg2p +computes the process coordinate which posseses the entry +of a matrix specified by a global index IG. + +

Arguments

+
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxl2g.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxl2g.html new file mode 100755 index 000000000..216e98057 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_indxl2g.html @@ -0,0 +1,78 @@ + + +HPL_indxl2g HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_indxl2g Map a index-process pair into a global index. + +

Synopsis

+#include "hpl.h"

+int +HPL_indxl2g( +const int +IL, +const int +INB, +const int +NB, +const int +PROC, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_indxl2g +computes the global index of a matrix entry pointed to +by the local index IL of the process indicated by PROC. + +

Arguments

+
+IL      (input)                       const int
+        On entry, IL specifies the local  index of the matrix  entry.
+        IL must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+PROC    (input)                       const int
+        On entry, PROC  specifies the coordinate of the process whose
+        local array row or column is to be determined. PROC  must  be
+        at least zero and strictly less than NPROCS.
+
+
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2lp, +HPL_indxg2p, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_infog2l.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_infog2l.html new file mode 100755 index 000000000..34feff72c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_infog2l.html @@ -0,0 +1,155 @@ + + +HPL_infog2l HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_infog2l global to local index translation. + +

Synopsis

+#include "hpl.h"

+void +HPL_infog2l( +int +I, +int +J, +const int +IMB, +const int +MB, +const int +INB, +const int +NB, +const int +RSRC, +const int +CSRC, +const int +MYROW, +const int +MYCOL, +const int +NPROW, +const int +NPCOL, +int * +II, +int * +JJ, +int * +PROW, +int * +PCOL +); + +

Description

+HPL_infog2l +computes the starting local index II, JJ corresponding to +the submatrix starting globally at the entry pointed by I, J. This +routine returns the coordinates in the grid of the process owning the +matrix entry of global indexes I, J, namely PROW and PCOL. + +

Arguments

+
+I       (global input)                int
+        On entry,  I  specifies  the  global  row index of the matrix
+        entry. I must be at least zero.
+
+
+J       (global input)                int
+        On entry,  J  specifies the global column index of the matrix
+        entry. J must be at least zero.
+
+
+IMB     (global input)                const int
+        On entry,  IMB  specifies  the size of the first row block of
+        the global matrix. IMB must be at least one.
+
+
+MB      (global input)                const int
+        On entry,  MB specifies the blocking factor used to partition
+        and  distribute the rows of the matrix A.  MB  must be larger
+        than one.
+
+
+INB     (global input)                const int
+        On entry, INB specifies the size of the first column block of
+        the global matrix. INB must be at least one.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the columns of the matrix A. NB must be larger
+        than one.
+
+
+RSRC    (global input)                const int
+        On entry,  RSRC  specifies  the row coordinate of the process
+        that possesses the row  I.  RSRC  must  be at least zero  and
+        strictly less than NPROW.
+
+
+CSRC    (global input)                const int
+        On entry, CSRC specifies the column coordinate of the process
+        that possesses the column J. CSRC  must be at least zero  and
+        strictly less than NPCOL.
+
+
+MYROW   (local input)                 const int
+        On entry, MYROW  specifies my  row process  coordinate in the
+        grid. MYROW is greater than or equal  to zero  and  less than
+        NPROW.
+
+
+MYCOL   (local input)                 const int
+        On entry, MYCOL specifies my column process coordinate in the
+        grid. MYCOL is greater than or equal  to zero  and  less than
+        NPCOL.
+
+
+NPROW   (global input)                const int
+        On entry,  NPROW  specifies the number of process rows in the
+        grid. NPROW is at least one.
+
+
+NPCOL   (global input)                const int
+        On entry,  NPCOL  specifies  the number of process columns in
+        the grid. NPCOL is at least one.
+
+
+II      (local output)                int *
+        On exit, II  specifies the  local  starting  row index of the
+        submatrix. On exit, II is at least 0.
+
+
+JJ      (local output)                int *
+        On exit, JJ  specifies the local starting column index of the
+        submatrix. On exit, JJ is at least 0.
+
+
+PROW    (global output)               int *
+        On exit, PROW is the row coordinate of the process owning the
+        entry specified by the global index I.  PROW is at least zero
+        and less than NPROW.
+
+
+PCOL    (global output)               int *
+        On exit, PCOL  is the column coordinate of the process owning
+        the entry specified by the global index J.  PCOL  is at least
+        zero and less than NPCOL.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_jumpit.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_jumpit.html new file mode 100755 index 000000000..be87a1f53 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_jumpit.html @@ -0,0 +1,65 @@ + + +HPL_jumpit HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_jumpit jump into the random sequence. + +

Synopsis

+#include "hpl.h"

+void +HPL_jumpit( +int * +MULT, +int * +IADD, +int * +IRANN, +int * +IRANM +); + +

Description

+HPL_jumpit +jumps in the random sequence from the number X(n) encoded +in IRANN to the number X(m) encoded in IRANM using the constants A +and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A +and C obviously depend on m and n, see the function HPL_xjumpm in +order to initialize them. + +

Arguments

+
+MULT    (local input)                 int *
+        On entry, MULT is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of the constant A.
+
+
+IADD    (local input)                 int *
+        On entry, IADD is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of the constant C.
+
+
+IRANN   (local input)                 int *
+        On entry,  IRANN  is an array of dimension 2,  that contains 
+        the 16-lower and 15-higher bits of the encoding of X(n).
+
+
+IRANM   (local output)                int *
+        On entry,  IRANM  is an array of dimension 2.  On exit, this
+        array contains respectively the 16-lower and  15-higher bits
+        of the encoding of X(m).
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ladd.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ladd.html new file mode 100755 index 000000000..0c42d80d8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ladd.html @@ -0,0 +1,57 @@ + + +HPL_ladd HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_ladd Adds two long positive integers. + +

Synopsis

+#include "hpl.h"

+void +HPL_ladd( +int * +J, +int * +K, +int * +I +); + +

Description

+HPL_ladd +adds without carry two long positive integers K and J and +puts the result into I. The long integers I, J, K are encoded on 64 +bits using an array of 2 integers. The 32-lower bits are stored in +the first entry of each array, the 32-higher bits in the second +entry. + +

Arguments

+
+J       (local input)                 int *
+        On entry, J is an integer array of dimension 2 containing the
+        encoded long integer J.
+
+
+K       (local input)                 int *
+        On entry, K is an integer array of dimension 2 containing the
+        encoded long integer K.
+
+
+I       (local output)                int *
+        On entry, I is an integer array of dimension 2. On exit, this
+        array contains the encoded long integer result.
+
+ +

See Also

+HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_lmul.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_lmul.html new file mode 100755 index 000000000..8ef70cba5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_lmul.html @@ -0,0 +1,58 @@ + + +HPL_lmul HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_lmul multiplies 2 long positive integers. + +

Synopsis

+#include "hpl.h"

+void +HPL_lmul( +int * +K, +int * +J, +int * +I +); + +

Description

+HPL_lmul +multiplies without carry two long positive integers K and J +and puts the result into I. The long integers I, J, K are encoded on +64 bits using an array of 2 integers. The 32-lower bits are stored in +the first entry of each array, the 32-higher bits in the second entry +of each array. For efficiency purposes, the intrisic modulo function +is inlined. + +

Arguments

+
+K       (local input)                 int *
+        On entry, K is an integer array of dimension 2 containing the
+        encoded long integer K.
+
+
+J       (local input)                 int *
+        On entry, J is an integer array of dimension 2 containing the
+        encoded long integer J.
+
+
+I       (local output)                int *
+        On entry, I is an integer array of dimension 2. On exit, this
+        array contains the encoded long integer result.
+
+ +

See Also

+HPL_ladd, +HPL_setran, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_logsort.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_logsort.html new file mode 100755 index 000000000..da271fc19 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_logsort.html @@ -0,0 +1,83 @@ + + +HPL_logsort HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_logsort Sort the processes in logarithmic order. + +

Synopsis

+#include "hpl.h"

+void +HPL_logsort( +const int +NPROCS, +const int +ICURROC, +int * +IPLEN, +int * +IPMAP, +int * +IPMAPM1 +); + +

Description

+HPL_logsort +computes an array IPMAP and its inverse IPMAPM1 that +contain the logarithmic sorted processes id with repect to the local +number of rows of U that they own. This is necessary to ensure that +the logarithmic spreading of U is optimal in terms of number of steps +and communication volume as well. In other words, the larget pieces +of U will be sent a minimal number of times. + +

Arguments

+
+NPROCS  (global input)                const int
+        On entry, NPROCS  specifies the number of process rows in the
+        process grid. NPROCS is at least one.
+
+
+ICURROC (global input)                const int
+        On entry, ICURROC is the source process row.
+
+
+IPLEN   (global input/output)         int *
+        On entry, IPLEN is an array of dimension NPROCS+1,  such that
+        IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+        that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+        rows of U  in the processes before process IPMAP[i] after the
+        sort,  with  the convention that  IPLEN[NPROCS] is  the total
+        number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+        IPLEN[i] is  the  number of rows of A that should be moved to
+        the process IPMAP[i].  IPLEN  is such that the number of rows
+        of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+        remaining  entries  of  this  array  are  sorted  so that the
+        quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+
+
+IPMAP   (global output)               int *
+        On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myroc] is the corresponding sorted process
+        coordinate.
+
+
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROCS)
+
+ +

See Also

+HPL_plindx1, +HPL_plindx10, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_max.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_max.html new file mode 100755 index 000000000..7cf0b0670 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_max.html @@ -0,0 +1,60 @@ + + +HPL_max HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_max Combine (max) two buffers. + +

Synopsis

+#include "hpl.h"

+void +HPL_max( +const int +N, +const void * +IN, +void * +INOUT, +const HPL_T_TYPE +DTYPE +); + +

Description

+HPL_max +combines (max) two buffers. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+
+
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+
+
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+
+
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_all_reduce, +HPL_barrier, +HPL_min, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_min.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_min.html new file mode 100755 index 000000000..9c109c338 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_min.html @@ -0,0 +1,60 @@ + + +HPL_min HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_min Combine (min) two buffers. + +

Synopsis

+#include "hpl.h"

+void +HPL_min( +const int +N, +const void * +IN, +void * +INOUT, +const HPL_T_TYPE +DTYPE +); + +

Description

+HPL_min +combines (min) two buffers. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+
+
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+
+
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+
+
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_all_reduce, +HPL_barrier, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_numroc.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_numroc.html new file mode 100755 index 000000000..fa617cac3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_numroc.html @@ -0,0 +1,79 @@ + + +HPL_numroc HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_numroc Compute the local number of row/columns. + +

Synopsis

+#include "hpl.h"

+int +HPL_numroc( +const int +N, +const int +INB, +const int +NB, +const int +PROC, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_numroc +returns the local number of matrix rows/columns process +PROC will get if we give out N rows/columns starting from global +index 0. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies the number of rows/columns being dealt
+        out. N must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+PROC    (input)                       const int
+        On entry, PROC specifies  the coordinate of the process whose
+        local portion is determined.  PROC must be at least zero  and
+        strictly less than NPROCS.
+
+
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2lp, +HPL_indxg2p, +HPL_indxl2g, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_numrocI.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_numrocI.html new file mode 100755 index 000000000..c1037a193 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_numrocI.html @@ -0,0 +1,86 @@ + + +HPL_numrocI HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_numrocI Compute the local number of row/columns. + +

Synopsis

+#include "hpl.h"

+int +HPL_numrocI( +const int +N, +const int +I, +const int +INB, +const int +NB, +const int +PROC, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_numrocI +returns the local number of matrix rows/columns process +PROC will get if we give out N rows/columns starting from global +index I. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies the number of rows/columns being dealt
+        out. N must be at least zero.
+
+
+I       (input)                       const int
+        On entry, I  specifies the global index of the matrix  entry
+        I must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of th
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+PROC    (input)                       const int
+        On entry, PROC specifies  the coordinate of the process whos
+        local portion is determined.  PROC must be at least zero  an
+        strictly less than NPROCS.
+
+
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  proces
+        that possesses the first row or column of the matrix. SRCPRO
+        must be at least zero and strictly less than NPROCS.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process row
+        or columns over which the matrix is distributed.  NPROCS mus
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2lp, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pabort.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pabort.html new file mode 100755 index 000000000..89aacbd9f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pabort.html @@ -0,0 +1,57 @@ + + +HPL_pabort HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pabort halts execution. + +

Synopsis

+#include "hpl.h"

+void +HPL_pabort( +int +LINE, +const char * +SRNAME, +const char * +FORM, +... +); + +

Description

+HPL_pabort +displays an error message on stderr and halts execution. + +

Arguments

+
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+
+
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

See Also

+HPL_fprintf, +HPL_pwarn. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_packL.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_packL.html new file mode 100755 index 000000000..1e8f8106c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_packL.html @@ -0,0 +1,59 @@ + + +HPL_packL HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_packL Form the MPI structure for the row ring broadcasts. + +

Synopsis

+#include "hpl.h"

+int +HPL_packL( +HPL_T_panel * +PANEL, +const int +INDEX, +const int +LEN, +const int +IBUF +); + +

Description

+HPL_packL +forms the MPI data type for the panel to be broadcast. +Successful completion is indicated by the returned error code +MPI_SUCCESS. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+
+INDEX   (input)                       const int
+        On entry,  INDEX  points  to  the  first entry of the  packed
+        buffer being broadcast.
+
+
+LEN     (input)                       const int
+        On entry, LEN is the length of the packed buffer.
+
+
+IBUF    (input)                       const int
+        On entry, IBUF  specifies the panel buffer/count/type entries
+        that should be initialized.
+
+ +

See Also

+HPL_binit, +HPL_bcast, +HPL_bwait. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pddriver.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pddriver.html new file mode 100755 index 000000000..adcc02e00 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pddriver.html @@ -0,0 +1,27 @@ + + +main HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+main HPL main timing program. + +

Synopsis

+#include "hpl.h"

+int +main(); + +

Description

+main +is the main driver program for testing the HPL routines. +This program is driven by a short data file named "HPL.dat". + +

See Also

+HPL_pdinfo, +HPL_pdtest. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdfact.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdfact.html new file mode 100755 index 000000000..f51cee5d2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdfact.html @@ -0,0 +1,78 @@ + + +HPL_pdfact HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdfact recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdfact( +HPL_T_panel * +PANEL +); + +

Description

+HPL_pdfact +recursively factorizes a 1-dimensional panel of columns. +The RPFACT function pointer specifies the recursive algorithm to be +used, either Crout, Left- or Right looking. NBMIN allows to vary the +recursive stopping criterium in terms of the number of columns in the +panel, and NDIV allow to specify the number of subpanels each panel +should be divided into. Usuallly a value of 2 will be chosen. Finally +PFACT is a function pointer specifying the non-recursive algorithm to +to be used on at most NBMIN columns. One can also choose here between +Crout, Left- or Right looking. Empirical tests seem to indicate that +values of 4 or 8 for NBMIN give the best results. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesv.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesv.html new file mode 100755 index 000000000..ebb9c18e4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesv.html @@ -0,0 +1,56 @@ + + +HPL_pdgesv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdgesv Solve A x = b. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdgesv( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +HPL_T_pmat * +A +); + +

Description

+HPL_pdgesv +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with or without look-ahead. The lower triangular factor is left +unpivoted and the pivots are not returned. The right hand side is the +N+1 column of the coefficient matrix. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+ +

See Also

+HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdtrsv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesv0.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesv0.html new file mode 100755 index 000000000..c137975d4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesv0.html @@ -0,0 +1,63 @@ + + +HPL_pdgesv0 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdgesv0 Factor an N x N+1 matrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdgesv0( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +HPL_T_pmat * +A +); + +

Description

+HPL_pdgesv0 +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +without look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdfact, +HPL_binit, +HPL_bcast, +HPL_bwait, +HPL_pdupdateNN, +HPL_pdupdateNT, +HPL_pdupdateTN, +HPL_pdupdateTT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesvK1.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesvK1.html new file mode 100755 index 000000000..1a19edc05 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesvK1.html @@ -0,0 +1,62 @@ + + +HPL_pdgesvK1 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdgesvK1 Factor an N x N+1 matrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdgesvK1( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +HPL_T_pmat * +A +); + +

Description

+HPL_pdgesvK1 +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdfact, +HPL_binit, +HPL_bcast, +HPL_bwait, +HPL_pdupdateNN, +HPL_pdupdateNT, +HPL_pdupdateTN, +HPL_pdupdateTT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesvK2.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesvK2.html new file mode 100755 index 000000000..f2a9a25f0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdgesvK2.html @@ -0,0 +1,63 @@ + + +HPL_pdgesvK2 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdgesvK2 Factor an N x N+1 matrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdgesvK2( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +HPL_T_pmat * +A +); + +

Description

+HPL_pdgesvK2 +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdfact, +HPL_binit, +HPL_bcast, +HPL_bwait, +HPL_pdupdateNN, +HPL_pdupdateNT, +HPL_pdupdateTN, +HPL_pdupdateTT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdinfo.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdinfo.html new file mode 100755 index 000000000..94a7f78c0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdinfo.html @@ -0,0 +1,252 @@ + + +HPL_pdinfo HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdinfo Read input parameter file. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdinfo( +HPL_T_test * +TEST, +int * +NS, +int * +N, +int * +NBS, +int * +NB, +HPL_T_ORDER * +PMAPPIN, +int * +NPQS, +int * +P, +int * +Q, +int * +NPFS, +HPL_T_FACT * +PF, +int * +NBMS, +int * +NBM, +int * +NDVS, +int * +NDV, +int * +NRFS, +HPL_T_FACT * +RF, +int * +NTPS, +HPL_T_TOP * +TP, +int * +NDHS, +int * +DH, +HPL_T_SWAP * +FSWAP, +int * +TSWAP, +int * +L1NOTRAN, +int * +UNOTRAN, +int * +EQUIL, +int * +ALIGN +); + +

Description

+HPL_pdinfo +reads the startup information for the various tests and +transmits it to all processes. + +

Arguments

+
+TEST    (global output)               HPL_T_test *
+        On entry, TEST  points to a testing data structure.  On exit,
+        the fields of this data structure are initialized as follows:
+        TEST->outfp  specifies the output file where the results will
+        be printed.  It is only defined and used by  the process 0 of
+        the grid.  TEST->thrsh specifies the threshhold value for the
+        test ratio.  TEST->epsil is the relative machine precision of
+        the distributed computer.  Finally  the test counters, kfail,
+        kpass, kskip, ktest are initialized to zero.
+
+
+NS      (global output)               int *
+        On exit,  NS  specifies the number of different problem sizes
+        to be tested. NS is less than or equal to HPL_MAX_PARAM.
+
+
+N       (global output)               int *
+        On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+        the first NS entries of this array contain the  problem sizes
+        to run the code with.
+
+
+NBS     (global output)               int *
+        On exit,  NBS  specifies the number of different distribution
+        blocking factors to be tested. NBS must be less than or equal
+        to HPL_MAX_PARAM.
+
+
+NB      (global output)               int *
+        On exit,  PMAPPIN  specifies the process mapping onto the no-
+        des of the  MPI machine configuration.  PMAPPIN  defaults  to
+        row-major ordering.
+
+
+PMAPPIN (global output)               HPL_T_ORDER *
+        On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NBS entries of this array contain the values of the
+        various distribution blocking factors, to run the code with.
+
+
+NPQS    (global output)               int *
+        On exit, NPQS  specifies the  number of different values that
+        can be used for P and Q, i.e., the number of process grids to
+        run  the  code with.  NPQS must be  less  than  or  equal  to
+        HPL_MAX_PARAM.
+
+
+P       (global output)               int *
+        On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NPQS entries of this array contain the values of P,
+        the number of process rows of the  NPQS grids to run the code
+        with.
+
+
+Q       (global output)               int *
+        On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NPQS entries of this array contain the values of Q,
+        the number of process columns of the  NPQS  grids to  run the
+        code with.
+
+
+NPFS    (global output)               int *
+        On exit, NPFS  specifies the  number of different values that
+        can be used for PF : the panel factorization algorithm to run
+        the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+
+
+PF      (global output)               HPL_T_FACT *
+        On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+        the first  NPFS  entries  of this array  contain  the various
+        panel factorization algorithms to run the code with.
+
+
+NBMS    (global output)               int *
+        On exit,  NBMS  specifies  the  number  of  various recursive
+        stopping criteria  to be tested.  NBMS  must be  less than or
+        equal to HPL_MAX_PARAM.
+
+
+NBM     (global output)               int *
+        On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NBMS entries of this array contain the values
+        of the various recursive stopping criteria to be tested.
+
+
+NDVS    (global output)               int *
+        On exit,  NDVS  specifies  the number  of various numbers  of
+        panels in recursion to be tested.  NDVS is less than or equal
+        to HPL_MAX_PARAM.
+
+
+NDV     (global output)               int *
+        On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NDVS entries of this array contain the values
+        of the various numbers of panels in recursion to be tested.
+
+
+NRFS    (global output)               int *
+        On exit, NRFS  specifies the  number of different values that
+        can be used for RF : the recursive factorization algorithm to
+        be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+
+
+RF      (global output)               HPL_T_FACT *
+        On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+        the first  NRFS  entries  of  this array contain  the various
+        recursive factorization algorithms to run the code with.
+
+
+NTPS    (global output)               int *
+        On exit, NTPS  specifies the  number of different values that
+        can be used for the  broadcast topologies  to be tested. NTPS
+        is less than or equal to HPL_MAX_PARAM.
+
+
+TP      (global output)               HPL_T_TOP *
+        On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+        the  first NTPS  entries of this  array  contain  the various
+        broadcast (along rows) topologies to run the code with.
+
+
+NDHS    (global output)               int *
+        On exit, NDHS  specifies the  number of different values that
+        can be used for the  lookahead depths to be  tested.  NDHS is
+        less than or equal to HPL_MAX_PARAM.
+
+
+DH      (global output)               int *
+        On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NDHS entries of this array contain the values
+        of lookahead depths to run the code with.  Such a value is at
+        least 0 (no-lookahead) or greater than zero.
+
+
+FSWAP   (global output)               HPL_T_SWAP *
+        On exit, FSWAP specifies the swapping algorithm to be used in
+        all tests.
+
+
+TSWAP   (global output)               int *
+        On exit,  TSWAP  specifies the swapping threshold as a number
+        of columns when the mixed swapping algorithm was chosen.
+
+
+L1NOTRA (global output)               int *
+        On exit, L1NOTRAN specifies whether the upper triangle of the
+        panels of columns  should  be stored  in  no-transposed  form
+        (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+
+
+UNOTRAN (global output)               int *
+        On exit, UNOTRAN  specifies whether the panels of rows should
+        be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+        form (UNOTRAN=0) during their broadcast.
+
+
+EQUIL   (global output)               int *
+        On exit,  EQUIL  specifies  whether  equilibration during the
+        swap-broadcast  of  the  panel of rows  should  be  performed
+        (EQUIL=1) or not (EQUIL=0).
+
+
+ALIGN   (global output)               int *
+        On exit,  ALIGN  specifies the alignment  of  the dynamically
+        allocated buffers in double precision words. ALIGN is greater
+        than zero.
+
+ +

See Also

+HPL_pddriver, +HPL_pdtest. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlamch.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlamch.html new file mode 100755 index 000000000..c1b51370a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlamch.html @@ -0,0 +1,67 @@ + + +HPL_pdlamch HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlamch determines machine-specific arithmetic constants. + +

Synopsis

+#include "hpl.h"

+double +HPL_pdlamch( +MPI_Comm +COMM, +const HPL_T_MACH +CMACH +); + +

Description

+HPL_pdlamch +determines machine-specific arithmetic constants such as +the relative machine precision (eps), the safe minimum(sfmin) such that +1/sfmin does not overflow, the base of the machine (base), the precision +(prec), the number of (base) digits in the mantissa (t), whether +rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum +exponent before (gradual) underflow (emin), the underflow threshold +(rmin)- base**(emin-1), the largest exponent before overflow (emax), the +overflow threshold (rmax) - (base**emax)*(1-eps). + +

Arguments

+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+
+CMACH   (global input)                const HPL_T_MACH
+        Specifies the value to be returned by HPL_pdlamch            
+           = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+           = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+           = HPL_MACH_BASE,  HPL_pdlamch := base                     
+           = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+           = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+           = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+           = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+           = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+           = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+           = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+         
+        where                                                        
+         
+           eps   = relative machine precision,                       
+           sfmin = safe minimum,                                     
+           base  = base of the machine,                              
+           prec  = eps*base,                                         
+           t     = number of digits in the mantissa,                 
+           rnd   = 1.0 if rounding occurs in addition,               
+           emin  = minimum exponent before underflow,                
+           rmin  = underflow threshold,                              
+           emax  = largest exponent before overflow,                 
+           rmax  = overflow threshold.
+
+ + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlange.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlange.html new file mode 100755 index 000000000..0d1affc3d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlange.html @@ -0,0 +1,88 @@ + + +HPL_pdlange HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlange Compute ||A||. + +

Synopsis

+#include "hpl.h"

+double +HPL_pdlange( +const HPL_T_grid * +GRID, +const HPL_T_NORM +NORM, +const int +M, +const int +N, +const int +NB, +const double * +A, +const int +LDA +); + +

Description

+HPL_pdlange +returns the value of the one norm, or the infinity norm, +or the element of largest absolute value of a distributed matrix A: + + + max(abs(A(i,j))) when NORM = HPL_NORM_A, + norm1(A), when NORM = HPL_NORM_1, + normI(A), when NORM = HPL_NORM_I, + +where norm1 denotes the one norm of a matrix (maximum column sum) and +normI denotes the infinity norm of a matrix (maximum row sum). Note +that max(abs(A(i,j))) is not a matrix norm. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+NORM    (global input)                const HPL_T_NORM
+        On entry,  NORM  specifies  the  value to be returned by this
+        function as described above.
+
+
+M       (global input)                const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+
+
+N       (global input)                const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+
+
+A       (local input)                 const double *
+        On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+        that contains the local pieces of the distributed matrix A.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+
+ +

See Also

+HPL_pdlaprnt, +HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaprnt.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaprnt.html new file mode 100755 index 000000000..0ce810db0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaprnt.html @@ -0,0 +1,94 @@ + + +HPL_pdlaprnt HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaprnt Print a distributed matrix A. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaprnt( +const HPL_T_grid * +GRID, +const int +M, +const int +N, +const int +NB, +double * +A, +const int +LDA, +const int +IAROW, +const int +IACOL, +const char * +CMATNM +); + +

Description

+HPL_pdlaprnt +prints to standard error a distributed matrix A. The +local pieces of A are sent to the process of coordinates (0,0) in +the grid and then printed. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+M       (global input)                const int
+        On entry,  M  specifies the number of rows of the coefficient
+        matrix A. M must be at least zero.
+
+
+N       (global input)                const int
+        On  entry,   N   specifies  the  number  of  columns  of  the
+        coefficient matrix A. N must be at least zero.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+
+
+A       (local input)                 double *
+        On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+        This array contains the coefficient matrix to be printed.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+
+
+IAROW   (global input)                const int
+        On entry,  IAROW  specifies the row process coordinate owning
+        the  first row of A.  IAROW  must be  larger than or equal to
+        zero and less than NPROW.
+
+
+IACOL   (global input)                const int
+        On entry,  IACOL  specifies  the  column  process  coordinate
+        owning the  first column  of A. IACOL  must be larger than or
+        equal to zero and less than NPCOL.
+
+
+CMATNM  (global input)                const char *
+        On entry, CMATNM is the name of the matrix to be printed.
+
+ +

See Also

+HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp00N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp00N.html new file mode 100755 index 000000000..07279fdb0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp00N.html @@ -0,0 +1,82 @@ + + +HPL_pdlaswp00N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaswp00N Broadcast a column panel L and swap the row panel U. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaswp00N( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdlaswp00N +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +Bi-directional exchange is used to perform the swap :: broadcast of +the row panel U at once, resulting in a lower number of messages than +usual as well as a lower communication volume. With P process rows and +assuming bi-directional links, the running time of this function can +be approximated by: + + log_2(P) * (lat + NB*LocQ(N) / bdwth) + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. Mono +directional links will double this communication cost. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be broadcast and swapped) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdupdateNN, +HPL_pdupdateTN, +HPL_pipid, +HPL_plindx0, +HPL_dlaswp01N, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp04N, +HPL_dlaswp05N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp00T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp00T.html new file mode 100755 index 000000000..08b8ea770 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp00T.html @@ -0,0 +1,82 @@ + + +HPL_pdlaswp00T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaswp00T Broadcast a column panel L and swap the row panel U. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaswp00T( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdlaswp00T +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +Bi-directional exchange is used to perform the swap :: broadcast of +the row panel U at once, resulting in a lower number of messages than +usual as well as a lower communication volume. With P process rows and +assuming bi-directional links, the running time of this function can +be approximated by: + + log_2(P) * (lat + NB*LocQ(N) / bdwth) + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. Mono +directional links will double this communication cost. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be broadcast and swapped) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdupdateNT, +HPL_pdupdateTT, +HPL_pipid, +HPL_plindx0, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03T, +HPL_dlaswp04T, +HPL_dlaswp05T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp01N.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp01N.html new file mode 100755 index 000000000..2d4772fda --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp01N.html @@ -0,0 +1,86 @@ + + +HPL_pdlaswp01N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaswp01N Broadcast a column panel L and swap the row panel U. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaswp01N( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdlaswp01N +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +A "Spread then roll" algorithm performs the swap :: broadcast of the +row panel U at once, resulting in a minimal communication volume and +a "very good" use of the connectivity if available. With P process +rows and assuming bi-directional links, the running time of this +function can be approximated by: + + (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. K is +a constant in (2,3] that depends on the achieved bandwidth during a +simultaneous message exchange between two processes. An empirical +optimistic value of K is typically 2.4. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdupdateNN, +HPL_pdupdateTN, +HPL_pipid, +HPL_plindx1, +HPL_plindx10, +HPL_spreadN, +HPL_equil, +HPL_rollN, +HPL_dlaswp00N, +HPL_dlaswp01N, +HPL_dlaswp06N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp01T.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp01T.html new file mode 100755 index 000000000..f6a5d8c4b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdlaswp01T.html @@ -0,0 +1,86 @@ + + +HPL_pdlaswp01T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaswp01T Broadcast a column panel L and swap the row panel U. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaswp01T( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdlaswp01T +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +A "Spread then roll" algorithm performs the swap :: broadcast of the +row panel U at once, resulting in a minimal communication volume and +a "very good" use of the connectivity if available. With P process +rows and assuming bi-directional links, the running time of this +function can be approximated by: + + (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. K is +a constant in (2,3] that depends on the achieved bandwidth during a +simultaneous message exchange between two processes. An empirical +optimistic value of K is typically 2.4. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdupdateNT, +HPL_pdupdateTT, +HPL_pipid, +HPL_plindx1, +HPL_plindx10, +HPL_spreadT, +HPL_equil, +HPL_rollT, +HPL_dlaswp10N, +HPL_dlaswp01T, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdmatgen.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdmatgen.html new file mode 100755 index 000000000..28fb95509 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdmatgen.html @@ -0,0 +1,87 @@ + + +HPL_pdmatgen HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdmatgen Parallel random matrix generator. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdmatgen( +const HPL_T_grid * +GRID, +const int +M, +const int +N, +const int +NB, +double * +A, +const int +LDA, +const int +ISEED +); + +

Description

+HPL_pdmatgen +generates (or regenerates) a parallel random matrix A. + +The pseudo-random generator uses the linear congruential algorithm: +X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer +Programming, Knuth 1973, Vol. 2. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+M       (global input)                const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+
+
+N       (global input)                const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+A       (local output)                double *
+        On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+        On exit, this array contains the coefficients of the randomly
+        generated matrix.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+
+
+ISEED   (global input)                const int
+        On entry, ISEED  specifies  the  seed  number to generate the
+        matrix A. ISEED must be at least zero.
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdmxswp.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdmxswp.html new file mode 100755 index 000000000..c11d2b2da --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdmxswp.html @@ -0,0 +1,96 @@ + + +HPL_pdmxswp HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdmxswp swaps and broacast the pivot row. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdmxswp( +HPL_T_panel * +PANEL, +const int +M, +const int +II, +const int +JJ, +double * +WORK +); + +

Description

+HPL_pdmxswp +swaps and broadcasts the absolute value max row using +bi-directional exchange. The buffer is partially set by HPL_dlocmax. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by + + log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) + +where lat and bdwth are the latency and bandwidth of the network for +double precision real elements. Communication only occurs in one +process column. Mono-directional links will cause the communication +cost to double. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of the matrix
+        column on which this function operates.
+
+
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+
+
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        It  is assumed that  HPL_dlocmax  was called  prior  to  this
+        routine to  initialize  the first four entries of this array.
+        On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+        Note that this is also the  JJth  row  (or column) of L1. The
+        remaining part is used as a temporary array.
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpancrN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpancrN.html new file mode 100755 index 000000000..663d2e266 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpancrN.html @@ -0,0 +1,100 @@ + + +HPL_pdpancrN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpancrN Crout panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpancrN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpancrN +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Crout variant of the usual +one-dimensional algorithm. The lower triangular N0-by-N0 upper block +of the panel is stored in no-transpose form (i.e. just like the input +matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpancrT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpancrT.html new file mode 100755 index 000000000..0e1490430 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpancrT.html @@ -0,0 +1,99 @@ + + +HPL_pdpancrT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpancrT Crout panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpancrT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpancrT +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Crout variant of the usual +one-dimensional algorithm. The lower triangular N0-by-N0 upper block +of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_disp.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_disp.html new file mode 100755 index 000000000..cb78fa4be --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_disp.html @@ -0,0 +1,38 @@ + + +HPL_pdpanel_disp HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanel_disp Deallocate a panel data structure. + +

Synopsis

+#include "hpl.h"

+int +HPL_pdpanel_disp( +HPL_T_panel * * +PANEL +); + +

Description

+HPL_pdpanel_disp +deallocates the panel structure and resources and +stores the error code returned by the panel factorization. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel * *
+        On entry,  PANEL  points  to  the  address  of the panel data
+        structure to be deallocated.
+
+ +

See Also

+HPL_pdpanel_new, +HPL_pdpanel_init, +HPL_pdpanel_free. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_free.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_free.html new file mode 100755 index 000000000..d33e5e400 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_free.html @@ -0,0 +1,38 @@ + + +HPL_pdpanel_free HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanel_free Deallocate the panel ressources. + +

Synopsis

+#include "hpl.h"

+int +HPL_pdpanel_free( +HPL_T_panel * +PANEL +); + +

Description

+HPL_pdpanel_free +deallocates the panel resources and stores the error +code returned by the panel factorization. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points  to  the  panel data  structure from
+        which the resources should be deallocated.
+
+ +

See Also

+HPL_pdpanel_new, +HPL_pdpanel_init, +HPL_pdpanel_disp. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_init.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_init.html new file mode 100755 index 000000000..2d105354f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_init.html @@ -0,0 +1,99 @@ + + +HPL_pdpanel_init HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanel_init Initialize the panel resources. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanel_init( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +const int +M, +const int +N, +const int +JB, +HPL_T_pmat * +A, +const int +IA, +const int +JA, +const int +TAG, +HPL_T_panel * +PANEL +); + +

Description

+HPL_pdpanel_init +initializes a panel data structure. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+M       (local input)                 const int
+        On entry, M specifies the global number of rows of the panel.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the  global number of columns of the
+        panel and trailing submatrix. N must be at least zero.
+
+
+JB      (global input)                const int
+        On entry, JB specifies is the number of columns of the panel.
+        JB must be at least zero.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+
+IA      (global input)                const int
+        On entry,  IA  is  the global row index identifying the panel
+        and trailing submatrix. IA must be at least zero.
+
+
+JA      (global input)                const int
+        On entry, JA is the global column index identifying the panel
+        and trailing submatrix. JA must be at least zero.
+
+
+TAG     (global input)                const int
+        On entry, TAG is the row broadcast message id.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+ +

See Also

+HPL_pdpanel_new, +HPL_pdpanel_disp, +HPL_pdpanel_free. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_new.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_new.html new file mode 100755 index 000000000..1b3029ecb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanel_new.html @@ -0,0 +1,99 @@ + + +HPL_pdpanel_new HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanel_new Create a panel data structure. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanel_new( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +const int +M, +const int +N, +const int +JB, +HPL_T_pmat * +A, +const int +IA, +const int +JA, +const int +TAG, +HPL_T_panel * * +PANEL +); + +

Description

+HPL_pdpanel_new +creates and initializes a panel data structure. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+M       (local input)                 const int
+        On entry, M specifies the global number of rows of the panel.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the  global number of columns of the
+        panel and trailing submatrix. N must be at least zero.
+
+
+JB      (global input)                const int
+        On entry, JB specifies is the number of columns of the panel.
+        JB must be at least zero.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+
+IA      (global input)                const int
+        On entry,  IA  is  the global row index identifying the panel
+        and trailing submatrix. IA must be at least zero.
+
+
+JA      (global input)                const int
+        On entry, JA is the global column index identifying the panel
+        and trailing submatrix. JA must be at least zero.
+
+
+TAG     (global input)                const int
+        On entry, TAG is the row broadcast message id.
+
+
+PANEL   (local input/output)          HPL_T_panel * *
+        On entry,  PANEL  points  to  the  address  of the panel data
+        structure to create and initialize.
+
+ +

See Also

+HPL_pdpanel_new, +HPL_pdpanel_init, +HPL_pdpanel_disp. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanllN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanllN.html new file mode 100755 index 000000000..386815fd2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanllN.html @@ -0,0 +1,100 @@ + + +HPL_pdpanllN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanllN Left-looking panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanllN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpanllN +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Left-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in no-transpose form (i.e. just like the +input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanllT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanllT.html new file mode 100755 index 000000000..04307e823 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanllT.html @@ -0,0 +1,99 @@ + + +HPL_pdpanllT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanllT Left-looking panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanllT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpanllT +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Left-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanrlN, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanrlN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanrlN.html new file mode 100755 index 000000000..8d705c63c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanrlN.html @@ -0,0 +1,100 @@ + + +HPL_pdpanrlN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanrlN Right-looking panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanrlN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpanrlN +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Right-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in no-transpose form (i.e. just like the +input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanrlT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanrlT.html new file mode 100755 index 000000000..af458e7a1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdpanrlT.html @@ -0,0 +1,99 @@ + + +HPL_pdpanrlT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanrlT Right-looking panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanrlT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpanrlT +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Right-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpancrN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpancrN.html new file mode 100755 index 000000000..9169c48cc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpancrN.html @@ -0,0 +1,97 @@ + + +HPL_pdrpancrN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpancrN Crout recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpancrN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpancrN +HPL_pdrpancrN recursively factorizes a panel of columns using the +recursive Crout variant of the usual one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpancrT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpancrT.html new file mode 100755 index 000000000..cc9047c3c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpancrT.html @@ -0,0 +1,97 @@ + + +HPL_pdrpancrT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpancrT Crout recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpancrT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpancrT +recursively factorizes a panel of columns using the +recursive Crout variant of the usual one-dimensional algorithm. +The lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanllN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanllN.html new file mode 100755 index 000000000..bf16e6009 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanllN.html @@ -0,0 +1,97 @@ + + +HPL_pdrpanllN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpanllN Left-looking recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpanllN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpanllN +recursively factorizes a panel of columns using the +recursive Left-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanllT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanllT.html new file mode 100755 index 000000000..9904fb326 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanllT.html @@ -0,0 +1,97 @@ + + +HPL_pdrpanllT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpanllT Left-looking recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpanllT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpanllT +recursively factorizes a panel of columns using the +recursive Left-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanrlN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanrlN.html new file mode 100755 index 000000000..9758c0722 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanrlN.html @@ -0,0 +1,97 @@ + + +HPL_pdrpanrlN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpanrlN Right-looking recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpanrlN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpanrlN +recursively factorizes a panel of columns using the +recursive Right-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanrlT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanrlT.html new file mode 100755 index 000000000..ed48a815d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdrpanrlT.html @@ -0,0 +1,97 @@ + + +HPL_pdrpanrlT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpanrlT Right-looking recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpanrlT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpanrlT +recursively factorizes a panel of columns using the +recursive Right-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdtest.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdtest.html new file mode 100755 index 000000000..1c11c34d7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdtest.html @@ -0,0 +1,81 @@ + + +HPL_pdtest HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdtest Perform one test. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdtest( +HPL_T_test * +TEST, +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +const int +N, +const int +NB +); + +

Description

+HPL_pdtest +performs one test given a set of parameters such as the +process grid, the problem size, the distribution blocking factor ... +This function generates the data, calls and times the linear system +solver, checks the accuracy of the obtained vector solution and +writes this information to the file pointed to by TEST->outfp. + +

Arguments

+
+TEST    (global input)                HPL_T_test *
+        On entry,  TEST  points  to a testing data structure:  outfp
+        specifies the output file where the results will be printed.
+        It is only defined and used by the process  0  of the  grid.
+        thrsh  specifies  the  threshhold value  for the test ratio.
+        Concretely, a test is declared "PASSED"  if and only if  the
+        following inequality is satisfied:
+        ||Ax-b||_oo / ( epsil *
+                        ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+                         N )  < thrsh.
+        epsil  is the  relative machine precision of the distributed
+        computer. Finally the test counters, kfail, kpass, kskip and
+        ktest are updated as follows:  if the test passes,  kpass is
+        incremented by one;  if the test fails, kfail is incremented
+        by one; if the test is skipped, kskip is incremented by one.
+        ktest is left unchanged.
+
+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters to be used for this test.
+
+
+N       (global input)                const int
+        On entry,  N specifies the order of the coefficient matrix A.
+        N must be at least zero.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+ +

See Also

+HPL_pddriver, +HPL_pdinfo. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdtrsv.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdtrsv.html new file mode 100755 index 000000000..0bb182dc9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdtrsv.html @@ -0,0 +1,64 @@ + + +HPL_pdtrsv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdtrsv Solve triu( A ) x = b. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdtrsv( +HPL_T_grid * +GRID, +HPL_T_pmat * +AMAT +); + +

Description

+HPL_pdtrsv +solves an upper triangular system of linear equations. + +The rhs is the last column of the N by N+1 matrix A. The solve starts +in the process column owning the Nth column of A, so the rhs b may +need to be moved one process column to the left at the beginning. The +routine therefore needs a column vector in every process column but +the one owning b. The result is replicated in all process rows, and +returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. + +The algorithm uses decreasing one-ring broadcast in process rows and +columns implemented in terms of synchronous communication point to +point primitives. The lookahead of depth 1 is used to minimize the +critical path. This entire operation is essentially ``latency'' bound +and an estimate of its running time is given by: + + (move rhs) lat + N / ( P bdwth ) + + (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + + gam2 N^2 / ( P Q ), + +where gam2 is an estimate of the Level 2 BLAS rate of execution. +There are N / NB diagonal blocks. One must exchange 2 messages of +length NB to compute the next NB entries of the vector solution, as +well as performing a total of N^2 floating point operations. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+AMAT    (local input/output)          HPL_T_pmat *
+        On entry,  AMAT  points  to the data structure containing the
+        local array information.
+
+ +

See Also

+HPL_pdgesv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateNN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateNN.html new file mode 100755 index 000000000..b77cddbce --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateNN.html @@ -0,0 +1,65 @@ + + +HPL_pdupdateNN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdupdateNN Broadcast a panel and update the trailing submatrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdupdateNN( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdupdateNN +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdlaswp00N, +HPL_pdlaswp01N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateNT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateNT.html new file mode 100755 index 000000000..4ecb1f687 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateNT.html @@ -0,0 +1,65 @@ + + +HPL_pdupdateNT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdupdateNT Broadcast a panel and update the trailing submatrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdupdateNT( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdupdateNT +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdlaswp00T, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateTN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateTN.html new file mode 100755 index 000000000..ae735bf84 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateTN.html @@ -0,0 +1,65 @@ + + +HPL_pdupdateTN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdupdateTN Broadcast a panel and update the trailing submatrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdupdateTN( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdupdateTN +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdlaswp00N, +HPL_pdlaswp01N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateTT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateTT.html new file mode 100755 index 000000000..7c69f8828 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pdupdateTT.html @@ -0,0 +1,65 @@ + + +HPL_pdupdateTT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdupdateTT Broadcast a panel and update the trailing submatrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdupdateTT( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdupdateTT +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdlaswp00T, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_perm.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_perm.html new file mode 100755 index 000000000..9312eb4eb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_perm.html @@ -0,0 +1,67 @@ + + +HPL_perm HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_perm Combine 2 index arrays - Generate the permutation. + +

Synopsis

+#include "hpl.h"

+void +HPL_perm( +const int +N, +int * +LINDXA, +int * +LINDXAU, +int * +IWORK +); + +

Description

+HPL_perm +combines two index arrays and generate the corresponding +permutation. First, this function computes the inverse of LINDXA, and +then combine it with LINDXAU. Second, in order to be able to perform +the permutation in place, LINDXAU is overwritten by the sequence of +permutation producing the same result. What we ultimately want to +achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the +call to this function, this in place permutation can be performed by +for i in [0..N) swap U[i] with U[LINDXAU[i]]. + +

Arguments

+
+N       (global input)                const int
+        On entry,  N  specifies the length of the arrays  LINDXA  and
+        LINDXAU. N should be at least zero.
+
+
+LINDXA  (global input/output)         int *
+        On entry,  LINDXA  is an array of dimension N  containing the
+        source indexes. On exit,  LINDXA  contains the combined index
+        array.
+
+
+LINDXAU (global input/output)         int *
+        On entry,  LINDXAU is an array of dimension N  containing the
+        target indexes.  On exit,  LINDXAU  contains  the sequence of
+        permutation,  that  should be applied  in increasing order to
+        permute the underlying array U in place.
+
+
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension N.
+
+ +

See Also

+HPL_plindx1, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pipid.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pipid.html new file mode 100755 index 000000000..e6deb3d93 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pipid.html @@ -0,0 +1,95 @@ + + +HPL_pipid HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pipid Simplify the pivot vector. + +

Synopsis

+#include "hpl.h"

+void +HPL_pipid( +HPL_T_panel * +PANEL, +int * +K, +int * +IPID +); + +

Description

+HPL_pipid +computes an array IPID that contains the source and final +destination of matrix rows resulting from the application of N +interchanges as computed by the LU factorization with row partial +pivoting. The array IPID is such that the row of global index IPID(i) +should be mapped onto the row of global index IPID(i+1). Note that we +cannot really know the length of IPID a priori. However, we know that +this array is at least 2*N long, since there are N rows to swap and +broadcast. The length of this array must be smaller than or equal to +4*N, since every row is swapped with at most a single distinct remote +row. The algorithm constructing IPID goes as follows: Let IA be the +global index of the first row to be swapped. + +For every row src IA + i with i in [0..N) to be swapped with row dst +such that dst is given by DPIV[i]: + +Is row src the destination of a previous row of the current block, +that is, is there k odd such that IPID(k) is equal to src ? + Yes: update this destination with dst. For example, if the +pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, +we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it +was thought so far ... + No : add the pair (src,dst) at the end of IPID; row src has not +been moved yet. + +Is row dst different from src the destination of a previous row of +the current block, i.e., is there k odd such that IPID(k) is equal to +dst ? + Yes: update IPID(k) with src. For example, if the pivot array +is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in +fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought +so far ... + No : add the pair (dst,src) at the end of IPID; row dst has not +been moved yet. + +Note that when src is equal to dst, the pair (dst,src) should not be +added to IPID in order to avoid duplicated entries in this array. +During the construction of the array IPID, we make sure that the +first N entries are such that IPID(k) with k odd is equal to IA+k/2. +For k in [0..K/2), the row of global index IPID(2*k) should be +mapped onto the row of global index IPID(2*k+1). + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+K       (global output)               int *
+        On exit, K specifies the number of entries in  IPID.  K is at
+        least 2*N, and at most 4*N.
+
+
+IPID    (global output)               int *
+        On entry, IPID is an array of length 4*N.  On exit, the first
+        K entries of that array contain the src and final destination
+        resulting  from  the  application of the  N  interchanges  as
+        specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+        stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+        in [0..N)
+
+ +

See Also

+HPL_pdlaswp00N, +HPL_pdlaswp00T, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx0.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx0.html new file mode 100755 index 000000000..f3dbbcdea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx0.html @@ -0,0 +1,187 @@ + + +HPL_plindx0 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_plindx0 Compute local swapping index arrays. + +

Synopsis

+#include "hpl.h"

+void +HPL_plindx0( +HPL_T_panel * +PANEL, +const int +K, +int * +IPID, +int * +LINDXA, +int * +LINDXAU, +int * +LLEN +); + +

Description

+HPL_plindx0 +computes two local arrays LINDXA and LINDXAU containing +the local source and final destination position resulting from the +application of row interchanges. + +On entry, the array IPID of length K is such that the row of global +index IPID(i) should be mapped onto row of global index IPID(i+1). +Let IA be the global index of the first row to be swapped. For k in +[0..K/2), the row of global index IPID(2*k) should be mapped onto the +row of global index IPID(2*k+1). The question then, is to determine +which rows should ultimately be part of U. + +First, some rows of the process ICURROW may be swapped locally. One +of this row belongs to U, the other one belongs to my local piece of +A. The other rows of the current block are swapped with remote rows +and are thus not part of U. These rows however should be sent along, +and grabbed by the other processes as we progress in the exchange +phase. + +So, assume that I am ICURROW and consider a row of index IPID(2*i) +that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less +than N, this row is locally swapped and should be copied into U at +the position IPID(2*i+1) - IA. No row will be exchanged for this one. +If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be +locally copied into my local piece of A at the position corresponding +to the row of global index IPID(2*i+1). + +If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) +is to be swapped away and strictly speaking does not belong to U, but +to A remotely. Since this process will however send this array U, +this row is copied into U, exactly where the row IPID(2*i+1) should +go. For this, we search IPID for k1, such that IPID(2*k1) is equal to +IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position +IPID(2*k1+1)-IA. + +It is thus important to put the rows that go into U, i.e., such that +IPID(2*i+1) - IA is less than N at the begining of the array IPID. By +doing so, U is formed, and the local copy is performed in just one +sweep. + +Two lists LINDXA and LINDXAU are built. LINDXA contains the local +index of the rows I have that should be copied. LINDXAU contains the +local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A +is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) +of A should be locally copied into A(-LINDXAU(k),:). In the process +ICURROW, the initial packing algorithm proceeds as follows. + + for all entries in IPID, + if IPID(2*i) is in ICURROW, + if IPID(2*i+1) is in ICURROW, + if( IPID(2*i+1) - IA < N ) + save corresponding local position + of this row (LINDXA); + save local position (LINDXAU) in U + where this row goes; + [copy row IPID(2*i) in U at position + IPID(2*i+1)-IA; ]; + else + save corresponding local position of + this row (LINDXA); + save local position (-LINDXAU) in A + where this row goes; + [copy row IPID(2*i) in my piece of A + at IPID(2*i+1);] + end if + else + find k1 such that IPID(2*k1) = IPID(2*i+1); + copy row IPID(2*i) in U at position + IPID(2*k1+1)-IA; + save corresponding local position of this + row (LINDXA); + save local position (LINDXAU) in U where + this row goes; + end if + end if + end for + +Second, if I am not the current row process ICURROW, all source rows +in IPID that I own are part of U. Indeed, they are swapped with one +row of the current block of rows, and the main factorization +algorithm proceeds one row after each other. The processes different +from ICURROW, should exchange and accumulate those rows until they +receive some data previously owned by the process ICURROW. + +In processes different from ICURROW, the initial packing algorithm +proceeds as follows. Consider a row of global index IPID(2*i) that I +own. When I will be receiving data previously owned by ICURROW, i.e., +U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, +and this particular row of U should be first copied into my piece of +A, at A(il,:), where il is the local row index corresponding to +IPID(2*i). Now,initially, this row will be packed into workspace, say +as the kth row of that work array. The following algorithm sets +LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row +should be copied. LINDXA(k) stores the local index in A where this +row of U should be copied, i.e il. + + for all entries in IPID, + if IPID(2*i) is not in ICURROW, + copy row IPID(2*i) in work array; + save corresponding local position + of this row (LINDXA); + save position (LINDXAU) in U where + this row should be copied; + end if + end for + +Since we are at it, we also globally figure out how many rows every +process has. That is necessary, because it would rather be cumbersome +to figure it on the fly during the bi-directional exchange phase. +This information is kept in the array LLEN of size NPROW. Also note +that the arrays LINDXA and LINDXAU are of max length equal to 2*N. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+
+
+IPID    (global input)                int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+
+
+LINDXA  (local output)                int *
+        On entry, LINDXA  is an array of dimension 2*N. On exit, this
+        array contains the local indexes of the rows of A I have that
+        should be copied into U.
+
+
+LINDXAU (local output)                int *
+        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+        array contains  the local destination  information encoded as
+        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+        of A should be locally copied into A(-LINDXAU(k),:).
+
+
+LLEN    (global output)               int *
+        On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+        contains how many rows every process has.
+
+ +

See Also

+HPL_pdlaswp00N, +HPL_pdlaswp00T, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx1.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx1.html new file mode 100755 index 000000000..0a49ede0b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx1.html @@ -0,0 +1,130 @@ + + +HPL_plindx1 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_plindx1 Compute local swapping index arrays. + +

Synopsis

+#include "hpl.h"

+void +HPL_plindx1( +HPL_T_panel * +PANEL, +const int +K, +const int * +IPID, +int * +IPA, +int * +LINDXA, +int * +LINDXAU, +int * +IPLEN, +int * +IPMAP, +int * +IPMAPM1, +int * +PERMU, +int * +IWORK +); + +

Description

+HPL_plindx1 +computes two local arrays LINDXA and LINDXAU containing +the local source and final destination position resulting from the +application of row interchanges. In addition, this function computes +three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic +mapping information for the spreading phase. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+
+
+IPID    (global input)                const int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+
+
+IPA     (global output)               int *
+        On exit,  IPA  specifies  the number of rows that the current
+        process row has that either belong to U  or should be swapped
+        with remote rows of A.
+
+
+LINDXA  (global output)               int *
+        On entry, LINDXA  is an array of dimension 2*N. On exit, this
+        array contains the local indexes of the rows of A I have that
+        should be copied into U.
+
+
+LINDXAU (global output)               int *
+        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+        array contains  the local destination  information encoded as
+        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+        of A should be locally copied into A(-LINDXAU(k),:).
+
+
+IPLEN   (global output)               int *
+        On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+        this array is such that  IPLEN[i]  is the number of rows of A
+        in  the  processes  before  process  IPMAP[i]  after the sort
+        with the convention that IPLEN[nprow]  is the total number of
+        rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+        local number of rows of A that should be moved to the process
+        IPMAP[i]. IPLEN is such that the number of rows of the source
+        process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+        remaining  entries  of  this  array  are  sorted  so that the
+        quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+
+
+IPMAP   (global output)               int *
+        On entry, IPMAP is an array of dimension NPROW. On exit, this
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myrow] is the corresponding sorted process
+        coordinate.
+
+
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROCS)
+
+
+PERMU   (global output)               int *
+        On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+        contains  a sequence of permutations,  that should be applied
+        in increasing order to permute in place the row panel U.
+
+
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension 2*JB.
+
+ +

See Also

+HPL_pdlaswp00N, +HPL_pdlaswp00T, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx10.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx10.html new file mode 100755 index 000000000..fbfd6be2f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_plindx10.html @@ -0,0 +1,87 @@ + + +HPL_plindx10 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_plindx10 Compute the logarithmic maps for the spreading. + +

Synopsis

+#include "hpl.h"

+void +HPL_plindx10( +HPL_T_panel * +PANEL, +const int +K, +const int * +IPID, +int * +IPLEN, +int * +IPMAP, +int * +IPMAPM1 +); + +

Description

+HPL_plindx10 +computes three arrays IPLEN, IPMAP and IPMAPM1 that +contain the logarithmic mapping information for the spreading phase. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+
+
+IPID    (global input)                const int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+
+
+IPLEN   (global output)               int *
+        On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+        this array is such that  IPLEN[i]  is the number of rows of A
+        in the processes  before process IMAP[i] after the sort, with
+        the convention that IPLEN[nprow] is the total number of rows.
+        In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+        rows of  A  that should be moved for each process.  IPLEN  is
+        such that the number of rows of the source process row can be
+        computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+        this  array are sorted  so  that  the quantities IPLEN[i+1] -
+        IPLEN[i] are logarithmically sorted.
+
+
+IPMAP   (global output)               int *
+        On entry, IPMAP is an array of dimension NPROW. On exit, this
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myrow] is the corresponding sorted process
+        coordinate.
+
+
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROW)
+
+ +

See Also

+HPL_pdlaswp00N, +HPL_pdlaswp00T, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pnum.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pnum.html new file mode 100755 index 000000000..8bedc3016 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pnum.html @@ -0,0 +1,54 @@ + + +HPL_pnum HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pnum Rank determination. + +

Synopsis

+#include "hpl.h"

+int +HPL_pnum( +const HPL_T_grid * +GRID, +const int +MYROW, +const int +MYCOL +); + +

Description

+HPL_pnum +determines the rank of a process as a function of its +coordinates in the grid. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+MYROW   (local input)                 const int
+        On entry,  MYROW  specifies the row coordinate of the process
+        whose rank is to be determined. MYROW must be greater than or
+        equal to zero and less than NPROW.
+
+
+MYCOL   (local input)                 const int
+        On entry,  MYCOL  specifies  the  column  coordinate  of  the
+        process whose rank is to be determined. MYCOL must be greater
+        than or equal to zero and less than NPCOL.
+
+ +

See Also

+HPL_grid_init, +HPL_grid_info, +HPL_grid_exit. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer.html new file mode 100755 index 000000000..abef45946 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer.html @@ -0,0 +1,49 @@ + + +HPL_ptimer HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_ptimer Timer facility. + +

Synopsis

+#include "hpl.h"

+void +HPL_ptimer( +const int +I +); + +

Description

+HPL_ptimer +provides a "stopwatch" functionality cpu/wall timer in +seconds. Up to 64 separate timers can be functioning at once. The +first call starts the timer, and the second stops it. This routine +can be disenabled by calling HPL_ptimer_disable(), so that calls to +the timer are ignored. This feature can be used to make sure certain +sections of code do not affect timings, even if they call routines +which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable +the timer functionality. One can retrieve the current value of a +timer by calling + +t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + +where I is the timer index in [0..64). To inititialize the timer +functionality, one must have called HPL_ptimer_boot() prior to any of +the functions mentioned above. + +

Arguments

+
+I       (global input)                const int
+        On entry, I specifies the timer to stop/start.
+
+ +

See Also

+HPL_ptimer_cputime, +HPL_ptimer_walltime. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer_cputime.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer_cputime.html new file mode 100755 index 000000000..cffd863b3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer_cputime.html @@ -0,0 +1,35 @@ + + +HPL_ptimer_cputime HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_ptimer_cputime Return the CPU time. + +

Synopsis

+#include "hpl.h"

+double +HPL_ptimer_cputime(); + +

Description

+HPL_ptimer_cputime +returns the cpu time. If HPL_USE_CLOCK is defined, +the clock() function is used to return an approximation of processor +time used by the program. The value returned is the CPU time used so +far as a clock_t; to get the number of seconds used, the result is +divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C +standard library. If HPL_USE_TIMES is defined, the times() function +is used instead. This function returns the current process times. +times() returns the number of clock ticks that have elapsed since the +system has been up. Otherwise and by default, the standard library +function getrusage() is used. + +

See Also

+HPL_ptimer_walltime, +HPL_ptimer. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer_walltime.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer_walltime.html new file mode 100755 index 000000000..a509897f1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_ptimer_walltime.html @@ -0,0 +1,26 @@ + + +HPL_ptimer_walltime HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_ptimer_walltime Return the elapsed (wall-clock) time. + +

Synopsis

+#include "hpl.h"

+double +HPL_ptimer_walltime(); + +

Description

+HPL_ptimer_walltime +returns the elapsed (wall-clock) time. + +

See Also

+HPL_ptimer_cputime, +HPL_ptimer. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pwarn.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pwarn.html new file mode 100755 index 000000000..221d23982 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_pwarn.html @@ -0,0 +1,63 @@ + + +HPL_pwarn HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pwarn displays an error message. + +

Synopsis

+#include "hpl.h"

+void +HPL_pwarn( +FILE * +STREAM, +int +LINE, +const char * +SRNAME, +const char * +FORM, +... +); + +

Description

+HPL_pwarn +displays an error message. + +

Arguments

+
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+
+
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+
+
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

See Also

+HPL_pabort, +HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rand.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rand.html new file mode 100755 index 000000000..5aef6669c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rand.html @@ -0,0 +1,40 @@ + + +HPL_rand HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_rand random number generator. + +

Synopsis

+#include "hpl.h"

+double +HPL_rand(); + +

Description

+HPL_rand +generates the next number in the random sequence. This +function ensures that this number lies in the interval (-0.5, 0.5]. + +The static array irand contains the information (2 integers) required +to generate the next number in the sequence X(n). This number is +computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the +constant d is the largest 64 bit positive integer. The array irand is +then updated for the generation of the next number X(n+1) in the +random sequence as follows X(n+1) = a * X(n) + c. The constants a and +c should have been preliminarily stored in the arrays ias and ics as +2 pairs of integers. The initialization of ias, ics and irand is +performed by the function HPL_setran. + +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_jumpit. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_recv.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_recv.html new file mode 100755 index 000000000..afcb570c5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_recv.html @@ -0,0 +1,67 @@ + + +HPL_recv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_recv Receive a message. + +

Synopsis

+#include "hpl.h"

+int +HPL_recv( +double * +RBUF, +int +RCOUNT, +int +SRC, +int +RTAG, +MPI_Comm +COMM +); + +

Description

+HPL_recv +is a simple wrapper around MPI_Recv. Its main purpose is +to allow for some experimentation / tuning of this simple routine. +Successful completion is indicated by the returned error code +HPL_SUCCESS. In the case of messages of length less than or equal to +zero, this function returns immediately. + +

Arguments

+
+RBUF    (local output)                double *
+        On entry, RBUF specifies the starting address of buffer to be
+        received.
+
+
+RCOUNT  (local input)                 int
+        On entry,  RCOUNT  specifies  the number  of double precision
+        entries in RBUF. RCOUNT must be at least zero.
+
+
+SRC     (local input)                 int
+        On entry, SRC  specifies the rank of the  sending  process in
+        the communication space defined by COMM.
+
+
+RTAG    (local input)                 int
+        On entry,  STAG specifies the message tag to be used for this
+        communication operation.
+
+
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+
+ +

See Also

+HPL_send, +HPL_sdrv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_reduce.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_reduce.html new file mode 100755 index 000000000..026435ed6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_reduce.html @@ -0,0 +1,75 @@ + + +HPL_reduce HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_reduce Reduce operation. + +

Synopsis

+#include "hpl.h"

+int +HPL_reduce( +void * +BUFFER, +const int +COUNT, +const HPL_T_TYPE +DTYPE, +const HPL_T_OP +OP, +const int +ROOT, +MPI_Comm +COMM +); + +

Description

+HPL_reduce +performs a global reduce operation across all processes of +a group. Note that the input buffer is used as workarray and in all +processes but the accumulating process corrupting the original data. + +

Arguments

+
+BUFFER  (local input/output)          void *
+        On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+        exit,  and  in process of rank  ROOT  this array contains the
+        reduced data.  This  buffer  is also used as workspace during
+        the operation in the other processes of the group.
+
+
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+
+
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+
+OP      (global input)                const HPL_T_OP 
+        On entry, OP is a pointer to the local combine function.
+
+
+ROOT    (global input)                const int
+        On entry, ROOT is the coordinate of the accumulating process.
+
+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+ +

See Also

+HPL_broadcast, +HPL_all_reduce, +HPL_barrier, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rollN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rollN.html new file mode 100755 index 000000000..1e1a49068 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rollN.html @@ -0,0 +1,99 @@ + + +HPL_rollN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_rollN Roll U and forward the column panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_rollN( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +N, +double * +U, +const int +LDU, +const int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1 +); + +

Description

+HPL_rollN +rolls the local arrays containing the local pieces of U, so +that on exit to this function U is replicated in every process row. +In addition, this function probe for the presence of the column panel +and forwards it when available. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be rolled) information.
+
+
+N       (local input)                 const int
+        On entry, N specifies the number of columns of  U.  N must be
+        at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least  MAX(1,IPLEN[NPROW]).
+
+
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process row.
+
+
+IPMAP   (global input)                const int *
+        On entry, IMAP  is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words,  IMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry,  IMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+
+ +

See Also

+HPL_pdlaswp01N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rollT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rollT.html new file mode 100755 index 000000000..a6ac29336 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_rollT.html @@ -0,0 +1,99 @@ + + +HPL_rollT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_rollT Roll U and forward the column panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_rollT( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +N, +double * +U, +const int +LDU, +const int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1 +); + +

Description

+HPL_rollT +rolls the local arrays containing the local pieces of U, so +that on exit to this function U is replicated in every process row. +In addition, this function probe for the presence of the column panel +and forwards it when available. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be rolled) information.
+
+
+N       (local input)                 const int
+        On entry, N specifies the local number of rows of  U.  N must
+        be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least  MAX(1,N).
+
+
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process row.
+
+
+IPMAP   (global input)                const int *
+        On entry, IMAP  is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words,  IMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry,  IMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+
+ +

See Also

+HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_sdrv.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_sdrv.html new file mode 100755 index 000000000..6f5b5880c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_sdrv.html @@ -0,0 +1,88 @@ + + +HPL_sdrv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_sdrv Send and receive a message. + +

Synopsis

+#include "hpl.h"

+int +HPL_sdrv( +double * +SBUF, +int +SCOUNT, +int +STAG, +double * +RBUF, +int +RCOUNT, +int +RTAG, +int +PARTNER, +MPI_Comm +COMM +); + +

Description

+HPL_sdrv +is a simple wrapper around MPI_Sendrecv. Its main purpose is +to allow for some experimentation and tuning of this simple function. +Messages of length less than or equal to zero are not sent nor +received. Successful completion is indicated by the returned error +code HPL_SUCCESS. + +

Arguments

+
+SBUF    (local input)                 double *
+        On entry, SBUF specifies the starting address of buffer to be
+        sent.
+
+
+SCOUNT  (local input)                 int
+        On entry,  SCOUNT  specifies  the number  of double precision
+        entries in SBUF. SCOUNT must be at least zero.
+
+
+STAG    (local input)                 int
+        On entry,  STAG  specifies the message tag to be used for the
+        sending communication operation.
+
+
+RBUF    (local output)                double *
+        On entry, RBUF specifies the starting address of buffer to be
+        received.
+
+
+RCOUNT  (local input)                 int
+        On entry,  RCOUNT  specifies  the number  of double precision
+        entries in RBUF. RCOUNT must be at least zero.
+
+
+RTAG    (local input)                 int
+        On entry,  RTAG  specifies the message tag to be used for the
+        receiving communication operation.
+
+
+PARTNER (local input)                 int
+        On entry,  PARTNER  specifies  the rank of the  collaborative
+        process in the communication space defined by COMM.
+
+
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+
+ +

See Also

+HPL_send, +HPL_recv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_send.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_send.html new file mode 100755 index 000000000..05dcb7e6d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_send.html @@ -0,0 +1,67 @@ + + +HPL_send HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_send Send a message. + +

Synopsis

+#include "hpl.h"

+int +HPL_send( +double * +SBUF, +int +SCOUNT, +int +DEST, +int +STAG, +MPI_Comm +COMM +); + +

Description

+HPL_send +is a simple wrapper around MPI_Send. Its main purpose is +to allow for some experimentation / tuning of this simple routine. +Successful completion is indicated by the returned error code +MPI_SUCCESS. In the case of messages of length less than or equal to +zero, this function returns immediately. + +

Arguments

+
+SBUF    (local input)                 double *
+        On entry, SBUF specifies the starting address of buffer to be
+        sent.
+
+
+SCOUNT  (local input)                 int
+        On entry,  SCOUNT  specifies  the number of  double precision
+        entries in SBUF. SCOUNT must be at least zero.
+
+
+DEST    (local input)                 int
+        On entry, DEST specifies the rank of the receiving process in
+        the communication space defined by COMM.
+
+
+STAG    (local input)                 int
+        On entry,  STAG specifies the message tag to be used for this
+        communication operation.
+
+
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+
+ +

See Also

+HPL_recv, +HPL_sdrv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_setran.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_setran.html new file mode 100755 index 000000000..44f37e35e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_setran.html @@ -0,0 +1,52 @@ + + +HPL_setran HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_setran Manage the random number generator. + +

Synopsis

+#include "hpl.h"

+void +HPL_setran( +const int +OPTION, +int * +IRAN +); + +

Description

+HPL_setran +initializes the random generator with the encoding of the +first number X(0) in the sequence, and the constants a and c used to +compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), +a and c are stored in the static variables irand, ias and ics. When +OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the +values of the input array IRAN. When OPTION is 3, IRAN is set to the +current value of irand, and irand is then incremented. + +

Arguments

+
+OPTION  (local input)                 const int
+        On entry, OPTION  is an integer that specifies the operations
+        to be performed on the random generator as specified above.
+
+
+IRAN    (local input/output)          int *
+        On entry,  IRAN is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of a random number.
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_spreadN.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_spreadN.html new file mode 100755 index 000000000..f0d8f8938 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_spreadN.html @@ -0,0 +1,120 @@ + + +HPL_spreadN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_spreadN Spread row panel U and forward current column panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_spreadN( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const enum HPL_SIDE +SIDE, +const int +N, +double * +U, +const int +LDU, +const int +SRCDIST, +const int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1 +); + +

Description

+HPL_spreadN +spreads the local array containing local pieces of U, so +that on exit to this function, a piece of U is contained in every +process row. The array IPLEN contains the number of rows of U, that +should be spread on any given process row. This function also probes +for the presence of the column panel PBCST. In case of success, this +panel will be forwarded. If PBCST is NULL on input, this probing +mechanism will be disabled. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be spread) information.
+
+
+SIDE    (global input)                const enum HPL_SIDE
+        On entry, SIDE specifies whether the local piece of U located
+        in process IPMAP[SRCDIST] should be spread to the right or to
+        the left. This feature is used by the equilibration process.
+
+
+N       (global input)                const int
+        On entry,  N  specifies  the  local number of columns of U. N
+        must be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,IPLEN[nprow]).
+
+
+SRCDIST (local input)                 const int
+        On entry,  SRCDIST  specifies the source process that spreads
+        its piece of U.
+
+
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process before process IPMAP[i], with the  convention
+        that IPLEN[nprow] is the total number of rows. In other words
+        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+        should be moved to process IPMAP[i].
+
+
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry,  IPMAPM1 is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+
+ +

See Also

+HPL_pdlaswp01N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_spreadT.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_spreadT.html new file mode 100755 index 000000000..cec561646 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_spreadT.html @@ -0,0 +1,120 @@ + + +HPL_spreadT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_spreadT Spread row panel U and forward current column panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_spreadT( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const enum HPL_SIDE +SIDE, +const int +N, +double * +U, +const int +LDU, +const int +SRCDIST, +const int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1 +); + +

Description

+HPL_spreadT +spreads the local array containing local pieces of U, so +that on exit to this function, a piece of U is contained in every +process row. The array IPLEN contains the number of columns of U, +that should be spread on any given process row. This function also +probes for the presence of the column panel PBCST. If available, +this panel will be forwarded. If PBCST is NULL on input, this +probing mechanism will be disabled. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be spread) information.
+
+
+SIDE    (global input)                const enum HPL_SIDE
+        On entry, SIDE specifies whether the local piece of U located
+        in process IPMAP[SRCDIST] should be spread to the right or to
+        the left. This feature is used by the equilibration process.
+
+
+N       (global input)                const int
+        On entry,  N  specifies the local number of rows of U. N must
+        be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,N).
+
+
+SRCDIST (local input)                 const int
+        On entry,  SRCDIST  specifies the source process that spreads
+        its piece of U.
+
+
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process before process IPMAP[i], with the  convention
+        that IPLEN[nprow] is the total number of rows. In other words
+        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+        should be moved to process IPMAP[i].
+
+
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry,  IPMAPM1 is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+
+ +

See Also

+HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_sum.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_sum.html new file mode 100755 index 000000000..be785b99e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_sum.html @@ -0,0 +1,61 @@ + + +HPL_sum HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_sum Combine (sum) two buffers. + +

Synopsis

+#include "hpl.h"

+void +HPL_sum( +const int +N, +const void * +IN, +void * +INOUT, +const HPL_T_TYPE +DTYPE +); + +

Description

+HPL_sum +combines (sum) two buffers. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+
+
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+
+
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+
+
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_all_reduce, +HPL_barrier, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer.html new file mode 100755 index 000000000..8e6a79803 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer.html @@ -0,0 +1,49 @@ + + +HPL_timer HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_timer Timer facility. + +

Synopsis

+#include "hpl.h"

+void +HPL_timer( +const int +I +); + +

Description

+HPL_timer +provides a "stopwatch" functionality cpu/wall timer in +seconds. Up to 64 separate timers can be functioning at once. The +first call starts the timer, and the second stops it. This routine +can be disenabled by calling HPL_timer_disable(), so that calls to +the timer are ignored. This feature can be used to make sure certain +sections of code do not affect timings, even if they call routines +which have HPL_timer calls in them. HPL_timer_enable() will re-enable +the timer functionality. One can retrieve the current value of a +timer by calling + +t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + +where I is the timer index in [0..64). To initialize the timer +functionality, one must have called HPL_timer_boot() prior to any of +the functions mentioned above. + +

Arguments

+
+I       (global input)                const int
+        On entry, I specifies the timer to stop/start.
+
+ +

See Also

+HPL_timer_cputime, +HPL_timer_walltime. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer_cputime.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer_cputime.html new file mode 100755 index 000000000..0fa9b6575 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer_cputime.html @@ -0,0 +1,35 @@ + + +HPL_timer_cputime HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_timer_cputime Return the CPU time. + +

Synopsis

+#include "hpl.h"

+double +HPL_timer_cputime(); + +

Description

+HPL_timer_cputime +returns the cpu time. If HPL_USE_CLOCK is defined, +the clock() function is used to return an approximation of processor +time used by the program. The value returned is the CPU time used so +far as a clock_t; to get the number of seconds used, the result is +divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C +standard library. If HPL_USE_TIMES is defined, the times() function +is used instead. This function returns the current process times. +times() returns the number of clock ticks that have elapsed since the +system has been up. Otherwise and by default, the standard library +function getrusage() is used. + +

See Also

+HPL_timer_walltime, +HPL_timer. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer_walltime.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer_walltime.html new file mode 100755 index 000000000..92588e49f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_timer_walltime.html @@ -0,0 +1,26 @@ + + +HPL_timer_walltime HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_timer_walltime Return the elapsed (wall-clock) time. + +

Synopsis

+#include "hpl.h"

+double +HPL_timer_walltime(); + +

Description

+HPL_timer_walltime +returns the elapsed (wall-clock) time. + +

See Also

+HPL_timer_cputime, +HPL_timer. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_warn.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_warn.html new file mode 100755 index 000000000..773df9ae0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_warn.html @@ -0,0 +1,74 @@ + + +HPL_warn HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_warn displays an error message. + +

Synopsis

+#include "hpl.h"

+void +HPL_warn( +FILE * +STREAM, +int +LINE, +const char * +SRNAME, +const char * +FORM, +... +); + +

Description

+HPL_warn +displays an error message. + +

Arguments

+
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+
+
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+
+
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   HPL_warn( stderr, __LINE__, __FILE__,
+             "Demo.\n" );
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_abort, +HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_xjumpm.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_xjumpm.html new file mode 100755 index 000000000..794ae3a8b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/HPL_xjumpm.html @@ -0,0 +1,97 @@ + + +HPL_xjumpm HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_xjumpm Compute constants to jump in the random sequence. + +

Synopsis

+#include "hpl.h"

+void +HPL_xjumpm( +const int +JUMPM, +int * +MULT, +int * +IADD, +int * +IRANN, +int * +IRANM, +int * +IAM, +int * +ICM +); + +

Description

+HPL_xjumpm +computes the constants A and C to jump JUMPM numbers in +the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in +MULT and IADD specify how to jump from one entry in the sequence to +the next. + +

Arguments

+
+JUMPM   (local input)                 const int
+        On entry,  JUMPM  specifies  the  number  of entries  in  the
+        sequence to jump over. When JUMPM is less or equal than zero,
+        A and C are not computed, IRANM is set to IRANN corresponding
+        to a jump of size zero.
+
+
+MULT    (local input)                 int *
+        On entry, MULT is an array of dimension 2,  that contains the
+        16-lower  and 15-higher bits of the constant  a  to jump from
+        X(n) to X(n+1) = a*X(n) + c in the random sequence.
+
+
+IADD    (local input)                 int *
+        On entry, IADD is an array of dimension 2,  that contains the
+        16-lower  and 15-higher bits of the constant  c  to jump from
+        X(n) to X(n+1) = a*X(n) + c in the random sequence.
+
+
+IRANN   (local input)                 int *
+        On entry, IRANN is an array of dimension 2. that contains the
+        16-lower and 15-higher bits of the encoding of X(n).
+
+
+IRANM   (local output)                int *
+        On entry,  IRANM  is an array of dimension 2.   On exit, this
+        array  contains respectively  the 16-lower and 15-higher bits
+        of the encoding of X(n+JUMPM).
+
+
+IAM     (local output)                int *
+        On entry, IAM is an array of dimension 2. On exit, when JUMPM
+        is  greater  than  zero,  this  array  contains  the  encoded
+        constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+        sequence. IAM(0:1)  contains  respectively  the  16-lower and
+        15-higher  bits  of this constant  A. When  JUMPM  is less or
+        equal than zero, this array is not referenced.
+
+
+ICM     (local output)                int *
+        On entry, ICM is an array of dimension 2. On exit, when JUMPM
+        is  greater  than  zero,  this  array  contains  the  encoded
+        constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+        sequence. ICM(0:1)  contains  respectively  the  16-lower and
+        15-higher  bits  of this constant  C. When  JUMPM  is less or
+        equal than zero, this array is not referenced.
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/algorithm.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/algorithm.html new file mode 100755 index 000000000..9b1d7222e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/algorithm.html @@ -0,0 +1,299 @@ + + +HPL Algorithm + + + + +

HPL Algorithm

+ + +This page provides a high-level description of the algorithm used in +this package. As indicated below, HPL contains in fact many possible +variants for various operations. Defaults could have been chosen, or +even variants could be selected during the execution. Due to the +performance requirements, it was decided to leave the user with the +opportunity of choosing, so that an "optimal" set of parameters could +easily be experimentally determined for a given machine configuration. +From a numerical accuracy point of view, all possible +combinations are rigorously equivalent to each other even though the +result may slightly differ (bit-wise). +

+ + +
+ +

Main Algorithm

+ +This software package solves a linear system of order n: A x = b by +first computing the LU factorization with row partial pivoting of the +n-by-n+1 coefficient matrix [A b] = [[L,U] y]. Since the lower triangular +factor L is applied to b as the factorization progresses, the solution x +is obtained by solving the upper triangular system U x = y. The lower +triangular matrix L is left unpivoted and the array of pivots is not +returned.

+ + + + + + +
+The data is distributed onto a two-dimensional P-by-Q grid of processes +according to the block-cyclic scheme to ensure "good" load balance +as well as the scalability of the algorithm. The n-by-n+1 coefficient +matrix is first logically partitioned into nb-by-nb blocks, that are +cyclically "dealt" onto the P-by-Q process grid. This is done in both +dimensions of the matrix.
+ + + + + +
+The right-looking variant has been chosen for the main loop of the LU +factorization. This means that at each iteration of the loop a panel of +nb columns is factorized, and the trailing submatrix is updated. Note +that this computation is thus logically partitioned with the same block +size nb that was used for the data distribution.
+
+ +

Panel Factorization

+ + + + + + +
+At a given iteration of the main loop, and because of the cartesian +property of the distribution scheme, each panel factorization occurs in +one column of processes. This particular part of the computation lies +on the critical path of the overall algorithm. The user is offered the +choice of three (Crout, left- and right-looking) matrix-multiply based +recursive variants. The software also allows the user to choose in how +many sub-panels the current panel should be divided into during the +recursion. Furthermore, one can also select at run-time the recursion +stopping criterium in terms of the number of columns left to factorize. +When this threshold is reached, the sub-panel will then be factorized +using one of the three Crout, left- or right-looking matrix-vector based +variant. Finally, for each panel column the pivot search, the associated +swap and broadcast operation of the pivot row are combined into one +single communication step. A binary-exchange (leave-on-all) reduction +performs these three operations at once.
+
+ +

Panel Broadcast

+ +Once the panel factorization has been computed, this panel of columns +is broadcast to the other process columns. There are many possible +broadcast algorithms and the software currently offers 6 variants to +choose from. These variants are described below assuming that process 0 +is the source of the broadcast for convenience. "->" means "sends to". +
    +
  • Increasing-ring: 0 -> 1; 1 -> 2; 2 -> 3 and so on. +This algorithm is the classic one; it has the caveat that process 1 has +to send a message. +
    + +
    + +
  • Increasing-ring (modified): 0 -> 1; 0 -> 2; 2 -> 3 +and so on. Process 0 sends two messages and process 1 only receives one +message. This algorithm is almost always better, if not the best. +
    + +
    + +
  • Increasing-2-ring: The Q processes are divided into +two parts: 0 -> 1 and 0 -> Q/2; Then processes 1 and Q/2 act as sources +of two rings: 1 -> 2, Q/2 -> Q/2+1; 2 -> 3, Q/2+1 -> to Q/2+2 and so on. +This algorithm has the advantage of reducing the time by which the last +process will receive the panel at the cost of process 0 sending 2 +messages. +
    + +
    + +
  • Increasing-2-ring (modified): As one may expect, +first 0 -> 1, then the Q-1 processes left are divided into two equal +parts: 0 -> 2 and 0 -> Q/2; Processes 2 and Q/2 act then as sources of +two rings: 2 -> 3, Q/2 -> Q/2+1; 3 -> 4, Q/2+1 -> to Q/2+2 and so on. +This algorithm is probably the most serious competitor to the increasing +ring modified variant. +
    + +
    + +
  • Long (bandwidth reducing): as opposed to the +previous variants, this algorithm and its follower synchronize all +processes involved in the operation. The message is chopped into Q equal +pieces that are scattered across the Q processes. +
    + +
    +The pieces are then rolled in Q-1 steps. The scatter phase uses a binary +tree and the rolling phase exclusively uses mutual message exchanges. In +odd steps 0 <-> 1, 2 <-> 3, 4 <-> 5 and so on; in even steps Q-1 <-> 0, +1 <-> 2, 3 <-> 4, 5 <-> 6 and so on. +
    + +
    +More messages are exchanged, however the total volume of communication is +independent of Q, making this algorithm particularly suitable for large +messages. This algorithm becomes competitive when the nodes are "very +fast" and the network (comparatively) "very slow".

    + +
  • Long (bandwidth reducing modified): same as above, +except that 0 -> 1 first, and then the Long variant is used on processes +0,2,3,4 .. Q-1.

    +
    + + +
    + +
+ +The rings variants are distinguished by a probe mechanism that activates +them. In other words, a process involved in the broadcast and different +from the source asynchronously probes for the message to receive. When +the message is available the broadcast proceeds, and otherwise the +function returns. This allows to interleave the broadcast operation with +the update phase. This contributes to reduce the idle time spent by those +processes waiting for the factorized panel. This mechanism is necessary +to accomodate for various computation/communication performance ratio.

+
+ +

Look-ahead

+ +Once the panel has been broadcast or say during this broadcast operation, +the trailing submatrix is updated using the last panel in the look-ahead +pipe: as mentioned before, the panel factorization lies on the critical +path, which means that when the kth panel has been factorized and then +broadcast, the next most urgent task to complete is the factorization and +broadcast of the k+1 th panel. This technique is often refered to as +"look-ahead" or "send-ahead" in the literature. This package allows to +select various "depth" of look-ahead. By convention, a depth of zero +corresponds to no lookahead, in which case the trailing submatrix is +updated by the panel currently broadcast. Look-ahead consumes some extra +memory to essentially keep all the panels of columns currently in the +look-ahead pipe. A look-ahead of depth 1 (maybe 2) is likely to achieve +the best performance gain.

+
+ +

Update

+ +The update of the trailing submatrix by the last panel in the look-ahead +pipe is made of two phases. First, the pivots must be applied to form the +current row panel U. U should then be solved by the upper triangle of the +column panel. U finally needs to be broadcast to each process row so that +the local rank-nb update can take place. We choose to combine the +swapping and broadcast of U at the cost of replicating the solve. Two +algorithms are available for this communication operation. +
    +
  • Binary-exchange: this is a modified variant of the +binary-exchange (leave on all) reduction operation. Every process column +performs the same operation. The algorithm essentially works as follows. +It pretends reducing the row panel U, but at the beginning the only valid +copy is owned by the current process row. The other process rows will +contribute rows of A they own that should be copied in U and replace them +with rows that were originally in the current process row. The complete +operation is performed in log(P) steps. For the sake of simplicity, let +assume that P is a power of two. At step k, process row p exchanges a +message with process row p+2^k. There are essentially two cases. First, +one of those two process rows has received U in a previous step. The +exchange occurs. One process swaps its local rows of A into U. Both +processes copy in U remote rows of A. Second, none of those process rows +has received U, the exchange occurs, and both processes simply add those +remote rows to the list they have accumulated so far. At each step, a +message of the size of U is exchanged by at least one pair of process +rows.

    + +
  • Long: this is a bandwidth reducing variant +accomplishing the same task. The row panel is first spread (using a tree) +among the process rows with respect to the pivot array. This is a scatter +(V variant for MPI users). Locally, every process row then swaps these +rows with the the rows of A it owns and that belong to U. These buffers +are then rolled (P-1 steps) to finish the broadcast of U. Every process +row permutes U and proceed with the computational part of the update. A +couple of notes: process rows are logarithmically sorted before +spreading, so that processes receiving the largest number of rows are +first in the tree. This makes the communication volume optimal for this +phase. Finally, before rolling and after the local swap, an equilibration +phase occurs during which the local pieces of U are uniformly spread +across the process rows. A tree-based algorithm is used. This operation +is necessary to keep the rolling phase optimal even when the pivot rows +are not equally distributed in process rows. This algorithm has a +complexity in terms of communication volume that solely depends on the +size of U. In particular, the number of process rows only impacts the +number of messages exchanged. It will thus outperforms the previous +variant for large problems on large machine configurations.

    + +
+ +The user can select any of the two variants above. In addition, a mix is +possible as well. The "binary-exchange" algorithm will be used when U +contains at most a certain number of columns. Choosing at least the block +size nb as the threshold value is clearly recommended when look-ahead is +on.

+
+ +

Backward Substitution

+ +The factorization has just now ended, the back-substitution remains to be +done. For this, we choose a look-ahead of depth one variant. The +right-hand-side is forwarded in process rows in a decreasing-ring +fashion, so that we solve Q * nb entries at a time. At each step, this +shrinking piece of the right-hand-side is updated. The process just above +the one owning the current diagonal block of the matrix A updates first +its last nb piece of x, forwards it to the previous process column, then +broadcast it in the process column in a decreasing-ring fashion as well. +The solution is then updated and sent to the previous process column. The +solution of the linear system is left replicated in every process row.

+
+ +

Checking the Solution

+ +To verify the result obtained, the input matrix and right-hand side are +regenerated. The normwise backward error (see formula below) is then +computed. A solution is considered as "numerically correct" when this +quantity is less than a threshold value of the order of 1.0. In the +expression below, eps is the relative (distributed-memory) machine +precision. + +
    +
  • || Ax - b ||_oo / ( eps * ( || A ||_oo * || x ||_oo + || b ||_oo ) * n ) +
+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/aprunner.gif b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/aprunner.gif new file mode 100755 index 000000000..6508c806f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/aprunner.gif differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/copyright.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/copyright.html new file mode 100755 index 000000000..934282c81 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/copyright.html @@ -0,0 +1,66 @@ + + +HPL Copyright and Licensing Terms + + + + +

HPL Copyright Notice and Licensing Terms

+ +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +
    +
  1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +
  2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions, and the following disclaimer in the +documentation and/or other materials provided with the distribution. +
  3. All advertising materials mentioning features or use of this +software must display the following acknowledgement: This product +includes software developed at the University of Tennessee, +Knoxville, Innovative Computing Laboratory. +
  4. The name of the University, the name of the Laboratory, or the +names of its contributors may not be used to endorse or promote +products derived from this software without specific written +permission. +
+ +

Disclaimer

+ +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +`AS IS' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/documentation.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/documentation.html new file mode 100755 index 000000000..152188041 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/documentation.html @@ -0,0 +1,304 @@ + + +HPL Documentation + + + + +

HPL Documentation

+ +The HPL software distribution comes with a set of text files explaining +how to install, run and tune the software. These files reside in the top +level directory and their names are in upper case. To a large extent, +this page reproduces them. In addition, man- and HTML-pages are provided +for every routine in the package. To access the man pages, one must add +hpl/man to its MANPATH environment variable. The HTML pages can be +accessed on this site, or by pointing your browser to your local hpl/www +directory. Finally, the source code has been heavily documented. Despite +all the other documentation efforts, the source code remains the most +trustworthy and truthful piece of information about what goes on in HPL. +

+ +

HPL Functions HTML Pages

+ +Computational Kernels Wrappers When calling the Fortran +77 BLAS interface, these C functions allow to confine the C to Fortran +77 interface issues to a small subset of routines. + + + +
+
+ +Local Auxiliaries Basic functionality, local swap functions. + + + +
+
+ +Parallel Auxiliaries Index computations, parallel basic +functionality. + + + +
+
+ +Grid Management Most of these routines have a direct +MPI equivalent. On new systems, when the entire MPI functionality is +not yet readily available, these functions are particularly convenient +since they rely on a mininal subset of the MPI standard. + + +
+
+ +Panel Management + + +
+
+ +Panel Factorization Recursive (matrix-multiply based) and +(matrix-vector based) panel factorization. + + +
+
+ +Panel Broadcast + + +
+
+ +Update + + +
+
+ +Main Factorization / Look-ahead + + +
+
+ +Backward Substitution + + +
+
+ +Matrix generation A C version of the ScaLAPACK random +matrix generator with less functionality though. + +
+
+ +Timers Sequential and parallel timing utilities. + +
+
+ +Main Testing / Timing Driver + + +
+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/errata.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/errata.html new file mode 100755 index 000000000..24275d2dd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/errata.html @@ -0,0 +1,116 @@ + + +HPL Errata-Bugs + + + + +

HPL Errata - Bugs

+ +

Issues fixed in Version 2.1, October 26th, 2012

+ +The output now reports exact time stamps before and after the +execution of the solver function pdgesv() was run. This could +allow for accurate accounting of running time for data center +management purposes. For example as reporting power +consumption. This is important for the Green500 project.

+ +Fixed an out-of-bounds access to arrays in the HPL_spreadN() +and HPL_spreadT() functions. This may cause segmentation +fault signals. It was reported by Stephen Whalen from Cray.

+ +

Issues fixed in Version 2.0, September 10th, 2008

+ +Gregory Bauer found a problem size corresponding to the +periodicity of the pseudo-random matrix generator used in the +HPL timing program. This causes the LU factorization to +detect the singularity of the input matrix as it should have.

+ +A problem size of 2^17 = 131072 causes columns 14 modulo 2^14 +(i.e. 16384) (starting from 0) to be bitwise identical on a +homogeneous platform. Every problem size being a power of 2 +and larger than 2^15 will feature a similar problem if one +searches far enough in the columns of the square input matrix.

+ +The pseudo-random generator uses the linear congruential +algorithm: X(n+1) = (a * X(n) + c) mod m as described in the +Art of Computer Programming, Knuth 1973, Vol. 2. In the HPL +case, m is set to 2^31.

+ +It is very important to realize that this issue is a problem +of the testing part of the HPL software. The numerical +properties of the algorithms used in the factorization and +the solve should not be questioned because of this. In fact, +this is just the opposite: the factorization demonstrated the +weakness of the testing part of the software by detecting the +singularity of the input matrix.

+ +This issue of the testing program is not easy to fix. This +pseudo-random generator has very useful properties despite +this. It is thus currently recommended to HPL users willing +to test matrices of size larger than 2^15 to not use power +twos.

+ +This issue has been fixed by changing the pseudo-random +matrix generator. Now the periodicity of the generator is +2^64.

+ +

Issues fixed in Version 1.0b, December 15th, 2004

+ +When the matrix size is such that one needs more than 16 GB +per MPI rank, the intermediate calculation (mat.ld+1) * +mat.nq in HPL_pdtest.c ends up overflowing because it is +done using 32-bit arithmetic. This issue has been fixed by +typecasting to size_t; Thanks to John Baron.

+ +

Issues fixed in Version 1.0a, January 20th, 2004

+ +The MPI process grid numbering scheme defaults now to row- +major ordering. This option can now be selected at run time.

+ +The inlined assembly timer routine that was causing the +compilation to fail when using gcc version 3.3 and above has +been removed from the package.

+ +Various building problems on the T3E have been fixed; Thanks +to Edward Anderson.

+ +

Issues fixed in Version 1.0, September 27th, 2000

+ +Due to a couple errors spotted in the VSIPL port of the +software, the distribution contained in the tar file of +September 9th, 2000 had been updated on September 27th, 2000 +with a corrected distribution. These problems were +not affecting in any way possible the BLAS version of the +software. If you are using the VSIPL port of HPL, +and want to make sure you are indeed using the latest +corrected version, please check the date contained in the +file HPL.build.log contained in the main directory.

+ + + + +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/faqs.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/faqs.html new file mode 100755 index 000000000..ad853e760 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/faqs.html @@ -0,0 +1,126 @@ + + +HPL Frequently Asked Questions + + + + +

HPL Frequently Asked Questions

+ + +
+ +

What problem size N should I run ?

+ +In order to find out the best performance of your system, the +largest problem size fitting in memory is what you should aim for. +The amount of memory used by HPL is essentially the size of the +coefficient matrix. So for example, if you have 4 nodes with 256 Mb +of memory on each, this corresponds to 1 Gb total, i.e., 125 M double +precision (8 bytes) elements. The square root of that number is +11585. One definitely needs to leave some memory for the OS as well +as for other things, so a problem size of 10000 is likely to fit. As +a rule of thumb, 80 % of the total amount of memory is a good guess. +If the problem size you pick is too large, swapping will occur, and +the performance will drop. If multiple processes are spawn on each +node (say you have 2 processors per node), what counts is the +available amount of memory to each process.

+
+ +

What block size NB should I use ?

+ +HPL uses the block size NB for the data distribution as well as for +the computational granularity. From a data distribution point of +view, the smallest NB, the better the load balance. You definitely +want to stay away from very large values of NB. From a computation +point of view, a too small value of NB may limit the computational +performance by a large factor because almost no data reuse will occur +in the highest level of the memory hierarchy. The number of messages +will also increase. Efficient matrix-multiply routines are often +internally blocked. Small multiples of this blocking factor are +likely to be good block sizes for HPL. The bottom line is that "good" +block sizes are almost always in the [32 .. 256] interval. The best +values depend on the computation / communication performance ratio of +your system. To a much less extent, the problem size matters as well. +Say for example, you emperically found that 44 was a good block size +with respect to performance. 88 or 132 are likely to give slightly +better results for large problem sizes because of a slighlty higher +flop rate.

+
+ +

What process grid ratio P x Q should I use ?

+ +This depends on the physical interconnection network you have. +Assuming a mesh or a switch HPL "likes" a 1:k ratio with k in [1..3]. +In other words, P and Q should be approximately equal, with Q +slightly larger than P. Examples: 2 x 2, 2 x 4, 2 x 5, 3 x 4, 4 x 4, +4 x 6, 5 x 6, 4 x 8 ... If you are running on a simple Ethernet +network, there is only one wire through which all the messages are +exchanged. On such a network, the performance and scalability of HPL +is strongly limited and very flat process grids are likely to be the +best choices: 1 x 4, 1 x 8, 2 x 4 ...

+
+ +

What about the one processor case ?

+ +HPL has been designed to perform well for large problem sizes on +hundreds of nodes and more. The software works on one node and for +large problem sizes, one can usually achieve pretty good performance +on a single processor as well. For small problem sizes however, the +overhead due to message-passing, local indexing and so on can be +significant.

+
+ +

Why so many options in HPL.dat ?

+ +There are quite a few reasons. First off, these options are useful to +determine what matters and what does not on your system. Second, HPL +is often used in the context of early evaluation of new systems. In +such a case, everything is usually not quite working right, and it is +convenient to be able to vary these parameters without recompiling. +Finally, every system has its own peculiarities and one is likely to +be willing to emperically determine the best set of parameters. In +any case, one can always follow the advice provided in the +tuning section of this document and not +worry about the complexity of the input file.

+
+ +

Can HPL be Outperformed ?

+ +Certainly. There is always room for performance improvements. +Specific knowledge about a particular system is always a source of +performance gains. Even from a generic point of view, better +algorithms or more efficient formulation of the classic ones are +potential winners.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/index.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/index.html new file mode 100755 index 000000000..a3a53abfe --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/index.html @@ -0,0 +1,178 @@ + + + +HPL - A Portable Implementation of the High-Performance +Linpack Benchmark for Distributed-Memory Computers + + + + + +
+ + + + + +
+

HPL - A Portable Implementation of the High-Performance Linpack +Benchmark for Distributed-Memory Computers

+
+ + +
+ + + + + + + +
Version 2.2 +A. Petitet, +R. C. Whaley, +J. Dongarra, +A. Cleary +December 2, 2018 +# Accesses +
+

+ +HPL is a software package that solves a (random) +dense linear system in double precision (64 bits) arithmetic +on distributed-memory computers. It can thus be regarded as +a portable as well as freely available implementation of the High +Performance Computing Linpack Benchmark.

+ +The algorithm used by HPL can be summarized by the +following keywords: Two-dimensional block-cyclic data distribution +- Right-looking variant of the LU factorization with row partial +pivoting featuring multiple look-ahead depths - Recursive panel +factorization with pivot search and column broadcast combined - +Various virtual panel broadcast topologies - bandwidth reducing +swap-broadcast algorithm - backward substitution with look-ahead +of depth 1.

+ +The HPL package provides a testing and timing program to quantify +the accuracy of the obtained solution as well as +the time it took to compute it. The best performance +achievable by this software on your system depends on a large variety +of factors. Nonetheless, with some restrictive assumptions on the +interconnection network, the algorithm described here and its +attached implementation are scalable in the sense +that their parallel efficiency is maintained constant with respect +to the per processor memory usage.

+ +The HPL software package requires the availibility +on your system of an implementation of the Message Passing Interface +MPI (1.1 compliant). +An implementation of either the Basic Linear Algebra +Subprograms BLAS or the Vector Signal Image +Processing Library VSIPL is also needed. +Machine-specific as well as generic implementations of +MPI, the +BLAS and +VSIPL are available for a large +variety of systems.

+ +Acknowledgements: This work was supported in part +by a grant from the Department of Energy's Lawrence +Livermore National Laboratory and Los Alamos National Laboratory +as part of the ASCI Projects contract numbers B503962 and +12187-001-00 4R. + +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ +
+Innovative Computing Laboratory
+last revised December 2, 2018
+
+ +
+#########################################################################
+
+file    hpl-2.3.tar.gz
+for     HPL 2.3 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: December 2, 2018
+
+#########################################################################
+
+file    hpl-2.2.tar.gz
+for     HPL 2.2 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: February 24, 2016
+
+#########################################################################
+
+file    hpl-2.1.tar.gz
+for     HPL 2.1 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: October 26, 2012
+
+#########################################################################
+
+file    hpl-2.0.tar.gz
+for     HPL 2.0 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary
+Updated: September 10, 2008
+
+#########################################################################
+
+file    hpl.tgz
+for     HPL 1.0a - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary
+Updated: January 20, 2004
+ +######################################################################### + +file hpl_qs22-2008-11-30.patch +for Implementation of the High-Performance Linpack benchmark for IBM +, QS22 systems with PowerXCell 8i processors. The file is a patch +, for HPL 1.0a. +by IBM + +file IBM_LICENSE.TXT +for IBM Copyright notice for QS22 HPL +by IBM + +file IBM_README.txt +for README for IBM QS22 HPL +by IBM +Updated: November 30, 2008 + + +######################################################################### +
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/links.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/links.html new file mode 100755 index 000000000..da2639e99 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/links.html @@ -0,0 +1,89 @@ + + +HPL Related Links + + + + +

HPL Related Links

+ +The list of links below contains some relevant material to this +work. This list is provided for illustrative purposes, and should be +regarded as an initial starting point for the interested reader. This +list is by all means not meant to be exhaustive.

+ +

Message Passing Interface (MPI)

+ +MPI is a library specification for message-passing, proposed as a +standard by a broadly based committee of vendors, implementors, and +users. Machine-specific (optimized) as well as freely available MPI +libraries are available for a large variety of systems. Browse the +Message Passing Interface (MPI) +standard web page for more information.

+ +

Basic Linear Algebra Subroutines (BLAS)

+ +The BLAS are high quality +"building block" routines for performing basic vector and matrix +operations. A lot of "BLAS-related" information can be found at this +site. In particular, a reference implementation is available. This +reference implementation is not optimized for any +system, and it is therefore not recommended to use it +for benchmarking purposes. +However, machine-specific +optimized BLAS libraries are available for a variety of computer +systems. For further details, please contact your local vendor +representative. Alternatively, one may also consider using automatic +code generators such as ATLAS. +This tool automatically generates a complete and optimized BLAS +library for a large variety of modern systems.

+ +

Vector Signal Image Processing Library (VSIPL)

+ +VSIPL is an API defined by an open +standard comprised of embedded signal and image processing hardware and +software vendors, academia, users, and government labs. A lot of +"VSIPL-related" information can be found at this site. In particular, a +reference implementation is available. Machine-specific optimized VSIPL +libraries are available for a variety of computer systems. For further +details, please contact your local vendor representative.

+ +

TOP 500 List

+ +The TOP 500 +is an ordered list of the 500 most powerful computer systems worldwide. +Computers are ranked in this list by their performance on the + +LINPACK Benchmark.

+ +

Parallel Dense Linear Algebra Software Libraries

+ +Browse the Netlib software repository +or the National HPCC Software Exchange +to find a large collection of freely available linear algebra libraries. +

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/main.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/main.jpg new file mode 100755 index 000000000..df62edd33 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/main.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/mat2.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/mat2.jpg new file mode 100755 index 000000000..25afdc44c Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/mat2.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/pfact.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/pfact.jpg new file mode 100755 index 000000000..33a7e55cb Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/pfact.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/references.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/references.html new file mode 100755 index 000000000..95c6db176 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/references.html @@ -0,0 +1,276 @@ + + +HPL References + + + + +

HPL References

+ + +The list of references below contains some relevant published material +to this work. This list is provided for illustrative purposes, and +should be regarded as an initial starting point for the interested +reader. This list is by all means not meant to be exhaustive. +

+ +The references have been sorted in four categories and chronologically +listed within each category. The four categories are + +
+ +

Linpack Benchmark

+ +
    + + +
  • LINPACK Users Guide, J. Dongarra, J. Bunch, C. Moler and +G. W. Stewart, SIAM, Philadelphia, PA, 1979. + + +
  • Performance of Various Computers Using Standard Linear Equations +Software, J. Dongarra, Technical Report CS-89-85, University of +Tennessee, 1989. (An updated version of this report can be found at + +http://www.netlib.org/benchmark/performance.ps). + + +
  • Towards Peak Parallel LINPACK Performance on 400, +R. Bisseling and L. Loyens, Supercomputer, Vol. 45, pp. 20-27, 1991. + +
  • Massively Parallel LINPACK Benchmark on the Intel Touchstone +DELTA and iPSC/860 Systems, R. van de Geijn, 1991 Annual Users +Conference Proceedings. Intel Supercomputer Users Group, Dallas, TX, +1991. + +
  • The LINPACK Benchmark on the AP 1000, R. Brent, Frontiers, +1992, pp. 128-135, McLean, VA, 1992. + + +
  • Implementation of BLAS Level 3 and LINPACK Benchmark on the +AP1000, R. Brent and P. Strazdins, Fujitsu Scientific and Technical +Journal, Vol. 5, No. 1, pp. 61-70, 1993. + + +
  • LU Factorization and the LINPACK Benchmark on the Intel +Paragon, D. Womble, D. Greenberg, D. Wheat and S. Riesen, Sandia +Technical Report, 1994. + + +
  • Massively Parallel Distributed Computing: Worlds First 281 +Gigaflop Supercomputer, J. Bolen, A. Davis, B. Dazey, S. Gupta, +G. Henry, D. Robboy, G. Schiffler, D. Scott, M. Stallcup, A. Taraghi, +S. Wheat from Intel SSD, L. Fisk, G. Istrail, C. Jong, R. Riesen, +L. Shuler, from Sandia National Laboratories, Proceedings of the Intel +Supercomputer Users Group 1995. + + +
  • High Performance Software on Intel Pentium Pro Processors or +Micro-Ops to TeraFLOPS, B. Greer and G. Henry, Proceedings of the +SuperComputing 1997 Conference, ACM SIGARCH - IEEE Computer Society +Press - ISBN: 0-89791-985-8, San Jose, CA, 1997. + +
+ +
+ +

Parallel LU Factorization

+ +
    + + +
  • Communication Complexity of the Gaussian Elimination Algorithm +on Multiprocessors, Y. Saad, Linear Algebra and Its Applications, +Vol. 77, pp. 315-340, 1986. + + +
  • LU Factorization Algorithms on Distributed-Memory Multiprocessor +Architectures, G. Geist and C. Romine, SIAM Journal on Scientific +and Statistical Computing, Vol. 9, pp. 639-649, 1988. + + +
  • Parallel LU Decomposition on a Transputer Network, +R. Bisseling and J. van der Vorst, Lecture Notes in Computer Sciences, +Springer-Verlag, Eds. G. van Zee and J. van der Vorst, Vol. 384, +pp. 61-77, 1989. + + +
  • The Distributed Solution of Linear Systems Using the Torus-Wrap +Data Mapping, C. Ashcraft, ECA-TR-147, Boeing Computer Services, +Seattle, WA, 1990. + +
  • Experiments with Multicomputer LU-Decomposition, E. van de +Velde, Concurrency: Practice and Experience, Vol. 2, pp. 1-26, 1990. + + +
  • A Taxonomy of Distributed Dense LU Factorization Methods, +C. Ashcraft, ECA-TR-161, Boeing Computer Services, Seattle, WA, 1991. + + +
  • The Torus-Wrap Mapping for Dense Matrix Calculations on Massively +Parallel Computers, B. Hendrickson and D. Womble, SIAM Journal on +Scientific and Statistical Computing, Vol. 15, pp. 1201-1226, 1994. + +
  • Scalability Issues in the Design of a Library for Dense Linear +Algebra, J. Dongarra, R. van de Geijn and D. Walker, Journal of +Parallel and Distributed Computing, Vol. 22, No. 3, pp. 523-537, 1994. + + +
  • Matrix Factorization using Distributed Panels on the Fujitsu +AP1000, P. Strazdins, Proceedings of the IEEE First International +Conference on Algorithms And Architectures for Parallel Processing +ICA3PP-95, Brisbane, 1995. + + +
  • The Design and Implementation of the ScaLAPACK LU, QR, and +Cholesky Factorization Routines, J. Choi, J. Dongarra, S. Ostrouchov, +A. Petitet, D. Walker and R. C. Whaley, Scientific Programming, Vol. 5, +pp. 173-184, 1996. + +
+ +
+ +

Recursive LU Factorization

+ +
    + + +
  • Locality of Reference in LU Decomposition with partial +pivoting, S. Toledo, SIAM Journal on Matrix. Anal. Appl., Vol. 18, +No. 4, 1997. + +
  • Recursion Leads to Automatic Variable Blocking for Dense +Linear-Algebra Algorithms, F. Gustavson, IBM Journal of Research +and Development, Vol. 41, No. 6, pp. 737-755, 1997 + +
+ +
+ +

Parallel Matrix Multiply

+ +
    + + +
  • Matrix Algorithms on a Hypercube I: Matrix Multiplication, +G. Fox, S. Otto and A. Hey, Parallel Computing, Vol. 3, pp. 17-31, 1987. + + +
  • Basic Matrix Subprograms for Distributed-Memory Systems, +A. Elster, Proceedings of the Fifth Distributed-Memory Computing +Conference, Eds. D. Walker and Q. Stout, IEEE Press, pp. 311-316, 1990. + + +
  • The Parallelization of Level 2 and 3 BLAS Operations on +Distributed-Memory Machines, M. Aboelaze, N. Chrisochoides +and E. Houstis, CSD-TR-91-007, Purdue University, West Lafayette, +IN, 1991. + + +
  • The Multicomputer Toolbox Approach to Concurrent BLAS and LACS, +R. Falgout, A. Skjellum, S. Smith and C. Still, Proceedings of the +Scalable High Performance Computing Conference SHPCC-92, IEEE Computer +Society Press, 1992. + + +
  • A High Performance Matrix Multiplication Algorithm on a +Distributed-Memory Parallel Computer, Using Overlapped Communication, +R. Agarwal, F. Gustavson and M. Zubair, IBM Journal or Research and +Development, Vol. 38, No. 6, pp. 673-681, 1994. + +
  • PUMMA: Parallel Universal Matrix Multiplication Algorithms on +Distributed-Memory Concurrent Computers, J. Choi, J. Dongarra and +D. Walker, Concurrency: Practice and Experience, Vol. 6, No. 7, +pp. 543-570, 1994. + +
  • Matrix Multiplication on the Intel Touchstone DELTA, +S. Huss-Lederman, E. Jacobson, A. Tsao and G. Zhang, Concurrency: +Practice and Experience, Vol. 6, No. 7, pp. 571-594, 1994. + + +
  • A Three-Dimensional Approach to Parallel Matrix Multiplication, +R. Agarwal, S. Balle, F. Gustavson, M. Joshi and P. Palkar, IBM Journal +or Research and Development, Vol. 39, No. 5, pp. 575-582, 1995. + + +
  • A High Performance Parallel Strassen Implementation, +B. Grayson and R. van de Geijn, Parallel Processing Letters, Vol. 6, +No. 1, pp. 3-12, 1996. + + +
  • Parallel Implementation of BLAS: General Techniques for Level +3 BLAS, A. Chtchelkanova, J. Gunnels, G. Morrow, J. Overfelt and +R. van de Geijn, Concurrency: Practice and Experience, Vol. 9, No. 9, +pp. 837-857, 1997. + +
  • A Poly-Algorithm for Parallel Dense Matrix Multiplication on +Two-Dimensional Process Grid Topologies, J. Li, R. Falgout and +A. Skjellum, Concurrency: Practice and Experience, Vol. 9, No. 5, +pp. 345-389, 1997. + +
  • SUMMA: Scalable Universal Matrix Multiplication Algorithm, +R. van de Geijn and J. Watts, Concurrency: Practice and Experience, +Vol. 9, No. 4, pp. 255-274, 1997. + +
+ +
+ +

Parallel Triangular Solve

+ +
    + + +
  • Parallel Solution Triangular Systems on Distributed-Memory +Multiprocessors, M. Heath and C. Romine, SIAM Journal on Scientific +and Statistical Computing, Vol. 9, pp. 558-588, 1988. + +
  • A Parallel Triangular Solver for a Distributed-Memory +Multiprocessor, G. Li and T. Coleman, SIAM Journal on Scientific +and Statistical Computing, Vol. 9, No. 3, pp. 485-502, 1988. + + +
  • A New Method for Solving Triangular Systems on Distributed-Memory +Message-Passing Multiprocessor, G. Li and T. Coleman, SIAM Journal +on Scientific and Statistical Computing, Vol. 10, No. 2, pp. 382-396, +1989. + + +
  • Parallel Triangular System Solving on a Mesh Network of +Transputers, R. Bisseling and J. van der Vorst, SIAM Journal +on Scientific and Statistical Computing, Vol. 12, pp. 787-799, 1991. + +
+ + +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/results.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/results.html new file mode 100755 index 000000000..9a7d8b8af --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/results.html @@ -0,0 +1,243 @@ + + +HPL Results + + + + + + + +
+ + +

HPL Performance Results

+ + +The performance achieved by this software package on a few machine +configurations is shown below. These results are only provided for +illustrative purposes. By the time you read this, those systems +have changed, they may not even exist anymore and one can surely +not exactly reproduce the state in which these machines were when +those measurements have been obtained. To obtain accurate figures +on your system, it is absolutely necessary to +download the software and run it there. + +
+
+ + + +
+
+ +

4 AMD Athlon K7 500 Mhz (256 Mb) - (2x) 100 Mbs +Switched - 2 NICs per node (channel bonding)

+ +
+ + + + + + + +
OS Linux 6.2 RedHat (Kernel 2.2.14)
C compiler gcc (egcs-2.91.66 egcs-1.1.2 release)
C flags -fomit-frame-pointer -O3 -funroll-loops
MPI MPIch 1.2.1
BLAS ATLAS (Version 3.0 beta)
Comments 09 / 00

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Performance (Gflops) w.r.t Problem size on 4 nodes. +
GRID 2000 5000 800010000
1 x 4 1.28 1.73 1.89 1.95
2 x 2 1.17 1.68 1.88 1.93
4 x 1 0.81 1.43 1.70 1.80

+

+ +
+

8 Duals Intel PIII 550 Mhz (512 Mb) - Myrinet

+ +
+ + + + + + + + + +
OS Linux 6.1 RedHat (Kernel 2.2.15)
C compiler gcc (egcs-2.91.66 egcs-1.1.2 release)
C flags -fomit-frame-pointer -O3 -funroll-loops
MPI MPI GM (Version 1.2.3)
BLAS ATLAS (Version 3.0 beta)
Comments UTK / ICL - Torc cluster - 09 / 00

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Performance (Gflops) w.r.t Problem size on 8- and 16-processors grids. +
GRID 2000 5000 8000100001500020000
2 x 4 1.76 2.32 2.51 2.58 2.72 2.73
4 x 4 2.27 3.94 4.46 4.68 5.00 5.16

+

+ +
+

Compaq 64 nodes (4 ev67 667 Mhz processors per node) +AlphaServer SC

+ +
+ + + + + + + + +
OS Tru64 Version 5
C compiler cc Version 6.1
C flags -arch host -tune host -std -O5
MPI -lmpi -lelan
BLAS CXML
Comments ORNL / NCCS + - falcon - 09 / 00

+

+ +In the table below, each row corresponds to a given number of cpus (or +processors) and nodes. The first row for example is denoted by 1 / 1, +i.e., 1 cpu / 1 node. Rmax is given in Gflops, and the value of Nmax +in fact corresponds to 351 Mb per cpu for all machine configurations.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CPUS / NODES GRID N 1/2 Nmax Rmax (Gflops) Parallel Efficiency
1 / 1 1 x 1 150 6625 1.136 1.000
4 / 1 2 x 2 800 13250 4.360 0.960
16 / 4 4 x 4 2300 26500 17.00 0.935
64 / 16 8 x 8 5700 53000 67.50 0.928
256 / 64 16 x 16 14000 106000 263.6 0.906

+

+For Rmax shown in the table, the parallel efficiency per cpu has been +computed using the performance achieved by HPL on 1 cpu. That is fair, +since the CXML matrix multiply routine was achieving at best 1.24 Gflops +for large matrix operands on one cpu, it would have been difficult for a +sequential Linpack benchmark implementation to achieve much more than +1.136 Gflops on this same cpu. For constant load (as in the table 351 Mb +per cpu for Nmax), HPL scales almost linearly as it should. + +

+The authors acknowledge the use of the Oak Ridge National Laboratory +Compaq computer, funded by the Department of Energy's Office +of Science and Energy Efficiency programs.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/roll.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/roll.jpg new file mode 100755 index 000000000..88d2c56af Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/roll.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/rollM.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/rollM.jpg new file mode 100755 index 000000000..0d7f076fd Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/rollM.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/scalability.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/scalability.html new file mode 100755 index 000000000..00bb1a27e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/scalability.html @@ -0,0 +1,200 @@ + + +HPL Scalability Analysis + + + + +

HPL Scalability Analysis

+ +The machine model used for the +analysis is first described. This crude model is then used to first +estimate the parallel running time of the various phases of the +algorithm namely + +Finally the parallel efficiency +of the entire algorithm is estimated according to this machine model. +We show that for a given set of parameters HPL is scalable +not only with respect to the amount of computation, but also with +respect to the communication volume.

+
+ +

The Machine Model

+ +Distributed-memory computers consist of processors that are connected +using a message passing interconnection network. Each processor has +its own memory called the local memory, which is accessible only to +that processor. As the time to access a remote memory is longer than +the time to access a local one, such computers are often referred to +as Non-Uniform Memory Access (NUMA) machines.

+ +The interconnection network of our machine model is static, meaning +that it consists of point-to-point communication links among +processors. This type of network is also referred to as a direct +network as opposed to dynamic networks. The latter are constructed +from switches and communication links. These links are dynamically +connected to one another by the switching elements to establish, at +run time, the paths between processors memories.

+ +The interconnection network of the two-dimensional machine model +considered here is a static, fully connected physical topology. It +is also assumed that processors can be treated equally in terms +of local performance and that the communication rate between two +processors depends on the processors considered.

+ +Our model assumes that a processor can send or receive data on only +one of its communication ports at a time (assuming it has more than +one). In the literature, this assumption is also referred to as the +one-port communication model.

+ +The time spent to communicate a message between two given processors +is called the communication time Tc. In our machine model, Tc is +approximated by a linear function of the number L of double +precision (64-bits) items communicated. Tc is the sum of the time to +prepare the message for transmission (alpha) and the time (beta * L) +taken by the message of length L to traverse the network to its +destination, i.e.,

+
+Tc = alpha + beta L.

+
+ +Finally, the model assumes that the communication links are +bi-directional, that is, the time for two processors to send each +other a message of length L is also Tc. A processor can send and/or +receive a message on only one of its communication links at a time. +In particular, a processor can send a message while receiving another +message from the processor it is sending to at the same time.

+ +Since this document is only concerned with regular local dense linear +algebra operations, the time taken to perform one floating point +operation is assumed to be summarized by three constants gam1, +gam2 and gam3. These quantitites are flop rates approximations of the +vector-vector, matrix-vector and matrix-matrix operations for each +processor. This very crude approximation summarizes all the steps +performed by a processor to achieve such a computation. Obviously, +such a model neglects all the phenomena occurring in the processor +components, such as cache misses, pipeline startups, memory load or +store, floating point arithmetic and so on, that may influence the +value of these constants as a function of the problem size for +example.

+ +Similarly, the model does not make any assumption on the amount of +physical memory per node. It is assumed that if a process has been +spawn on a processor, one has ensured that enough memory was +available on that processor. In other words, swapping will not occur +during the modeled computation.

+ + +This machine model is a very crude approximation that is designed +specifically to illustrate the cost of the dominant factors of our +particular case.

+
+
+ +

Panel Factorization and Broadcast

+ +Let consider an M-by-N panel distributed over a P-process column. +Because of the recursive formulation of the panel factorization, it +is reasonable to consider that the floating point operations will +be performed at matrix-matrix multiply "speed". For every column in +the panel a binary-exchange is performed on 2*N data items. When this +panel is broadcast, what matters is the time that the next process +column will spend in this communication operation. Assuming one +chooses the increasing-ring (modified) +variant, only one message needs to be taken into account. The +execution time of the panel factorization and broadcast can thus be +approximated by:

+
+Tpfact( M, N ) = (M/P - N/3) N^2 gam3 + N log(P)( alpha + beta 2 N ) + +alpha + beta M N / P.

+
+
+ +

Trailing Submatrix Update

+ +Let consider the update phase of an N-by-N trailing submatrix +distributed on a P-by-Q process grid. From a computational point of +view one has to (triangular) solve N right-hand-sides and perform a +local rank-NB update of this trailing submatrix. Assuming one chooses +the long variant, the execution +time of the update operation can be approximated by:

+
+Tupdate( N, NB ) = gam3 ( N NB^2 / Q + 2 N^2 NB / ( P Q ) ) + +alpha ( log( P ) + P - 1 ) + 3 beta N NB / Q.

+
+The constant "3" in front of the "beta" term is obtained by counting +one for the (logarithmic) spread phase and two for the rolling phase; +In the case of bi-directional links this constant 3 should therefore +be only a 2.

+
+ +

Backward Substitution

+ +The number of floating point operations performed during the backward +substitution in given by N^2 / (P*Q). Because of the lookahead, the +communication cost can be approximated at each step by two messages +of length NB, i.e., the time to communicate the NB-piece of the +solution vector from one diagonal block of the matrix to another. It +follows that the execution time of the backward substitution can be +approximated by:

+
+Tbacks( N, NB ) = gam2 N^2 / (P Q) + N ( alpha / NB + 2 beta ).

+
+
+ +

Putting it All Together

+ +The total execution time of the algorithm described above is given by

+
+Sum(k=0,N,NB)[Tpfact( N-k, NB ) + Tupdate( N-k-NB, NB )] + +Tbacks( N, NB ).

+
+That is, by only considering only the dominant term in alpha, beta and +gam3:

+
+Thpl = 2 gam3 N^3 / ( 3 P Q ) + beta N^2 (3 P + Q) / ( 2 P Q ) + +alpha N ((NB + 1) log(P) + P) / NB.

+
+The serial execution time is given by Tser = 2 gam3 N^3 / 3. If we +define the parallel efficiency E as the ratio Tser / ( P Q Thpl ), we +obtain:

+
+E = 1 / ( 1 + 3 beta (3 P + Q) / ( 4 gam3 N ) + +3 alpha P Q ((NB + 1) log(P) + P) / (2 N^2 NB gam3) ).

+
+This last equality shows that when the memory usage per processor +N^2 / (P Q) is maintained constant, the parallel efficiency slowly +decreases only because of the alpha term. The communication volume +(the beta term) however remains constant. Due to these results, HPL +is said to be scalable not only with respect to the +amount of computation, but also with respect to the communication +volume.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/software.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/software.html new file mode 100755 index 000000000..34d82b2b7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/software.html @@ -0,0 +1,109 @@ + + +HPL Software + + + + +

HPL Software

+ +

Download and Installation

+ +
    +
  1. Download the tar-gzipped file, +issue then "gunzip hpl-2.3.tar.gz; tar -xvf hpl-2.3.tar" and this +should create an hpl-2.3 directory containing the distribution. +We call this directory the top level directory. + +
  2. Create a file Make.<arch> in the top-level directory. +For this purpose, you may want to re-use one contained in the +setup directory. This Make.<arch> file essentially contains +the compilers, libraries, and their paths to be used on your system. + +
  3. Type "make arch=<arch>". This should create an executable +in the bin/<arch> directory called xhpl. For example, on our +Linux PII cluster, I create a file called Make.Linux_PII in the +top-level directory. Then, I type "make arch=Linux_PII". This +creates the executable file bin/Linux_PII/xhpl. + +
  4. Quick check: run a few tests (assuming you have 4 nodes for +interactive use) by issuing the following commands from the top +level directory: "cd bin/<arch> ; mpirun -np 4 xhpl". This +should produce quite a bit of meaningful output on the screen. + +
  5. Most of the performance parameters can be tuned, by modifying +the input file bin/<arch>/HPL.dat. See the +tuning page or the TUNING file in the +top-level directory. +
+
+ +

Compile Time Options

+ +At the end of the "model" Make.<arch>, the user is given +the opportunity to override some default compile options of this +software. The list of these options and their meaning is:

+ +
+ + + + + + + + + +
-DHPL_COPY_Lforce the copy of the panel L before bcast
-DHPL_CALL_CBLAScall the BLAS C interface
-DHPL_CALL_VSIPLcall the vsip library
-DHPL_DETAILED_TIMINGenable detailed timers

+

+ +The user must choose between either the BLAS Fortran 77 interface, +or the BLAS C interface, or the VSIPL library depending on which +computational kernels are available on his system. Only one of these +options should be selected. If you choose the BLAS Fortran 77 +interface, it is necessary to fill out the machine-specific C to +Fortran 77 interface section of the Make.<arch> file. To do +this, please refer to the Make.<arch> examples contained in +the setup directory.

+ +By default HPL will: +
    +
  • not copy L before broadcast, +
  • call the BLAS Fortran 77 interface, +
  • not display detailed timing information. +
+ +As an example, suppose one wants this software to copy the panel of +columns into a contiguous buffer before broadcasting. It should +be more efficient to let the software create the appropriate MPI +user-defined data type since this may avoid the data copy. So, it +is a strange idea, but one insists. To achieve this one would add +-DHPL_COPY_L to the definition of HPL_OPTS at the end of the file +Make.<arch>. Issue then a "make clean arch=<arch> ; +make build arch=<arch>" and the executable will be re-build +with that feature in.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/spread.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/spread.jpg new file mode 100755 index 000000000..56c255a3f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/spread.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/spreadM.jpg b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/spreadM.jpg new file mode 100755 index 000000000..433e4c077 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/spreadM.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/tuning.html b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/tuning.html new file mode 100755 index 000000000..fbbf17fb7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/cuda/hpl-2.3/www/tuning.html @@ -0,0 +1,476 @@ + + +HPL Tuning + + + + +

HPL Tuning

+ +After having built the executable hpl/bin/<arch>/xhpl, +one may want to modify the input data file HPL.dat. This file +should reside in the same directory as the executable +hpl/bin/<arch>/xhpl. An example HPL.dat file is +provided by default. This file contains information about the +problem sizes, machine configuration, and algorithm features +to be used by the executable. It is 31 lines long. All the +selected parameters will be printed in the output generated +by the executable.

+ +We first describe the meaning of each line of this input file +below. Finally, a few useful +experimental guide lines to set up the file are given at +the end of this page.

+
+ +

Description of the HPL.dat File

+ +Line 1: (unused) Typically one would use +this line for its own good. For example, it could be used +to summarize the content of the input file. By default this +line reads: +
+HPL Linpack benchmark input file
+
+ +
+Line 2: (unused) same as line 1. By default +this line reads: +
+Innovative Computing Laboratory, University of Tennessee
+
+ +
+Line 3: the user can choose where the +output should be redirected to. In the case of a file, a +name is necessary, and this is the line where one wants to +specify it. Only the first name on this line is significant. +By default, the line reads: +
+HPL.out  output file name (if any)
+
+ +This means that if one chooses to redirect the output to a +file, the file will be called "HPL.out". The rest of the line +is unused, and this space to put some informative comment on +the meaning of this line.

+ +
+Line 4: This line specifies where the output +should go. The line is formatted, it must begin with a +positive integer, the rest is unsignificant. 3 choices are +possible for the positive integer, 6 means that the output +will go the standard output, 7 means that the output will +go to the standard error. Any other integer means that the +output should be redirected to a file, which name has been +specified in the line above. This line by default reads: +
+6        device out (6=stdout,7=stderr,file)
+
+which means that the output generated by the executable +should be redirected to the standard output.

+ +
+Line 5: This line specifies the number of +problem sizes to be executed. This number should be less than +or equal to 20. The first integer is significant, the rest +is ignored. If the line reads: +
+3        # of problems sizes (N)
+
+this means that the user is willing to run 3 problem sizes +that will be specified in the next line.

+ +
+Line 6: This line specifies the problem sizes +one wants to run. Assuming the line above started with 3, +the 3 first positive integers are significant, the rest is +ignored. For example: +
+3000 6000 10000    Ns
+
+means that one wants xhpl to run 3 (specified in line 5) +problem sizes, namely 3000, 6000 and 10000.

+ +
+Line 7: This line specifies the number of +block sizes to be runned. This number should be less than or +equal to 20. The first integer is significant, the rest is +ignored. If the line reads: +
+5        # of NBs
+
+this means that the user is willing to use 5 block sizes that +will be specified in the next line.

+ +
+Line 8: This line specifies the block sizes +one wants to run. Assuming the line above started with 5, +the 5 first positive integers are significant, the rest is +ignored. For example: +
+80 100 120 140 160 NBs
+
+means that one wants xhpl to use 5 (specified in line 7) +block sizes, namely 80, 100, 120, 140 and 160.

+ +
+Line 9: This line specifies how the MPI +processes should be mapped onto the nodes of your platform. +There are currently two possible mappings, namely row- and +column-major. This feature is mainly useful when these nodes +are themselves multi-processor computers. A row-major mapping +is recommended.

+ +
+Line 10: This line specifies the number of +process grid to be runned. This number should be less than +or equal to 20. The first integer is significant, the rest is +ignored. If the line reads: +
+2        # of process grids (P x Q)
+
+this means that you are willing to try 2 process grid sizes +that will be specified in the next line.

+ +
+Line 11-12: These two lines specify the +number of process rows and columns of each grid you want to +run on. Assuming the line above (10) started with 2, the 2 +first positive integers of those two lines are significant, +the rest is ignored. For example: +
+1 2          Ps
+6 8          Qs
+
+means that one wants to run xhpl on 2 process grids (line +10), namely 1-by-6 and 2-by-8. Note: In this example, it is +required then to start xhpl on at least 16 nodes (max +of Pi-by-Qi). The runs on the two grids will be consecutive. +If one was starting xhpl on more than 16 nodes, say 52, only +6 would be used for the first grid (1x6) and then 16 (2x8) +would be used for the second grid. The fact that you started +the MPI job on 52 nodes, will not make HPL use all of them. +In this example, only 16 would be used. If one wants to run +xhpl with 52 processes one needs to specify a grid of 52 +processes, for example the following lines would do the job: +
+4  2         Ps
+13 8         Qs
+
+ +
+Line 13: This line specifies the threshold +to which the residuals should be compared with. The residuals +should be or order 1, but are in practice slightly less than +this, typically 0.001. This line is made of a real number, +the rest is not significant. For example: +
+16.0         threshold
+
+In practice, a value of 16.0 will cover most cases. For +various reasons, it is possible that some of the residuals +become slightly larger, say for example 35.6. xhpl will flag +those runs as failed, however they can be considered as +correct. A run should be considered as failed if the residual +is a few order of magnitude bigger than 1 for example 10^6 or +more. Note: if one was to specify a threshold of 0.0, all +tests would be flagged as failed, even though the answer is +likely to be correct. It is allowed to specify a negative +value for this threshold, in which case the checks will be +by-passed, no matter what the threshold value is, as soon as +it is negative. This feature allows to save time when +performing a lot of experiments, say for instance during the +tuning phase. Example: +
+-16.0        threshold
+
+ +
+The remaning lines allow to specifies algorithmic features. +xhpl will run all possible combinations of those for each +problem size, block size, process grid combination. This is +handy when one looks for an "optimal" set of parameters. To +understand a little bit better, let say first a few words +about the algorithm implemented in HPL. Basically this is a +right-looking version with row-partial pivoting. The panel +factorization is matrix-matrix operation based and recursive, +dividing the panel into NDIV subpanels at each step. This +part of the panel factorization is denoted below by +"recursive panel fact. (RFACT)". The recursion stops when +the current panel is made of less than or equal to NBMIN +columns. At that point, xhpl uses a matrix-vector operation +based factorization denoted below by "PFACTs". Classic +recursion would then use NDIV=2, NBMIN=1. There are +essentially 3 numerically equivalent LU factorization +algorithm variants (left-looking, Crout and right-looking). +In HPL, one can choose every one of those for the RFACT, as +well as the PFACT. The following lines of HPL.dat allows you +to set those parameters.

+Lines 14-21: (Example 1) +
+3       # of panel fact
+0 1 2   PFACTs (0=left, 1=Crout, 2=Right)
+4       # of recursive stopping criterium
+1 2 4 8 NBMINs (>= 1)
+3       # of panels in recursion
+2 3 4   NDIVs
+3       # of recursive panel fact.
+0 1 2   RFACTs (0=left, 1=Crout, 2=Right)
+
+ +This example would try all variants of PFACT, 4 values for +NBMIN, namely 1, 2, 4 and 8, 3 values for NDIV namely 2, 3 +and 4, and all variants for RFACT.

+Lines 14-21: (Example 2) +
+2       # of panel fact
+2 0     PFACTs (0=left, 1=Crout, 2=Right)
+2       # of recursive stopping criterium
+4 8     NBMINs (>= 1)
+1       # of panels in recursion
+2       NDIVs
+1       # of recursive panel fact.
+2       RFACTs (0=left, 1=Crout, 2=Right)
+
+This example would try 2 variants of PFACT namely right +looking and left looking, 2 values for NBMIN, namely 4 and 8, +1 value for NDIV namely 2, and one variant for RFACT.

+ +
+In the main loop of the algorithm, the current panel of +column is broadcast in process rows using a virtual ring +topology. HPL offers various choices and one most likely want +to use the increasing ring modified encoded as 1. 3 and 4 are +also good choices.

+Lines 22-23: (Example 1) +
+1       # of broadcast
+1       BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+
+This will cause HPL to broadcast the current panel using the +increasing ring modified topology.

+Lines 22-23: (Example 2) +
+2       # of broadcast
+0 4     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+
+This will cause HPL to broadcast the current panel using the +increasing ring virtual topology and the long message +algorithm.

+ +
+Lines 24-25 allow to specify the look-ahead +depth used by HPL. A depth of 0 means that the next panel +is factorized after the update by the current panel is +completely finished. A depth of 1 means that the next +panel is immediately factorized after being updated. The +update by the current panel is then finished. A depth of k +means that the k next panels are factorized immediately after +being updated. The update by the current panel is then +finished. It turns out that a depth of 1 seems to give the +best results, but may need a large problem size before one +can see the performance gain. So use 1, if you do not know +better, otherwise you may want to try 0. Look-ahead of +depths 3 and larger will probably not give you better +results.

+Lines 24-25: (Example 1): +
+1       # of lookahead depth
+1       DEPTHs (>=0)
+
+This will cause HPL to use a look-ahead of depth 1.

+Lines 24-25: (Example 2): +
+2       # of lookahead depth
+0 1     DEPTHs (>=0)
+
+This will cause HPL to use a look-ahead of depths 0 and 1.

+ +
+Lines 26-27 allow to specify the swapping +algorithm used by HPL for all tests. There are currently +two swapping algorithms available, one based on "binary +exchange" and the other one based on a "spread-roll" +procedure (also called "long" below). For large problem +sizes, this last one is likely to be more efficient. The user +can also choose to mix both variants, that is "binary-exchange" +for a number of columns less than a threshold value, and then +the "spread-roll" algorithm. This threshold value is then +specified on Line 27.

+Lines 26-27: (Example 1): +
+1       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold
+
+This will cause HPL to use the "long" or "spread-roll" +swapping algorithm. Note that a threshold is specified in +that example but not used by HPL.

+Lines 26-27: (Example 2): +
+2       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold
+
+This will cause HPL to use the "long" or "spread-roll" +swapping algorithm as soon as there is more than 60 columns +in the row panel. Otherwise, the "binary-exchange" algorithm +will be used instead.

+ +
+Line 28 allows to specify whether the upper +triangle of the panel of columns should be stored in +no-transposed or transposed form. Example: +
+0            L1 in (0=transposed,1=no-transposed) form
+
+ +
+Line 29 allows to specify whether the panel +of rows U should be stored in no-transposed or transposed +form. Example: +
+0            U  in (0=transposed,1=no-transposed) form
+
+ +
+Line 30 enables / disables the equilibration +phase. This option will not be used unless you selected 1 or +2 in Line 26. Example: +
+1            Equilibration (0=no,1=yes)
+
+ +
+Line 31 allows to specify the alignment in +memory for the memory space allocated by HPL. On modern +machines, one probably wants to use 4, 8 or 16. This may +result in a tiny amount of memory wasted. Example: +
+8       memory alignment in double (> 0)
+
+ +
+

Guide Lines

+ +
    +
  1. Figure out a good block size for the matrix multiply +routine. The best method is to try a few out. If you happen +to know the block size used by the matrix-matrix multiply +routine, a small multiple of that block size will do fine. +This particular topic is discussed in the +FAQs section.

    + +
  2. The process mapping should not matter if the nodes of +your platform are single processor computers. If these nodes +are multi-processors, a row-major mapping is recommended.

    + +
  3. HPL likes "square" or slightly flat process grids. Unless +you are using a very small process grid, stay away from the +1-by-Q and P-by-1 process grids. This particular topic is also +discussed in the FAQs section.

    + +
  4. Panel factorization parameters: a good start are the +following for the lines 14-21: +
    +1       # of panel fact
    +1       PFACTs (0=left, 1=Crout, 2=Right)
    +2       # of recursive stopping criterium
    +4 8     NBMINs (>= 1)
    +1       # of panels in recursion
    +2       NDIVs
    +1       # of recursive panel fact.
    +2       RFACTs (0=left, 1=Crout, 2=Right)
    +
    + +
  5. Broadcast parameters: at this time it is far from obvious +to me what the best setting is, so i would probably try them +all. If I had to guess I would probably start with the +following for the lines 22-23: +
    +2       # of broadcast
    +1 3     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
    +
    +The best broadcast depends on your problem size and harware +performance. My take is that 4 or 5 may be competitive for +machines featuring very fast nodes comparatively to the +network.

    + +
  6. Look-ahead depth: as mentioned above 0 or 1 are likely to +be the best choices. This also depends on the problem size +and machine configuration, so I would try "no look-ahead (0)" +and "look-ahead of depth 1 (1)". That is for lines 24-25: +
    +2       # of lookahead depth
    +0 1     DEPTHs (>=0)
    +
    + +
  7. Swapping: one can select only one of the three algorithm +in the input file. Theoretically, mix (2) should win, however +long (1) might just be good enough. The difference should be +small between those two assuming a swapping threshold of the +order of the block size (NB) selected. If this threshold is +very large, HPL will use bin_exch (0) most of the time and if +it is very small (< NB) long (1) will always be used. In +short and assuming the block size (NB) used is say 60, I +would choose for the lines 26-27: +
    +2       SWAP (0=bin-exch,1=long,2=mix)
    +60      swapping threshold 
    +
    +I would also try the long variant. For a very small number +of processes in every column of the process grid (say < 4), +very little performance difference should be observable.

    + +
  8. Local storage: I do not think Line 28 matters. Pick 0 in +doubt. Line 29 is more important. It controls how the panel +of rows should be stored. No doubt 0 is better. The caveat is +that in that case the matrix-multiply function is called with +( Notrans, Trans, ... ), that is C := C - A B^T. Unless the +computational kernel you are using has a very poor (with +respect to performance) implementation of that case, and is +much more efficient with ( Notrans, Notrans, ... ) just pick +0 as well. So, my choice: +
    +0       L1 in (0=transposed,1=no-transposed) form
    +0       U  in (0=transposed,1=no-transposed) form
    +
    + +
  9. Equilibration: It is hard to tell whether equilibration +should always be performed or not. Not knowing much about the +random matrix generated and because the overhead is so small +compared to the possible gain, I turn it on all the time. +
    +1       Equilibration (0=no,1=yes)
    +
    + +
  10. For alignment, 4 should be plenty, but just to be safe, +one may want to pick 8 instead. +
    +8       memory alignment in double (> 0)
    +
    +
+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_cpu.dat b/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_cpu.dat new file mode 100644 index 000000000..a015f8ba5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_cpu.dat @@ -0,0 +1,32 @@ +HPLinpack benchmark input file +Innovative Computing Laboratory, University of Tennessee +HPL.out output file name (if any) +6 device out (6=stdout,7=stderr,file) +1 # of problems sizes (N) +24576 12288 Ns +1 # of NBs +3072 1024 2048 384 640 768 896 960 1024 1152 1280 384 640 960 768 640 256 960 512 768 1152 NBs +0 PMAP process mapping (0=Row-,1=Column-major) +1 # of process grids (P x Q) +1 Ps +1 Qs +16.0 threshold +1 # of panel fact +0 1 2 PFACTs (0=left, 1=Crout, 2=Right) +1 # of recursive stopping criterium +2 8 NBMINs (>= 1) +1 # of panels in recursion +2 NDIVs +1 # of recursive panel fact. +0 1 2 RFACTs (0=left, 1=Crout, 2=Right) +1 # of broadcast +0 2 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) +1 # of lookahead depth +1 0 DEPTHs (>=0) +1 SWAP (0=bin-exch,1=long,2=mix) +192 swapping threshold +1 L1 in (0=transposed,1=no-transposed) form +1 U in (0=transposed,1=no-transposed) form +1 Equilibration (0=no,1=yes) +8 memory alignment in double (> 0) + diff --git a/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_gpu.dat b/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_gpu.dat new file mode 100644 index 000000000..19a956783 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_gpu.dat @@ -0,0 +1,32 @@ +HPLinpack benchmark input file +Innovative Computing Laboratory, University of Tennessee +HPL.out output file name (if any) +6 device out (6=stdout,7=stderr,file) +2 # of problems sizes (N) +24576 24576 12288 Ns +1 # of NBs +2048 1024 2048 384 640 768 896 960 1024 1152 1280 384 640 960 768 640 256 960 512 768 1152 NBs +0 PMAP process mapping (0=Row-,1=Column-major) +1 # of process grids (P x Q) +1 Ps +1 Qs +16.0 threshold +1 # of panel fact +0 1 2 PFACTs (0=left, 1=Crout, 2=Right) +1 # of recursive stopping criterium +2 8 NBMINs (>= 1) +1 # of panels in recursion +2 NDIVs +1 # of recursive panel fact. +0 1 2 RFACTs (0=left, 1=Crout, 2=Right) +1 # of broadcast +0 2 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) +1 # of lookahead depth +1 0 DEPTHs (>=0) +1 SWAP (0=bin-exch,1=long,2=mix) +192 swapping threshold +1 L1 in (0=transposed,1=no-transposed) form +1 U in (0=transposed,1=no-transposed) form +1 Equilibration (0=no,1=yes) +8 memory alignment in double (> 0) + diff --git a/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_gpu_2_tile.dat b/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_gpu_2_tile.dat new file mode 100644 index 000000000..f84b54155 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/datafiles/HPL_small_gpu_2_tile.dat @@ -0,0 +1,32 @@ +HPLinpack benchmark input file +Innovative Computing Laboratory, University of Tennessee +HPL.out output file name (if any) +6 device out (6=stdout,7=stderr,file) +1 # of problems sizes (N) +24576 12288 Ns +2 # of NBs +2048 2048 1024 2048 384 640 768 896 960 1024 1152 1280 384 640 960 768 640 256 960 512 768 1152 NBs +0 PMAP process mapping (0=Row-,1=Column-major) +1 # of process grids (P x Q) +1 Ps +2 Qs +16.0 threshold +1 # of panel fact +0 1 2 PFACTs (0=left, 1=Crout, 2=Right) +1 # of recursive stopping criterium +2 8 NBMINs (>= 1) +1 # of panels in recursion +2 NDIVs +1 # of recursive panel fact. +0 1 2 RFACTs (0=left, 1=Crout, 2=Right) +1 # of broadcast +0 2 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) +1 # of lookahead depth +1 0 DEPTHs (>=0) +1 SWAP (0=bin-exch,1=long,2=mix) +192 swapping threshold +1 L1 in (0=transposed,1=no-transposed) form +1 U in (0=transposed,1=no-transposed) form +1 Equilibration (0=no,1=yes) +8 memory alignment in double (> 0) + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/AUTHORS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/AUTHORS new file mode 100644 index 000000000..b08e25180 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/AUTHORS @@ -0,0 +1,6 @@ +Antoine Petitet +Clint Whaley rcwhaley@lsu.edu +Jack Dongarra dongarra@icl.utk.edu +Andy Cleary +Piotr Luszczek luszczek@icl.utk.edu +Julien Langou Julien.Langou@ucdenver.edu diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/BUGS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/BUGS new file mode 100644 index 000000000..08d694014 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/BUGS @@ -0,0 +1,9 @@ +============================================================== + List of the known problems with the HPL software + + Current as of release HPL - 2.3 - December 2, 2018 +============================================================== + +============================================================== + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/COPYING b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/COPYING new file mode 100644 index 000000000..08465d618 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/COPYING @@ -0,0 +1,45 @@ +====================================================================== + -- High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 + Antoine P. Petitet + University of Tennessee, Knoxville + Innovative Computing Laboratory + (C) Copyright 2000-2008 All Rights Reserved + + -- Copyright notice and Licensing terms: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. All advertising materials mentioning features or use of this + software must display the following acknowledgement: + This product includes software developed at the University of + Tennessee, Knoxville, Innovative Computing Laboratory. + + 4. The name of the University, the name of the Laboratory, or the + names of its contributors may not be used to endorse or promote + products derived from this software without specific written + permission. + + -- Disclaimer: + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +====================================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/COPYRIGHT b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/COPYRIGHT new file mode 100644 index 000000000..08465d618 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/COPYRIGHT @@ -0,0 +1,45 @@ +====================================================================== + -- High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 + Antoine P. Petitet + University of Tennessee, Knoxville + Innovative Computing Laboratory + (C) Copyright 2000-2008 All Rights Reserved + + -- Copyright notice and Licensing terms: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. All advertising materials mentioning features or use of this + software must display the following acknowledgement: + This product includes software developed at the University of + Tennessee, Knoxville, Innovative Computing Laboratory. + + 4. The name of the University, the name of the Laboratory, or the + names of its contributors may not be used to endorse or promote + products derived from this software without specific written + permission. + + -- Disclaimer: + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +====================================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/ChangeLog b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/ChangeLog new file mode 100644 index 000000000..1c2b36778 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/ChangeLog @@ -0,0 +1,16 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + Done list in version 1.0b, December 15th, 2004 + - Fixed problem with 32-bit integer overflow. + Thanks to John Baron. + + Done list in version 1.0a, January 1st, 2004 + - Added Row- or Column-major process mapping in data file + - Fixed compilation error for gcc 3.3 in walltime. + - Fixed building problems on the T3E; + Thanks to Edward Anderson. + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/HISTORY b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/HISTORY new file mode 100644 index 000000000..d6d59ee45 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/HISTORY @@ -0,0 +1,103 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + History + + - 09/09/00 Public release of Version 1.0 + + - 09/27/00 A couple of mistakes in the VSIPL port have been + corrected. The tar file as well as the web site were updated + on September 27th, 2000. Note that these problems were not + affecting the BLAS version of the software in any way. + + - 01/01/04 Version 1.0a + The MPI process grid numbering scheme is now an run-time + option. + The inlined assembly timer routine that caused the compila- + tion to fail when using gcc version 3.3 and above has been + removed from the package. + Various building problems on the T3E have been fixed; Thanks + to Edward Anderson. + + - 15/12/04 Version 1.0b + Weakness of the pseudo-random matrix generator found for pro- + blem sizes being power of twos and larger than 2^15; Thanks + to Gregory Bauer. This problem has not been fixed. It is thus + currently recommended to HPL users willing to test matrices + of size larger than 2^15 to not use power twos. + + When the matrix size is such that one needs > 16 GB per MPI + rank, the intermediate calculation (mat.ld+1) * mat.nq in + HPL_pdtest.c ends up overflowing because it is done using + 32-bit arithmetic. This issue has been fixed by typecasting + to size_t; Thanks to John Baron. + + - 09/10/08 Version 2.0 + + Piotr Luszczek changed to 64-bit RNG, modified files: + -- [M] include/hpl_matgen.h + -- [M] testing/matgen/HPL_ladd.c + -- [M] testing/matgen/HPL_lmul.c + -- [M] testing/matgen/HPL_rand.c + -- [M] testing/ptest/HPL_pdinfo.c + + For a motivation for the change, see: + Dongarra and Langou, ``The Problem with the Linpack + Benchmark Matrix Generator'', LAWN 206, June 2008. + + -- [M] testing/ptest/HPL_pdtest.c -- + + Julien Langou changed the test for correctness from + ||Ax-b||_oo / ( eps * ||A||_1 * N ) + ||Ax-b||_oo / ( eps * ||A||_1 * ||x||_1 ) + ||Ax-b||_oo / ( eps * ||A||_oo * ||x||_oo * N ) + to the normwise backward error + || r ||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N ) + See: + Nicholas J. Higham, ``Accuracy and Stability of Numerical Algorithms'', + Society for Industrial and Applied Mathematics, Philadelphia, PA, USA, + Second Edition, pages = xxx+680, ISBN = 0-89871-521-0, 2002. + + Note that in our case || b ||_oo is almost for sure + 1/2, we compute it anyway. + + - 10/26/2012 Version 2.1 + + Piotr Luszczek introduced exact time stamping for HPL_pdgesv(): + -- [M] dist/include/hpl_misc.h + -- [M] dist/testing/ptest/HPL_pdtest.c + + Piotr Luszczek fixed out-of-bounds access in data spreading functions + and exact time stamping for HPL_pdgesv(): + -- [M] dist/src/pgesv/HPL_spreadN.c + -- [M] dist/src/pgesv/HPL_spreadT.c + Thanks to Stephen Whalen from Cray. + + - 02/24/2016 Version 2.2 + + Piotr Luszczek added continuous reporting of factorization progress + submitted by Intel and make scripts that uses Intel software tools and + libraries and their Apple's Mac OS X equivalents. + + - 12/02/2018 Version 2.3 + + Piotr Luszczek removed deprecated MPI functions that are no longer + supported in some MPI implementations (for example Open MPI 4.0) and + replaced them with + modern equivalents in HPL_packL(): + -- [M] src/comm/HPL_packL.c + + Piotr Luszczek added one digit to the display of performance result + and changed display of scaled residual to scientific notation with + extra digits in HPL_pdtest(): + -- [M] testing/ptest/HPL_pdtest.c + + Piotr Luszczek added support for Autotools configuration packages + autoconf and automake: + -- [A] Makefile.am + -- [A] configure.ac + -- [A] acinclude.m4 + -- [A] src/Makefile.am + -- [A] testing/Makefile.am diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/INSTALL b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/INSTALL new file mode 100644 index 000000000..fec266c49 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/INSTALL @@ -0,0 +1,81 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + 1) Retrieve the tar file, then + + gunzip hpl.tgz; tar -xvf hpl.tar + + this will create an hpl directory, that we call below the + top-level directory. + + 2) Create a file Make. in the top-level directory. For + this purpose, you may want to re-use one contained in the + setup directory. This file essentially contains the compilers + and librairies with their paths to be used. + + 3) Type "make arch=". This should create an executable + in the bin/ directory called xhpl. + + For example, on our Linux PII cluster, I create a file called + Make.Linux_PII in the top-level directory. Then, I type + "make arch=Linux_PII" + This creates the executable file bin/Linux_PII/xhpl. + + 4) Quick check: run a few tests: + + cd bin/ + mpirun -np 4 xhpl + + 5) Tuning: Most of the performance parameters can be tuned, + by modifying the input file bin/HPL.dat. See the file TUNING + in the top-level directory. + +============================================================== + + Compile time options: At the end of the "model" Make., + --------------------- the user is given the opportunity to + compile the software with some specific compile options. The + list of this options and their meaning are: + + -DHPL_COPY_L + force the copy of the panel L before bcast; + + -DHPL_CALL_CBLAS + call the cblas interface; + + -DHPL_CALL_VSIPL + call the vsip library; + + -DHPL_DETAILED_TIMING + enables detail timers; + + The user must choose between either the BLAS Fortran 77 + interface, or the BLAS C interface, or the VSIPL library + depending on which computational kernels are available on his + system. Only one of these options should be selected. If you + choose the BLAS Fortran 77 interface, it is necessary to fill + out the machine-specific C to Fortran 77 interface section of + the Make. file. To do this, please refer to the + Make. examples contained in the setup directory. + + By default HPL will: + *) not copy L before broadcast, + *) call the BLAS Fortran 77 interface, + *) not display detailed timing information. + + As an example, suppose one wants HPL to copy the panel of + columns into a contiguous buffer before broadcasting. In + theory, it would be more efficient to let HPL create the + appropriate MPI user-defined data type since this may avoid + the data copy. So, it is a strange idea, but one insists. To + achieve this one would add -DHPL_COPY_L to the definition of + HPL_OPTS at the end of the file Make.. Issue then a + "make clean arch=; make build arch=" and the xhpl + executable will be re-build with that feature in. +============================================================== + + Check out the website www.netlib.org/benchmark/hpl for the + latest information. +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Make.intel64 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Make.intel64 new file mode 100644 index 000000000..2b55e694f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Make.intel64 @@ -0,0 +1,244 @@ + # -- High Performance Computing Linpack Benchmark (HPL) + # Modifications Copyright (C) 2023 Intel Corporation​ + # + # -- Copyright notice and Licensing terms: + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: + # + # 1. Redistributions of source code must retain the above copyright + # notice, this list of conditions and the following disclaimer. + # + # 2. Redistributions in binary form must reproduce the above copyright + # notice, this list of conditions, and the following disclaimer in the + # documentation and/or other materials provided with the distribution. + # + # 3. All advertising materials mentioning features or use of this + # software must display the following acknowledgement: + # This product includes software developed at the University of + # Tennessee, Knoxville, Innovative Computing Laboratory. + # + # 4. The name of the University, the name of the Laboratory, or the + # names of its contributors may not be used to endorse or promote + # products derived from this software without specific written + # permission. + # + # -- Disclaimer: + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # --------------------------------------------------------------------- + # + #SPDX-License-Identifier: BSD-4-Clause + +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -fs +MKDIR = mkdir -p +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = intel64 +export ARCH = intel64 +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +# Set TOPdir to the location of where this is being built +TOPdir = $(CURDIR) +INCdir = $(TOPdir)/include +BINdir =$(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a + +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +OneAPIdir = $(ONEAPI_ROOT) +MPdir = $(OneAPIdir)/mpi/latest/ +MPinc = -I$(MPdir)/include/ +MPlib = -lmpi #$(MPdir)/lib/release/libmpi.so +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(OneAPIdir)/mkl/latest/lib/intel64/ +LAinc = -I$(OneAPIdir)/mkl/latest/include/intel64/ +LAlib = -L$(TOPdir)/src/dpcpp/ -ldgemm -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lpthread -liomp5 -lm -I$(TOPdir)/src/dpcpp/ +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) #$(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# -DASYOUGO enable timing information as you go (nonintrusive) +# -DASYOUGO2 slightly intrusive timing information +# -DASYOUGO2_DISPLAY display detailed DGEMM information +# -DENDEARLY end the problem early +# -DFASTSWAP insert to use DLASWP instead of HPL code +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +ifeq ($(USE_NVIDIA_BACKEND),ON) +LINKER = $(CC) +LINKFLAGS = $(CCFLAGS) +else ifeq ($(USE_AMD_BACKEND),ON) +LINKER = $(CC) +LINKFLAGS = $(CCFLAGS) +else +LINKER = mpiicpc -cxx=icpx -fsycl +LINKFLAGS = $(CCFLAGS) -lmkl_sycl -lmkl_core -lmkl_cdft_core -lmkl_gf_ilp64 -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_intel_ilp64 -lmkl_intel_lp64 -lmkl_rt -lmkl_sequential -lmkl_tbb_thread +endif +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- +MAKE = make VERBOSE=1 arch=$(ARCH) TOPdir=$(TOPdir) diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Make.top b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Make.top new file mode 100644 index 000000000..c9980518c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Make.top @@ -0,0 +1,238 @@ + # -- High Performance Computing Linpack Benchmark (HPL) + # Modifications Copyright (C) 2023 Intel Corporation​ + # + # -- Copyright notice and Licensing terms: + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: + # + # 1. Redistributions of source code must retain the above copyright + # notice, this list of conditions and the following disclaimer. + # + # 2. Redistributions in binary form must reproduce the above copyright + # notice, this list of conditions, and the following disclaimer in the + # documentation and/or other materials provided with the distribution. + # + # 3. All advertising materials mentioning features or use of this + # software must display the following acknowledgement: + # This product includes software developed at the University of + # Tennessee, Knoxville, Innovative Computing Laboratory. + # + # 4. The name of the University, the name of the Laboratory, or the + # names of its contributors may not be used to endorse or promote + # products derived from this software without specific written + # permission. + # + # -- Disclaimer: + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # --------------------------------------------------------------------- + # + #SPDX-License-Identifier: BSD-4-Clause + +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +arch = UNKNOWN +# +include Make.$(arch) +# +## build ############################################################### +# +build_src : + ( $(CD) src/auxil/$(arch); $(MAKE) ) + ( $(CD) src/blas/$(arch); $(MAKE) ) + ( $(CD) src/comm/$(arch); $(MAKE) ) + ( $(CD) src/grid/$(arch); $(MAKE) ) + ( $(CD) src/panel/$(arch); $(MAKE) ) + ( $(CD) src/pauxil/$(arch); $(MAKE) ) + ( $(CD) src/pfact/$(arch); $(MAKE) ) + ( $(CD) src/pgesv/$(arch); $(MAKE) ) + ( $(CD) src/dpcpp/; $(MAKE) ) +# +build_tst : + ( $(CD) testing/matgen/$(arch); $(MAKE) ) + ( $(CD) testing/timer/$(arch); $(MAKE) ) + ( $(CD) testing/pmatgen/$(arch); $(MAKE) ) + ( $(CD) testing/ptimer/$(arch); $(MAKE) ) + ( $(CD) testing/ptest/$(arch); $(MAKE) ) +#( SPMS_make_cd`' testing/test/$(arch); SPMS_make_make`' ) +# +## startup ############################################################# +# +startup_dir : + - $(MKDIR) include/$(arch) + - $(MKDIR) lib + - $(MKDIR) lib/$(arch) + - $(MKDIR) bin + - $(MKDIR) bin/$(arch) +# +startup_src : + - $(MAKE) -f Make.top leaf le=src/auxil arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/blas arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/comm arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/grid arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/panel arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/pauxil arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/pfact arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/pgesv arch=$(arch) +# +startup_tst : + - $(MAKE) -f Make.top leaf le=testing/matgen arch=$(arch) + - $(MAKE) -f Make.top leaf le=testing/timer arch=$(arch) + - $(MAKE) -f Make.top leaf le=testing/pmatgen arch=$(arch) + - $(MAKE) -f Make.top leaf le=testing/ptimer arch=$(arch) + - $(MAKE) -f Make.top leaf le=testing/ptest arch=$(arch) +#- SPMS_make_make`' -f Make.top leaf le=testing/test arch=$(arch) +# +## refresh ############################################################# +# +refresh_src : + - $(CP) makes/Make.auxil src/auxil/$(arch)/Makefile + - $(CP) makes/Make.blas src/blas/$(arch)/Makefile + - $(CP) makes/Make.comm src/comm/$(arch)/Makefile + - $(CP) makes/Make.grid src/grid/$(arch)/Makefile + - $(CP) makes/Make.panel src/panel/$(arch)/Makefile + - $(CP) makes/Make.pauxil src/pauxil/$(arch)/Makefile + - $(CP) makes/Make.pfact src/pfact/$(arch)/Makefile + - $(CP) makes/Make.pgesv src/pgesv/$(arch)/Makefile +# +refresh_tst : + - $(CP) makes/Make.matgen testing/matgen/$(arch)/Makefile + - $(CP) makes/Make.timer testing/timer/$(arch)/Makefile + - $(CP) makes/Make.pmatgen testing/pmatgen/$(arch)/Makefile + - $(CP) makes/Make.ptimer testing/ptimer/$(arch)/Makefile + - $(CP) makes/Make.ptest testing/ptest/$(arch)/Makefile +#- SPMS_make_cp`' makes/Make.test testing/test/$(arch)/Makefile +# +## clean ############################################################### +# +clean_src : + - ( $(CD) src/auxil/$(arch); $(MAKE) clean ) + - ( $(CD) src/blas/$(arch); $(MAKE) clean ) + - ( $(CD) src/comm/$(arch); $(MAKE) clean ) + - ( $(CD) src/grid/$(arch); $(MAKE) clean ) + - ( $(CD) src/panel/$(arch); $(MAKE) clean ) + - ( $(CD) src/pauxil/$(arch); $(MAKE) clean ) + - ( $(CD) src/pfact/$(arch); $(MAKE) clean ) + - ( $(CD) src/pgesv/$(arch); $(MAKE) clean ) + - ( $(CD) src/dpcpp/; $(MAKE) clean) +# +clean_tst : + - ( $(CD) testing/matgen/$(arch); $(MAKE) clean ) + - ( $(CD) testing/timer/$(arch); $(MAKE) clean ) + - ( $(CD) testing/pmatgen/$(arch); $(MAKE) clean ) + - ( $(CD) testing/ptimer/$(arch); $(MAKE) clean ) + - ( $(CD) testing/ptest/$(arch); $(MAKE) clean ) +#- ( SPMS_make_cd`' testing/test/$(arch); SPMS_make_make`' clean ) +# +## clean_arch ########################################################## +# +clean_arch_src : + - $(RM) -r src/auxil/$(arch) + - $(RM) -r src/blas/$(arch) + - $(RM) -r src/comm/$(arch) + - $(RM) -r src/grid/$(arch) + - $(RM) -r src/panel/$(arch) + - $(RM) -r src/pauxil/$(arch) + - $(RM) -r src/pfact/$(arch) + - $(RM) -r src/pgesv/$(arch) + - ( $(CD) src/dpcpp; $(MAKE) clean) +# +clean_arch_tst : + - $(RM) -r testing/matgen/$(arch) + - $(RM) -r testing/timer/$(arch) + - $(RM) -r testing/pmatgen/$(arch) + - $(RM) -r testing/ptimer/$(arch) + - $(RM) -r testing/ptest/$(arch) +#- SPMS_make_rm`' -r testing/test/$(arch) +# +## clean_arch_all ###################################################### +# +clean_arch_all : + - $(MAKE) -f Make.top clean_arch_src arch=$(arch) + - $(MAKE) -f Make.top clean_arch_tst arch=$(arch) + - $(RM) -r bin/$(arch) include/$(arch) lib/$(arch) +# +## clean_guard ######################################################### +# +clean_guard_src : + - ( $(CD) src/auxil/$(arch); $(RM) *.grd ) + - ( $(CD) src/blas/$(arch); $(RM) *.grd ) + - ( $(CD) src/comm/$(arch); $(RM) *.grd ) + - ( $(CD) src/grid/$(arch); $(RM) *.grd ) + - ( $(CD) src/panel/$(arch); $(RM) *.grd ) + - ( $(CD) src/pauxil/$(arch); $(RM) *.grd ) + - ( $(CD) src/pfact/$(arch); $(RM) *.grd ) + - ( $(CD) src/pgesv/$(arch); $(RM) *.grd ) +# +clean_guard_tst : + - ( $(CD) testing/matgen/$(arch); $(RM) *.grd ) + - ( $(CD) testing/timer/$(arch); $(RM) *.grd ) + - ( $(CD) testing/pmatgen/$(arch); $(RM) *.grd ) + - ( $(CD) testing/ptimer/$(arch); $(RM) *.grd ) + - ( $(CD) testing/ptest/$(arch); $(RM) *.grd ) +#- ( SPMS_make_cd`' testing/test/$(arch); SPMS_make_rm`' *.grd ) +# +## misc ################################################################ +# +leaf : + - ( $(CD) $(le) ; $(MKDIR) $(arch) ) + - ( $(CD) $(le)/$(arch) ; \ + $(LN_S) $(TOPdir)/Make.$(arch) Make.inc ) +# +######################################################################## diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile new file mode 100644 index 000000000..7ab3d9c54 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile @@ -0,0 +1,134 @@ + # -- High Performance Computing Linpack Benchmark (HPL) + # Modifications Copyright (C) 2023 Intel Corporation​ + # + # -- Copyright notice and Licensing terms: + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: + # + # 1. Redistributions of source code must retain the above copyright + # notice, this list of conditions and the following disclaimer. + # + # 2. Redistributions in binary form must reproduce the above copyright + # notice, this list of conditions, and the following disclaimer in the + # documentation and/or other materials provided with the distribution. + # + # 3. All advertising materials mentioning features or use of this + # software must display the following acknowledgement: + # This product includes software developed at the University of + # Tennessee, Knoxville, Innovative Computing Laboratory. + # + # 4. The name of the University, the name of the Laboratory, or the + # names of its contributors may not be used to endorse or promote + # products derived from this software without specific written + # permission. + # + # -- Disclaimer: + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # --------------------------------------------------------------------- + # + #SPDX-License-Identifier: BSD-4-Clause + +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# +SHELL = /bin/sh +# +arch = intel64 +make = 'make VERBOSE=1' +# +## Targets ############################################################# +# +all : install +# +# ###################################################################### +# +install : startup refresh build +# +startup : + $(MAKE) -f Make.top startup_dir arch=$(arch) + $(MAKE) -f Make.top startup_src arch=$(arch) + $(MAKE) -f Make.top startup_tst arch=$(arch) + $(MAKE) -f Make.top refresh_src arch=$(arch) + $(MAKE) -f Make.top refresh_tst arch=$(arch) +# +refresh : + $(MAKE) -f Make.top refresh_src arch=$(arch) + $(MAKE) -f Make.top refresh_tst arch=$(arch) +# +build : + $(MAKE) -f Make.top build_src arch=$(arch) + $(MAKE) -f Make.top build_tst arch=$(arch) +# +clean : + $(MAKE) -f Make.top clean_src arch=$(arch) + $(MAKE) -f Make.top clean_tst arch=$(arch) +# +clean_arch : + $(MAKE) -f Make.top clean_arch_src arch=$(arch) + $(MAKE) -f Make.top clean_arch_tst arch=$(arch) +# +clean_arch_all : + $(MAKE) -f Make.top clean_arch_all arch=$(arch) +# +clean_guard : + $(MAKE) -f Make.top clean_guard_src arch=$(arch) + $(MAKE) -f Make.top clean_guard_tst arch=$(arch) +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile.am new file mode 100644 index 000000000..1ad8c1b17 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src testing + +AM_CPPFLAGS = -I$(top_srcdir)/include diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile.in new file mode 100644 index 000000000..76f0e2dd6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/Makefile.in @@ -0,0 +1,772 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +subdir = . +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \ + $(am__configure_deps) $(am__DIST_COMMON) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/include/hplconfig.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + cscope distdir distdir-am dist dist-all distcheck +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +CSCOPE = cscope +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/include/hplconfig.h.in AUTHORS COPYING ChangeLog \ + INSTALL NEWS README THANKS TODO compile config.guess \ + config.sub depcomp install-sh missing +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + if test -d "$(distdir)"; then \ + find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -rf "$(distdir)" \ + || { sleep 5 && rm -rf "$(distdir)"; }; \ + else :; fi +am__post_remove_distdir = $(am__remove_distdir) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +DIST_ARCHIVES = $(distdir).tar.gz +GZIP_ENV = --best +DIST_TARGETS = dist-gzip +distuninstallcheck_listfiles = find . -type f -print +am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ + | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' +distcleancheck_listfiles = find . -type f -print +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIBS = @BLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build_alias = @build_alias@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host_alias = @host_alias@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = src testing +AM_CPPFLAGS = -I$(top_srcdir)/include +all: all-recursive + +.SUFFIXES: +am--refresh: Makefile + @: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --gnu'; \ + $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: $(am__configure_deps) + $(am__cd) $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) +$(am__aclocal_m4_deps): + +include/hplconfig.h: include/stamp-h1 + @test -f $@ || rm -f include/stamp-h1 + @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) include/stamp-h1 + +include/stamp-h1: $(top_srcdir)/include/hplconfig.h.in $(top_builddir)/config.status + @rm -f include/stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status include/hplconfig.h +$(top_srcdir)/include/hplconfig.h.in: $(am__configure_deps) + ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) + rm -f include/stamp-h1 + touch $@ + +distclean-hdr: + -rm -f include/hplconfig.h include/stamp-h1 + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscope: cscope.files + test ! -s cscope.files \ + || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) +clean-cscope: + -rm -f cscope.files +cscope.files: clean-cscope cscopelist +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + -rm -f cscope.out cscope.in.out cscope.po.out cscope.files + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + $(am__remove_distdir) + test -d "$(distdir)" || mkdir "$(distdir)" + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done + -test -n "$(am__skip_mode_fix)" \ + || find "$(distdir)" -type d ! -perm -755 \ + -exec chmod u+rwx,go+rx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r "$(distdir)" +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz + $(am__post_remove_distdir) + +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 + $(am__post_remove_distdir) + +dist-lzip: distdir + tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz + $(am__post_remove_distdir) + +dist-xz: distdir + tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz + $(am__post_remove_distdir) + +dist-tarZ: distdir + @echo WARNING: "Support for distribution archives compressed with" \ + "legacy program 'compress' is deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__post_remove_distdir) + +dist-shar: distdir + @echo WARNING: "Support for shar distribution archives is" \ + "deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz + $(am__post_remove_distdir) + +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__post_remove_distdir) + +dist dist-all: + $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' + $(am__post_remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.lz*) \ + lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ + *.tar.xz*) \ + xz -dc $(distdir).tar.xz | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + esac + chmod -R a-w $(distdir) + chmod u+w $(distdir) + mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst + chmod a-w $(distdir) + test -d $(distdir)/_build || exit 0; \ + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && am__cwd=`pwd` \ + && $(am__cd) $(distdir)/_build/sub \ + && ../../configure \ + $(AM_DISTCHECK_CONFIGURE_FLAGS) \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + --srcdir=../.. --prefix="$$dc_install_base" \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) dvi \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ + && cd "$$am__cwd" \ + || exit 1 + $(am__post_remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' +distuninstallcheck: + @test -n '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: trying to run $@ with an empty' \ + '$$(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + $(am__cd) '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic mostlyclean-am + +distclean: distclean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-hdr distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--refresh check check-am clean clean-cscope clean-generic \ + cscope cscopelist-am ctags ctags-am dist dist-all dist-bzip2 \ + dist-gzip dist-lzip dist-shar dist-tarZ dist-xz dist-zip \ + distcheck distclean distclean-generic distclean-hdr \ + distclean-tags distcleancheck distdir distuninstallcheck dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/NEWS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/NEWS new file mode 100644 index 000000000..d6d59ee45 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/NEWS @@ -0,0 +1,103 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + History + + - 09/09/00 Public release of Version 1.0 + + - 09/27/00 A couple of mistakes in the VSIPL port have been + corrected. The tar file as well as the web site were updated + on September 27th, 2000. Note that these problems were not + affecting the BLAS version of the software in any way. + + - 01/01/04 Version 1.0a + The MPI process grid numbering scheme is now an run-time + option. + The inlined assembly timer routine that caused the compila- + tion to fail when using gcc version 3.3 and above has been + removed from the package. + Various building problems on the T3E have been fixed; Thanks + to Edward Anderson. + + - 15/12/04 Version 1.0b + Weakness of the pseudo-random matrix generator found for pro- + blem sizes being power of twos and larger than 2^15; Thanks + to Gregory Bauer. This problem has not been fixed. It is thus + currently recommended to HPL users willing to test matrices + of size larger than 2^15 to not use power twos. + + When the matrix size is such that one needs > 16 GB per MPI + rank, the intermediate calculation (mat.ld+1) * mat.nq in + HPL_pdtest.c ends up overflowing because it is done using + 32-bit arithmetic. This issue has been fixed by typecasting + to size_t; Thanks to John Baron. + + - 09/10/08 Version 2.0 + + Piotr Luszczek changed to 64-bit RNG, modified files: + -- [M] include/hpl_matgen.h + -- [M] testing/matgen/HPL_ladd.c + -- [M] testing/matgen/HPL_lmul.c + -- [M] testing/matgen/HPL_rand.c + -- [M] testing/ptest/HPL_pdinfo.c + + For a motivation for the change, see: + Dongarra and Langou, ``The Problem with the Linpack + Benchmark Matrix Generator'', LAWN 206, June 2008. + + -- [M] testing/ptest/HPL_pdtest.c -- + + Julien Langou changed the test for correctness from + ||Ax-b||_oo / ( eps * ||A||_1 * N ) + ||Ax-b||_oo / ( eps * ||A||_1 * ||x||_1 ) + ||Ax-b||_oo / ( eps * ||A||_oo * ||x||_oo * N ) + to the normwise backward error + || r ||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N ) + See: + Nicholas J. Higham, ``Accuracy and Stability of Numerical Algorithms'', + Society for Industrial and Applied Mathematics, Philadelphia, PA, USA, + Second Edition, pages = xxx+680, ISBN = 0-89871-521-0, 2002. + + Note that in our case || b ||_oo is almost for sure + 1/2, we compute it anyway. + + - 10/26/2012 Version 2.1 + + Piotr Luszczek introduced exact time stamping for HPL_pdgesv(): + -- [M] dist/include/hpl_misc.h + -- [M] dist/testing/ptest/HPL_pdtest.c + + Piotr Luszczek fixed out-of-bounds access in data spreading functions + and exact time stamping for HPL_pdgesv(): + -- [M] dist/src/pgesv/HPL_spreadN.c + -- [M] dist/src/pgesv/HPL_spreadT.c + Thanks to Stephen Whalen from Cray. + + - 02/24/2016 Version 2.2 + + Piotr Luszczek added continuous reporting of factorization progress + submitted by Intel and make scripts that uses Intel software tools and + libraries and their Apple's Mac OS X equivalents. + + - 12/02/2018 Version 2.3 + + Piotr Luszczek removed deprecated MPI functions that are no longer + supported in some MPI implementations (for example Open MPI 4.0) and + replaced them with + modern equivalents in HPL_packL(): + -- [M] src/comm/HPL_packL.c + + Piotr Luszczek added one digit to the display of performance result + and changed display of scaled residual to scientific notation with + extra digits in HPL_pdtest(): + -- [M] testing/ptest/HPL_pdtest.c + + Piotr Luszczek added support for Autotools configuration packages + autoconf and automake: + -- [A] Makefile.am + -- [A] configure.ac + -- [A] acinclude.m4 + -- [A] src/Makefile.am + -- [A] testing/Makefile.am diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/README b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/README new file mode 100644 index 000000000..c3f79a877 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/README @@ -0,0 +1,32 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + HPL is a software package that solves a (random) dense linear + system in double precision (64 bits) arithmetic on + distributed-memory computers. It can thus be regarded as a + portable as well as freely available implementation of the + High Performance Computing Linpack Benchmark. + + The HPL software package requires the availibility on your + system of an implementation of the Message Passing Interface + MPI (1.1 compliant). An implementation of either the Basic + Linear Algebra Subprograms BLAS or the Vector Signal Image + Processing Library VSIPL is also needed. Machine-specific as + well as generic implementations of MPI, the BLAS and VSIPL + are available for a large variety of systems. + + Install See the file INSTALL in this directory. + ------- + + Tuning See the file TUNING in this directory. + ------ + + Bugs Known problems and bugs with this release are documen- + ---- ted in the file hpl/BUGS. + + Check out the website www.netlib.org/benchmark/hpl for the + latest information. + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/THANKS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/THANKS new file mode 100644 index 000000000..1c5641ce4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/THANKS @@ -0,0 +1 @@ +This software was improved with contribution of external developers. diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/TODO b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/TODO new file mode 100644 index 000000000..1c2b36778 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/TODO @@ -0,0 +1,16 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + Done list in version 1.0b, December 15th, 2004 + - Fixed problem with 32-bit integer overflow. + Thanks to John Baron. + + Done list in version 1.0a, January 1st, 2004 + - Added Row- or Column-major process mapping in data file + - Fixed compilation error for gcc 3.3 in walltime. + - Fixed building problems on the T3E; + Thanks to Edward Anderson. + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/TUNING b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/TUNING new file mode 100644 index 000000000..24707f1fc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/TUNING @@ -0,0 +1,419 @@ +============================================================== + Performance Tuning and setting up the input data file HPL.dat + + Current as of release HPL - 2.3 - December 2, 2018 +============================================================== + Check out the website www.netlib.org/benchmark/hpl for the + latest information. + + After having built the executable hpl/bin//xhpl, one + may want to modify the input data file HPL.dat. This file + should reside in the same directory as the executable + hpl/bin//xhpl. An example HPL.dat file is provided by + default. This file contains information about the problem + sizes, machine configuration, and algorithm features to be + used by the executable. It is 30 lines long. All the selected + parameters will be printed in the output generated by the + executable. + + At the end of this file, there is a couple of experimental + guide lines that you may find useful. + +============================================================== + File HPL.dat (description): + + Line 1: (unused) Typically one would use this line for its + own good. For example, it could be used to summarize the con- + tent of the input file. By default this line reads: + + HPL Linpack benchmark input file + + Line 2: (unused) same as line 1. By default this line reads: + + Innovative Computing Laboratory, University of Tennessee + + Line 3: the user can choose where the output should be re- + directed to. In the case of a file, a name is necessary, and + this is the line where one wants to specify it. Only the + first name on this line is significative. By default, the li- + ne reads: + + HPL.out output file name (if any) + + This means that if one chooses to redirect the output to a + file, the file will be called "HPL.out". The rest of the line + is unused, and this space to put some informative comment on + the meaning of this line. + + Line 4: This line specifies where the output should go. The + line is formatted, it must be a positive integer, the rest is + unsignificant. 3 choices are possible for the positive inte- + ger, 6 means that the output will go the standard output, 7 + means that the output will go to the standard error. Any o- + ther integer means that the output should be redirected + to a file, which name has been specified in the line above. + This line by default reads: + + 6 device out (6=stdout,7=stderr,file) + + which means that the output generated by the executable + should be redirected to the standard output. + + Line 5: This line specifies the number of problem sizes to be + executed. This number should be less than or equal to 20. The + first integer is significant, the rest is ignored. If the + line reads: + + 3 # of problems sizes (N) + + this means that the user is willing to run 3 problem sizes + that will be specified in the next line. + + Line 6: This line specifies the problem sizes one wants to + run. Assuming the line above started with 3, the 3 first + positive integers are significant, the rest is ignored. For + example: + + 3000 6000 10000 Ns + + means that one wants xhpl to run 3 (specified in line 5) pro- + blem sizes, namely 3000, 6000 and 10000. + + Line 7: This line specifies the number of block sizes to be + runned. This number should be less than or equal to 20. + The first integer is significant, the rest is ignored. If the + line reads: + + 5 # of NBs + + this means that the user is willing to use 5 block sizes that + will be specified in the next line. + + Line 8: This line specifies the block sizes one wants to run. + Assuming the line above started with 5, the 5 first positive + integers are significant, the rest is ignored. For example: + + 80 100 120 140 160 NBs + + means that one wants xhpl to use 5 (specified in line 7) + block sizes, namely 80, 100, 120, 140 and 160. + + Line 9 specifies how the MPI processes should be mapped onto + the nodes of your platform. There are currently two possible + mappings, namely row- and column-major. This feature is main- + ly useful when these nodes are themselves multi-processor + computers. A row-major mapping is recommended. + + Line 10: This line specifies the number of process grid to + be runned. This number should be less than or equal to 20. + The first integer is significant, the rest is ignored. If the + line reads: + + 2 # of process grids (P x Q) + + this means that you are willing to try 2 process grid sizes + that will be specified in the next line. + + Line 11-12: These two lines specify the number of process + rows and columns of each grid you want to run on. Assuming + the line above (10) started with 2, the 2 first positive in- + tegers of those two lines are significant, the rest is igno- + red. For example: + + 1 2 Ps + 6 8 Qs + + means that one wants to run xhpl on 2 process grids (line + 10), namely 1 by 6 and 2 by 8. Note: In this example, it is + required then to start xhpl on at least 16 nodes (max of P_i + xQ_i). The runs on the two grids will be consecutive. If one + was starting xhpl on more than 16 nodes, say 52, only 6 would + be used for the first grid (1x6) and then 16 (2x8) would be + used for the second grid. The fact that you started the MPI + job on 52 nodes, will not make HPL use all of them. In this + example, only 16 would be used. If one wants to run xhpl with + 52 processes one needs to specify a grid of 52 processes, for + example the following lines would do the job: + + 4 2 Ps + 13 8 Qs + + Line 13: This line specifies the threshold the residuals + should be compared to. The residuals should be or order 1, + but are in practice slightly less than this, typically 0.001. + This line is made of a real number, the rest is unsignifi- + cant. For example: + + 16.0 threshold + + In practice, a value of 16.0 will cover most cases. For va- + rious reasons, it is possible that some of the residuals be- + come slightly larger, say for example 35.6. xhpl will flag + those runs as failed, however they can be considered as cor- + rect. A run can be considered as failed if the residual is a + few order of magnitude bigger than 1 for example 10^6 or mo- + re. Note: if one was to specify a threshold of 0.0, all tests + would be flagged as failed, even though the answer is likely + to be correct. It is allowed to specify a negative value for + this threshold, in which case the checks will be by-passed, + no matter what the value is, as soon as it is negative. This + feature allows to save time when performing a lot of experi- + ments, say for instance during the tuning phase. Example: + + -16.0 threshold + + The remaning lines allow to specifies algorithmic features. + xhpl will run all possible combinations of those for each + problem size, block size, process grid combination. This is + handy when one looks for an "optimal" set of parameters. To + understand a little bit better, let say first a few words + about the algorithm implemented in HPL. Basically this is a + right-looking version with row-partial pivoting. The panel + factorization is matrix-matrix operation based and recursive, + dividing the panel into NDIV subpanels at each step. This + part of the panel factorization is denoted below by + "recursive panel fact. (RFACT)". The recursion stops when the + current panel is made of less than or equal to NBMIN columns. + At that point, xhpl uses a matrix-vector operation based + factorization denoted below by "PFACTs". Classic recursion + would then use NDIV=2, NBMIN=1. There are essentially 3 + numerically equivalent LU factorization algorithm variants + (left-looking, Crout and right-looking). In HPL, one can + choose every one of those for the RFACT, as well as the + PFACT. The following lines of HPL.dat allows you to set those + parameters. + + Lines 14-21: (Example 1) + 3 # of panel fact + 0 1 2 PFACTs (0=left, 1=Crout, 2=Right) + 4 # of recursive stopping criterium + 1 2 4 8 NBMINs (>= 1) + 3 # of panels in recursion + 2 3 4 NDIVs + 3 # of recursive panel fact. + 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) + + This example would try all variants of PFACT, 4 values for + NBMIN, namely 1, 2, 4 and 8, 3 values for NDIV namely 2, 3 + and 4, and all variants for RFACT. Lines 14-21: (Example 1) + + 2 # of panel fact + 2 0 PFACTs (0=left, 1=Crout, 2=Right) + 2 # of recursive stopping criterium + 4 8 NBMINs (>= 1) + 1 # of panels in recursion + 2 NDIVs + 1 # of recursive panel fact. + 2 RFACTs (0=left, 1=Crout, 2=Right) + + This example would try 2 variants of PFACT namely right loo- + king and left looking, 2 values for NBMIN, namely 4 and 8, 1 + value for NDIV namely 2, and one variant for RFACT. + + In the main loop of the algorithm, the current panel of co- + lumn is broadcast in process rows using a virtual ring to- + pology. HPL offers various choices, and one most likely want + to use the increasing ring modified encoded as 1. 4 is also + a good choice. Lines 22-23: (Example 1): + + 1 # of broadcast + 1 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + + This will cause HPL to broadcast the current panel using the + increasing ring modified topology. Lines 22-23: (Example 2): + + 2 # of broadcast + 0 4 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + + This will cause HPL to broadcast the current panel using the + increasing ring virtual topology and the long message algori- + thm. + + Lines 24-25 allow to specify the look-ahead depth used by + HPL. A depth of 0 means that the next panel is factorized af- + ter the update by the current panel is completely finished. A + depth of 1 means that the next panel is factorized immediate- + ly after being updated. The update by the current panel is + then finished. A depth of k means that the k next panels are + factorized immediately after being updated. The update by the + current panel is then finished. It turns out that a depth of + 1 seems to give the best results, but may need a large pro- + blem size before one can see the performance gain. So use 1, + if you do not know better, otherwise you may want to try 0. + Look-ahead of depths 2 and larger will probably not give you + better results. Lines 24-25: (Example 1): + + 1 # of lookahead depth + 1 DEPTHs (>=0) + + This will cause HPL to use a look-ahead of depth 1. + Lines 24-25: (Example 2): + + 2 # of lookahead depth + 0 1 DEPTHs (>=0) + + This will cause HPL to use a look-ahead of depths 0 and 1. + + Lines 26-27 allow to specify the swapping algorithm used by + HPL for all tests. There are currently two swapping algo- + rithms available, one based on "binary exchange" and the + other one based on a "spread-roll" procedure (also called + "long" below. For large problem sizes, this last one is like- + ly to be more efficient. The user can also choose to mix both + variants, that is "binary-exchange" for a number of columns + less than a threshold value, and then the "spread-roll" al- + gorithm. This threshold value is then specified on Line 27. + Lines 26-27: (Example 1): + + 1 SWAP (0=bin-exch,1=long,2=mix) + 60 swapping threshold + + This will cause HPL to use the "long" or "spread-roll" swap- + ping algorithm. Note that a threshold is specified in that + example but not used by HPL. Lines 26-27: (Example 2): + + 2 SWAP (0=bin-exch,1=long,2=mix) + 60 swapping threshold + + This will cause HPL to use the "long" or "spread-roll" swap- + ping algorithm as soon as there is more than 60 columns in + the row panel. Otherwise, the "binary-exchange" algorithm + will be used instead. + + Line 28 allows to specify whether the upper triangle of the + panel of columns should be stored in no-transposed or + transposed form. Example: + + 0 L1 in (0=transposed,1=no-transposed) form + + Line 29 allows to specify whether the panel of rows U should + be stored in no-transposed or transposed form. Example: + + 0 U in (0=transposed,1=no-transposed) form + + Line 30 enables/disables the equilibration phase. This option + will not be used unless you selected 1 or 2 in Line 26. Ex: + + 1 Equilibration (0=no,1=yes) + + + Line 31 allows to specify the alignment in memory for the + memory space allocated by HPL. On modern machines, one proba- + bly wants to use 4, 8 or 16. This may result in a tiny amount + of memory wasted. Example: + + 4 memory alignment in double (> 0) + +============================================================== + Guide lines: + + 1) Figure out a good block size for the matrix-matrix + multiply routine. The best method is to try a few out. If you + happen to know the block size used by the matrix-matrix + multiply routine, a small multiple of that block size will do + fine. + + HPL uses the block size NB for the data distribution as well + as for the computational granularity. From a data + distribution point of view, the smallest NB, the better the + load balance. You definitely want to stay away from very + large values of NB. From a computation point of view, a too + small value of NB may limit the computational performance by + a large factor because almost no data reuse will occur in the + highest level of the memory hierarchy. The number of messages + will also increase. Efficient matrix-multiply routines are + often internally blocked. Small multiples of this blocking + factor are likely to be good block sizes for HPL. The bottom + line is that "good" block sizes are almost always in the + [32..256] interval. The best values depend on the computation + / communication performance ratio of your system. To a much + less extent, the problem size matters as well. Say for + example, you emperically found that 44 was a good block size + with respect to performance. 88 or 132 are likely to give + slightly better results for large problem sizes because of a + slighlty higher flop rate. + + 2) The process mapping should not matter if the nodes of + your platform are single processor computers. If these nodes + are multi-processors, a row-major mapping is recommended. + + 3) HPL likes "square" or slightly flat process grids. Unless + you are using a very small process grid, stay away from the + 1-by-Q and P-by-1 process grids. + + 4) Panel factorization parameters: a good start are the fol- + lowing for the lines 14-21: + + 1 # of panel fact + 1 PFACTs (0=left, 1=Crout, 2=Right) + 2 # of recursive stopping criterium + 4 8 NBMINs (>= 1) + 1 # of panels in recursion + 2 NDIVs + 1 # of recursive panel fact. + 2 RFACTs (0=left, 1=Crout, 2=Right) + + 5) Broadcast parameters: at this time, it is far from obvious + to me what the best setting is, so i would probably try them + all. If I had to guess I would probably start with the follo- + wing for the lines 22-23: + + 2 # of broadcast + 1 3 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + + The best broadcast depends on your problem size and harware + performance. My take is that 4 or 5 may be competitive for + machines featuring very fast nodes comparatively to the + network. + + 6) Look-ahead depth: as mentioned above 0 or 1 are likely to + be the best choices. This also depends on the problem size + and machine configuration, so I would try "no look-ahead (0)" + and "look-ahead of depth 1 (1)". That is for lines 24-25: + + 2 # of lookahead depth + 0 1 DEPTHs (>=0) + + 7) Swapping: one can select only one of the three algorithm + in the input file. Theoretically, mix (2) should win, however + long (1) might just be good enough. The difference should be + small between those two assuming a swapping threshold of the + order of the block size (NB) selected. If this threshold is + very large, HPL will use bin_exch (0) most of the time and if + it is very small (< NB) long (1) will always be used. In + short and assuming the block size (NB) used is say 60, I + would choose for the lines 26-27: + + 2 SWAP (0=bin-exch,1=long,2=mix) + 60 swapping threshold + + I would also try the long variant. For a very small number + of processes in every column of the process grid (say < 4), + very little performance difference should be observable. + + 8) Local storage: I do not think Line 28 matters. Pick 0 in + doubt. Line 29 is more important. It controls how the panel + of rows should be stored. No doubt 0 is better. The caveat is + that in that case the matrix-multiply function is called with + ( Notrans, Trans, ... ), that is C := C - A B^T. Unless the + computational kernel you are using has a very poor (with + respect to performance) implementation of that case, and is + much more efficient with ( Notrans, Notrans, ... ) just pick + 0 as well. So, my choice: + + 0 L1 in (0=transposed,1=no-transposed) form + 0 U in (0=transposed,1=no-transposed) form + + 9) Equilibration: It is hard to tell whether equilibration + should always be performed or not. Not knowing much about the + random matrix generated and because the overhead is so small + compared to the possible gain, I turn it on all the time. + + 1 Equilibration (0=no,1=yes) + + 10) For alignment, 4 should be plenty, but just to be safe, + one may want to pick 8 instead. + + 8 memory alignment in double (> 0) + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/acinclude.m4 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/acinclude.m4 new file mode 100644 index 000000000..4072a950f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/acinclude.m4 @@ -0,0 +1,90 @@ + +AC_DEFUN([HPL_BLAS], [ + +AC_PREREQ(2.69) + +hpl_blas_ok=no + +dnl FIXME: add --with-blas="" + +current_LIBS="$LIBS" + +cat < hplvars.txt +name1=OpenBLAS +rout1=dgemm_ +libs1=-lopenblas -lm + +name2=Atlas Fortran BLAS +rout2=dgemm_ +libs2=-lf77blas -latlas + +name3=Sequential Intel MKL LP64 (group) +rout3=dgemm_ +libs3=-Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread + +name4=Sequential Intel MKL LP64 +rout4=dgemm_ +libs4=-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread + +name5=AMD's ACML +rout5=dgemm_ +libs5=-lacml -lm + +name6=Accelerate +rout6=dgemm_ +libs6=-framework Accelerate + +name7=Apple VecLib +rout7=dgemm_ +libs7=-framework vecLib + +name8=IBM ESSL +rout8=dgemm_ +libs8=-lessl + +name9=NVIDIA nvblas +rout9=dgemm_ +libs9=-lnvblas + +name10=Generic BLAS +rout10=dgemm_ +libs10=-lblas + +HPLEOF +for hpl_i in 1 2 3 4 5 6 7 8 9 10; +do +if test x$hpl_blas_ok = xno; then + name="`grep ^name${hpl_i}= hplvars.txt | sed s/^name${hpl_i}=//`" + rout="`grep ^rout${hpl_i}= hplvars.txt | sed s/^rout${hpl_i}=//`" + libs="`grep ^libs${hpl_i}= hplvars.txt | sed s/^libs${hpl_i}=//`" + AC_MSG_CHECKING([for [$]rout in [$]name]) + + LIBS="[$]libs" + AC_TRY_LINK_FUNC([$]rout, [hpl_blas_ok=yes;BLAS_LIBS="[$]libs"]) + LIBS="$current_LIBS" + + AC_MSG_RESULT($hpl_blas_ok) +fi +done +rm hplvars.txt + +if test x$hpl_blas_ok = xno; then +dnl +AC_MSG_CHECKING([for dgemm_ in OpenBLAS]) +AC_CHECK_LIB(openblas, dgemm_, [hpl_blas_ok=yes;BLAS_LIBS="-lopenblas"]) +AC_MSG_RESULT($hpl_blas_ok) +dnl +fi + +AC_SUBST(BLAS_LIBS) + +# If present, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$hpl_blas_ok" = xyes; then + ifelse([$1],,AC_DEFINE(HAVE_BLAS,1,[Define if you have a BLAS library.]),[$1]) + : +else + hpl_blas_ok=no + $2 +fi + +])dnl HPL_BLAS diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/aclocal.m4 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/aclocal.m4 new file mode 100644 index 000000000..56c6bd753 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/aclocal.m4 @@ -0,0 +1,1308 @@ +# generated automatically by aclocal 1.16.1 -*- Autoconf -*- + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. + +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],, +[m4_warning([this file was generated for autoconf 2.69. +You have another version of autoconf. It may work, but is not guaranteed to. +If you have problems, you may need to regenerate the build system entirely. +To do so, use the procedure documented by the package, typically 'autoreconf'.])]) + +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_prog_cc_mpi.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PROG_CC_MPI([MPI-WANTED-TEST[, ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]]) +# +# DESCRIPTION +# +# This macro tries to find out how to compile C programs that use MPI +# (Message Passing Interface), a standard API for parallel process +# communication (see http://www-unix.mcs.anl.gov/mpi/). The macro has to +# be used instead of the standard macro AC_PROG_CC and will replace the +# standard variable CC with the found compiler. +# +# MPI-WANTED-TEST is used to test whether MPI is actually wanted by the +# user. If MPI-WANTED_TEST is omitted or if it succeeds, the macro will +# try to find out how to use MPI, if it fails, the macro will call +# AC_PROG_CC to find a standard C compiler instead. +# +# When MPI is found, ACTION-IF-FOUND will be executed, if MPI is not found +# (or MPI-WANTED-TEST fails) ACTION-IF-NOT-FOUND is executed. If +# ACTION-IF-FOUND is not set, the macro will define HAVE_MPI. +# +# The following example demonstrates usage of the macro: +# +# # If --with-mpi=auto is used, try to find MPI, but use standard C compiler if it is not found. +# # If --with-mpi=yes is used, try to find MPI and fail if it isn't found. +# # If --with-mpi=no is used, use a standard C compiler instead. +# AC_ARG_WITH(mpi, [AS_HELP_STRING([--with-mpi], +# [compile with MPI (parallelization) support. If none is found, +# MPI is not used. Default: auto]) +# ],,[with_mpi=auto]) +# # +# AX_PROG_CC_MPI([test x"$with_mpi" != xno],[use_mpi=yes],[ +# use_mpi=no +# if test x"$with_mpi" = xyes; then +# AC_MSG_FAILURE([MPI compiler requested, but couldn't use MPI.]) +# else +# AC_MSG_WARN([No MPI compiler found, won't use MPI.]) +# fi +# ]) +# +# LICENSE +# +# Copyright (c) 2010,2011 Olaf Lenz +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 2 + +AC_DEFUN([AX_PROG_CC_MPI], [ +AC_PREREQ(2.50) + +# Check for compiler +# Needs to be split off into an extra macro to ensure right expansion +# order. +AC_REQUIRE([_AX_PROG_CC_MPI],[_AX_PROG_CC_MPI([$1])]) + +AS_IF([test x"$_ax_prog_cc_mpi_mpi_wanted" = xno], + [ _ax_prog_cc_mpi_mpi_found=no ], + [ + AC_LANG_PUSH([C]) + # test whether MPI_Init is available + # We do not use AC_SEARCH_LIBS here, as it caches its outcome and + # thus disallows corresponding calls in the other AX_PROG_*_MPI + # macros. + for lib in NONE mpi mpich; do + save_LIBS=$LIBS + if test x"$lib" = xNONE; then + AC_MSG_CHECKING([for function MPI_Init]) + else + AC_MSG_CHECKING([for function MPI_Init in -l$lib]) + LIBS="-l$lib $LIBS" + fi + AC_LINK_IFELSE([AC_LANG_CALL([],[MPI_Init])], + [ _ax_prog_cc_mpi_mpi_found=yes ], + [ _ax_prog_cc_mpi_mpi_found=no ]) + AC_MSG_RESULT($_ax_prog_cc_mpi_mpi_found) + if test "x$_ax_prog_cc_mpi_mpi_found" = "xyes"; then + break; + fi + LIBS=$save_LIBS + done + + # Check for header + AS_IF([test x"$_ax_prog_cc_mpi_mpi_found" = xyes], [ + AC_MSG_CHECKING([for mpi.h]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include ])], + [ AC_MSG_RESULT(yes)], + [ AC_MSG_RESULT(no) + _ax_prog_cc_mpi_mpi_found=no + ]) + ]) + AC_LANG_POP([C]) +]) + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +AS_IF([test x"$_ax_prog_cc_mpi_mpi_found" = xyes], [ + ifelse([$2],,[AC_DEFINE(HAVE_MPI,1,[Define if you have the MPI library.])],[$2]) + : +],[ + $3 + : +]) + +])dnl AX_PROG_CC_MPI + +dnl _AX_PROG_CC_MPI is an internal macro required by AX_PROG_CC_MPI. +dnl To ensure the right expansion order, the main function AX_PROG_CC_MPI +dnl has to be split into two parts. +dnl +dnl Known MPI C compilers: +dnl mpicc +dnl mpixlc_r +dnl mpixlc +dnl hcc +dnl mpxlc_r +dnl mpxlc +dnl sxmpicc NEC SX +dnl mpifcc Fujitsu +dnl mpgcc +dnl mpcc +dnl cmpicc +dnl cc +dnl +AC_DEFUN([_AX_PROG_CC_MPI], [ + AC_ARG_VAR(MPICC,[MPI C compiler command]) + ifelse([$1],,[_ax_prog_cc_mpi_mpi_wanted=yes],[ + AC_MSG_CHECKING([whether to compile using MPI]) + if $1; then + _ax_prog_cc_mpi_mpi_wanted=yes + else + _ax_prog_cc_mpi_mpi_wanted=no + fi + AC_MSG_RESULT($_ax_prog_cc_mpi_mpi_wanted) + ]) + if test x"$_ax_prog_cc_mpi_mpi_wanted" = xyes; then + if test -z "$CC" && test -n "$MPICC"; then + CC="$MPICC" + else + AC_CHECK_TOOLS([CC], [mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc]) + fi + fi + AC_PROG_CC +])dnl _AX_PROG_CC_MPI + +# Copyright (C) 2002-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_AUTOMAKE_VERSION(VERSION) +# ---------------------------- +# Automake X.Y traces this macro to ensure aclocal.m4 has been +# generated from the m4 files accompanying Automake X.Y. +# (This private macro should not be called outside this file.) +AC_DEFUN([AM_AUTOMAKE_VERSION], +[am__api_version='1.16' +dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to +dnl require some minimum version. Point them to the right macro. +m4_if([$1], [1.16.1], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl +]) + +# _AM_AUTOCONF_VERSION(VERSION) +# ----------------------------- +# aclocal traces this macro to find the Autoconf version. +# This is a private macro too. Using m4_define simplifies +# the logic in aclocal, which can simply ignore this definition. +m4_define([_AM_AUTOCONF_VERSION], []) + +# AM_SET_CURRENT_AUTOMAKE_VERSION +# ------------------------------- +# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. +# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. +AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +[AM_AUTOMAKE_VERSION([1.16.1])dnl +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) + +# AM_AUX_DIR_EXPAND -*- Autoconf -*- + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets +# $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to +# '$srcdir', '$srcdir/..', or '$srcdir/../..'. +# +# Of course, Automake must honor this variable whenever it calls a +# tool from the auxiliary directory. The problem is that $srcdir (and +# therefore $ac_aux_dir as well) can be either absolute or relative, +# depending on how configure is run. This is pretty annoying, since +# it makes $ac_aux_dir quite unusable in subdirectories: in the top +# source directory, any form will work fine, but in subdirectories a +# relative path needs to be adjusted first. +# +# $ac_aux_dir/missing +# fails when called from a subdirectory if $ac_aux_dir is relative +# $top_srcdir/$ac_aux_dir/missing +# fails if $ac_aux_dir is absolute, +# fails when called from a subdirectory in a VPATH build with +# a relative $ac_aux_dir +# +# The reason of the latter failure is that $top_srcdir and $ac_aux_dir +# are both prefixed by $srcdir. In an in-source build this is usually +# harmless because $srcdir is '.', but things will broke when you +# start a VPATH build or use an absolute $srcdir. +# +# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, +# iff we strip the leading $srcdir from $ac_aux_dir. That would be: +# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` +# and then we would define $MISSING as +# MISSING="\${SHELL} $am_aux_dir/missing" +# This will work as long as MISSING is not called from configure, because +# unfortunately $(top_srcdir) has no meaning in configure. +# However there are other variables, like CC, which are often used in +# configure, and could therefore not use this "fixed" $ac_aux_dir. +# +# Another solution, used here, is to always expand $ac_aux_dir to an +# absolute PATH. The drawback is that using absolute paths prevent a +# configured tree to be moved without reconfiguration. + +AC_DEFUN([AM_AUX_DIR_EXPAND], +[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` +]) + +# AM_CONDITIONAL -*- Autoconf -*- + +# Copyright (C) 1997-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_CONDITIONAL(NAME, SHELL-CONDITION) +# ------------------------------------- +# Define a conditional. +AC_DEFUN([AM_CONDITIONAL], +[AC_PREREQ([2.52])dnl + m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], + [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl +AC_SUBST([$1_TRUE])dnl +AC_SUBST([$1_FALSE])dnl +_AM_SUBST_NOTMAKE([$1_TRUE])dnl +_AM_SUBST_NOTMAKE([$1_FALSE])dnl +m4_define([_AM_COND_VALUE_$1], [$2])dnl +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi +AC_CONFIG_COMMANDS_PRE( +[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then + AC_MSG_ERROR([[conditional "$1" was never defined. +Usually this means the macro was only invoked conditionally.]]) +fi])]) + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be +# written in clear, in which case automake, when reading aclocal.m4, +# will think it sees a *use*, and therefore will trigger all it's +# C support machinery. Also note that it means that autoscan, seeing +# CC etc. in the Makefile, will ask for an AC_PROG_CC use... + + +# _AM_DEPENDENCIES(NAME) +# ---------------------- +# See how the compiler implements dependency checking. +# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". +# We try a few techniques and use that to set a single cache variable. +# +# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was +# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular +# dependency, and given that the user is not expected to run this macro, +# just rely on AC_PROG_CC. +AC_DEFUN([_AM_DEPENDENCIES], +[AC_REQUIRE([AM_SET_DEPDIR])dnl +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl +AC_REQUIRE([AM_MAKE_INCLUDE])dnl +AC_REQUIRE([AM_DEP_TRACK])dnl + +m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], + [$1], [CXX], [depcc="$CXX" am_compiler_list=], + [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], + [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], + [$1], [UPC], [depcc="$UPC" am_compiler_list=], + [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], + [depcc="$$1" am_compiler_list=]) + +AC_CACHE_CHECK([dependency style of $depcc], + [am_cv_$1_dependencies_compiler_type], +[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_$1_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` + fi + am__universal=false + m4_case([$1], [CC], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac], + [CXX], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac]) + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_$1_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_$1_dependencies_compiler_type=none +fi +]) +AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) +AM_CONDITIONAL([am__fastdep$1], [ + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) +]) + + +# AM_SET_DEPDIR +# ------------- +# Choose a directory name for dependency files. +# This macro is AC_REQUIREd in _AM_DEPENDENCIES. +AC_DEFUN([AM_SET_DEPDIR], +[AC_REQUIRE([AM_SET_LEADING_DOT])dnl +AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl +]) + + +# AM_DEP_TRACK +# ------------ +AC_DEFUN([AM_DEP_TRACK], +[AC_ARG_ENABLE([dependency-tracking], [dnl +AS_HELP_STRING( + [--enable-dependency-tracking], + [do not reject slow dependency extractors]) +AS_HELP_STRING( + [--disable-dependency-tracking], + [speeds up one-time build])]) +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi +AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) +AC_SUBST([AMDEPBACKSLASH])dnl +_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl +AC_SUBST([am__nodep])dnl +_AM_SUBST_NOTMAKE([am__nodep])dnl +]) + +# Generate code to set up dependency tracking. -*- Autoconf -*- + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_OUTPUT_DEPENDENCY_COMMANDS +# ------------------------------ +AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], +[{ + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + AS_CASE([$CONFIG_FILES], + [*\'*], [eval set x "$CONFIG_FILES"], + [*], [set x $CONFIG_FILES]) + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`AS_DIRNAME(["$am_mf"])` + am_filepart=`AS_BASENAME(["$am_mf"])` + AM_RUN_LOG([cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles]) || am_rc=$? + done + if test $am_rc -ne 0; then + AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. Try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking).]) + fi + AS_UNSET([am_dirpart]) + AS_UNSET([am_filepart]) + AS_UNSET([am_mf]) + AS_UNSET([am_rc]) + rm -f conftest-deps.mk +} +])# _AM_OUTPUT_DEPENDENCY_COMMANDS + + +# AM_OUTPUT_DEPENDENCY_COMMANDS +# ----------------------------- +# This macro should only be invoked once -- use via AC_REQUIRE. +# +# This code is only required when automatic dependency tracking is enabled. +# This creates each '.Po' and '.Plo' makefile fragment that we'll need in +# order to bootstrap the dependency handling code. +AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], +[AC_CONFIG_COMMANDS([depfiles], + [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], + [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])]) + +# Do all the work for Automake. -*- Autoconf -*- + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This macro actually does too much. Some checks are only needed if +# your package does certain things. But this isn't really a big deal. + +dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O. +m4_define([AC_PROG_CC], +m4_defn([AC_PROG_CC]) +[_AM_PROG_CC_C_O +]) + +# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) +# AM_INIT_AUTOMAKE([OPTIONS]) +# ----------------------------------------------- +# The call with PACKAGE and VERSION arguments is the old style +# call (pre autoconf-2.50), which is being phased out. PACKAGE +# and VERSION should now be passed to AC_INIT and removed from +# the call to AM_INIT_AUTOMAKE. +# We support both call styles for the transition. After +# the next Automake release, Autoconf can make the AC_INIT +# arguments mandatory, and then we can depend on a new Autoconf +# release and drop the old call support. +AC_DEFUN([AM_INIT_AUTOMAKE], +[AC_PREREQ([2.65])dnl +dnl Autoconf wants to disallow AM_ names. We explicitly allow +dnl the ones we care about. +m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl +AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl +AC_REQUIRE([AC_PROG_INSTALL])dnl +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi +AC_SUBST([CYGPATH_W]) + +# Define the identity of the package. +dnl Distinguish between old-style and new-style calls. +m4_ifval([$2], +[AC_DIAGNOSE([obsolete], + [$0: two- and three-arguments forms are deprecated.]) +m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl + AC_SUBST([PACKAGE], [$1])dnl + AC_SUBST([VERSION], [$2])], +[_AM_SET_OPTIONS([$1])dnl +dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. +m4_if( + m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]), + [ok:ok],, + [m4_fatal([AC_INIT should be called with package and version arguments])])dnl + AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl + AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl + +_AM_IF_OPTION([no-define],, +[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) + AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl + +# Some tools Automake needs. +AC_REQUIRE([AM_SANITY_CHECK])dnl +AC_REQUIRE([AC_ARG_PROGRAM])dnl +AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) +AM_MISSING_PROG([AUTOCONF], [autoconf]) +AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) +AM_MISSING_PROG([AUTOHEADER], [autoheader]) +AM_MISSING_PROG([MAKEINFO], [makeinfo]) +AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl +AC_REQUIRE([AC_PROG_MKDIR_P])dnl +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +AC_SUBST([mkdir_p], ['$(MKDIR_P)']) +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([AC_PROG_MAKE_SET])dnl +AC_REQUIRE([AM_SET_LEADING_DOT])dnl +_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], + [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], + [_AM_PROG_TAR([v7])])]) +_AM_IF_OPTION([no-dependencies],, +[AC_PROVIDE_IFELSE([AC_PROG_CC], + [_AM_DEPENDENCIES([CC])], + [m4_define([AC_PROG_CC], + m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [_AM_DEPENDENCIES([CXX])], + [m4_define([AC_PROG_CXX], + m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJC], + [_AM_DEPENDENCIES([OBJC])], + [m4_define([AC_PROG_OBJC], + m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], + [_AM_DEPENDENCIES([OBJCXX])], + [m4_define([AC_PROG_OBJCXX], + m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl +]) +AC_REQUIRE([AM_SILENT_RULES])dnl +dnl The testsuite driver may need to know about EXEEXT, so add the +dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This +dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below. +AC_CONFIG_COMMANDS_PRE(dnl +[m4_provide_if([_AM_COMPILER_EXEEXT], + [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) + fi +fi +dnl The trailing newline in this macro's definition is deliberate, for +dnl backward compatibility and to allow trailing 'dnl'-style comments +dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841. +]) + +dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not +dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further +dnl mangled by Autoconf and run in a shell conditional statement. +m4_define([_AC_COMPILER_EXEEXT], +m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) + +# When config.status generates a header, we must update the stamp-h file. +# This file resides in the same directory as the config header +# that is generated. The stamp files are numbered to have different names. + +# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the +# loop where config.status creates the headers, so we can generate +# our stamp files there. +AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], +[# Compute $1's index in $config_headers. +_am_arg=$1 +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_SH +# ------------------ +# Define $install_sh. +AC_DEFUN([AM_PROG_INSTALL_SH], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi +AC_SUBST([install_sh])]) + +# Copyright (C) 2003-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# Check whether the underlying file-system supports filenames +# with a leading dot. For instance MS-DOS doesn't. +AC_DEFUN([AM_SET_LEADING_DOT], +[rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null +AC_SUBST([am__leading_dot])]) + +# Check to see how 'make' treats includes. -*- Autoconf -*- + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAKE_INCLUDE() +# ----------------- +# Check whether make has an 'include' directive that can support all +# the idioms we need for our automatic dependency tracking code. +AC_DEFUN([AM_MAKE_INCLUDE], +[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive]) +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out]) + AS_CASE([$?:`cat confinc.out 2>/dev/null`], + ['0:this is the am__doit target'], + [AS_CASE([$s], + [BSD], [am__include='.include' am__quote='"'], + [am__include='include' am__quote=''])]) + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +AC_MSG_RESULT([${_am_result}]) +AC_SUBST([am__include])]) +AC_SUBST([am__quote])]) + +# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- + +# Copyright (C) 1997-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MISSING_PROG(NAME, PROGRAM) +# ------------------------------ +AC_DEFUN([AM_MISSING_PROG], +[AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + +# AM_MISSING_HAS_RUN +# ------------------ +# Define MISSING if not defined so far and test if it is modern enough. +# If it is, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([missing])dnl +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + AC_MSG_WARN(['missing' script is too old or missing]) +fi +]) + +# Helper functions for option handling. -*- Autoconf -*- + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_MANGLE_OPTION(NAME) +# ----------------------- +AC_DEFUN([_AM_MANGLE_OPTION], +[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) + +# _AM_SET_OPTION(NAME) +# -------------------- +# Set option NAME. Presently that only means defining a flag for this option. +AC_DEFUN([_AM_SET_OPTION], +[m4_define(_AM_MANGLE_OPTION([$1]), [1])]) + +# _AM_SET_OPTIONS(OPTIONS) +# ------------------------ +# OPTIONS is a space-separated list of Automake options. +AC_DEFUN([_AM_SET_OPTIONS], +[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) + +# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) +# ------------------------------------------- +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +AC_DEFUN([_AM_IF_OPTION], +[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_CC_C_O +# --------------- +# Like AC_PROG_CC_C_O, but changed for automake. We rewrite AC_PROG_CC +# to automatically call this. +AC_DEFUN([_AM_PROG_CC_C_O], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([compile])dnl +AC_LANG_PUSH([C])dnl +AC_CACHE_CHECK( + [whether $CC understands -c and -o together], + [am_cv_prog_cc_c_o], + [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])]) + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i]) +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +AC_LANG_POP([C])]) + +# For backward compatibility. +AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])]) + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_RUN_LOG(COMMAND) +# ------------------- +# Run COMMAND, save the exit status in ac_status, and log it. +# (This has been adapted from Autoconf's _AC_RUN_LOG macro.) +AC_DEFUN([AM_RUN_LOG], +[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD + ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + (exit $ac_status); }]) + +# Check to make sure that the build environment is sane. -*- Autoconf -*- + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SANITY_CHECK +# --------------- +AC_DEFUN([AM_SANITY_CHECK], +[AC_MSG_CHECKING([whether build environment is sane]) +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[[\\\"\#\$\&\'\`$am_lf]]*) + AC_MSG_ERROR([unsafe absolute working directory name]);; +esac +case $srcdir in + *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) + AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$[*]" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$[*]" != "X $srcdir/configure conftest.file" \ + && test "$[*]" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken + alias in your environment]) + fi + if test "$[2]" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$[2]" = conftest.file + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +AC_MSG_RESULT([yes]) +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi +AC_CONFIG_COMMANDS_PRE( + [AC_MSG_CHECKING([that generated files are newer than configure]) + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + AC_MSG_RESULT([done])]) +rm -f conftest.file +]) + +# Copyright (C) 2009-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SILENT_RULES([DEFAULT]) +# -------------------------- +# Enable less verbose build rules; with the default set to DEFAULT +# ("yes" being less verbose, "no" or empty being verbose). +AC_DEFUN([AM_SILENT_RULES], +[AC_ARG_ENABLE([silent-rules], [dnl +AS_HELP_STRING( + [--enable-silent-rules], + [less verbose build output (undo: "make V=1")]) +AS_HELP_STRING( + [--disable-silent-rules], + [verbose build output (undo: "make V=0")])dnl +]) +case $enable_silent_rules in @%:@ ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);; +esac +dnl +dnl A few 'make' implementations (e.g., NonStop OS and NextStep) +dnl do not support nested variable expansions. +dnl See automake bug#9928 and bug#10237. +am_make=${MAKE-make} +AC_CACHE_CHECK([whether $am_make supports nested variables], + [am_cv_make_support_nested_variables], + [if AS_ECHO([['TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi]) +if test $am_cv_make_support_nested_variables = yes; then + dnl Using '$V' instead of '$(V)' breaks IRIX make. + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AC_SUBST([AM_V])dnl +AM_SUBST_NOTMAKE([AM_V])dnl +AC_SUBST([AM_DEFAULT_V])dnl +AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl +AC_SUBST([AM_DEFAULT_VERBOSITY])dnl +AM_BACKSLASH='\' +AC_SUBST([AM_BACKSLASH])dnl +_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl +]) + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_STRIP +# --------------------- +# One issue with vendor 'install' (even GNU) is that you can't +# specify the program used to strip binaries. This is especially +# annoying in cross-compiling environments, where the build's strip +# is unlikely to handle the host's binaries. +# Fortunately install-sh will honor a STRIPPROG variable, so we +# always use install-sh in "make install-strip", and initialize +# STRIPPROG with the value of the STRIP variable (set by the user). +AC_DEFUN([AM_PROG_INSTALL_STRIP], +[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. +if test "$cross_compiling" != no; then + AC_CHECK_TOOL([STRIP], [strip], :) +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" +AC_SUBST([INSTALL_STRIP_PROGRAM])]) + +# Copyright (C) 2006-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_SUBST_NOTMAKE(VARIABLE) +# --------------------------- +# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. +# This macro is traced by Automake. +AC_DEFUN([_AM_SUBST_NOTMAKE]) + +# AM_SUBST_NOTMAKE(VARIABLE) +# -------------------------- +# Public sister of _AM_SUBST_NOTMAKE. +AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) + +# Check how to create a tarball. -*- Autoconf -*- + +# Copyright (C) 2004-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_TAR(FORMAT) +# -------------------- +# Check how to create a tarball in format FORMAT. +# FORMAT should be one of 'v7', 'ustar', or 'pax'. +# +# Substitute a variable $(am__tar) that is a command +# writing to stdout a FORMAT-tarball containing the directory +# $tardir. +# tardir=directory && $(am__tar) > result.tar +# +# Substitute a variable $(am__untar) that extract such +# a tarball read from stdin. +# $(am__untar) < result.tar +# +AC_DEFUN([_AM_PROG_TAR], +[# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AC_SUBST([AMTAR], ['$${TAR-tar}']) + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' + +m4_if([$1], [v7], + [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], + + [m4_case([$1], + [ustar], + [# The POSIX 1988 'ustar' format is defined with fixed-size fields. + # There is notably a 21 bits limit for the UID and the GID. In fact, + # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 + # and bug#13588). + am_max_uid=2097151 # 2^21 - 1 + am_max_gid=$am_max_uid + # The $UID and $GID variables are not portable, so we need to resort + # to the POSIX-mandated id(1) utility. Errors in the 'id' calls + # below are definitely unexpected, so allow the users to see them + # (that is, avoid stderr redirection). + am_uid=`id -u || echo unknown` + am_gid=`id -g || echo unknown` + AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format]) + if test $am_uid -le $am_max_uid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi + AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format]) + if test $am_gid -le $am_max_gid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi], + + [pax], + [], + + [m4_fatal([Unknown tar format])]) + + AC_MSG_CHECKING([how to create a $1 tar archive]) + + # Go ahead even if we have the value already cached. We do so because we + # need to set the values for the 'am__tar' and 'am__untar' variables. + _am_tools=${am_cv_prog_tar_$1-$_am_tools} + + for _am_tool in $_am_tools; do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; do + AM_RUN_LOG([$_am_tar --version]) && break + done + am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x $1 -w "$$tardir"' + am__tar_='pax -L -x $1 -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H $1 -L' + am__tar_='find "$tardir" -print | cpio -o -H $1 -L' + am__untar='cpio -i -H $1 -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_$1}" && break + + # tar/untar a dummy directory, and stop if the command works. + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) + rm -rf conftest.dir + if test -s conftest.tar; then + AM_RUN_LOG([$am__untar /dev/null 2>&1 && break + fi + done + rm -rf conftest.dir + + AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) + AC_MSG_RESULT([$am_cv_prog_tar_$1])]) + +AC_SUBST([am__tar]) +AC_SUBST([am__untar]) +]) # _AM_PROG_TAR + +m4_include([acinclude.m4]) diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/compile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/compile new file mode 100755 index 000000000..99e50524b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/compile @@ -0,0 +1,348 @@ +#! /bin/sh +# Wrapper for compilers which do not understand '-c -o'. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# Written by Tom Tromey . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +nl=' +' + +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent tools from complaining about whitespace usage. +IFS=" "" $nl" + +file_conv= + +# func_file_conv build_file lazy +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. If the determined conversion +# type is listed in (the comma separated) LAZY, no conversion will +# take place. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv/,$2, in + *,$file_conv,*) + ;; + mingw/*) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin/*) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine/*) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_cl_dashL linkdir +# Make cl look for libraries in LINKDIR +func_cl_dashL () +{ + func_file_conv "$1" + if test -z "$lib_path"; then + lib_path=$file + else + lib_path="$lib_path;$file" + fi + linker_opts="$linker_opts -LIBPATH:$file" +} + +# func_cl_dashl library +# Do a library search-path lookup for cl +func_cl_dashl () +{ + lib=$1 + found=no + save_IFS=$IFS + IFS=';' + for dir in $lib_path $LIB + do + IFS=$save_IFS + if $shared && test -f "$dir/$lib.dll.lib"; then + found=yes + lib=$dir/$lib.dll.lib + break + fi + if test -f "$dir/$lib.lib"; then + found=yes + lib=$dir/$lib.lib + break + fi + if test -f "$dir/lib$lib.a"; then + found=yes + lib=$dir/lib$lib.a + break + fi + done + IFS=$save_IFS + + if test "$found" != yes; then + lib=$lib.lib + fi +} + +# func_cl_wrapper cl arg... +# Adjust compile command to suit cl +func_cl_wrapper () +{ + # Assume a capable shell + lib_path= + shared=: + linker_opts= + for arg + do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + eat=1 + case $2 in + *.o | *.[oO][bB][jJ]) + func_file_conv "$2" + set x "$@" -Fo"$file" + shift + ;; + *) + func_file_conv "$2" + set x "$@" -Fe"$file" + shift + ;; + esac + ;; + -I) + eat=1 + func_file_conv "$2" mingw + set x "$@" -I"$file" + shift + ;; + -I*) + func_file_conv "${1#-I}" mingw + set x "$@" -I"$file" + shift + ;; + -l) + eat=1 + func_cl_dashl "$2" + set x "$@" "$lib" + shift + ;; + -l*) + func_cl_dashl "${1#-l}" + set x "$@" "$lib" + shift + ;; + -L) + eat=1 + func_cl_dashL "$2" + ;; + -L*) + func_cl_dashL "${1#-L}" + ;; + -static) + shared=false + ;; + -Wl,*) + arg=${1#-Wl,} + save_ifs="$IFS"; IFS=',' + for flag in $arg; do + IFS="$save_ifs" + linker_opts="$linker_opts $flag" + done + IFS="$save_ifs" + ;; + -Xlinker) + eat=1 + linker_opts="$linker_opts $2" + ;; + -*) + set x "$@" "$1" + shift + ;; + *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) + func_file_conv "$1" + set x "$@" -Tp"$file" + shift + ;; + *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) + func_file_conv "$1" mingw + set x "$@" "$file" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift + done + if test -n "$linker_opts"; then + linker_opts="-link$linker_opts" + fi + exec "$@" $linker_opts + exit 1 +} + +eat= + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: compile [--help] [--version] PROGRAM [ARGS] + +Wrapper for compilers which do not understand '-c -o'. +Remove '-o dest.o' from ARGS, run PROGRAM with the remaining +arguments, and rename the output as expected. + +If you are trying to build a whole package this is not the +right script to run: please start by reading the file 'INSTALL'. + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "compile $scriptversion" + exit $? + ;; + cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \ + icl | *[/\\]icl | icl.exe | *[/\\]icl.exe ) + func_cl_wrapper "$@" # Doesn't return... + ;; +esac + +ofile= +cfile= + +for arg +do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + # So we strip '-o arg' only if arg is an object. + eat=1 + case $2 in + *.o | *.obj) + ofile=$2 + ;; + *) + set x "$@" -o "$2" + shift + ;; + esac + ;; + *.c) + cfile=$1 + set x "$@" "$1" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift +done + +if test -z "$ofile" || test -z "$cfile"; then + # If no '-o' option was seen then we might have been invoked from a + # pattern rule where we don't need one. That is ok -- this is a + # normal compilation that the losing compiler can handle. If no + # '.c' file was seen then we are probably linking. That is also + # ok. + exec "$@" +fi + +# Name of file we expect compiler to create. +cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` + +# Create the lock directory. +# Note: use '[/\\:.-]' here to ensure that we don't use the same name +# that we are using for the .o file. Also, base the name on the expected +# object file name, since that is what matters with a parallel build. +lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d +while true; do + if mkdir "$lockdir" >/dev/null 2>&1; then + break + fi + sleep 1 +done +# FIXME: race condition here if user kills between mkdir and trap. +trap "rmdir '$lockdir'; exit 1" 1 2 15 + +# Run the compile. +"$@" +ret=$? + +if test -f "$cofile"; then + test "$cofile" = "$ofile" || mv "$cofile" "$ofile" +elif test -f "${cofile}bj"; then + test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" +fi + +rmdir "$lockdir" +exit $ret + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/config.guess b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/config.guess new file mode 100755 index 000000000..256083a70 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/config.guess @@ -0,0 +1,1476 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright 1992-2018 Free Software Foundation, Inc. + +timestamp='2018-03-08' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). +# +# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. +# +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess +# +# Please send patches to . + + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright 1992-2018 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > "$dummy.c" ; + for c in cc gcc c89 c99 ; do + if ($c -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +case "$UNAME_SYSTEM" in +Linux|GNU|GNU/*) + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + LIBC=gnu + + eval "$set_cc_for_build" + cat <<-EOF > "$dummy.c" + #include + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #else + LIBC=gnu + #endif + EOF + eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`" + + # If ldd exists, use it to detect musl libc. + if command -v ldd >/dev/null && \ + ldd --version 2>&1 | grep -q ^musl + then + LIBC=musl + fi + ;; +esac + +# Note: order is significant - the case branches are not exclusive. + +case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ + "/sbin/$sysctl" 2>/dev/null || \ + "/usr/sbin/$sysctl" 2>/dev/null || \ + echo unknown)` + case "$UNAME_MACHINE_ARCH" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + earmv*) + arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'` + endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'` + machine="${arch}${endian}"-unknown + ;; + *) machine="$UNAME_MACHINE_ARCH"-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently (or will in the future) and ABI. + case "$UNAME_MACHINE_ARCH" in + earm*) + os=netbsdelf + ;; + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval "$set_cc_for_build" + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # Determine ABI tags. + case "$UNAME_MACHINE_ARCH" in + earm*) + expr='s/^earmv[0-9]/-eabi/;s/eb$//' + abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"` + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "$UNAME_VERSION" in + Debian*) + release='-gnu' + ;; + *) + release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "$machine-${os}${release}${abi}" + exit ;; + *:Bitrig:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE" + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE" + exit ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE" + exit ;; + *:MidnightBSD:*:*) + echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE" + exit ;; + *:ekkoBSD:*:*) + echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE" + exit ;; + *:SolidBSD:*:*) + echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE" + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd"$UNAME_RELEASE" + exit ;; + *:MirBSD:*:*) + echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE" + exit ;; + *:Sortix:*:*) + echo "$UNAME_MACHINE"-unknown-sortix + exit ;; + *:Redox:*:*) + echo "$UNAME_MACHINE"-unknown-redox + exit ;; + mips:OSF1:*.*) + echo mips-dec-osf1 + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE=alpha ;; + "EV4.5 (21064)") + UNAME_MACHINE=alpha ;; + "LCA4 (21066/21068)") + UNAME_MACHINE=alpha ;; + "EV5 (21164)") + UNAME_MACHINE=alphaev5 ;; + "EV5.6 (21164A)") + UNAME_MACHINE=alphaev56 ;; + "EV5.6 (21164PC)") + UNAME_MACHINE=alphapca56 ;; + "EV5.7 (21164PC)") + UNAME_MACHINE=alphapca57 ;; + "EV6 (21264)") + UNAME_MACHINE=alphaev6 ;; + "EV6.7 (21264A)") + UNAME_MACHINE=alphaev67 ;; + "EV6.8CB (21264C)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8AL (21264B)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8CX (21264D)") + UNAME_MACHINE=alphaev68 ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE=alphaev69 ;; + "EV7 (21364)") + UNAME_MACHINE=alphaev7 ;; + "EV7.9 (21364A)") + UNAME_MACHINE=alphaev79 ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo "$UNAME_MACHINE"-dec-osf"`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`" + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo "$UNAME_MACHINE"-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo "$UNAME_MACHINE"-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix"$UNAME_RELEASE" + exit ;; + arm*:riscos:*:*|arm*:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + s390x:SunOS:*:*) + echo "$UNAME_MACHINE"-ibm-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`" + exit ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`" + exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux"$UNAME_RELEASE" + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + eval "$set_cc_for_build" + SUN_ARCH=i386 + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH=x86_64 + fi + fi + echo "$SUN_ARCH"-pc-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos"`echo "$UNAME_RELEASE"|sed -e 's/-/_/'`" + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos"$UNAME_RELEASE" + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos"$UNAME_RELEASE" + ;; + sun4) + echo sparc-sun-sunos"$UNAME_RELEASE" + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos"$UNAME_RELEASE" + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint"$UNAME_RELEASE" + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint"$UNAME_RELEASE" + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint"$UNAME_RELEASE" + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten"$UNAME_RELEASE" + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten"$UNAME_RELEASE" + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix"$UNAME_RELEASE" + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix"$UNAME_RELEASE" + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix"$UNAME_RELEASE" + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && + dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`"$dummy" "$dummyarg"` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos"$UNAME_RELEASE" + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ "$UNAME_PROCESSOR" = mc88100 ] || [ "$UNAME_PROCESSOR" = mc88110 ] + then + if [ "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx ] || \ + [ "$TARGET_BINARY_INTERFACE"x = x ] + then + echo m88k-dg-dgux"$UNAME_RELEASE" + else + echo m88k-dg-dguxbcs"$UNAME_RELEASE" + fi + else + echo i586-dg-dgux"$UNAME_RELEASE" + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix"`echo "$UNAME_RELEASE"|sed -e 's/-/_/g'`" + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" + fi + echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV" + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/lslpp ] ; then + IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | + awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` + else + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" + fi + echo "$IBM_ARCH"-ibm-aix"$IBM_REV" + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd"$UNAME_RELEASE" # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'` + case "$UNAME_MACHINE" in + 9000/31?) HP_ARCH=m68000 ;; + 9000/[34]??) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "$sc_cpu_version" in + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "$sc_kernel_bits" in + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "$HP_ARCH" = "" ]; then + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ "$HP_ARCH" = hppa2.0w ] + then + eval "$set_cc_for_build" + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ + then + HP_ARCH=hppa2.0w + else + HP_ARCH=hppa64 + fi + fi + echo "$HP_ARCH"-hp-hpux"$HPUX_REV" + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux"$HPUX_REV" + exit ;; + 3050*:HI-UX:*:*) + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo "$UNAME_MACHINE"-unknown-osf1mk + else + echo "$UNAME_MACHINE"-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE" + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi"$UNAME_RELEASE" + exit ;; + *:BSD/OS:*:*) + echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE" + exit ;; + *:FreeBSD:*:*) + UNAME_PROCESSOR=`/usr/bin/uname -p` + case "$UNAME_PROCESSOR" in + amd64) + UNAME_PROCESSOR=x86_64 ;; + i386) + UNAME_PROCESSOR=i586 ;; + esac + echo "$UNAME_PROCESSOR"-unknown-freebsd"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`" + exit ;; + i*:CYGWIN*:*) + echo "$UNAME_MACHINE"-pc-cygwin + exit ;; + *:MINGW64*:*) + echo "$UNAME_MACHINE"-pc-mingw64 + exit ;; + *:MINGW*:*) + echo "$UNAME_MACHINE"-pc-mingw32 + exit ;; + *:MSYS*:*) + echo "$UNAME_MACHINE"-pc-msys + exit ;; + i*:PW*:*) + echo "$UNAME_MACHINE"-pc-pw32 + exit ;; + *:Interix*:*) + case "$UNAME_MACHINE" in + x86) + echo i586-pc-interix"$UNAME_RELEASE" + exit ;; + authenticamd | genuineintel | EM64T) + echo x86_64-unknown-interix"$UNAME_RELEASE" + exit ;; + IA64) + echo ia64-unknown-interix"$UNAME_RELEASE" + exit ;; + esac ;; + i*:UWIN*:*) + echo "$UNAME_MACHINE"-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + *:GNU:*:*) + # the GNU system + echo "`echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,'`-unknown-$LIBC`echo "$UNAME_RELEASE"|sed -e 's,/.*$,,'`" + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo "$UNAME_MACHINE-unknown-`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`-$LIBC" + exit ;; + i*86:Minix:*:*) + echo "$UNAME_MACHINE"-pc-minix + exit ;; + aarch64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + arc:Linux:*:* | arceb:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + arm*:Linux:*:*) + eval "$set_cc_for_build" + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi + else + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf + fi + fi + exit ;; + avr32*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + cris:Linux:*:*) + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" + exit ;; + crisv32:Linux:*:*) + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" + exit ;; + e2k:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + frv:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + hexagon:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + i*86:Linux:*:*) + echo "$UNAME_MACHINE"-pc-linux-"$LIBC" + exit ;; + ia64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + k1om:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + m32r*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + m68*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" + #undef CPU + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=${UNAME_MACHINE}el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=${UNAME_MACHINE} + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU'`" + test "x$CPU" != x && { echo "$CPU-unknown-linux-$LIBC"; exit; } + ;; + mips64el:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + openrisc*:Linux:*:*) + echo or1k-unknown-linux-"$LIBC" + exit ;; + or32:Linux:*:* | or1k*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + padre:Linux:*:*) + echo sparc-unknown-linux-"$LIBC" + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-"$LIBC" + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;; + PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;; + *) echo hppa-unknown-linux-"$LIBC" ;; + esac + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-"$LIBC" + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-"$LIBC" + exit ;; + ppc64le:Linux:*:*) + echo powerpc64le-unknown-linux-"$LIBC" + exit ;; + ppcle:Linux:*:*) + echo powerpcle-unknown-linux-"$LIBC" + exit ;; + riscv32:Linux:*:* | riscv64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo "$UNAME_MACHINE"-ibm-linux-"$LIBC" + exit ;; + sh64*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + sh*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + tile*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + vax:Linux:*:*) + echo "$UNAME_MACHINE"-dec-linux-"$LIBC" + exit ;; + x86_64:Linux:*:*) + echo "$UNAME_MACHINE"-pc-linux-"$LIBC" + exit ;; + xtensa*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION" + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo "$UNAME_MACHINE"-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo "$UNAME_MACHINE"-unknown-stop + exit ;; + i*86:atheos:*:*) + echo "$UNAME_MACHINE"-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo "$UNAME_MACHINE"-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + echo i386-unknown-lynxos"$UNAME_RELEASE" + exit ;; + i*86:*DOS:*:*) + echo "$UNAME_MACHINE"-pc-msdosdjgpp + exit ;; + i*86:*:4.*:*) + UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL" + else + echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL" + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}{$UNAME_VERSION}" + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL" + else + echo "$UNAME_MACHINE"-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. + # Note: whatever this is, it MUST be the same as what config.sub + # prints for the "djgpp" host, or else GDB configure will decide that + # this is a cross-build. + echo i586-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv"$UNAME_RELEASE" # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos"$UNAME_RELEASE" + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos"$UNAME_RELEASE" + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos"$UNAME_RELEASE" + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + echo powerpc-unknown-lynxos"$UNAME_RELEASE" + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv"$UNAME_RELEASE" + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo "$UNAME_MACHINE"-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo "$UNAME_MACHINE"-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux"$UNAME_RELEASE" + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv"$UNAME_RELEASE" + else + echo mips-unknown-sysv"$UNAME_RELEASE" + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; + x86_64:Haiku:*:*) + echo x86_64-unknown-haiku + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux"$UNAME_RELEASE" + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux"$UNAME_RELEASE" + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux"$UNAME_RELEASE" + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux"$UNAME_RELEASE" + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux"$UNAME_RELEASE" + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux"$UNAME_RELEASE" + exit ;; + SX-ACE:SUPER-UX:*:*) + echo sxace-nec-superux"$UNAME_RELEASE" + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody"$UNAME_RELEASE" + exit ;; + *:Rhapsody:*:*) + echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE" + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + eval "$set_cc_for_build" + if test "$UNAME_PROCESSOR" = unknown ; then + UNAME_PROCESSOR=powerpc + fi + if test "`echo "$UNAME_RELEASE" | sed -e 's/\..*//'`" -le 10 ; then + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc + if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_PPC >/dev/null + then + UNAME_PROCESSOR=powerpc + fi + fi + elif test "$UNAME_PROCESSOR" = i386 ; then + # Avoid executing cc on OS X 10.9, as it ships with a stub + # that puts up a graphical alert prompting to install + # developer tools. Any system running Mac OS X 10.7 or + # later (Darwin 11 and later) is required to have a 64-bit + # processor. This is not true of the ARM version of Darwin + # that Apple uses in portable devices. + UNAME_PROCESSOR=x86_64 + fi + echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE" + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = x86; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE" + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NEO-*:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSE-*:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSR-*:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSV-*:NONSTOP_KERNEL:*:*) + echo nsv-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSX-*:NONSTOP_KERNEL:*:*) + echo nsx-tandem-nsk"$UNAME_RELEASE" + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE" + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = 386; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo "$UNAME_MACHINE"-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux"$UNAME_RELEASE" + exit ;; + *:DragonFly:*:*) + echo "$UNAME_MACHINE"-unknown-dragonfly"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`" + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "$UNAME_MACHINE" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo "$UNAME_MACHINE"-pc-skyos"`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`" + exit ;; + i*86:rdos:*:*) + echo "$UNAME_MACHINE"-pc-rdos + exit ;; + i*86:AROS:*:*) + echo "$UNAME_MACHINE"-pc-aros + exit ;; + x86_64:VMkernel:*:*) + echo "$UNAME_MACHINE"-unknown-esx + exit ;; + amd64:Isilon\ OneFS:*:*) + echo x86_64-unknown-onefs + exit ;; +esac + +echo "$0: unable to guess system type" >&2 + +case "$UNAME_MACHINE:$UNAME_SYSTEM" in + mips:Linux | mips64:Linux) + # If we got here on MIPS GNU/Linux, output extra information. + cat >&2 <&2 </dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = "$UNAME_MACHINE" +UNAME_RELEASE = "$UNAME_RELEASE" +UNAME_SYSTEM = "$UNAME_SYSTEM" +UNAME_VERSION = "$UNAME_VERSION" +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/config.sub b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/config.sub new file mode 100755 index 000000000..9ccf09a7a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/config.sub @@ -0,0 +1,1801 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright 1992-2018 Free Software Foundation, Inc. + +timestamp='2018-03-08' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). + + +# Please send patches to . +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS + +Canonicalize a configuration name. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright 1992-2018 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo "$1" + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ + linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ + knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \ + kopensolaris*-gnu* | cloudabi*-eabi* | \ + storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + android-linux) + os=-linux-android + basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown + ;; + *) + basic_machine=`echo "$1" | sed 's/-[^-]*$//'` + if [ "$basic_machine" != "$1" ] + then os=`echo "$1" | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis | -knuth | -cray | -microblaze*) + os= + basic_machine=$1 + ;; + -bluegene*) + os=-cnk + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco6) + os=-sco5v6 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*178) + os=-lynxos178 + ;; + -lynx*5) + os=-lynxos5 + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo "$1" | sed -e 's/86-.*/86-sequent/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ + | aarch64 | aarch64_be \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arceb \ + | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ + | avr | avr32 \ + | ba \ + | be32 | be64 \ + | bfin \ + | c4x | c8051 | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | e2k | epiphany \ + | fido | fr30 | frv | ft32 \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | hexagon \ + | i370 | i860 | i960 | ia16 | ia64 \ + | ip2k | iq2000 \ + | k1om \ + | le32 | le64 \ + | lm32 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | microblazeel | mcore | mep | metag \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64octeon | mips64octeonel \ + | mips64orion | mips64orionel \ + | mips64r5900 | mips64r5900el \ + | mips64vr | mips64vrel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa32r6 | mipsisa32r6el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64r6 | mipsisa64r6el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipsr5900 | mipsr5900el \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | moxie \ + | mt \ + | msp430 \ + | nds32 | nds32le | nds32be \ + | nios | nios2 | nios2eb | nios2el \ + | ns16k | ns32k \ + | open8 | or1k | or1knd | or32 \ + | pdp10 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle \ + | pru \ + | pyramid \ + | riscv32 | riscv64 \ + | rl78 | rx \ + | score \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu \ + | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ + | ubicom32 \ + | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ + | visium \ + | wasm32 \ + | x86 | xc16x | xstormy16 | xtensa \ + | z8k | z80) + basic_machine=$basic_machine-unknown + ;; + c54x) + basic_machine=tic54x-unknown + ;; + c55x) + basic_machine=tic55x-unknown + ;; + c6x) + basic_machine=tic6x-unknown + ;; + leon|leon[3-9]) + basic_machine=sparc-$basic_machine + ;; + m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip) + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65) + ;; + ms1) + basic_machine=mt-unknown + ;; + + strongarm | thumb | xscale) + basic_machine=arm-unknown + ;; + xgate) + basic_machine=$basic_machine-unknown + os=-none + ;; + xscaleeb) + basic_machine=armeb-unknown + ;; + + xscaleel) + basic_machine=armel-unknown + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ + | aarch64-* | aarch64_be-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* | avr32-* \ + | ba-* \ + | be32-* | be64-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* \ + | c8051-* | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | e2k-* | elxsi-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | hexagon-* \ + | i*86-* | i860-* | i960-* | ia16-* | ia64-* \ + | ip2k-* | iq2000-* \ + | k1om-* \ + | le32-* | le64-* \ + | lm32-* \ + | m32c-* | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \ + | microblaze-* | microblazeel-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64octeon-* | mips64octeonel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64r5900-* | mips64r5900el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa32r6-* | mipsisa32r6el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64r6-* | mipsisa64r6el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipsr5900-* | mipsr5900el-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ + | msp430-* \ + | nds32-* | nds32le-* | nds32be-* \ + | nios-* | nios2-* | nios2eb-* | nios2el-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | open8-* \ + | or1k*-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ + | pru-* \ + | pyramid-* \ + | riscv32-* | riscv64-* \ + | rl78-* | romp-* | rs6000-* | rx-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \ + | tahoe-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tile*-* \ + | tron-* \ + | ubicom32-* \ + | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ + | vax-* \ + | visium-* \ + | wasm32-* \ + | we32k-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* \ + | xstormy16-* | xtensa*-* \ + | ymp-* \ + | z8k-* | z80-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) + basic_machine=$basic_machine-unknown + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-pc + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aros) + basic_machine=i386-pc + os=-aros + ;; + asmjs) + basic_machine=asmjs-unknown + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + blackfin) + basic_machine=bfin-unknown + os=-linux + ;; + blackfin-*) + basic_machine=bfin-`echo "$basic_machine" | sed 's/^[^-]*-//'` + os=-linux + ;; + bluegene*) + basic_machine=powerpc-ibm + os=-cnk + ;; + c54x-*) + basic_machine=tic54x-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + c55x-*) + basic_machine=tic55x-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + c6x-*) + basic_machine=tic6x-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; + cegcc) + basic_machine=arm-unknown + os=-cegcc + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | j90) + basic_machine=j90-cray + os=-unicos + ;; + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16 | cr16-*) + basic_machine=cr16-unknown + os=-elf + ;; + crds | unos) + basic_machine=m68k-crds + ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + dicos) + basic_machine=i686-pc + os=-dicos + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2*) + basic_machine=m68k-bull + os=-sysv3 + ;; + e500v[12]) + basic_machine=powerpc-unknown + os=$os"spe" + ;; + e500v[12]-*) + basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'` + os=$os"spe" + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; + i*86v32) + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + leon-*|leon[3-9]-*) + basic_machine=sparc-`echo "$basic_machine" | sed 's/-.*//'` + ;; + m68knommu) + basic_machine=m68k-unknown + os=-linux + ;; + m68knommu-*) + basic_machine=m68k-`echo "$basic_machine" | sed 's/^[^-]*-//'` + os=-linux + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + microblaze*) + basic_machine=microblaze-xilinx + ;; + mingw64) + basic_machine=x86_64-pc + os=-mingw64 + ;; + mingw32) + basic_machine=i686-pc + os=-mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + os=-mingw32ce + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mips3*-*) + basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`-unknown + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + moxiebox) + basic_machine=moxie-unknown + os=-moxiebox + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + ms1-*) + basic_machine=`echo "$basic_machine" | sed -e 's/ms1-/mt-/'` + ;; + msys) + basic_machine=i686-pc + os=-msys + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + nacl) + basic_machine=le32-unknown + os=-nacl + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + neo-tandem) + basic_machine=neo-tandem + ;; + nse-tandem) + basic_machine=nse-tandem + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + nsv-tandem) + basic_machine=nsv-tandem + ;; + nsx-tandem) + basic_machine=nsx-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + openrisc | openrisc-*) + basic_machine=or32-unknown + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + parisc) + basic_machine=hppa-unknown + os=-linux + ;; + parisc-*) + basic_machine=hppa-`echo "$basic_machine" | sed 's/^[^-]*-//'` + os=-linux + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) + basic_machine=i586-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pentium4-*) + basic_machine=i786-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc | ppcbe) basic_machine=powerpc-unknown + ;; + ppc-* | ppcbe-*) + basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rdos | rdos64) + basic_machine=x86_64-pc + os=-rdos + ;; + rdos32) + basic_machine=i386-pc + os=-rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; + sequent) + basic_machine=i386-sequent + ;; + sh5el) + basic_machine=sh5le-unknown + ;; + simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + strongarm-* | thumb-*) + basic_machine=arm-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + tile*) + basic_machine=$basic_machine-unknown + os=-linux-gnu + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + x64) + basic_machine=x86_64-pc + ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + xscale-* | xscalee[bl]-*) + basic_machine=`echo "$basic_machine" | sed 's/^xscale/arm/'` + ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + romp) + basic_machine=romp-ibm + ;; + mmix) + basic_machine=mmix-knuth + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo "$basic_machine" | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo "$basic_machine" | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases that might get confused + # with valid system types. + # -solaris* is a basic system type, with this one exception. + -auroraux) + os=-auroraux + ;; + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # es1800 is here to avoid being matched by es* (a different OS) + -es1800*) + os=-ose + ;; + # Now accept the basic system types. + # The portable systems comes first. + # Each alternative MUST end in a * to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ + | -sym* | -kopensolaris* | -plan9* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* | -aros* | -cloudabi* | -sortix* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -knetbsd* | -mirbsd* | -netbsd* \ + | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* | -hcos* \ + | -chorusos* | -chorusrdb* | -cegcc* | -glidix* \ + | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -linux-newlib* | -linux-musl* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \ + | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox* | -bme* \ + | -midnightbsd*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -xray | -os68k* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo "$os" | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo "$os" | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo "$os" | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -nova*) + os=-rtmk-nova + ;; + -ns2) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -tpf*) + os=-tpf + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4*) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -zvmoe) + os=-zvmoe + ;; + -dicos*) + os=-dicos + ;; + -pikeos*) + # Until real need of OS specific support for + # particular features comes up, bare metal + # configurations are quite functional. + case $basic_machine in + arm*) + os=-eabi + ;; + *) + os=-elf + ;; + esac + ;; + -nacl*) + ;; + -ios) + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`"$1"\': system \`"$os"\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + c4x-* | tic4x-*) + os=-coff + ;; + c8051-*) + os=-elf + ;; + hexagon-*) + os=-elf + ;; + tic54x-*) + os=-coff + ;; + tic55x-*) + os=-coff + ;; + tic6x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + ;; + m68*-cisco) + os=-aout + ;; + mep-*) + os=-elf + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or32-*) + os=-coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + pru-*) + os=-elf + ;; + *-be) + os=-beos + ;; + *-ibm) + os=-aix + ;; + *-knuth) + os=-mmixware + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -cnk*|-aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; + esac + basic_machine=`echo "$basic_machine" | sed "s/unknown/$vendor/"` + ;; +esac + +echo "$basic_machine$os" +exit + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/configure b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/configure new file mode 100755 index 000000000..ed0b4faa0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/configure @@ -0,0 +1,6161 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.69 for hpl 2.3. +# +# Report bugs to . +# +# +# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +as_fn_exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org and hpl@icl.utk.edu +$0: about your system, including any error possibly output +$0: before this message. Then install a modern shell, or +$0: manually run the script under such a shell if you do +$0: have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='hpl' +PACKAGE_TARNAME='hpl' +PACKAGE_VERSION='2.3' +PACKAGE_STRING='hpl 2.3' +PACKAGE_BUGREPORT='hpl@icl.utk.edu' +PACKAGE_URL='' + +ac_unique_file="include/hpl.h" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='am__EXEEXT_FALSE +am__EXEEXT_TRUE +LTLIBOBJS +LIBOBJS +EGREP +GREP +CPP +BLAS_LIBS +AM_BACKSLASH +AM_DEFAULT_VERBOSITY +AM_DEFAULT_V +AM_V +am__fastdepCC_FALSE +am__fastdepCC_TRUE +CCDEPMODE +am__nodep +AMDEPBACKSLASH +AMDEP_FALSE +AMDEP_TRUE +am__include +DEPDIR +am__untar +am__tar +AMTAR +am__leading_dot +SET_MAKE +AWK +mkdir_p +MKDIR_P +INSTALL_STRIP_PROGRAM +STRIP +install_sh +MAKEINFO +AUTOHEADER +AUTOMAKE +AUTOCONF +ACLOCAL +VERSION +PACKAGE +CYGPATH_W +am__isrc +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +RANLIB +OBJEXT +EXEEXT +CPPFLAGS +LDFLAGS +CFLAGS +ac_ct_CC +CC +MPICC +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL +am__quote' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +enable_dependency_tracking +enable_silent_rules +' + ac_precious_vars='build_alias +host_alias +target_alias +MPICC +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures hpl 2.3 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/hpl] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of hpl 2.3:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-dependency-tracking + do not reject slow dependency extractors + --disable-dependency-tracking + speeds up one-time build + --enable-silent-rules less verbose build output (undo: "make V=1") + --disable-silent-rules verbose build output (undo: "make V=0") + +Some influential environment variables: + MPICC MPI C compiler command + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to . +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +hpl configure 2.3 +generated by GNU Autoconf 2.69 + +Copyright (C) 2012 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main () +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists, giving a warning if it cannot be compiled using +# the include files in INCLUDES and setting the cache variable VAR +# accordingly. +ac_fn_c_check_header_mongrel () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if eval \${$3+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} +( $as_echo "## ------------------------------ ## +## Report this to hpl@icl.utk.edu ## +## ------------------------------ ##" + ) | sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_mongrel + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by hpl $as_me 2.3, which was +generated by GNU Autoconf 2.69. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +ac_config_headers="$ac_config_headers include/hplconfig.h" + + +ac_aux_dir= +for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` + + + + _ax_prog_cc_mpi_mpi_wanted=yes + if test x"$_ax_prog_cc_mpi_mpi_wanted" = xyes; then + if test -z "$CC" && test -n "$MPICC"; then + CC="$MPICC" + else + if test -n "$ac_tool_prefix"; then + for ac_prog in mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + + fi + fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +$as_echo_n "checking whether $CC understands -c and -o together... " >&6; } +if ${am_cv_prog_cc_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 + ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +$as_echo "$am_cv_prog_cc_c_o" >&6; } +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + +# Check for compiler +# Needs to be split off into an extra macro to ensure right expansion +# order. + + +if test x"$_ax_prog_cc_mpi_mpi_wanted" = xno; then : + _ax_prog_cc_mpi_mpi_found=no +else + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + # test whether MPI_Init is available + # We do not use AC_SEARCH_LIBS here, as it caches its outcome and + # thus disallows corresponding calls in the other AX_PROG_*_MPI + # macros. + for lib in NONE mpi mpich; do + save_LIBS=$LIBS + if test x"$lib" = xNONE; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for function MPI_Init" >&5 +$as_echo_n "checking for function MPI_Init... " >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for function MPI_Init in -l$lib" >&5 +$as_echo_n "checking for function MPI_Init in -l$lib... " >&6; } + LIBS="-l$lib $LIBS" + fi + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char MPI_Init (); +int +main () +{ +return MPI_Init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + _ax_prog_cc_mpi_mpi_found=yes +else + _ax_prog_cc_mpi_mpi_found=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_ax_prog_cc_mpi_mpi_found" >&5 +$as_echo "$_ax_prog_cc_mpi_mpi_found" >&6; } + if test "x$_ax_prog_cc_mpi_mpi_found" = "xyes"; then + break; + fi + LIBS=$save_LIBS + done + + # Check for header + if test x"$_ax_prog_cc_mpi_mpi_found" = xyes; then : + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpi.h" >&5 +$as_echo_n "checking for mpi.h... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + _ax_prog_cc_mpi_mpi_found=no + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +fi + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$_ax_prog_cc_mpi_mpi_found" = xyes; then : + + +$as_echo "#define HAVE_MPI 1" >>confdefs.h + + : + +else + + + : + +fi + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +$as_echo "$RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +$as_echo "$ac_ct_RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +$as_echo_n "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if ${ac_cv_path_install+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in #(( + ./ | .// | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +$as_echo "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + + +am__api_version='1.16' + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 +$as_echo_n "checking whether build environment is sane... " >&6; } +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[\\\"\#\$\&\'\`$am_lf]*) + as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; +esac +case $srcdir in + *[\\\"\#\$\&\'\`$am_lf\ \ ]*) + as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$*" != "X $srcdir/configure conftest.file" \ + && test "$*" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + as_fn_error $? "ls -t appears to fail. Make sure there is not a broken + alias in your environment" "$LINENO" 5 + fi + if test "$2" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$2" = conftest.file + ) +then + # Ok. + : +else + as_fn_error $? "newly created file is older than distributed files! +Check your system clock" "$LINENO" 5 +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi + +rm -f conftest.file + +test "$program_prefix" != NONE && + program_transform_name="s&^&$program_prefix&;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s&\$&$program_suffix&;$program_transform_name" +# Double any \ or $. +# By default was `s,x,x', remove it if useless. +ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' +program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` + +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 +$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} +fi + +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi + +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +$as_echo "$STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +$as_echo "$ac_ct_STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5 +$as_echo_n "checking for a thread-safe mkdir -p... " >&6; } +if test -z "$MKDIR_P"; then + if ${ac_cv_path_mkdir+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in mkdir gmkdir; do + for ac_exec_ext in '' $ac_executable_extensions; do + as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue + case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( + 'mkdir (GNU coreutils) '* | \ + 'mkdir (coreutils) '* | \ + 'mkdir (fileutils) '4.1*) + ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext + break 3;; + esac + done + done + done +IFS=$as_save_IFS + +fi + + test -d ./--version && rmdir ./--version + if test "${ac_cv_path_mkdir+set}" = set; then + MKDIR_P="$ac_cv_path_mkdir -p" + else + # As a last resort, use the slow shell script. Don't cache a + # value for MKDIR_P within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + MKDIR_P="$ac_install_sh -d" + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 +$as_echo "$MKDIR_P" >&6; } + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AWK+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +$as_echo "$AWK" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AWK" && break +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +set x ${MAKE-make} +ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering ...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + SET_MAKE= +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null + +DEPDIR="${am__leading_dot}deps" + +ac_config_commands="$ac_config_commands depfiles" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5 +$as_echo_n "checking whether ${MAKE-make} supports the include directive... " >&6; } +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5 + (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + case $?:`cat confinc.out 2>/dev/null` in #( + '0:this is the am__doit target') : + case $s in #( + BSD) : + am__include='.include' am__quote='"' ;; #( + *) : + am__include='include' am__quote='' ;; +esac ;; #( + *) : + ;; +esac + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5 +$as_echo "${_am_result}" >&6; } + +# Check whether --enable-dependency-tracking was given. +if test "${enable_dependency_tracking+set}" = set; then : + enableval=$enable_dependency_tracking; +fi + +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi + if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' +else + AMDEP_TRUE='#' + AMDEP_FALSE= +fi + + +# Check whether --enable-silent-rules was given. +if test "${enable_silent_rules+set}" = set; then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=1;; +esac +am_make=${MAKE-make} +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +$as_echo_n "checking whether $am_make supports nested variables... " >&6; } +if ${am_cv_make_support_nested_variables+:} false; then : + $as_echo_n "(cached) " >&6 +else + if $as_echo 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +$as_echo "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + am__isrc=' -I$(srcdir)' + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='hpl' + VERSION='2.3' + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE "$PACKAGE" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define VERSION "$VERSION" +_ACEOF + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +mkdir_p='$(MKDIR_P)' + +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AMTAR='$${TAR-tar}' + + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar pax cpio none' + +am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' + + + + + +depcc="$CC" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CC_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 + fi +fi + + + + + + + + + +hpl_blas_ok=no + + +current_LIBS="$LIBS" + +cat < hplvars.txt +name1=OpenBLAS +rout1=dgemm_ +libs1=-lopenblas -lm + +name2=Atlas Fortran BLAS +rout2=dgemm_ +libs2=-lf77blas -latlas + +name3=Sequential Intel MKL LP64 (group) +rout3=dgemm_ +libs3=-Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread + +name4=Sequential Intel MKL LP64 +rout4=dgemm_ +libs4=-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread + +name5=AMD's ACML +rout5=dgemm_ +libs5=-lacml -lm + +name6=Accelerate +rout6=dgemm_ +libs6=-framework Accelerate + +name7=Apple VecLib +rout7=dgemm_ +libs7=-framework vecLib + +name8=IBM ESSL +rout8=dgemm_ +libs8=-lessl + +name9=NVIDIA nvblas +rout9=dgemm_ +libs9=-lnvblas + +name10=Generic BLAS +rout10=dgemm_ +libs10=-lblas + +HPLEOF +for hpl_i in 1 2 3 4 5 6 7 8 9 10; +do +if test x$hpl_blas_ok = xno; then + name="`grep ^name${hpl_i}= hplvars.txt | sed s/^name${hpl_i}=//`" + rout="`grep ^rout${hpl_i}= hplvars.txt | sed s/^rout${hpl_i}=//`" + libs="`grep ^libs${hpl_i}= hplvars.txt | sed s/^libs${hpl_i}=//`" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $rout in $name" >&5 +$as_echo_n "checking for $rout in $name... " >&6; } + + LIBS="$libs" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $rout (); +int +main () +{ +return $rout (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + hpl_blas_ok=yes;BLAS_LIBS="$libs" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LIBS="$current_LIBS" + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hpl_blas_ok" >&5 +$as_echo "$hpl_blas_ok" >&6; } +fi +done +rm hplvars.txt + +if test x$hpl_blas_ok = xno; then +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dgemm_ in OpenBLAS" >&5 +$as_echo_n "checking for dgemm_ in OpenBLAS... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dgemm_ in -lopenblas" >&5 +$as_echo_n "checking for dgemm_ in -lopenblas... " >&6; } +if ${ac_cv_lib_openblas_dgemm_+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lopenblas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dgemm_ (); +int +main () +{ +return dgemm_ (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_openblas_dgemm_=yes +else + ac_cv_lib_openblas_dgemm_=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_openblas_dgemm_" >&5 +$as_echo "$ac_cv_lib_openblas_dgemm_" >&6; } +if test "x$ac_cv_lib_openblas_dgemm_" = xyes; then : + hpl_blas_ok=yes;BLAS_LIBS="-lopenblas" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hpl_blas_ok" >&5 +$as_echo "$hpl_blas_ok" >&6; } +fi + + + +# If present, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$hpl_blas_ok" = xyes; then + LIBS="$BLAS_LIBS $LIBS" + : +else + hpl_blas_ok=no + as_fn_error $? "BLAS not found" "$LINENO" 5 +fi + + + + +for ac_func in dgemm_ +do : + ac_fn_c_check_func "$LINENO" "dgemm_" "ac_cv_func_dgemm_" +if test "x$ac_cv_func_dgemm_" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_DGEMM_ 1 +_ACEOF + +fi +done + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if ${ac_cv_prog_CPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in mpi.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "mpi.h" "ac_cv_header_mpi_h" "$ac_includes_default" +if test "x$ac_cv_header_mpi_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_MPI_H 1 +_ACEOF + +fi + +done + + +ac_config_files="$ac_config_files Makefile src/Makefile testing/Makefile" + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 +$as_echo_n "checking that generated files are newer than configure... " >&6; } + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5 +$as_echo "done" >&6; } +if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then + as_fn_error $? "conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi + if test -n "$EXEEXT"; then + am__EXEEXT_TRUE= + am__EXEEXT_FALSE='#' +else + am__EXEEXT_TRUE='#' + am__EXEEXT_FALSE= +fi + + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by hpl $as_me 2.3, which was +generated by GNU Autoconf 2.69. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration commands: +$config_commands + +Report bugs to ." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +hpl config.status 2.3 +configured by $0, generated by GNU Autoconf 2.69, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2012 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +MKDIR_P='$MKDIR_P' +AWK='$AWK' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# +# INIT-COMMANDS +# +AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "include/hplconfig.h") CONFIG_HEADERS="$CONFIG_HEADERS include/hplconfig.h" ;; + "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;; + "testing/Makefile") CONFIG_FILES="$CONFIG_FILES testing/Makefile" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers + test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS" +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac + ac_MKDIR_P=$MKDIR_P + case $MKDIR_P in + [\\/$]* | ?:[\\/]* ) ;; + */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +s&@MKDIR_P@&$ac_MKDIR_P&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +$as_echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi +# Compute "$ac_file"'s index in $config_headers. +_am_arg="$ac_file" +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || +$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$_am_arg" : 'X\(//\)[^/]' \| \ + X"$_am_arg" : 'X\(//\)$' \| \ + X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$_am_arg" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'`/stamp-h$_am_stamp_count + ;; + + :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +$as_echo "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "depfiles":C) test x"$AMDEP_TRUE" != x"" || { + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + case $CONFIG_FILES in #( + *\'*) : + eval set x "$CONFIG_FILES" ;; #( + *) : + set x $CONFIG_FILES ;; #( + *) : + ;; +esac + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`$as_echo "$am_mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`$as_dirname -- "$am_mf" || +$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$am_mf" : 'X\(//\)[^/]' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$am_mf" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + am_filepart=`$as_basename -- "$am_mf" || +$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$am_mf" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + { echo "$as_me:$LINENO: cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles" >&5 + (cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } || am_rc=$? + done + if test $am_rc -ne 0; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. Try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking). +See \`config.log' for more details" "$LINENO" 5; } + fi + { am_dirpart=; unset am_dirpart;} + { am_filepart=; unset am_filepart;} + { am_mf=; unset am_mf;} + { am_rc=; unset am_rc;} + rm -f conftest-deps.mk +} + ;; + + esac +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/configure.ac b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/configure.ac new file mode 100644 index 000000000..eb91dc590 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/configure.ac @@ -0,0 +1,34 @@ +AC_PREREQ([2.69]) + +AC_INIT(hpl, 2.3, hpl@icl.utk.edu) +AC_CONFIG_SRCDIR([include/hpl.h]) +AC_CONFIG_HEADERS([include/hplconfig.h]) + +AX_PROG_CC_MPI + +AC_PROG_RANLIB + +AC_PROG_INSTALL + +AM_INIT_AUTOMAKE([subdir-objects]) + +AM_PROG_CC_C_O + +dnl +dnl AX_BLAS requires Fortran compiler and detects fortran libraries in $FLIBS +dnl +dnl AX_BLAS(LIBS="$BLAS_LIBS $LIBS $FLIBS") +dnl + +HPL_BLAS(LIBS="$BLAS_LIBS $LIBS",AC_MSG_ERROR([BLAS not found])) + +dnl FIXME: test for CBLAS: Atlas, MKL, OpenBLAS, ESSL, ... +dnl FIXME: test for GSL CBLAS + +AC_CHECK_FUNCS([dgemm_]) + +AC_CHECK_HEADERS([mpi.h]) + +AC_CONFIG_FILES([Makefile src/Makefile testing/Makefile]) + +AC_OUTPUT diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/depcomp b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/depcomp new file mode 100755 index 000000000..65cbf7093 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/depcomp @@ -0,0 +1,791 @@ +#! /bin/sh +# depcomp - compile a program generating dependencies as side-effects + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva . + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by 'PROGRAMS ARGS'. + object Object file output by 'PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputting dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit $? + ;; +esac + +# Get the directory component of the given path, and save it in the +# global variables '$dir'. Note that this directory component will +# be either empty or ending with a '/' character. This is deliberate. +set_dir_from () +{ + case $1 in + */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; + *) dir=;; + esac +} + +# Get the suffix-stripped basename of the given path, and save it the +# global variable '$base'. +set_base_from () +{ + base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` +} + +# If no dependency file was actually created by the compiler invocation, +# we still have to create a dummy depfile, to avoid errors with the +# Makefile "include basename.Plo" scheme. +make_dummy_depfile () +{ + echo "#dummy" > "$depfile" +} + +# Factor out some common post-processing of the generated depfile. +# Requires the auxiliary global variable '$tmpdepfile' to be set. +aix_post_process_depfile () +{ + # If the compiler actually managed to produce a dependency file, + # post-process it. + if test -f "$tmpdepfile"; then + # Each line is of the form 'foo.o: dependency.h'. + # Do two passes, one to just change these to + # $object: dependency.h + # and one to simply output + # dependency.h: + # which is needed to avoid the deleted-header problem. + { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" + sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" + } > "$depfile" + rm -f "$tmpdepfile" + else + make_dummy_depfile + fi +} + +# A tabulation character. +tab=' ' +# A newline character. +nl=' +' +# Character ranges might be problematic outside the C locale. +# These definitions help. +upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ +lower=abcdefghijklmnopqrstuvwxyz +digits=0123456789 +alpha=${upper}${lower} + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Avoid interferences from the environment. +gccflag= dashmflag= + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +cygpath_u="cygpath -u -f -" +if test "$depmode" = msvcmsys; then + # This is just like msvisualcpp but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvisualcpp +fi + +if test "$depmode" = msvc7msys; then + # This is just like msvc7 but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvc7 +fi + +if test "$depmode" = xlc; then + # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. + gccflag=-qmakedep=gcc,-MF + depmode=gcc +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. +## Unfortunately, FreeBSD c89 acceptance of flags depends upon +## the command line argument order; so add the flags where they +## appear in depend2.am. Note that the slowdown incurred here +## affects only configure: in makefiles, %FASTDEP% shortcuts this. + for arg + do + case $arg in + -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; + *) set fnord "$@" "$arg" ;; + esac + shift # fnord + shift # $arg + done + "$@" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. +## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. +## (see the conditional assignment to $gccflag above). +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). Also, it might not be +## supported by the other compilers which use the 'gcc' depmode. +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The second -e expression handles DOS-style file names with drive + # letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the "deleted header file" problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. +## Some versions of gcc put a space before the ':'. On the theory +## that the space means something, we add a space to the output as +## well. hp depmode also adds that space, but also prefixes the VPATH +## to the object. Take care to not repeat it in the output. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like '#:fec' to the end of the + # dependency line. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ + | tr "$nl" ' ' >> "$depfile" + echo >> "$depfile" + # The second pass generates a dummy entry for each header file. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" + ;; + +xlc) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts '$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.u + tmpdepfile2=$base.u + tmpdepfile3=$dir.libs/$base.u + "$@" -Wc,-M + else + tmpdepfile1=$dir$base.u + tmpdepfile2=$dir$base.u + tmpdepfile3=$dir$base.u + "$@" -M + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + aix_post_process_depfile + ;; + +tcc) + # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 + # FIXME: That version still under development at the moment of writing. + # Make that this statement remains true also for stable, released + # versions. + # It will wrap lines (doesn't matter whether long or short) with a + # trailing '\', as in: + # + # foo.o : \ + # foo.c \ + # foo.h \ + # + # It will put a trailing '\' even on the last line, and will use leading + # spaces rather than leading tabs (at least since its commit 0394caf7 + # "Emit spaces for -MD"). + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. + # We have to change lines of the first kind to '$object: \'. + sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" + # And for each line of the second kind, we have to emit a 'dep.h:' + # dummy dependency, to avoid the deleted-header problem. + sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" + rm -f "$tmpdepfile" + ;; + +## The order of this option in the case statement is important, since the +## shell code in configure will try each of these formats in the order +## listed in this file. A plain '-MD' option would be understood by many +## compilers, so we must ensure this comes after the gcc and icc options. +pgcc) + # Portland's C compiler understands '-MD'. + # Will always output deps to 'file.d' where file is the root name of the + # source file under compilation, even if file resides in a subdirectory. + # The object file name does not affect the name of the '.d' file. + # pgcc 10.2 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using '\' : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + set_dir_from "$object" + # Use the source, not the object, to determine the base name, since + # that's sadly what pgcc will do too. + set_base_from "$source" + tmpdepfile=$base.d + + # For projects that build the same source file twice into different object + # files, the pgcc approach of using the *source* file root name can cause + # problems in parallel builds. Use a locking strategy to avoid stomping on + # the same $tmpdepfile. + lockdir=$base.d-lock + trap " + echo '$0: caught signal, cleaning up...' >&2 + rmdir '$lockdir' + exit 1 + " 1 2 13 15 + numtries=100 + i=$numtries + while test $i -gt 0; do + # mkdir is a portable test-and-set. + if mkdir "$lockdir" 2>/dev/null; then + # This process acquired the lock. + "$@" -MD + stat=$? + # Release the lock. + rmdir "$lockdir" + break + else + # If the lock is being held by a different process, wait + # until the winning process is done or we timeout. + while test -d "$lockdir" && test $i -gt 0; do + sleep 1 + i=`expr $i - 1` + done + fi + i=`expr $i - 1` + done + trap - 1 2 13 15 + if test $i -le 0; then + echo "$0: failed to acquire lock after $numtries attempts" >&2 + echo "$0: check lockdir '$lockdir'" >&2 + exit 1 + fi + + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp2) + # The "hp" stanza above does not work with aCC (C++) and HP's ia64 + # compilers, which have integrated preprocessors. The correct option + # to use with these is +Maked; it writes dependencies to a file named + # 'foo.d', which lands next to the object file, wherever that + # happens to be. + # Much of this is similar to the tru64 case; see comments there. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir.libs/$base.d + "$@" -Wc,+Maked + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + "$@" +Maked + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" + # Add 'dependent.h:' lines. + sed -ne '2,${ + s/^ *// + s/ \\*$// + s/$/:/ + p + }' "$tmpdepfile" >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" "$tmpdepfile2" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in 'foo.d' instead, so we check for that too. + # Subdirectories are respected. + set_dir_from "$object" + set_base_from "$object" + + if test "$libtool" = yes; then + # Libtool generates 2 separate objects for the 2 libraries. These + # two compilations output dependencies in $dir.libs/$base.o.d and + # in $dir$base.o.d. We have to check for both files, because + # one of the two compilations can be disabled. We should prefer + # $dir$base.o.d over $dir.libs/$base.o.d because the latter is + # automatically cleaned when .libs/ is deleted, while ignoring + # the former would cause a distcleancheck panic. + tmpdepfile1=$dir$base.o.d # libtool 1.5 + tmpdepfile2=$dir.libs/$base.o.d # Likewise. + tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 + "$@" -Wc,-MD + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + tmpdepfile3=$dir$base.d + "$@" -MD + fi + + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + # Same post-processing that is required for AIX mode. + aix_post_process_depfile + ;; + +msvc7) + if test "$libtool" = yes; then + showIncludes=-Wc,-showIncludes + else + showIncludes=-showIncludes + fi + "$@" $showIncludes > "$tmpdepfile" + stat=$? + grep -v '^Note: including file: ' "$tmpdepfile" + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The first sed program below extracts the file names and escapes + # backslashes for cygpath. The second sed program outputs the file + # name when reading, but also accumulates all include files in the + # hold buffer in order to output them again at the end. This only + # works with sed implementations that can handle large buffers. + sed < "$tmpdepfile" -n ' +/^Note: including file: *\(.*\)/ { + s//\1/ + s/\\/\\\\/g + p +}' | $cygpath_u | sort -u | sed -n ' +s/ /\\ /g +s/\(.*\)/'"$tab"'\1 \\/p +s/.\(.*\) \\/\1:/ +H +$ { + s/.*/'"$tab"'/ + G + p +}' >> "$depfile" + echo >> "$depfile" # make sure the fragment doesn't end with a backslash + rm -f "$tmpdepfile" + ;; + +msvc7msys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for ':' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. + "$@" $dashmflag | + sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this sed invocation + # correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no eat=no + for arg + do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + if test $eat = yes; then + eat=no + continue + fi + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -arch) + eat=yes ;; + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix=`echo "$object" | sed 's/^.*\././'` + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + # makedepend may prepend the VPATH from the source file name to the object. + # No need to regex-escape $object, excess matching of '.' is harmless. + sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process the last invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed '1,2d' "$tmpdepfile" \ + | tr ' ' "$nl" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E \ + | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + | sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + IFS=" " + for arg + do + case "$arg" in + -o) + shift + ;; + $object) + shift + ;; + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E 2>/dev/null | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" + echo "$tab" >> "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvcmsys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl.h new file mode 100644 index 000000000..6d131963f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl.h @@ -0,0 +1,97 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_H +#define HPL_H +/* + * --------------------------------------------------------------------- + * HPL default compile options that can overridden in the Make. + * --------------------------------------------------------------------- + */ +#ifndef HPL_NO_MPI_DATATYPE /* Use MPI user-defined data type */ +#define HPL_USE_MPI_DATATYPE +#endif + +#ifndef HPL_COPY_L /* do not copy L, use MPI user-defined data types */ +#define HPL_NO_COPY_L +#endif + +#ifndef HPL_DETAILED_TIMING /* Do not enable detailed timings */ +#define HPL_NO_DETAILED_TIMING +#endif + +#ifndef HPL_CALL_VSIPL /* Call the Fortran 77 BLAS interface */ +#ifndef HPL_CALL_CBLAS /* there can be only one */ +#define HPL_CALL_FBLAS +#endif +#endif +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pfact.h" +#include "hpl_pgesv.h" + +#include "hpl_timer.h" +#include "hpl_matgen.h" +#include "hpl_test.h" + +#include "hpl_ptimer.h" +#include "hpl_pmatgen.h" +#include "hpl_ptest.h" + +#endif +/* + * End of hpl.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_auxil.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_auxil.h new file mode 100644 index 000000000..861caf380 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_auxil.h @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_AUXIL_H +#define HPL_AUXIL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +/* + * --------------------------------------------------------------------- + * typedef definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_NORM_A = 800, HPL_NORM_1 = 801, HPL_NORM_I = 802 } HPL_T_NORM; + +typedef enum +{ + HPL_MACH_EPS = 900, /* relative machine precision */ + HPL_MACH_SFMIN = 901, /* safe minimum st 1/sfmin does not overflow */ + HPL_MACH_BASE = 902, /* base = base of the machine */ + HPL_MACH_PREC = 903, /* prec = eps*base */ + HPL_MACH_MLEN = 904, /* number of (base) digits in the mantissa */ + HPL_MACH_RND = 905, /* 1.0 if rounding occurs in addition */ + HPL_MACH_EMIN = 906, /* min exponent before (gradual) underflow */ + HPL_MACH_RMIN = 907, /* underflow threshold base**(emin-1) */ + HPL_MACH_EMAX = 908, /* largest exponent before overflow */ + HPL_MACH_RMAX = 909 /* overflow threshold - (base**emax)*(1-eps) */ + +} HPL_T_MACH; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_fprintf +STDC_ARGS( ( + FILE *, + const char *, + ... +) ); +void HPL_warn +STDC_ARGS( ( + FILE *, + int, + const char *, + const char *, + ... +) ); +void HPL_abort +STDC_ARGS( ( + int, + const char *, + const char *, + ... +) ); +void HPL_dlacpy +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dlatcpy +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dlaprnt +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int, + const int, + const char * +) ); +double HPL_dlange +STDC_ARGS( ( + const HPL_T_NORM, + const int, + const int, + const double *, + const int +) ); +double HPL_dlamch +STDC_ARGS( ( + const HPL_T_MACH +) ); + +#endif +/* + * End of hpl_auxil.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_blas.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_blas.h new file mode 100644 index 000000000..2a510471a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_blas.h @@ -0,0 +1,630 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_BLAS_H +#define HPL_BLAS_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" + + +/* + * --------------------------------------------------------------------- + * typedef definitions + * --------------------------------------------------------------------- + */ +enum HPL_ORDER +{ HplRowMajor = 101, HplColumnMajor = 102 }; +enum HPL_TRANS +{ HplNoTrans = 111, HplTrans = 112, HplConjTrans = 113 }; +enum HPL_UPLO +{ HplUpper = 121, HplLower = 122 }; +enum HPL_DIAG +{ HplNonUnit = 131, HplUnit = 132 }; +enum HPL_SIDE +{ HplLeft = 141, HplRight = 142 }; + + +#ifdef HPL_CALL_CBLAS + + +/* + * --------------------------------------------------------------------- + * The C interface of the BLAS is available ... + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define CBLAS_INDEX int + +#define CBLAS_ORDER HPL_ORDER +#define CblasRowMajor HplRowMajor +#define CblasColMajor HplColMajor + +#define CBLAS_TRANSPOSE HPL_TRANS +#define CblasNoTrans HplNoTrans +#define CblasTrans HplTrans +#define CblasConjTrans HplConjTrans + +#define CBLAS_UPLO HPL_UPLO +#define CblasUpper HplUpper +#define CblasLower HplLower + +#define CBLAS_DIAG HPL_DIAG +#define CblasNonUnit HplNonUnit +#define CblasUnit HplUnit + +#define CBLAS_SIDE HPL_SIDE +#define CblasLeft HplLeft +#define CblasRight HplRight +/* + * --------------------------------------------------------------------- + * CBLAS Function prototypes + * --------------------------------------------------------------------- + */ +CBLAS_INDEX cblas_idamax +STDC_ARGS( +( const int, const double *, const int ) ); +void cblas_dswap +STDC_ARGS( +( const int, double *, const int, double *, + const int ) ); +void cblas_dcopy +STDC_ARGS( +( const int, const double *, const int, double *, + const int ) ); +void cblas_daxpy +STDC_ARGS( +( const int, const double, const double *, const int, + double *, const int ) ); +void cblas_dscal +STDC_ARGS( +( const int, const double, double *, const int ) ); + +void cblas_dgemv +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const int, const int, const double, const double *, + const int, const double *, const int, const double, + double *, const int ) ); + +void cblas_dger +STDC_ARGS( +( const enum CBLAS_ORDER, const int, const int, + const double, const double *, const int, const double *, + const int, double *, const int ) ); +void cblas_dtrsv +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_UPLO, + const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG, + const int, const double *, const int, double *, + const int ) ); + +void cblas_dgemm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const enum CBLAS_TRANSPOSE, const int, const int, + const int, const double, const double *, const int, + const double *, const int, const double, double *, + const int ) ); + +void cblas_dtrsm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_SIDE, + const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, + const enum CBLAS_DIAG, const int, const int, + const double, const double *, const int, double *, + const int ) ); +void dpcpp_dgemm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const enum CBLAS_TRANSPOSE, const int, const int, + const int, const double, const double *, const int, + const double *, const int, const double, double *, + const int ) ); + +void dpcpp_dtrsm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_SIDE, + const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, + const enum CBLAS_DIAG, const int, const int, + const double, const double *, const int, double *, + const int ) ); +/* + * --------------------------------------------------------------------- + * HPL C BLAS macro definition + * --------------------------------------------------------------------- + */ +#define HPL_dswap cblas_dswap +#define HPL_dcopy cblas_dcopy +#define HPL_daxpy cblas_daxpy +#define HPL_dscal cblas_dscal +#define HPL_idamax cblas_idamax + +#define HPL_dgemv cblas_dgemv +#define HPL_dtrsv cblas_dtrsv +#define HPL_dger cblas_dger + +//#define HPL_dgemm cblas_dgemm +//#define HPL_dtrsm cblas_dtrsm +#define HPL_dgemm dpcpp_dgemm +#define HPL_dtrsm dpcpp_dtrsm + +#endif + +//#define HPL_hello sss_gemm + +#ifdef HPL_CALL_FBLAS +/* + * --------------------------------------------------------------------- + * Use the Fortran 77 interface of the BLAS ... + * --------------------------------------------------------------------- + * Defaults: Add_, F77_INTEGER=int, StringSunStyle + * --------------------------------------------------------------------- + */ +#ifndef NoChange +#ifndef UpCase +#ifndef Add__ +#ifndef Add_ + +#define Add_ + +#endif +#endif +#endif +#endif + +#ifndef F77_INTEGER +#define F77_INTEGER int +#else +#define HPL_USE_F77_INTEGER_DEF +#endif + +#ifndef StringCrayStyle +#ifndef StringStructVal +#ifndef StringStructPtr +#ifndef StringSunStyle + +#define StringSunStyle + +#endif +#endif +#endif +#endif +/* + * --------------------------------------------------------------------- + * Fortran 77 <-> C interface + * --------------------------------------------------------------------- + * + * These macros identifies how Fortran routines will be called. + * + * Add_ : the Fortran compiler expects the name of C functions to be + * in all lower case and to have an underscore postfixed it (Suns, Intel + * compilers expect this). + * + * NoChange : the Fortran compiler expects the name of C functions to be + * in all lower case (IBM RS6K compilers do this). + * + * UpCase : the Fortran compiler expects the name of C functions to be + * in all upcase. (Cray compilers expect this). + * + * Add__ : the Fortran compiler in use is f2c, a Fortran to C conver- + * ter. + */ +#ifdef NoChange +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm(...) + */ +#define F77dswap dswap +#define F77dscal dscal +#define F77dcopy dcopy +#define F77daxpy daxpy +#define F77idamax idamax + +#define F77dgemv dgemv +#define F77dtrsv dtrsv +#define F77dger dger + +#define F77dgemm dgemm +#define F77dtrsm dtrsm + +#endif + +#ifdef UpCase +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) DGEMM(...) + */ +#ifdef CRAY_BLAS + +#define F77dswap SSWAP +#define F77dscal SSCAL +#define F77dcopy SCOPY +#define F77daxpy SAXPY +#define F77idamax ISAMAX + +#define F77dgemv SGEMV +#define F77dtrsv STRSV +#define F77dger SGER + +#define F77dgemm SGEMM +#define F77dtrsm STRSM + +#else + +#define F77dswap DSWAP +#define F77dscal DSCAL +#define F77dcopy DCOPY +#define F77daxpy DAXPY +#define F77idamax IDAMAX + +#define F77dgemv DGEMV +#define F77dtrsv DTRSV +#define F77dger DGER + +#define F77dgemm DGEMM +#define F77dtrsm DTRSM + +#endif + +#endif + +#ifdef Add_ +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm_(...) + */ +#define F77dswap dswap_ +#define F77dscal dscal_ +#define F77dcopy dcopy_ +#define F77daxpy daxpy_ +#define F77idamax idamax_ + +#define F77dgemv dgemv_ +#define F77dtrsv dtrsv_ +#define F77dger dger_ + +#define F77dgemm dgemm_ +#define F77dtrsm dtrsm_ + +#endif + +#ifdef Add__ +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm_(...) + */ +#define F77dswap dswap_ +#define F77dscal dscal_ +#define F77dcopy dcopy_ +#define F77daxpy daxpy_ +#define F77idamax idamax_ + +#define F77dgemv dgemv_ +#define F77dtrsv dtrsv_ +#define F77dger dger_ + +#define F77dgemm dgemm_ +#define F77dtrsm dtrsm_ +//#define F77hello sss_gemm + +#endif +//#define F77hello sss_gemm +/* + * --------------------------------------------------------------------- + * Typedef definitions and conversion utilities + * --------------------------------------------------------------------- + */ +#ifdef StringCrayStyle + +#include + /* Type of character argument in a FORTRAN call */ +#define F77_CHAR _fcd + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(_fcdtocp(c) )) +#define HPL_C2F_CHAR(c) (_cptofcd(&(c), 1)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringStructVal + /* Type of character argument in a FORTRAN call */ +typedef struct { char *cp; F77_INTEGER len; } F77_CHAR; + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c.cp)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringStructPtr + /* Type of character argument in a FORTRAN call */ +typedef struct { char *cp; F77_INTEGER len; } F77_CHAR; + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c->cp)) + +#define F77_CHAR_DECL F77_CHAR * /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringSunStyle + /* Type of character argument in a FORTRAN call */ +#define F77_CHAR char * + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c)) +#define HPL_C2F_CHAR(c) (&(c)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ +#define F77_1_CHAR , F77_INTEGER +#define F77_2_CHAR F77_1_CHAR F77_1_CHAR +#define F77_3_CHAR F77_2_CHAR F77_1_CHAR +#define F77_4_CHAR F77_3_CHAR F77_1_CHAR + +#endif +/* ------------------------------------------------------------------ */ + +#ifndef F77_1_CHAR +#define F77_1_CHAR +#define F77_2_CHAR +#define F77_3_CHAR +#define F77_4_CHAR +#endif + +#define F77_INT_DECL const F77_INTEGER * /* input integer */ +#define F77_SIN_DECL const double * /* input scalar */ +#define F77_VIN_DECL const double * /* input vector */ +#define F77_VINOUT_DECL double * /* input/output matrix */ +#define F77_MIN_DECL const double * /* input matrix */ +#define F77_MINOUT_DECL double * /* input/output matrix */ + +#ifdef CRAY_PVP_ENV /* Type of FORTRAN functions */ +#define F77_VOID_FUN extern fortran void /* subroutine */ +#define F77_INT_FUN extern fortran int /* integer function */ +#else +#define F77_VOID_FUN extern void /* subroutine */ +#define F77_INT_FUN extern int /* integer function */ +#endif +/* + * --------------------------------------------------------------------- + * Fortran 77 BLAS function prototypes + * --------------------------------------------------------------------- + */ +F77_VOID_FUN F77dswap +STDC_ARGS( +( F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL, F77_VINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77dscal +STDC_ARGS( +( F77_INT_DECL, F77_SIN_DECL, F77_VINOUT_DECL, F77_INT_DECL ) ); +F77_VOID_FUN F77dcopy +STDC_ARGS( +( F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, F77_VINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77daxpy +STDC_ARGS( +( F77_INT_DECL, F77_SIN_DECL, F77_VIN_DECL, F77_INT_DECL, + F77_VINOUT_DECL, F77_INT_DECL ) ); +F77_INT_FUN F77idamax +STDC_ARGS( +( F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL ) ); + +F77_VOID_FUN F77dgemv +STDC_ARGS( +( F77_CHAR_DECL, F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, + F77_SIN_DECL, F77_VINOUT_DECL, F77_INT_DECL F77_1_CHAR ) ); +F77_VOID_FUN F77dger +STDC_ARGS( +( F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_VIN_DECL, + F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, F77_MINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77dtrsv +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, F77_INT_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL + F77_3_CHAR ) ); + +F77_VOID_FUN F77dgemm +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_INT_DECL, F77_INT_DECL, + F77_INT_DECL, F77_SIN_DECL, F77_MIN_DECL, F77_INT_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MINOUT_DECL, + F77_INT_DECL F77_2_CHAR ) ); +F77_VOID_FUN F77dtrsm +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, + F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MIN_DECL, + F77_INT_DECL, F77_MINOUT_DECL, F77_INT_DECL F77_4_CHAR ) ); + +#endif +/* + * --------------------------------------------------------------------- + * HPL BLAS Function prototypes + * --------------------------------------------------------------------- + */ +#ifndef HPL_CALL_CBLAS + +int HPL_idamax +STDC_ARGS( ( + const int, + const double *, + const int +) ); +void HPL_daxpy +STDC_ARGS( ( + const int, + const double, + const double *, + const int, + double *, + const int +) ); +void HPL_dcopy +STDC_ARGS( ( + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dscal +STDC_ARGS( ( + const int, + const double, + double *, + const int +) ); +void HPL_dswap +STDC_ARGS( ( + const int, + double *, + const int, + double *, + const int +) ); +void HPL_dgemv +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_TRANS, + const int, + const int, + const double, + const double *, + const int, + const double *, + const int, + const double, + double *, + const int +) ); +void HPL_dger +STDC_ARGS( ( + const enum HPL_ORDER, + const int, + const int, + const double, + const double *, + const int, + double *, + const int, + double *, + const int +) ); +void HPL_dtrsv +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_UPLO, + const enum HPL_TRANS, + const enum HPL_DIAG, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dgemm +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_TRANS, + const enum HPL_TRANS, + const int, + const int, + const int, + const double, + const double *, + const int, + const double *, + const int, + const double, + double *, + const int +) ); +void HPL_hello +STDC_ARGS( ( +) ); +#endif +void HPL_dtrsm +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_SIDE, + const enum HPL_UPLO, + const enum HPL_TRANS, + const enum HPL_DIAG, + const int, + const int, + const double, + const double *, + const int, + double *, + const int +) ); + +//#endif + +#endif +/* + * hpl_blas.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_comm.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_comm.h new file mode 100644 index 000000000..e3ba51a57 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_comm.h @@ -0,0 +1,161 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_COMM_H +#define HPL_COMM_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +#include "hpl_panel.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_1RING = 401, /* Increasing ring */ + HPL_1RING_M = 402, /* Increasing ring (modified) */ + HPL_2RING = 403, /* Increasing 2-ring */ + HPL_2RING_M = 404, /* Increasing 2-ring (modified) */ + HPL_BLONG = 405, /* long broadcast */ + HPL_BLONG_M = 406 /* long broadcast (modified) */ +} HPL_T_TOP; +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_FAILURE 0 +#define HPL_SUCCESS 1 +#define HPL_KEEP_TESTING 2 +/* + * --------------------------------------------------------------------- + * comm function prototypes + * --------------------------------------------------------------------- + */ +int HPL_send +STDC_ARGS( ( + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_recv +STDC_ARGS( ( + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_sdrv +STDC_ARGS( ( + double *, + int, + int, + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_binit +STDC_ARGS( ( + HPL_T_panel * +) ); +int HPL_bcast +STDC_ARGS( ( + HPL_T_panel *, + int * +) ); +int HPL_bwait +STDC_ARGS( ( + HPL_T_panel * +) ); +int HPL_packL +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int +) ); +void HPL_copyL +STDC_ARGS( ( + HPL_T_panel * +) ); + +int HPL_binit_1ring STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_1ring STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_1ring STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_1rinM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_1rinM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_1rinM STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_2ring STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_2ring STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_2ring STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_2rinM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_2rinM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_2rinM STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_blong STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_blong STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_blong STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_blonM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_blonM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_blonM STDC_ARGS( ( HPL_T_panel * ) ); + +#endif +/* + * End of hpl_comm.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_gesv.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_gesv.h new file mode 100644 index 000000000..ce671cf2b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_gesv.h @@ -0,0 +1,87 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_GESV_H +#define HPL_GESV_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_LEFT_LOOKING = 301, /* Left looking lu fact variant */ + HPL_CROUT = 302, /* Crout lu fact variant */ + HPL_RIGHT_LOOKING = 303 /* Right looking lu fact variant */ +} HPL_T_FACT; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dgesv +STDC_ARGS( +( const int, const int, const int, const HPL_T_FACT, + const HPL_T_FACT, const int, double *, + const int, int * ) ); +void HPL_ipid +STDC_ARGS( +( const int, double *, int *, int *, + int *, int *, int *, int *, + const int, const int, const int, const int, + const int ) ); + +#endif +/* + * End of hpl_gesv.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_grid.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_grid.h new file mode 100644 index 000000000..1895a5ed4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_grid.h @@ -0,0 +1,212 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_GRID_H +#define HPL_GRID_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum { HPL_INT = 100, HPL_DOUBLE = 101 } HPL_T_TYPE; + +typedef enum +{ + HPL_ROW_MAJOR = 201, + HPL_COLUMN_MAJOR = 202 +} HPL_T_ORDER; + +typedef struct HPL_S_grid +{ + MPI_Comm all_comm; /* grid communicator */ + MPI_Comm row_comm; /* row communicator */ + MPI_Comm col_comm; /* column communicator */ + HPL_T_ORDER order; /* ordering of the procs in the grid */ + int iam; /* my rank in the grid */ + int myrow; /* my row number in the grid */ + int mycol; /* my column number in the grid */ + int nprow; /* the total # of rows in the grid */ + int npcol; /* the total # of columns in the grid */ + int nprocs; /* the total # of procs in the grid */ + int row_ip2; /* largest power of two <= nprow */ + int row_hdim; /* row_ip2 procs hypercube dimension */ + int row_ip2m1; /* largest power of two <= nprow-1 */ + int row_mask; /* row_ip2m1 procs hypercube mask */ + int col_ip2; /* largest power of two <= npcol */ + int col_hdim; /* col_ip2 procs hypercube dimension */ + int col_ip2m1; /* largest power of two <= npcol-1 */ + int col_mask; /* col_ip2m1 procs hypercube mask */ +} HPL_T_grid; + +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef void (*HPL_T_OP) +( const int, const void *, void *, const HPL_T_TYPE ); +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define HPL_2_MPI_TYPE( typ ) \ + ( ( typ == HPL_INT ? MPI_INT : MPI_DOUBLE ) ) +/* + * The following macros perform common modulo operations; All functions + * except MPosMod assume arguments are < d (i.e., arguments are themsel- + * ves within modulo range). + */ + /* increment with mod */ +#define MModInc(I, d) if(++(I) == (d)) (I) = 0 + /* decrement with mod */ +#define MModDec(I, d) if(--(I) == -1) (I) = (d)-1 + /* positive modulo */ +#define MPosMod(I, d) ( (I) - ((I)/(d))*(d) ) + /* add two numbers */ +#define MModAdd(I1, I2, d) \ + ( ( (I1) + (I2) < (d) ) ? (I1) + (I2) : (I1) + (I2) - (d) ) + /* add 1 to # */ +#define MModAdd1(I, d) ( ((I) != (d)-1) ? (I) + 1 : 0 ) + /* subtract two numbers */ +#define MModSub(I1, I2, d) \ + ( ( (I1) < (I2) ) ? (d) + (I1) - (I2) : (I1) - (I2) ) + /* sub 1 from # */ +#define MModSub1(I, d) ( ((I)!=0) ? (I)-1 : (d)-1 ) +/* + * --------------------------------------------------------------------- + * grid function prototypes + * --------------------------------------------------------------------- + */ +int HPL_grid_init +STDC_ARGS( ( + MPI_Comm, + const HPL_T_ORDER, + const int, + const int, + HPL_T_grid * +) ); +int HPL_grid_exit +STDC_ARGS( ( + HPL_T_grid * +) ); + +int HPL_grid_info +STDC_ARGS( ( + const HPL_T_grid *, + int *, + int *, + int *, + int * +) ); +int HPL_pnum +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int +) ); + +int HPL_barrier +STDC_ARGS( ( + MPI_Comm +) ); +int HPL_broadcast +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const int, + MPI_Comm +) ); +int HPL_reduce +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const HPL_T_OP , + const int, + MPI_Comm +) ); +int HPL_all_reduce +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const HPL_T_OP , + MPI_Comm +) ); + +void HPL_max +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); +void HPL_min +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); +void HPL_sum +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); + +#endif +/* + * End of hpl_grid.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_matgen.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_matgen.h new file mode 100644 index 000000000..de6503eea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_matgen.h @@ -0,0 +1,120 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_MATGEN_H +#define HPL_MATGEN_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_MULT0 1284865837 +#define HPL_MULT1 1481765933 +#define HPL_IADD0 1 +#define HPL_IADD1 0 +#define HPL_DIVFAC 2147483648.0 +#define HPL_POW16 65536.0 +#define HPL_HALF 0.5 +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dmatgen +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int +) ); +void HPL_lmul +STDC_ARGS( ( + int *, + int *, + int * +) ); +void HPL_ladd +STDC_ARGS( ( + int *, + int *, + int * +) ); +void HPL_xjumpm +STDC_ARGS( ( + const int, + int *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_setran +STDC_ARGS( ( + const int, + int * +) ); +void HPL_jumpit +STDC_ARGS( ( + int *, + int *, + int *, + int * +) ); +double HPL_rand STDC_ARGS( ( void ) ); + +#endif +/* + * End of hpl_matgen.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_misc.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_misc.h new file mode 100644 index 000000000..ea421a403 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_misc.h @@ -0,0 +1,110 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_MISC_H +#define HPL_MISC_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#ifdef __STDC__ +#define STDC_HEADERS +#endif + +#include +#include +#include +#include + +#ifdef STDC_HEADERS +#include +#define STDC_ARGS(p) p +#else +#include +#define STDC_ARGS(p) () +#endif + +#ifdef HPL_CALL_VSIPL +#include +#endif +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_rone 1.0 +#define HPL_rtwo 2.0 +#define HPL_rzero 0.0 +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define Mabs( a_ ) ( ( (a_) < 0 ) ? -(a_) : (a_) ) +#define Mmin( a_, b_ ) ( ( (a_) < (b_) ) ? (a_) : (b_) ) +#define Mmax( a_, b_ ) ( ( (a_) > (b_) ) ? (a_) : (b_) ) + +#define Mfloor(a,b) (((a)>0) ? (((a)/(b))) : (-(((-(a))+(b)-1)/(b)))) +#define Mceil(a,b) ( ( (a)+(b)-1 ) / (b) ) +#define Miceil(a,b) (((a)>0) ? ((((a)+(b)-1)/(b))) : (-((-(a))/(b)))) + +#define Mupcase(C) (((C)>96 && (C)<123) ? (C) & 0xDF : (C)) +#define Mlowcase(C) (((C)>64 && (C)< 91) ? (C) | 32 : (C)) +/* + * Mptr returns a pointer to a_( i_, j_ ) for readability reasons and + * also less silly errors ... + */ +#define Mptr( a_, i_, j_, lda_ ) \ + ( (a_) + (size_t)(i_) + (size_t)(j_)*(size_t)(lda_) ) +/* + * Align pointer + */ +#define HPL_PTR( ptr_, al_ ) \ + ( ( ( (size_t)(ptr_)+(al_)-1 ) / (al_) ) * (al_) ) +#endif +/* + * End of hpl_misc.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_panel.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_panel.h new file mode 100644 index 000000000..d5ba2939c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_panel.h @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PANEL_H +#define HPL_PANEL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +#include "hpl_grid.h" +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef struct HPL_S_panel +{ + struct HPL_S_grid * grid; /* ptr to the process grid */ + struct HPL_S_palg * algo; /* ptr to the algo parameters */ + struct HPL_S_pmat * pmat; /* ptr to the local array info */ + double * A; /* ptr to trailing part of A */ + double * WORK; /* work space */ + double * L2; /* ptr to L */ + double * L1; /* ptr to jb x jb upper block of A */ + double * DPIV; /* ptr to replicated jb pivot array */ + double * DINFO; /* ptr to replicated scalar info */ + double * U; /* ptr to U */ + int * IWORK; /* integer workspace for swapping */ + void * * * buffers[2]; /* buffers for panel bcast */ + int counts [2]; /* counts for panel bcast */ + MPI_Datatype dtypes [2]; /* data types for panel bcast */ + MPI_Request request[1]; /* requests for panel bcast */ + MPI_Status status [1]; /* status for panel bcast */ + int nb; /* distribution blocking factor */ + int jb; /* panel width */ + int m; /* global # of rows of trailing part of A */ + int n; /* global # of cols of trailing part of A */ + int ia; /* global row index of trailing part of A */ + int ja; /* global col index of trailing part of A */ + int mp; /* local # of rows of trailing part of A */ + int nq; /* local # of cols of trailing part of A */ + int ii; /* local row index of trailing part of A */ + int jj; /* local col index of trailing part of A */ + int lda; /* local leading dim of array A */ + int prow; /* proc. row owning 1st row of trail. A */ + int pcol; /* proc. col owning 1st col of trail. A */ + int msgid; /* message id for panel bcast */ + int ldl2; /* local leading dim of array L2 */ + int len; /* length of the buffer to broadcast */ +#ifdef HPL_CALL_VSIPL + vsip_block_d * Ablock; /* A block */ + vsip_block_d * L1block; /* L1 block */ + vsip_block_d * L2block; /* L2 block */ + vsip_block_d * Ublock; /* U block */ +#endif +} HPL_T_panel; + +/* + * --------------------------------------------------------------------- + * panel function prototypes + * --------------------------------------------------------------------- + */ +#include "hpl_pgesv.h" + +void HPL_pdpanel_new +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + const int, + const int, + const int, + HPL_T_pmat *, + const int, + const int, + const int, + HPL_T_panel * * +) ); +void HPL_pdpanel_init +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + const int, + const int, + const int, + HPL_T_pmat *, + const int, + const int, + const int, + HPL_T_panel * +) ); +int HPL_pdpanel_disp +STDC_ARGS( ( + HPL_T_panel * * +) ); +int HPL_pdpanel_free +STDC_ARGS( ( + HPL_T_panel * +) ); + +#endif +/* + * End of hpl_panel.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pauxil.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pauxil.h new file mode 100644 index 000000000..1fd0ee457 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pauxil.h @@ -0,0 +1,505 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PAUXIL_H +#define HPL_PAUXIL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" + +#include "hpl_pmisc.h" +#include "hpl_grid.h" +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +/* + * Mindxg2p returns the process coodinate owning the entry globally in- + * dexed by ig_. + */ +#define Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) && \ + ( (nprocs_) > 1 ) ) \ + { \ + proc_ = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_); \ + proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ + } \ + else \ + { \ + proc_ = (src_); \ + } \ + } + +#define Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ + ( (nprocs_) == 1 ) ) { il_ = (ig_); } \ + else \ + { \ + int i__, j__; \ + j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ + il_ = (nb_)*( j__ - i__ ) + \ + ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ + (ig_) - (inb_) : (ig_) ); \ + } \ + } + +#define Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ ) \ + { \ + if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ + ( (nprocs_) == 1 ) ) \ + { il_ = (ig_); proc_ = (src_); } \ + else \ + { \ + int i__, j__; \ + j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ + il_ = (nb_)*(j__-i__) + \ + ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ + (ig_) - (inb_) : (ig_) ); \ + proc_ = (src_) + 1 + i__; \ + proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ + } \ + } +/* + * Mindxl2g computes the global index ig_ corresponding to the local + * index il_ in process proc_. + */ +#define Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ + { \ + if( (proc_) == (src_) ) \ + { \ + if( (il_) < (inb_) ) ig_ = (il_); \ + else ig_ = (il_) + \ + (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \ + } \ + else if( (proc_) < (src_) ) \ + { \ + ig_ = (il_) + (inb_) + \ + (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ + (proc_)-(src_)-1+(nprocs_) ); \ + } \ + else \ + { \ + ig_ = (il_) + (inb_) + \ + (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ + (proc_)-(src_)-1 ); \ + } \ + } \ + else \ + { \ + ig_ = (il_); \ + } \ + } +/* + * MnumrocI computes the # of local indexes np_ residing in the process + * of coordinate proc_ corresponding to the interval of global indexes + * i_:i_+n_-1 assuming that the global index 0 resides in the process + * src_, and that the indexes are distributed from src_ using the para- + * meters inb_, nb_ and nprocs_. + */ +#define MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ + { \ + int inb__, mydist__, n__, nblk__, quot__, src__; \ + if( ( inb__ = (inb_) - (i_) ) <= 0 ) \ + { \ + nblk__ = (-inb__) / (nb_) + 1; \ + src__ = (src_) + nblk__; \ + src__ -= ( src__ / (nprocs_) ) * (nprocs_); \ + inb__ += nblk__*(nb_); \ + if( ( n__ = (n_) - inb__ ) <= 0 ) \ + { \ + if( (proc_) == src__ ) np_ = (n_); \ + else np_ = 0; \ + } \ + else \ + { \ + if( ( mydist__ = (proc_) - src__ ) < 0 ) \ + mydist__ += (nprocs_); \ + nblk__ = n__ / (nb_) + 1; \ + mydist__ -= nblk__ - \ + (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \ + if( mydist__ < 0 ) \ + { \ + if( (proc_) != src__ ) \ + np_ = (nb_) + (nb_) * quot__; \ + else \ + np_ = inb__ + (nb_) * quot__; \ + } \ + else if( mydist__ > 0 ) \ + { \ + np_ = (nb_) * quot__; \ + } \ + else \ + { \ + if( (proc_) != src__ ) \ + np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ + else \ + np_ = (n_)+ (nb_)*(quot__ - nblk__); \ + } \ + } \ + } \ + else \ + { \ + if( ( n__ = (n_) - inb__ ) <= 0 ) \ + { \ + if( (proc_) == (src_) ) np_ = (n_); \ + else np_ = 0; \ + } \ + else \ + { \ + if( ( mydist__ = (proc_) - (src_) ) < 0 ) \ + mydist__ += (nprocs_); \ + nblk__ = n__ / (nb_) + 1; \ + mydist__ -= nblk__ - \ + ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \ + if( mydist__ < 0 ) \ + { \ + if( (proc_) != (src_) ) \ + np_ = (nb_) + (nb_) * quot__; \ + else \ + np_ = inb__ + (nb_) * quot__; \ + } \ + else if( mydist__ > 0 ) \ + { \ + np_ = (nb_) * quot__; \ + } \ + else \ + { \ + if( (proc_) != (src_) ) \ + np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ + else \ + np_ = (n_)+ (nb_)*(quot__ - nblk__); \ + } \ + } \ + } \ + } \ + else \ + { \ + np_ = (n_); \ + } \ + } + +#define Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ ) \ + MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ ) +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_indxg2lp +STDC_ARGS( ( + int *, + int *, + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxg2l +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxg2p +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxl2g +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int +) ); +void HPL_infog2l +STDC_ARGS( ( + int, + int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + int *, + int *, + int *, + int * +) ); +int HPL_numroc +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int +) ); +int HPL_numrocI +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int, + const int +) ); + +void HPL_dlaswp00N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp10N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp01N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp01T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp02N +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp03N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const double *, + const int +) ); +void HPL_dlaswp03T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const double *, + const int +) ); +void HPL_dlaswp04N +STDC_ARGS( ( + const int, + const int, + const int, + double *, + const int, + double *, + const int, + const double *, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp04T +STDC_ARGS( ( + const int, + const int, + const int, + double *, + const int, + double *, + const int, + const double *, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp05N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp05T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp06N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp06T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int * +) ); + +void HPL_pabort +STDC_ARGS( ( + int, + const char *, + const char *, + ... +) ); +void HPL_pwarn +STDC_ARGS( ( + FILE *, + int, + const char *, + const char *, + ... +) ); +void HPL_pdlaprnt +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int, + const int, + double *, + const int, + const int, + const int, + const char * +) ); +double HPL_pdlamch +STDC_ARGS( ( + MPI_Comm, + const HPL_T_MACH +) ); +double HPL_pdlange +STDC_ARGS( ( + const HPL_T_grid *, + const HPL_T_NORM, + const int, + const int, + const int, + const double *, + const int +) ); + +#endif +/* + * End of hpl_pauxil.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pfact.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pfact.h new file mode 100644 index 000000000..09eee79ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pfact.h @@ -0,0 +1,216 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PFACT_H +#define HPL_PFACT_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef void (*HPL_T_PFA_FUN) +( HPL_T_panel *, const int, const int, const int, + double * ); +typedef void (*HPL_T_RFA_FUN) +( HPL_T_panel *, const int, const int, const int, + double * ); +typedef void (*HPL_T_UPD_FUN) +( HPL_T_panel *, int *, HPL_T_panel *, const int ); +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dlocmax +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_dlocswpN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + double * +) ); +void HPL_dlocswpT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + double * +) ); +void HPL_pdmxswp +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdpancrN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpancrT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanllN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanllT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanrlN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanrlT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdrpancrN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpancrT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanllN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanllT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanrlN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanrlT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdfact +STDC_ARGS( ( + HPL_T_panel * +) ); + +#endif +/* + * End of hpl_pfact.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pgesv.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pgesv.h new file mode 100644 index 000000000..3ca576c68 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pgesv.h @@ -0,0 +1,346 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PGESV_H +#define HPL_PGESV_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" + +#include "hpl_pmisc.h" +#include "hpl_grid.h" +#include "hpl_comm.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pfact.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_SWAP00 = 451, /* Use HPL_pdlaswp00 */ + HPL_SWAP01 = 452, /* Use HPL_pdlaswp01 */ + HPL_SW_MIX = 453, /* Use HPL_pdlaswp00_ for small number of */ + /* columns, and HPL_pdlaswp01_ otherwise. */ + HPL_NO_SWP = 499 +} HPL_T_SWAP; + +typedef struct HPL_S_palg +{ + HPL_T_TOP btopo; /* row broadcast topology */ + int depth; /* look-ahead depth */ + int nbdiv; /* recursive division factor */ + int nbmin; /* recursion stopping criterium */ + HPL_T_FACT pfact; /* panel fact variant */ + HPL_T_FACT rfact; /* recursive fact variant */ + HPL_T_PFA_FUN pffun; /* panel fact function ptr */ + HPL_T_RFA_FUN rffun; /* recursive fact function ptr */ + HPL_T_UPD_FUN upfun; /* update function */ + HPL_T_SWAP fswap; /* Swapping algorithm */ + int fsthr; /* Swapping threshold */ + int equil; /* Equilibration */ + int align; /* data alignment constant */ +} HPL_T_palg; + +typedef struct HPL_S_pmat +{ +#ifdef HPL_CALL_VSIPL + vsip_block_d * block; +#endif + double * A; /* pointer to local piece of A */ + double * X; /* pointer to solution vector */ + int n; /* global problem size */ + int nb; /* blocking factor */ + int ld; /* local leading dimension */ + int mp; /* local number of rows */ + int nq; /* local number of columns */ + int info; /* computational flag */ +} HPL_T_pmat; +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define MSGID_BEGIN_PFACT 1001 /* message id ranges */ +#define MSGID_END_PFACT 2000 +#define MSGID_BEGIN_FACT 2001 +#define MSGID_END_FACT 3000 +#define MSGID_BEGIN_PTRSV 3001 +#define MSGID_END_PTRSV 4000 + +#define MSGID_BEGIN_COLL 9001 +#define MSGID_END_COLL 10000 +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define MNxtMgid( id_, beg_, end_ ) \ + (( (id_)+1 > (end_) ? (beg_) : (id_)+1 )) +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pipid +STDC_ARGS( ( + HPL_T_panel *, + int *, + int * +) ); +void HPL_plindx0 +STDC_ARGS( ( + HPL_T_panel *, + const int, + int *, + int *, + int *, + int * +) ); +void HPL_pdlaswp00N +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdlaswp00T +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_perm +STDC_ARGS( ( + const int, + int *, + int *, + int * +) ); +void HPL_logsort +STDC_ARGS( ( + const int, + const int, + int *, + int *, + int * +) ); +void HPL_plindx10 +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int *, + int *, + int *, + int * +) ); +void HPL_plindx1 +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int *, + int *, + int *, + int *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_spreadN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_SIDE, + const int, + double *, + const int, + const int, + const int *, + const int *, + const int * +) ); +void HPL_spreadT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_SIDE, + const int, + double *, + const int, + const int, + const int *, + const int *, + const int * +) ); +void HPL_equil +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_TRANS, + const int, + double *, + const int, + int *, + const int *, + const int *, + int * +) ); +void HPL_rollN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int, + double *, + const int, + const int *, + const int *, + const int * +) ); +void HPL_rollT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int, + double *, + const int, + const int *, + const int *, + const int * +) ); +void HPL_pdlaswp01N +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdlaswp01T +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_pdupdateNN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateNT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateTN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateTT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_pdgesv0 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesvK1 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesvK2 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesv +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); + +void HPL_pdtrsv +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_pmat * +) ); + +#endif +/* + * End of hpl_pgesv.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pmatgen.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pmatgen.h new file mode 100644 index 000000000..1091b0f60 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pmatgen.h @@ -0,0 +1,77 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PMATGEN_H +#define HPL_PMATGEN_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_matgen.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pdmatgen +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int, + const int, + double *, + const int, + const int +) ); + +#endif +/* + * End of hpl_pmatgen.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pmisc.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pmisc.h new file mode 100644 index 000000000..23550d47b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_pmisc.h @@ -0,0 +1,59 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PMISC_H +#define HPL_PMISC_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "mpi.h" + +#endif +/* + * End of hpl_pmisc.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_ptest.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_ptest.h new file mode 100644 index 000000000..5777bd536 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_ptest.h @@ -0,0 +1,151 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PTEST_H +#define HPL_PTEST_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pgesv.h" + +#include "hpl_ptimer.h" +#include "hpl_pmatgen.h" +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef struct HPL_S_test +{ + double epsil; /* epsilon machine */ + double thrsh; /* threshold */ + FILE * outfp; /* output stream (only in proc 0) */ + int kfail; /* # of tests failed */ + int kpass; /* # of tests passed */ + int kskip; /* # of tests skipped */ + int ktest; /* total number of tests */ +} HPL_T_test; + +/* + * --------------------------------------------------------------------- + * #define macro constants for testing only + * --------------------------------------------------------------------- + */ +#define HPL_LINE_MAX 256 +#define HPL_MAX_PARAM 20 +#define HPL_ISEED 100 +/* + * --------------------------------------------------------------------- + * global timers for timing analysis only + * --------------------------------------------------------------------- + */ +#ifdef HPL_DETAILED_TIMING +#define HPL_TIMING_BEG 11 /* timer 0 reserved, used by main */ +#define HPL_TIMING_N 6 /* number of timers defined below */ +#define HPL_TIMING_RPFACT 11 /* starting from here, contiguous */ +#define HPL_TIMING_PFACT 12 +#define HPL_TIMING_MXSWP 13 +#define HPL_TIMING_UPDATE 14 +#define HPL_TIMING_LASWP 15 +#define HPL_TIMING_PTRSV 16 +#endif +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pdinfo +STDC_ARGS( ( + HPL_T_test *, + int *, + int *, + int *, + int *, + HPL_T_ORDER *, + int *, + int *, + int *, + int *, + HPL_T_FACT *, + int *, + int *, + int *, + int *, + int *, + HPL_T_FACT *, + int *, + HPL_T_TOP *, + int *, + int *, + HPL_T_SWAP *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_pdtest +STDC_ARGS( ( + HPL_T_test *, + HPL_T_grid *, + HPL_T_palg *, + const int, + const int +) ); + +#endif +/* + * End of hpl_ptest.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_ptimer.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_ptimer.h new file mode 100644 index 000000000..43c8fe33a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_ptimer.h @@ -0,0 +1,96 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PTIMER_H +#define HPL_PTIMER_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_NPTIMER 64 +#define HPL_PTIMER_STARTFLAG 5.0 +#define HPL_PTIMER_ERROR -1.0 +/* + * --------------------------------------------------------------------- + * type definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_WALL_PTIME = 101, HPL_CPU_PTIME = 102 } HPL_T_PTIME; + +typedef enum +{ HPL_AMAX_PTIME = 201, HPL_AMIN_PTIME = 202, HPL_SUM_PTIME = 203 } +HPL_T_PTIME_OP; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +double HPL_ptimer_cputime STDC_ARGS( ( void ) ); +double HPL_ptimer_walltime STDC_ARGS( ( void ) ); + +void HPL_ptimer STDC_ARGS( ( const int ) ); +void HPL_ptimer_boot STDC_ARGS( ( void ) ); +void HPL_ptimer_combine +STDC_ARGS( +( MPI_Comm comm, const HPL_T_PTIME_OP, const HPL_T_PTIME, + const int, const int, double * ) ); +void HPL_ptimer_disable STDC_ARGS( ( void ) ); +void HPL_ptimer_enable STDC_ARGS( ( void ) ); +double HPL_ptimer_inquire +STDC_ARGS( +( const HPL_T_PTIME, const int ) ); + +#endif +/* + * End of hpl_ptimer.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_test.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_test.h new file mode 100644 index 000000000..1eedc97e0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_test.h @@ -0,0 +1,80 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_TEST_H +#define HPL_TEST_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_matgen.h" +#include "hpl_timer.h" +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dinfo +STDC_ARGS( +( FILE * *, int *, int *, int *, + HPL_T_FACT *, int *, int *, int *, + int *, int *, HPL_T_FACT *, int *, + double *, double * ) ); +void HPL_dtest +STDC_ARGS( +( FILE *, const int, const int, const int, + HPL_T_FACT, HPL_T_FACT, const int, const double, + const double, int *, int *, int * ) ); + +#endif +/* + * End of hpl_test.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_timer.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_timer.h new file mode 100644 index 000000000..4c91700ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_timer.h @@ -0,0 +1,88 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_TIMER_H +#define HPL_TIMER_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_NTIMER 64 +#define HPL_TIMER_STARTFLAG 5.0 +#define HPL_TIMER_ERROR -1.0 +/* + * --------------------------------------------------------------------- + * type definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_WALL_TIME = 101, HPL_CPU_TIME = 102 } HPL_T_TIME; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +double HPL_timer_cputime STDC_ARGS( ( void ) ); +double HPL_timer_walltime STDC_ARGS( ( void ) ); + +void HPL_timer STDC_ARGS( ( const int ) ); +void HPL_timer_boot STDC_ARGS( ( void ) ); +void HPL_timer_enable STDC_ARGS( ( void ) ); +void HPL_timer_disable STDC_ARGS( ( void ) ); +double HPL_timer_inquire +STDC_ARGS( +( const HPL_T_TIME, const int ) ); + +#endif +/* + * End of hpl_timer.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_units.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_units.h new file mode 100644 index 000000000..a96956497 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hpl_units.h @@ -0,0 +1,135 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_UNITS_H +#define HPL_UNITS_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_MAXROUT 50 +#define HPL_MAXRNAME 15 + +#define HPL_TRUE 'T' +#define HPL_FALSE 'F' + +#define HPL_INDXG2P_ROUT "HPL_indxg2p" +#define HPL_INDXG2L_ROUT "HPL_indxg2l" +#define HPL_INDXL2G_ROUT "HPL_indxl2g" +#define HPL_NUMROC_ROUT "HPL_numroc" +#define HPL_NUMROCI_ROUT "HPL_numrocI" +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_unit_info +STDC_ARGS( +( FILE * *, int *, int *, int *, + int *, int *, int *, int *, + int *, int *, int *, char [][HPL_MAXRNAME], + int [] ) ); + +void HPL_unit_indxg2l +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); +int HPL_chek_indxg2l +STDC_ARGS( +( FILE *, const char *, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); + +void HPL_unit_indxl2g +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); +int HPL_chek_indxl2g +STDC_ARGS( +( FILE *, const char *, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); + +void HPL_unit_indxg2p +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); +int HPL_chek_indxg2p +STDC_ARGS( +( FILE *, const char *, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); + +void HPL_unit_numroc +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); +void HPL_unit_numrocI +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, const int, long *, long * ) ); +int HPL_chek_numrocI +STDC_ARGS( +( FILE *, const char *, const int, const int, + const int, const int, const int, const int, + const int, const int, long *, long * ) ); + +#endif +/* + * End of hpl_units.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hplconfig.h.in b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hplconfig.h.in new file mode 100644 index 000000000..b4b3b9a35 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/include/hplconfig.h.in @@ -0,0 +1,67 @@ +/* include/hplconfig.h.in. Generated from configure.ac by autoheader. */ + +/* Define if you have a BLAS library. */ +#undef HAVE_BLAS + +/* Define to 1 if you have the `dgemm_' function. */ +#undef HAVE_DGEMM_ + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define if you have the MPI library. */ +#undef HAVE_MPI + +/* Define to 1 if you have the header file. */ +#undef HAVE_MPI_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Version number of package */ +#undef VERSION diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/install-sh b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/install-sh new file mode 100755 index 000000000..8175c640f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/install-sh @@ -0,0 +1,518 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2018-03-11.20; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# 'make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +tab=' ' +nl=' +' +IFS=" $tab$nl" + +# Set DOITPROG to "echo" to test this script. + +doit=${DOITPROG-} +doit_exec=${doit:-exec} + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +is_target_a_directory=possibly + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve the last data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -s $stripprog installed files. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -s) stripcmd=$stripprog;; + + -t) + is_target_a_directory=always + dst_arg=$2 + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; + + -T) is_target_a_directory=never;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +# We allow the use of options -d and -T together, by making -d +# take the precedence; this is for compatibility with GNU install. + +if test -n "$dir_arg"; then + if test -n "$dst_arg"; then + echo "$0: target directory not allowed when installing a directory." >&2 + exit 1 + fi +fi + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call 'install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + if test $# -gt 1 || test "$is_target_a_directory" = always; then + if test ! -d "$dst_arg"; then + echo "$0: $dst_arg: Is not a directory." >&2 + exit 1 + fi + fi +fi + +if test -z "$dir_arg"; then + do_exit='(exit $ret); exit $ret' + trap "ret=129; $do_exit" 1 + trap "ret=130; $do_exit" 2 + trap "ret=141; $do_exit" 13 + trap "ret=143; $do_exit" 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names problematic for 'test' and other utilities. + case $src in + -* | [=\(\)!]) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + dst=$dst_arg + + # If destination is a directory, append the input filename. + if test -d "$dst"; then + if test "$is_target_a_directory" = never; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dstbase=`basename "$src"` + case $dst in + */) dst=$dst$dstbase;; + *) dst=$dst/$dstbase;; + esac + dstdir_status=0 + else + dstdir=`dirname "$dst"` + test -d "$dstdir" + dstdir_status=$? + fi + fi + + case $dstdir in + */) dstdirslash=$dstdir;; + *) dstdirslash=$dstdir/;; + esac + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + # Note that $RANDOM variable is not portable (e.g. dash); Use it + # here however when possible just to lower collision chance. + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + + trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0 + + # Because "mkdir -p" follows existing symlinks and we likely work + # directly in world-writeable /tmp, make sure that the '$tmpdir' + # directory is successfully created first before we actually test + # 'mkdir -p' feature. + if (umask $mkdir_umask && + $mkdirprog $mkdir_mode "$tmpdir" && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + test_tmpdir="$tmpdir/a" + ls_ld_tmpdir=`ls -ld "$test_tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$test_tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null + fi + trap '' 0;; + esac;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # The umask is ridiculous, or mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; + esac + + oIFS=$IFS + IFS=/ + set -f + set fnord $dstdir + shift + set +f + IFS=$oIFS + + prefixes= + + for d + do + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=${dstdirslash}_inst.$$_ + rmtmp=${dstdirslash}_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + set +f && + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd -f "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.auxil b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.auxil new file mode 100644 index 000000000..e92d18b80 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.auxil @@ -0,0 +1,100 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h +# +## Object files ######################################################## +# +HPL_au0obj = \ + HPL_dlacpy.o HPL_dlatcpy.o HPL_fprintf.o \ + HPL_warn.o HPL_abort.o HPL_dlaprnt.o \ + HPL_dlange.o +HPL_au1obj = \ + HPL_dlamch.o +HPL_auxobj = \ + $(HPL_au0obj) $(HPL_au1obj) +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_auxobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_auxobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dlacpy.o : ../HPL_dlacpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlacpy.c +HPL_dlatcpy.o : ../HPL_dlatcpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlatcpy.c +HPL_fprintf.o : ../HPL_fprintf.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_fprintf.c +HPL_warn.o : ../HPL_warn.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_warn.c +HPL_abort.o : ../HPL_abort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_abort.c +HPL_dlaprnt.o : ../HPL_dlaprnt.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaprnt.c +HPL_dlange.o : ../HPL_dlange.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlange.c +HPL_dlamch.o : ../HPL_dlamch.c $(INCdep) + $(CC) -o $@ -c $(CCNOOPT) ../HPL_dlamch.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.blas b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.blas new file mode 100644 index 000000000..ed9f3d0e2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.blas @@ -0,0 +1,98 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h +# +## Object files ######################################################## +# +HPL_blaobj = \ + HPL_dcopy.o HPL_daxpy.o HPL_dscal.o \ + HPL_idamax.o HPL_dgemv.o HPL_dtrsv.o \ + HPL_dger.o HPL_dgemm.o HPL_dtrsm.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_blaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_blaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dcopy.o : ../HPL_dcopy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dcopy.c +HPL_daxpy.o : ../HPL_daxpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_daxpy.c +HPL_dscal.o : ../HPL_dscal.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dscal.c +HPL_idamax.o : ../HPL_idamax.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_idamax.c +HPL_dgemv.o : ../HPL_dgemv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemv.c +HPL_dtrsv.o : ../HPL_dtrsv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsv.c +HPL_dger.o : ../HPL_dger.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dger.c +HPL_dgemm.o : ../HPL_dgemm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemm.c +HPL_dtrsm.o : ../HPL_dtrsm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsm.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.comm b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.comm new file mode 100644 index 000000000..529fe9aea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.comm @@ -0,0 +1,111 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h \ + $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_comobj = \ + HPL_1ring.o HPL_1rinM.o HPL_2ring.o \ + HPL_2rinM.o HPL_blong.o HPL_blonM.o \ + HPL_packL.o HPL_copyL.o HPL_binit.o \ + HPL_bcast.o HPL_bwait.o HPL_send.o \ + HPL_recv.o HPL_sdrv.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_comobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_comobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_1ring.o : ../HPL_1ring.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_1ring.c +HPL_1rinM.o : ../HPL_1rinM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_1rinM.c +HPL_2ring.o : ../HPL_2ring.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_2ring.c +HPL_2rinM.o : ../HPL_2rinM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_2rinM.c +HPL_blong.o : ../HPL_blong.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_blong.c +HPL_blonM.o : ../HPL_blonM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_blonM.c +HPL_packL.o : ../HPL_packL.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_packL.c +HPL_copyL.o : ../HPL_copyL.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_copyL.c +HPL_binit.o : ../HPL_binit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_binit.c +HPL_bcast.o : ../HPL_bcast.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_bcast.c +HPL_bwait.o : ../HPL_bwait.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_bwait.c +HPL_send.o : ../HPL_send.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_send.c +HPL_recv.o : ../HPL_recv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_recv.c +HPL_sdrv.o : ../HPL_sdrv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_sdrv.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.gesv b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.gesv new file mode 100644 index 000000000..2a8722559 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.gesv @@ -0,0 +1,83 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_gesv.h +# +## Object files ######################################################## +# +HPL_gesobj = \ + HPL_dgesv.o HPL_ipid.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_gesobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_gesobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dgesv.o : ../HPL_dgesv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgesv.c +HPL_ipid.o : ../HPL_ipid.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ipid.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.grid b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.grid new file mode 100644 index 000000000..51549d817 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.grid @@ -0,0 +1,103 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h +# +## Object files ######################################################## +# +HPL_griobj = \ + HPL_grid_init.o HPL_pnum.o HPL_grid_info.o \ + HPL_grid_exit.o HPL_broadcast.o HPL_reduce.o \ + HPL_all_reduce.o HPL_barrier.o HPL_min.o \ + HPL_max.o HPL_sum.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_griobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_griobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_grid_init.o : ../HPL_grid_init.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_init.c +HPL_pnum.o : ../HPL_pnum.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pnum.c +HPL_grid_info.o : ../HPL_grid_info.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_info.c +HPL_grid_exit.o : ../HPL_grid_exit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_exit.c +HPL_broadcast.o : ../HPL_broadcast.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_broadcast.c +HPL_reduce.o : ../HPL_reduce.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_reduce.c +HPL_all_reduce.o : ../HPL_all_reduce.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_all_reduce.c +HPL_barrier.o : ../HPL_barrier.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_barrier.c +HPL_min.o : ../HPL_min.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_min.c +HPL_max.o : ../HPL_max.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_max.c +HPL_sum.o : ../HPL_sum.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_sum.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.matgen b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.matgen new file mode 100644 index 000000000..f027fbc06 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.matgen @@ -0,0 +1,95 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_matgen.h +# +## Object files ######################################################## +# +HPL_matobj = \ + HPL_dmatgen.o HPL_ladd.o HPL_lmul.o \ + HPL_xjumpm.o HPL_jumpit.o HPL_rand.o \ + HPL_setran.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_matobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_matobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dmatgen.o : ../HPL_dmatgen.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dmatgen.c +HPL_ladd.o : ../HPL_ladd.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ladd.c +HPL_lmul.o : ../HPL_lmul.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_lmul.c +HPL_xjumpm.o : ../HPL_xjumpm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_xjumpm.c +HPL_jumpit.o : ../HPL_jumpit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_jumpit.c +HPL_rand.o : ../HPL_rand.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rand.c +HPL_setran.o : ../HPL_setran.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_setran.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.panel b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.panel new file mode 100644 index 000000000..804749cc2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.panel @@ -0,0 +1,90 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ + $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_panobj = \ + HPL_pdpanel_new.o HPL_pdpanel_init.o HPL_pdpanel_disp.o \ + HPL_pdpanel_free.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_panobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_panobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pdpanel_new.o : ../HPL_pdpanel_new.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_new.c +HPL_pdpanel_init.o : ../HPL_pdpanel_init.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_init.c +HPL_pdpanel_disp.o : ../HPL_pdpanel_disp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_disp.c +HPL_pdpanel_free.o : ../HPL_pdpanel_free.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_free.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pauxil b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pauxil new file mode 100644 index 000000000..ea93cd150 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pauxil @@ -0,0 +1,137 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_pauxil.h +# +## Object files ######################################################## +# +HPL_pauobj = \ + HPL_indxg2l.o HPL_indxg2lp.o HPL_indxg2p.o \ + HPL_indxl2g.o HPL_infog2l.o HPL_numroc.o \ + HPL_numrocI.o HPL_dlaswp00N.o HPL_dlaswp10N.o \ + HPL_dlaswp01N.o HPL_dlaswp01T.o HPL_dlaswp02N.o \ + HPL_dlaswp03N.o HPL_dlaswp03T.o HPL_dlaswp04N.o \ + HPL_dlaswp04T.o HPL_dlaswp05N.o HPL_dlaswp05T.o \ + HPL_dlaswp06N.o HPL_dlaswp06T.o HPL_pwarn.o \ + HPL_pabort.o HPL_pdlaprnt.o HPL_pdlamch.o \ + HPL_pdlange.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pauobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pauobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_indxg2l.o : ../HPL_indxg2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2l.c +HPL_indxg2lp.o : ../HPL_indxg2lp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2lp.c +HPL_indxg2p.o : ../HPL_indxg2p.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2p.c +HPL_indxl2g.o : ../HPL_indxl2g.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxl2g.c +HPL_infog2l.o : ../HPL_infog2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_infog2l.c +HPL_numroc.o : ../HPL_numroc.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_numroc.c +HPL_numrocI.o : ../HPL_numrocI.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_numrocI.c +HPL_dlaswp00N.o : ../HPL_dlaswp00N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp00N.c +HPL_dlaswp10N.o : ../HPL_dlaswp10N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp10N.c +HPL_dlaswp01N.o : ../HPL_dlaswp01N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01N.c +HPL_dlaswp01T.o : ../HPL_dlaswp01T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01T.c +HPL_dlaswp02N.o : ../HPL_dlaswp02N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp02N.c +HPL_dlaswp03N.o : ../HPL_dlaswp03N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03N.c +HPL_dlaswp03T.o : ../HPL_dlaswp03T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03T.c +HPL_dlaswp04N.o : ../HPL_dlaswp04N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04N.c +HPL_dlaswp04T.o : ../HPL_dlaswp04T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04T.c +HPL_dlaswp05N.o : ../HPL_dlaswp05N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05N.c +HPL_dlaswp05T.o : ../HPL_dlaswp05T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05T.c +HPL_dlaswp06N.o : ../HPL_dlaswp06N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06N.c +HPL_dlaswp06T.o : ../HPL_dlaswp06T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06T.c +HPL_pwarn.o : ../HPL_pwarn.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pwarn.c +HPL_pabort.o : ../HPL_pabort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pabort.c +HPL_pdlaprnt.o : ../HPL_pdlaprnt.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaprnt.c +HPL_pdlamch.o : ../HPL_pdlamch.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlamch.c +HPL_pdlange.o : ../HPL_pdlange.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlange.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pfact b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pfact new file mode 100644 index 000000000..bf4634d31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pfact @@ -0,0 +1,118 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pfact.h +# +## Object files ######################################################## +# +HPL_pfaobj = \ + HPL_dlocmax.o HPL_dlocswpN.o HPL_dlocswpT.o \ + HPL_pdmxswp.o HPL_pdpancrN.o HPL_pdpancrT.o \ + HPL_pdpanllN.o HPL_pdpanllT.o HPL_pdpanrlN.o \ + HPL_pdpanrlT.o HPL_pdrpanllN.o HPL_pdrpanllT.o \ + HPL_pdrpancrN.o HPL_pdrpancrT.o HPL_pdrpanrlN.o \ + HPL_pdrpanrlT.o HPL_pdfact.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pfaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pfaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dlocmax.o : ../HPL_dlocmax.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocmax.c +HPL_dlocswpN.o : ../HPL_dlocswpN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpN.c +HPL_dlocswpT.o : ../HPL_dlocswpT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpT.c +HPL_pdmxswp.o : ../HPL_pdmxswp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmxswp.c +HPL_pdpancrN.o : ../HPL_pdpancrN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrN.c +HPL_pdpancrT.o : ../HPL_pdpancrT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrT.c +HPL_pdpanllN.o : ../HPL_pdpanllN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllN.c +HPL_pdpanllT.o : ../HPL_pdpanllT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllT.c +HPL_pdpanrlN.o : ../HPL_pdpanrlN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlN.c +HPL_pdpanrlT.o : ../HPL_pdpanrlT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlT.c +HPL_pdrpanllN.o : ../HPL_pdrpanllN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllN.c +HPL_pdrpanllT.o : ../HPL_pdrpanllT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllT.c +HPL_pdrpancrN.o : ../HPL_pdrpancrN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrN.c +HPL_pdrpancrT.o : ../HPL_pdrpancrT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrT.c +HPL_pdrpanrlN.o : ../HPL_pdrpanrlN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlN.c +HPL_pdrpanrlT.o : ../HPL_pdrpanrlT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlT.c +HPL_pdfact.o : ../HPL_pdfact.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdfact.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pgesv b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pgesv new file mode 100644 index 000000000..7898665f0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pgesv @@ -0,0 +1,136 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ + $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_pgeobj = \ + HPL_pipid.o HPL_plindx0.o HPL_pdlaswp00N.o \ + HPL_pdlaswp00T.o HPL_perm.o HPL_logsort.o \ + HPL_plindx10.o HPL_plindx1.o HPL_spreadN.o \ + HPL_spreadT.o HPL_rollN.o HPL_rollT.o \ + HPL_equil.o HPL_pdlaswp01N.o HPL_pdlaswp01T.o \ + HPL_pdupdateNN.o HPL_pdupdateNT.o HPL_pdupdateTN.o \ + HPL_pdupdateTT.o HPL_pdtrsv.o HPL_pdgesv0.o \ + HPL_pdgesvK1.o HPL_pdgesvK2.o HPL_pdgesv.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pgeobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pgeobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pipid.o : ../HPL_pipid.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pipid.c +HPL_plindx0.o : ../HPL_plindx0.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx0.c +HPL_pdlaswp00N.o : ../HPL_pdlaswp00N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00N.c +HPL_pdlaswp00T.o : ../HPL_pdlaswp00T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00T.c +HPL_perm.o : ../HPL_perm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_perm.c +HPL_logsort.o : ../HPL_logsort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_logsort.c +HPL_plindx10.o : ../HPL_plindx10.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx10.c +HPL_plindx1.o : ../HPL_plindx1.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx1.c +HPL_spreadN.o : ../HPL_spreadN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadN.c +HPL_spreadT.o : ../HPL_spreadT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadT.c +HPL_rollN.o : ../HPL_rollN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollN.c +HPL_rollT.o : ../HPL_rollT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollT.c +HPL_equil.o : ../HPL_equil.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_equil.c +HPL_pdlaswp01N.o : ../HPL_pdlaswp01N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01N.c +HPL_pdlaswp01T.o : ../HPL_pdlaswp01T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01T.c +HPL_pdupdateNN.o : ../HPL_pdupdateNN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNN.c +HPL_pdupdateNT.o : ../HPL_pdupdateNT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNT.c +HPL_pdupdateTN.o : ../HPL_pdupdateTN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTN.c +HPL_pdupdateTT.o : ../HPL_pdupdateTT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTT.c +HPL_pdtrsv.o : ../HPL_pdtrsv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtrsv.c +HPL_pdgesv0.o : ../HPL_pdgesv0.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv0.c +HPL_pdgesvK1.o : ../HPL_pdgesvK1.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK1.c +HPL_pdgesvK2.o : ../HPL_pdgesvK2.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK2.c +HPL_pdgesv.o : ../HPL_pdgesv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pmatgen b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pmatgen new file mode 100644 index 000000000..bf33fcd7b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.pmatgen @@ -0,0 +1,81 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_matgen.h $(INCdir)/hpl_pmisc.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pmatgen.h +# +## Object files ######################################################## +# +HPL_pmaobj = \ + HPL_pdmatgen.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pmaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pmaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pdmatgen.o : ../HPL_pdmatgen.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmatgen.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.ptest b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.ptest new file mode 100644 index 000000000..cfc96e667 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.ptest @@ -0,0 +1,94 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_gesv.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h \ + $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h $(INCdir)/hpl_pmatgen.h \ + $(INCdir)/hpl_ptimer.h $(INCdir)/hpl_ptest.h +# +## Executable names #################################################### +# +xhpl = $(BINdir)/xhpl +# +## Object files ######################################################## +# +HPL_pteobj = \ + HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o +# +## Targets ############################################################# +# +all : dexe +# +dexe : dexe.grd +# +$(BINdir)/HPL.dat : ../HPL.dat + ( $(CP) ../HPL.dat $(BINdir) ) +# +dexe.grd: $(HPL_pteobj) $(HPLlib) + $(LINKER) $(LINKFLAGS) -o $(xhpl) $(HPL_pteobj) $(HPL_LIBS) + $(MAKE) $(BINdir)/HPL.dat + $(TOUCH) dexe.grd +# +# ###################################################################### +# +HPL_pddriver.o : ../HPL_pddriver.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pddriver.c +HPL_pdinfo.o : ../HPL_pdinfo.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdinfo.c +HPL_pdtest.o : ../HPL_pdtest.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtest.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.ptimer b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.ptimer new file mode 100644 index 000000000..971500764 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.ptimer @@ -0,0 +1,84 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_ptimer.h +# +## Object files ######################################################## +# +HPL_ptiobj = \ + HPL_ptimer.o HPL_ptimer_cputime.o HPL_ptimer_walltime.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_ptiobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_ptiobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_ptimer.o : ../HPL_ptimer.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer.c +HPL_ptimer_cputime.o : ../HPL_ptimer_cputime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_cputime.c +HPL_ptimer_walltime.o : ../HPL_ptimer_walltime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_walltime.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.test b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.test new file mode 100644 index 000000000..514d445b8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.test @@ -0,0 +1,93 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_gesv.h $(INCdir)/hpl_matgen.h $(INCdir)/hpl_timer.h \ + $(INCdir)/hpl_test.h +# +## Executable names #################################################### +# +xlinpack = $(BINdir)/xlinpack +# +## Object files ######################################################## +# +HPL_tesobj = \ + HPL_ddriver.o HPL_dinfo.o HPL_dtest.o +# +## Targets ############################################################# +# +all : dexe +# +dexe : dexe.grd +# +$(BINdir)/LINPACK.dat : ../LINPACK.dat + ( $(CP) ../LINPACK.dat $(BINdir) ) +# +dexe.grd: $(HPL_tesobj) $(HPLlib) + $(LINKER) $(LINKFLAGS) -o $(xlinpack) $(HPL_tesobj) HPL_make_libs + $(MAKE) $(BINdir)/LINPACK.dat + $(TOUCH) dexe.grd +# +# ###################################################################### +# +HPL_ddriver.o : ../HPL_ddriver.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ddriver.c +HPL_dinfo.o : ../HPL_dinfo.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dinfo.c +HPL_dtest.o : ../HPL_dtest.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtest.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.timer b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.timer new file mode 100644 index 000000000..b8009e88a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.timer @@ -0,0 +1,84 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_timer.h +# +## Object files ######################################################## +# +HPL_timobj = \ + HPL_timer.o HPL_timer_cputime.o HPL_timer_walltime.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_timobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_timobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_timer.o : ../HPL_timer.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer.c +HPL_timer_cputime.o : ../HPL_timer_cputime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_cputime.c +HPL_timer_walltime.o : ../HPL_timer_walltime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_walltime.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.units b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.units new file mode 100644 index 000000000..1c447f204 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/makes/Make.units @@ -0,0 +1,112 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ +@rout Make.units + $(INCdir)/hpl_misc.h $(INCdir)/hpl_auxil.h $(INCdir)/hpl_pmisc.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_units.h +# +## Executable names #################################################### +# +xunits = $(BINdir)/xunits +# +## Object files ######################################################## +# +HPL_uniobj = \ + HPL_unit_driver.o HPL_unit_info.o HPL_unit_indxg2l.o \ + HPL_chek_indxg2l.o HPL_unit_indxg2p.o HPL_chek_indxg2p.o \ + HPL_unit_indxl2g.o HPL_chek_indxl2g.o HPL_unit_numroc.o \ + HPL_unit_numrocI.o HPL_chek_numrocI.o +# +## Targets ############################################################# +# +all : dexe +# +dexe : dexe.grd +# +$(BINdir)/UNITS.dat : ../UNITS.dat + ( $(CP) ../UNITS.dat $(BINdir) ) +# +dexe.grd : $(HPL_uniobj) $(HPLlib) + $(LINKER) $(LINKFLAGS) -o $(xunits) $(HPL_uniobj) @(hpllibs) + $(MAKE) $(BINdir)/UNITS.dat + $(TOUCH) dexe.grd +# +# ###################################################################### +# +HPL_unit_driver.o : ../HPL_unit_driver.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_driver.c +HPL_unit_info.o : ../HPL_unit_info.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_info.c +HPL_unit_indxg2l.o : ../HPL_unit_indxg2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_indxg2l.c +HPL_chek_indxg2l.o : ../HPL_chek_indxg2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_indxg2l.c +HPL_unit_indxg2p.o : ../HPL_unit_indxg2p.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_indxg2p.c +HPL_chek_indxg2p.o : ../HPL_chek_indxg2p.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_indxg2p.c +HPL_unit_indxl2g.o : ../HPL_unit_indxl2g.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_indxl2g.c +HPL_chek_indxl2g.o : ../HPL_chek_indxl2g.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_indxl2g.c +HPL_unit_numroc.o : ../HPL_unit_numroc.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_numroc.c +HPL_unit_numrocI.o : ../HPL_unit_numrocI.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_numrocI.c +HPL_chek_numrocI.o : ../HPL_chek_numrocI.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_numrocI.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_abort.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_abort.3 new file mode 100644 index 000000000..c6a2c7a70 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_abort.3 @@ -0,0 +1,52 @@ +.TH HPL_abort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_abort \- halts execution. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_abort(\fR +\fB\&int\fR +\fI\&LINE\fR, +\fB\&const char *\fR +\fI\&SRNAME\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_abort\fR +displays an error message on stderr and halts execution. +.SH ARGUMENTS +.TP 8 +LINE (local input) int +On entry, LINE specifies the line number in the file where +the error has occured. When LINE is not a positive line +number, it is ignored. +.TP 8 +SRNAME (local input) const char * +On entry, SRNAME should be the name of the routine calling +this error handler. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + HPL_abort( __LINE__, __FILE__, "Halt.\en" ); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_fprintf \ (3), +.BR HPL_warn \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_all_reduce.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_all_reduce.3 new file mode 100644 index 000000000..70ec6c4ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_all_reduce.3 @@ -0,0 +1,49 @@ +.TH HPL_all_reduce 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_all_reduce \- All reduce operation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_all_reduce(\fR +\fB\&void *\fR +\fI\&BUFFER\fR, +\fB\&const int\fR +\fI\&COUNT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR, +\fB\&const HPL_T_OP \fR +\fI\&OP\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_all_reduce\fR +performs a global reduce operation across all +processes of a group leaving the results on all processes. +.SH ARGUMENTS +.TP 8 +BUFFER (local input/global out void * +On entry, BUFFER points to the buffer to be combined. On +exit, this array contains the combined data and is identical +on all processes in the group. +.TP 8 +COUNT (global input) const int +On entry, COUNT indicates the number of entries in BUFFER. +COUNT must be at least zero. +.TP 8 +DTYPE (global input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.TP 8 +OP (global input) const HPL_T_OP +On entry, OP is a pointer to the local combine function. +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_barrier.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_barrier.3 new file mode 100644 index 000000000..ffee7f291 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_barrier.3 @@ -0,0 +1,27 @@ +.TH HPL_barrier 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_barrier \- Barrier operation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_barrier(\fR +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_barrier\fR +blocks the caller until all process members have call it. +The call returns at any process only after all group members have +entered the call. +.SH ARGUMENTS +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_bcast.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_bcast.3 new file mode 100644 index 000000000..54eb54b25 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_bcast.3 @@ -0,0 +1,31 @@ +.TH HPL_bcast 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_bcast \- Perform the row broadcast. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_bcast(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&int *\fR +\fI\&IFLAG\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_bcast\fR +broadcasts the current panel. Successful completion is +indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to +HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was +not completed, in which case this function should be called again. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.TP 8 +IFLAG (output) int * +On exit, IFLAG indicates whether or not the broadcast has +occured. +.SH SEE ALSO +.BR HPL_binit \ (3), +.BR HPL_bwait \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_binit.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_binit.3 new file mode 100644 index 000000000..083776ab6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_binit.3 @@ -0,0 +1,23 @@ +.TH HPL_binit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_binit \- Initialize the row broadcast. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_binit(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_binit\fR +initializes a row broadcast. Successful completion is +indicated by the returned error code HPL_SUCCESS. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.SH SEE ALSO +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_broadcast.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_broadcast.3 new file mode 100644 index 000000000..317d374cf --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_broadcast.3 @@ -0,0 +1,49 @@ +.TH HPL_broadcast 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_broadcast \- Broadcast operation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_broadcast(\fR +\fB\&void *\fR +\fI\&BUFFER\fR, +\fB\&const int\fR +\fI\&COUNT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR, +\fB\&const int\fR +\fI\&ROOT\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_broadcast\fR +broadcasts a message from the process with rank ROOT to +all processes in the group. +.SH ARGUMENTS +.TP 8 +BUFFER (local input/output) void * +On entry, BUFFER points to the buffer to be broadcast. On +exit, this array contains the broadcast data and is identical +on all processes in the group. +.TP 8 +COUNT (global input) const int +On entry, COUNT indicates the number of entries in BUFFER. +COUNT must be at least zero. +.TP 8 +DTYPE (global input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.TP 8 +ROOT (global input) const int +On entry, ROOT is the coordinate of the source process. +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.SH SEE ALSO +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_bwait.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_bwait.3 new file mode 100644 index 000000000..0dac6fe58 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_bwait.3 @@ -0,0 +1,24 @@ +.TH HPL_bwait 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_bwait \- Finalize the row broadcast. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_bwait(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_bwait\fR +HPL_bwait waits for the row broadcast of the current panel to +terminate. Successful completion is indicated by the returned error +code HPL_SUCCESS. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.SH SEE ALSO +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_copyL.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_copyL.3 new file mode 100644 index 000000000..d60619a06 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_copyL.3 @@ -0,0 +1,28 @@ +.TH HPL_copyL 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_copyL \- Copy the current panel into a contiguous workspace. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_copyL(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_copyL\fR +copies the panel of columns, the L1 replicated submatrix, +the pivot array and the info scalar into a contiguous workspace for +later broadcast. + +The copy of this panel into a contiguous buffer can be enforced by +specifying -DHPL_COPY_L in the architecture specific Makefile. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.SH SEE ALSO +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_daxpy.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_daxpy.3 new file mode 100644 index 000000000..50bd0b0a8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_daxpy.3 @@ -0,0 +1,76 @@ +.TH HPL_daxpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_daxpy \- y := y + alpha * x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_daxpy(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_daxpy\fR +scales the vector x by alpha and adds it to y. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vectors x and y. N +must be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero, then the entries of the incremented array X +need not be set on input. +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +Y (local input/output) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +On exit, the entries of the incremented array Y are updated +with the scaled entries of the incremented array X. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3], y[3]; +.br + x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; +.br + y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; +.br + HPL_daxpy( 3, 2.0, x, 1, y, 1 ); +.br + printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dcopy \ (3), +.BR HPL_dscal \ (3), +.BR HPL_dswap \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dcopy.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dcopy.3 new file mode 100644 index 000000000..f2759ced9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dcopy.3 @@ -0,0 +1,69 @@ +.TH HPL_dcopy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dcopy \- y := x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dcopy(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dcopy\fR +copies the vector x into the vector y. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vectors x and y. N +must be at least zero. +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +Y (local input/output) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +On exit, the entries of the incremented array Y are updated +with the entries of the incremented array X. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3], y[3]; +.br + x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; +.br + y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; +.br + HPL_dcopy( 3, x, 1, y, 1 ); +.br + printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_daxpy \ (3), +.BR HPL_dscal \ (3), +.BR HPL_dswap \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dgemm.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dgemm.3 new file mode 100644 index 000000000..57c69f78c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dgemm.3 @@ -0,0 +1,160 @@ +.TH HPL_dgemm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dgemm \- C := alpha * op(A) * op(B) + beta * C. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dgemm(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANSA\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANSB\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&K\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&B\fR, +\fB\&const int\fR +\fI\&LDB\fR, +\fB\&const double\fR +\fI\&BETA\fR, +\fB\&double *\fR +\fI\&C\fR, +\fB\&const int\fR +\fI\&LDC\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dgemm\fR +performs one of the matrix-matrix operations + + C := alpha * op( A ) * op( B ) + beta * C + + where op( X ) is one of + + op( X ) = X or op( X ) = X^T. + +Alpha and beta are scalars, and A, B and C are matrices, with op(A) +an m by k matrix, op(B) a k by n matrix and C an m by n matrix. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +TRANSA (local input) const enum HPL_TRANS +On entry, TRANSA specifies the form of op(A) to be used in +the matrix-matrix operation follows: + TRANSA==HplNoTrans : op( A ) = A, + TRANSA==HplTrans : op( A ) = A^T, + TRANSA==HplConjTrans : op( A ) = A^T. +.TP 8 +TRANSB (local input) const enum HPL_TRANS +On entry, TRANSB specifies the form of op(B) to be used in +the matrix-matrix operation follows: + TRANSB==HplNoTrans : op( B ) = B, + TRANSB==HplTrans : op( B ) = B^T, + TRANSB==HplConjTrans : op( B ) = B^T. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix +op(A) and of the matrix C. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix +op(B) and the number of columns of the matrix C. N must be +at least zero. +.TP 8 +K (local input) const int +On entry, K specifies the number of columns of the matrix +op(A) and the number of rows of the matrix op(B). K must be +be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero then the elements of the matrices A and B +need not be set on input. +.TP 8 +A (local input) const double * +On entry, A is an array of dimension (LDA,ka), where ka is +k when TRANSA==HplNoTrans, and is m otherwise. Before +entry with TRANSA==HplNoTrans, the leading m by k part of +the array A must contain the matrix A, otherwise the leading +k by m part of the array A must contain the matrix A. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the first dimension of A as declared +in the calling (sub) program. When TRANSA==HplNoTrans then +LDA must be at least max(1,m), otherwise LDA must be at least +max(1,k). +.TP 8 +B (local input) const double * +On entry, B is an array of dimension (LDB,kb), where kb is +n when TRANSB==HplNoTrans, and is k otherwise. Before +entry with TRANSB==HplNoTrans, the leading k by n part of +the array B must contain the matrix B, otherwise the leading +n by k part of the array B must contain the matrix B. +.TP 8 +LDB (local input) const int +On entry, LDB specifies the first dimension of B as declared +in the calling (sub) program. When TRANSB==HplNoTrans then +LDB must be at least max(1,k), otherwise LDB must be at least +max(1,n). +.TP 8 +BETA (local input) const double +On entry, BETA specifies the scalar beta. When BETA is +supplied as zero then the elements of the matrix C need +not be set on input. +.TP 8 +C (local input/output) double * +On entry, C is an array of dimension (LDC,n). Before entry, +the leading m by n part of the array C must contain the +matrix C, except when beta is zero, in which case C need not +be set on entry. On exit, the array C is overwritten by the +m by n matrix ( alpha*op( A )*op( B ) + beta*C ). +.TP 8 +LDC (local input) const int +On entry, LDC specifies the first dimension of C as declared +in the calling (sub) program. LDC must be at least +max(1,m). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], b[2*2], c[2*2]; +.br + a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0; +.br + b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0; +.br + c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0; +.br + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, +.br + 2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 ); +.br + printf(" [%f,%f]\en", c[0], c[2]); +.br + printf("c=[%f,%f]\en", c[1], c[3]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dtrsm \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dgemv.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dgemv.3 new file mode 100644 index 000000000..f85db57fb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dgemv.3 @@ -0,0 +1,128 @@ +.TH HPL_dgemv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dgemv \- y := beta * y + alpha * op(A) * x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dgemv(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANS\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&const double\fR +\fI\&BETA\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dgemv\fR +performs one of the matrix-vector operations + + y := alpha * op( A ) * x + beta * y, + + where op( X ) is one of + + op( X ) = X or op( X ) = X^T. + +where alpha and beta are scalars, x and y are vectors and A is an m +by n matrix. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +TRANS (local input) const enum HPL_TRANS +On entry, TRANS specifies the operation to be performed as +follows: + TRANS = HplNoTrans y := alpha*A *x + beta*y, + TRANS = HplTrans y := alpha*A^T*x + beta*y. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero then A and X need not be set on input. +.TP 8 +A (local input) const double * +On entry, A points to an array of size equal to or greater +than LDA * n. Before entry, the leading m by n part of the +array A must contain the matrix coefficients. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of A as +declared in the calling (sub) program. LDA must be at +least MAX(1,m). +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +BETA (local input) const double +On entry, BETA specifies the scalar beta. When ALPHA is +supplied as zero then Y need not be set on input. +.TP 8 +Y (local input/output) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +Before entry with BETA non-zero, the incremented array Y must +contain the vector y. On exit, Y is overwritten by the +updated vector y. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], x[2], y[2]; +.br + a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0; +.br + x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0; +.br + HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0, +.br + a, 2, x, 1, -1.0, y, 1 ); +.br + printf("y=[%f,%f]\en", y[0], y[1]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dger \ (3), +.BR HPL_dtrsv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dger.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dger.3 new file mode 100644 index 000000000..da9ddf495 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dger.3 @@ -0,0 +1,108 @@ +.TH HPL_dger 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dger \- A := alpha * x * y^T + A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dger(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dger\fR +performs the rank 1 operation + + A := alpha * x * y^T + A, + +where alpha is a scalar, x is an m-element vector, y is an n-element +vector and A is an m by n matrix. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero then X and Y need not be set on input. +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( m - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +Y (local input) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.TP 8 +A (local input/output) double * +On entry, A points to an array of size equal to or greater +than LDA * n. Before entry, the leading m by n part of the +array A must contain the matrix coefficients. On exit, A is +overwritten by the updated matrix. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of A as +declared in the calling (sub) program. LDA must be at +least MAX(1,m). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], x[2], y[2]; +.br + a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0; +.br + x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0; +.br + HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1, +.br + a, 2 ); +.br + printf("y=[%f,%f]\en", y[0], y[1]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dgemv \ (3), +.BR HPL_dtrsv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlacpy.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlacpy.3 new file mode 100644 index 000000000..8da8b1316 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlacpy.3 @@ -0,0 +1,72 @@ +.TH HPL_dlacpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlacpy \- B := A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlacpy(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&B\fR, +\fB\&const int\fR +\fI\&LDB\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlacpy\fR +copies an array A into an array B. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the arrays A and +B. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the arrays A +and B. N must be at least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,N). +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +B (local output) double * +On entry, B points to an array of dimension (LDB,N). On exit, +B is overwritten with A. +.TP 8 +LDB (local input) const int +On entry, LDB specifies the leading dimension of the array B. +LDB must be at least MAX(1,M). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], b[2*2]; +.br + a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; +.br + HPL_dlacpy( 2, 2, a, 2, b, 2 ); +.br + printf(" [%f,%f]\en", b[0], b[2]); +.br + printf("b=[%f,%f]\en", b[1], b[3]); +.br + exit(0); +.br + return(0); +.br +} +.SH SEE ALSO +.BR HPL_dlatcpy \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlamch.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlamch.3 new file mode 100644 index 000000000..9bf41b68a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlamch.3 @@ -0,0 +1,76 @@ +.TH HPL_dlamch 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlamch \- determines machine-specific arithmetic constants. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_dlamch(\fR +\fB\&const HPL_T_MACH\fR +\fI\&CMACH\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlamch\fR +determines machine-specific arithmetic constants such as +the relative machine precision (eps), the safe minimum (sfmin) such +that 1 / sfmin does not overflow, the base of the machine (base), the +precision (prec), the number of (base) digits in the mantissa (t), +whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the +minimum exponent before (gradual) underflow (emin), the underflow +threshold (rmin) base**(emin-1), the largest exponent before overflow +(emax), the overflow threshold (rmax) (base**emax)*(1-eps). +.SH ARGUMENTS +.TP 8 +CMACH (local input) const HPL_T_MACH +Specifies the value to be returned by HPL_dlamch + = HPL_MACH_EPS, HPL_dlamch := eps (default) + = HPL_MACH_SFMIN, HPL_dlamch := sfmin + = HPL_MACH_BASE, HPL_dlamch := base + = HPL_MACH_PREC, HPL_dlamch := eps*base + = HPL_MACH_MLEN, HPL_dlamch := t + = HPL_MACH_RND, HPL_dlamch := rnd + = HPL_MACH_EMIN, HPL_dlamch := emin + = HPL_MACH_RMIN, HPL_dlamch := rmin + = HPL_MACH_EMAX, HPL_dlamch := emax + = HPL_MACH_RMAX, HPL_dlamch := rmax + +where + + eps = relative machine precision, + sfmin = safe minimum, + base = base of the machine, + prec = eps*base, + t = number of digits in the mantissa, + rnd = 1.0 if rounding occurs in addition, + emin = minimum exponent before underflow, + rmin = underflow threshold, + emax = largest exponent before overflow, + rmax = overflow threshold. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double eps; +.br + eps = HPL_dlamch( HPL_MACH_EPS ); +.br + printf("eps=%18.8e\en", eps); +.br + exit(0); return(0); +.br +} +.SH REFERENCES +This function has been manually translated from the Fortran 77 LAPACK +auxiliary function dlamch.f (version 2.0 -- 1992), that was itself +based on the function ENVRON by Malcolm and incorporated suggestions +by Gentleman and Marovich. See + +Malcolm M. A., Algorithms to reveal properties of floating-point +arithmetic., Comms. of the ACM, 15, 949-951 (1972). + +Gentleman W. M. and Marovich S. B., More on algorithms that reveal +properties of floating point arithmetic units., Comms. of the ACM, +17, 276-277 (1974). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlange.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlange.3 new file mode 100644 index 000000000..ffbab554f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlange.3 @@ -0,0 +1,73 @@ +.TH HPL_dlange 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlange \- Compute ||A||. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_dlange(\fR +\fB\&const HPL_T_NORM\fR +\fI\&NORM\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlange\fR +returns the value of the one norm, or the infinity norm, +or the element of largest absolute value of a matrix A: + + max(abs(A(i,j))) when NORM = HPL_NORM_A, + norm1(A), when NORM = HPL_NORM_1, + normI(A), when NORM = HPL_NORM_I, + +where norm1 denotes the one norm of a matrix (maximum column sum) and +normI denotes the infinity norm of a matrix (maximum row sum). Note +that max(abs(A(i,j))) is not a matrix norm. +.SH ARGUMENTS +.TP 8 +NORM (local input) const HPL_T_NORM +On entry, NORM specifies the value to be returned by this +function as described above. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,N), that +contains the matrix A. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,M). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2]; +.br + a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; +.br + norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 ); +.br + printf("norm=%f\en", norm); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dlaprnt \ (3), +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaprnt.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaprnt.3 new file mode 100644 index 000000000..8fdd89b8c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaprnt.3 @@ -0,0 +1,70 @@ +.TH HPL_dlaprnt 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaprnt \- Print the matrix A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaprnt(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&IA\fR, +\fB\&const int\fR +\fI\&JA\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const char *\fR +\fI\&CMATNM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaprnt\fR +prints to standard error an M-by-N matrix A. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A. M must be at +least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of A. N must be +at least zero. +.TP 8 +A (local input) double * +On entry, A points to an array of dimension (LDA,N). +.TP 8 +IA (local input) const int +On entry, IA specifies the starting row index to be printed. +.TP 8 +JA (local input) const int +On entry, JA specifies the starting column index to be +printed. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,M). +.TP 8 +CMATNM (local input) const char * +On entry, CMATNM is the name of the matrix to be printed. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2]; +.br + a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; +.br + HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" ); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp00N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp00N.3 new file mode 100644 index 000000000..efe3580b3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp00N.3 @@ -0,0 +1,60 @@ +.TH HPL_dlaswp00N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp00N \- performs a series of row interchanges. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp00N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int *\fR +\fI\&IPIV\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp00N\fR +performs a series of local row interchanges on a matrix +A. One row interchange is initiated for rows 0 through M-1 of A. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the array A to be +interchanged. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the array A. +N must be at least zero. +.TP 8 +A (local input/output) double * +On entry, A points to an array of dimension (LDA,N) to which +the row interchanges will be applied. On exit, the permuted +matrix. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +IPIV (local input) const int * +On entry, IPIV is an array of size M that contains the +pivoting information. For k in [0..M), IPIV[k]=IROFF + l +implies that local rows k and l are to be interchanged. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp01N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp01N.3 new file mode 100644 index 000000000..662913e54 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp01N.3 @@ -0,0 +1,88 @@ +.TH HPL_dlaswp01N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp01N \- copies rows of A into itself and into U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp01N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp01N\fR +copies scattered rows of A into itself and into an +array U. The row offsets in A of the source rows are specified by +LINDXA. The destination of those rows are specified by LINDXAU. A +positive value of LINDXAU indicates that the array destination is U, +and A otherwise. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +moved within A or copied into U. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of rows of A that should be +moved within A or copied into U. N must be at least zero. +.TP 8 +A (local input/output) double * +On entry, A points to an array of dimension (LDA,N). The rows +of this array specified by LINDXA should be moved within A or +copied into U. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,N). The rows +of A specified by LINDXA are be copied within this array U at +the positions indicated by positive values of LINDXAU. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be moved within A or +or copied into U. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local row indexes of U where the rows of A should be +copied at. This array also contains the local row offsets in +A where some of the rows of A should be moved to. A positive +value of LINDXAU[i] indicates that the row LINDXA[i] of A +should be copied into U at the position LINDXAU[i]; otherwise +the row LINDXA[i] of A should be moved at the position +-LINDXAU[i] within A. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp01T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp01T.3 new file mode 100644 index 000000000..738507755 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp01T.3 @@ -0,0 +1,89 @@ +.TH HPL_dlaswp01T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp01T \- copies rows of A into itself and into U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp01T(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp01T\fR +copies scattered rows of A into itself and into an +array U. The row offsets in A of the source rows are specified by +LINDXA. The destination of those rows are specified by LINDXAU. A +positive value of LINDXAU indicates that the array destination is U, +and A otherwise. Rows of A are stored as columns in U. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +moved within A or copied into U. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of rows of A that should be +moved within A or copied into U. N must be at least zero. +.TP 8 +A (local input/output) double * +On entry, A points to an array of dimension (LDA,N). The rows +of this array specified by LINDXA should be moved within A or +copied into U. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,M). The rows +of A specified by LINDXA are copied within this array U at +the positions indicated by positive values of LINDXAU. The +rows of A are stored as columns in U. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be moved within A or +or copied into U. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local row indexes of U where the rows of A should be +copied at. This array also contains the local row offsets in +A where some of the rows of A should be moved to. A positive +value of LINDXAU[i] indicates that the row LINDXA[i] of A +should be copied into U at the position LINDXAU[i]; otherwise +the row LINDXA[i] of A should be moved at the position +-LINDXAU[i] within A. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp02N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp02N.3 new file mode 100644 index 000000000..600449c68 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp02N.3 @@ -0,0 +1,85 @@ +.TH HPL_dlaswp02N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp02N \- pack rows of A into columns of W. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp02N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&W0\fR, +\fB\&double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp02N\fR +packs scattered rows of an array A into workspace W. +The row offsets in A are specified by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +copied into W. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of rows of A that should be +copied into W. N must be at least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,N). The rows +of this array specified by LINDXA should be copied into W. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +W0 (local input/output) double * +On exit, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local output) double * +On entry, W is an array of size (LDW,M). On exit, W contains +the rows LINDXA[i] for i in [0..M) of A stored contiguously +in W(:,i). +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be copied into W. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local row indexes of U that should be copied into A and +replaced by the rows of W. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp03N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp03N.3 new file mode 100644 index 000000000..1ba0b3208 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp03N.3 @@ -0,0 +1,75 @@ +.TH HPL_dlaswp03N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp03N \- copy rows of W into U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp03N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const double *\fR +\fI\&W0\fR, +\fB\&const double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp03N\fR +copies columns of W into rows of an array U. The +destination in U of these columns contained in W is stored within W0. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of columns of W stored +contiguously that should be copied into U. M must be at least +zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of columns of W stored +contiguously that should be copied into U. N must be at least +zero. +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,N). Columns +of W are copied as rows within this array U at the positions +specified in W0. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M). +.TP 8 +W0 (local input) const double * +On entry, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local input) const double * +On entry, W is an array of size (LDW,M), that contains data +to be copied into U. For i in [0..M), entries W(:,i) should +be copied into the row or column W0(i*LDW) of U. +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp03T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp03T.3 new file mode 100644 index 000000000..d8bd11ec1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp03T.3 @@ -0,0 +1,75 @@ +.TH HPL_dlaswp03T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp03T \- copy columns of W into U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp03T(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const double *\fR +\fI\&W0\fR, +\fB\&const double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp03T\fR +copies columns of W into an array U. The destination +in U of these columns contained in W is stored within W0. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of columns of W stored +contiguously that should be copied into U. M must be at least +zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of columns of W stored +contiguously that should be copied into U. N must be at least +zero. +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,M). Columns +of W are copied within the array U at the positions specified +in W0. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +W0 (local input) const double * +On entry, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local input) const double * +On entry, W is an array of size (LDW,M), that contains data +to be copied into U. For i in [0..M), entries W(:,i) should +be copied into the row or column W0(i*LDW) of U. +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp04N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp04N.3 new file mode 100644 index 000000000..9f12d79ab --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp04N.3 @@ -0,0 +1,106 @@ +.TH HPL_dlaswp04N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp04N \- copy rows of U in A and replace them with columns of W. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp04N(\fR +\fB\&const int\fR +\fI\&M0\fR, +\fB\&const int\fR +\fI\&M1\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&W0\fR, +\fB\&const double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp04N\fR +copies M0 rows of U into A and replaces those rows of U +with columns of W. In addition M1 - M0 columns of W are copied into +rows of U. +.SH ARGUMENTS +.TP 8 +M0 (local input) const int +On entry, M0 specifies the number of rows of U that should be +copied into A and replaced by columns of W. M0 must be at +least zero. +.TP 8 +M1 (local input) const int +On entry, M1 specifies the number of columns of W that should +be copied into rows of U. M1 must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the rows of U that should +be copied into A. N must be at least zero. +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,N). This +array contains the rows that are to be copied into A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M1). +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +rows of U indicated by LINDXAU. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M0). +.TP 8 +W0 (local input) const double * +On entry, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local input) const double * +On entry, W is an array of size (LDW,M0+M1), that contains +data to be copied into U. For i in [M0..M0+M1), the entries +W(:,i) are copied into the row W0(i*LDW) of U. +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M0 containing the +local row indexes A into which rows of U are copied. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M0 that contains +the local row indexes of U that should be copied into A and +replaced by the columns of W. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp04T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp04T.3 new file mode 100644 index 000000000..448334148 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp04T.3 @@ -0,0 +1,107 @@ +.TH HPL_dlaswp04T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp04T \- copy columns of U in rows of A and replace them with columns of W. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp04T(\fR +\fB\&const int\fR +\fI\&M0\fR, +\fB\&const int\fR +\fI\&M1\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&W0\fR, +\fB\&const double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp04T\fR +copies M0 columns of U into rows of A and replaces those +columns of U with columns of W. In addition M1 - M0 columns of W are +copied into U. +.SH ARGUMENTS +.TP 8 +M0 (local input) const int +On entry, M0 specifies the number of columns of U that should +be copied into A and replaced by columns of W. M0 must be at +least zero. +.TP 8 +M1 (local input) const int +On entry, M1 specifies the number of columnns of W that will +be copied into U. M1 must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the columns of U that +will be copied into rows of A. N must be at least zero. +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,*). This +array contains the columns that are to be copied into rows of +A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +columns of U indicated by LINDXAU. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M0). +.TP 8 +W0 (local input) const double * +On entry, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local input) const double * +On entry, W is an array of size (LDW,M0+M1), that contains +data to be copied into U. For i in [M0..M0+M1), the entries +W(:,i) are copied into the column W0(i*LDW) of U. +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M0 containing the +local row indexes A into which columns of U are copied. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M0 that contains +the local column indexes of U that should be copied into A +and replaced by the columns of W. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp05N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp05N.3 new file mode 100644 index 000000000..371dd0b92 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp05N.3 @@ -0,0 +1,77 @@ +.TH HPL_dlaswp05N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp05N \- copy rows of U into A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp05N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp05N\fR +copies rows of U of global offset LINDXAU into rows of +A at positions indicated by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of U that should be +copied into A. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the rows of U that should +be copied into A. N must be at least zero. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +rows of U indicated by LINDXAU. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) const double * +On entry, U points to an array of dimension (LDU,N). This +array contains the rows that are to be copied into A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be copied from U. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local row indexes of U that should be copied in A. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp05T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp05T.3 new file mode 100644 index 000000000..5d70a7a16 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp05T.3 @@ -0,0 +1,77 @@ +.TH HPL_dlaswp05T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp05T \- copy rows of U into A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp05T(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp05T\fR +copies columns of U of global offset LINDXAU into rows +of A at positions indicated by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of columns of U that shouldbe copied into A. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the columns of U that will +be copied into rows of A. N must be at least zero. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +columns of U indicated by LINDXAU. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) const double * +On entry, U points to an array of dimension (LDU,*). This +array contains the columns that are to be copied into rows of +A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be copied from U. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local column indexes of U that should be copied in A. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp06N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp06N.3 new file mode 100644 index 000000000..7fa19d41a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp06N.3 @@ -0,0 +1,72 @@ +.TH HPL_dlaswp06N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp06N \- swap rows of U with rows of A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp06N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp06N\fR +swaps rows of U with rows of A at positions +indicated by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +swapped with rows of U. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the rows of A that should +be swapped with rows of U. N must be at least zero. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +rows or columns of U. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,N). This +array contains the rows of U that are to be swapped with rows +of A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be swapped with U. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp06T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp06T.3 new file mode 100644 index 000000000..41fa3d6ee --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp06T.3 @@ -0,0 +1,72 @@ +.TH HPL_dlaswp06T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp06T \- swap rows or columns of U with rows of A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp06T(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp06T\fR +swaps columns of U with rows of A at positions +indicated by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +swapped with columns of U. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the rows of A that should +be swapped with columns of U. N must be at least zero. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +columns of U. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,*). This +array contains the columns of U that are to be swapped with +rows of A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be swapped with U. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp10N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp10N.3 new file mode 100644 index 000000000..23465895c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlaswp10N.3 @@ -0,0 +1,59 @@ +.TH HPL_dlaswp10N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp10N \- performs a series column interchanges. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp10N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int *\fR +\fI\&IPIV\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp10N\fR +performs a sequence of local column interchanges on a +matrix A. One column interchange is initiated for columns 0 through +N-1 of A. +.SH ARGUMENTS +.TP 8 +M (local input) const int +__arg0__ +.TP 8 +N (local input) const int +On entry, M specifies the number of rows of the array A. M +must be at least zero. +.TP 8 +A (local input/output) double * +On entry, N specifies the number of columns of the array A. N +must be at least zero. +.TP 8 +LDA (local input) const int +On entry, A points to an array of dimension (LDA,N). This +array contains the columns onto which the interchanges should +be applied. On exit, A contains the permuted matrix. +.TP 8 +IPIV (local input) const int * +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlatcpy.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlatcpy.3 new file mode 100644 index 000000000..dc940e321 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlatcpy.3 @@ -0,0 +1,70 @@ +.TH HPL_dlatcpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlatcpy \- B := A^T +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlatcpy(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&B\fR, +\fB\&const int\fR +\fI\&LDB\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlatcpy\fR +copies the transpose of an array A into an array B. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the array B and +the number of columns of A. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of rows of the array A and +the number of columns of B. N must be at least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,M). +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,N). +.TP 8 +B (local output) double * +On entry, B points to an array of dimension (LDB,N). On exit, +B is overwritten with the transpose of A. +.TP 8 +LDB (local input) const int +On entry, LDB specifies the leading dimension of the array B. +LDB must be at least MAX(1,M). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], b[2*2]; +.br + a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; +.br + HPL_dlacpy( 2, 2, a, 2, b, 2 ); +.br + printf(" [%f,%f]\en", b[0], b[2]); +.br + printf("b=[%f,%f]\en", b[1], b[3]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dlacpy \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocmax.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocmax.3 new file mode 100644 index 000000000..f68f887c9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocmax.3 @@ -0,0 +1,69 @@ +.TH HPL_dlocmax 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlocmax \- finds the maximum entry in matrix column. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlocmax(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&II\fR, +\fB\&const int\fR +\fI\&JJ\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlocmax\fR +finds the maximum entry in the current column and packs +the useful information in WORK[0:3]. On exit, WORK[0] contains the +local maximum absolute value scalar, WORK[1] is the corresponding +local row index, WORK[2] is the corresponding global row index, and +WORK[3] is the coordinate of the process owning this max. When N is +less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set +to the total number of process rows. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +N (local input) const int +On entry, N specifies the local number of rows of the column +of A on which we operate. +.TP 8 +II (local input) const int +On entry, II specifies the row offset where the column to be +operated on starts with respect to the panel. +.TP 8 +JJ (local input) const int +On entry, JJ specifies the column offset where the column to +be operated on starts with respect to the panel. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 4. On exit, +WORK[0] contains the local maximum absolute value scalar, +WORK[1] contains the corresponding local row index, WORK[2] +contains the corresponding global row index, and WORK[3] is +the coordinate of process owning this max. +.SH SEE ALSO +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocswpN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocswpN.3 new file mode 100644 index 000000000..367e37e36 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocswpN.3 @@ -0,0 +1,62 @@ +.TH HPL_dlocswpN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlocswpN \- locally swaps rows within panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlocswpN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&II\fR, +\fB\&const int\fR +\fI\&JJ\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlocswpN\fR +performs the local swapping operations within a panel. +The lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +II (local input) const int +On entry, II specifies the row offset where the column to be +operated on starts with respect to the panel. +.TP 8 +JJ (local input) const int +On entry, JJ specifies the column offset where the column to +be operated on starts with respect to the panel. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2 * (4+2*N0). +WORK[0] contains the local maximum absolute value scalar, +WORK[1] contains the corresponding local row index, WORK[2] +contains the corresponding global row index, and WORK[3] is +the coordinate of process owning this max. The N0 length max +row is stored in WORK[4:4+N0-1]; Note that this is also the +JJth row (or column) of L1. The remaining part of this array +is used as workspace. +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocswpT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocswpT.3 new file mode 100644 index 000000000..f864de535 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dlocswpT.3 @@ -0,0 +1,62 @@ +.TH HPL_dlocswpT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlocswpT \- locally swaps rows within panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlocswpT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&II\fR, +\fB\&const int\fR +\fI\&JJ\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlocswpT\fR +performs the local swapping operations within a panel. +The lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +II (local input) const int +On entry, II specifies the row offset where the column to be +operated on starts with respect to the panel. +.TP 8 +JJ (local input) const int +On entry, JJ specifies the column offset where the column to +be operated on starts with respect to the panel. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2 * (4+2*N0). +WORK[0] contains the local maximum absolute value scalar, +WORK[1] contains the corresponding local row index, WORK[2] +contains the corresponding global row index, and WORK[3] is +the coordinate of process owning this max. The N0 length max +row is stored in WORK[4:4+N0-1]; Note that this is also the +JJth row (or column) of L1. The remaining part of this array +is used as workspace. +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dmatgen.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dmatgen.3 new file mode 100644 index 000000000..c287fb0fb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dmatgen.3 @@ -0,0 +1,55 @@ +.TH HPL_dmatgen 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dmatgen \- random matrix generator. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dmatgen(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int\fR +\fI\&ISEED\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dmatgen\fR +generates (or regenerates) a random matrix A. + +The pseudo-random generator uses the linear congruential algorithm: +X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer +Programming, Knuth 1973, Vol. 2. +.SH ARGUMENTS +.TP 8 +M (input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +A (output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +this array contains the coefficients of the randomly +generated matrix. +.TP 8 +LDA (input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,M). +.TP 8 +ISEED (input) const int +On entry, ISEED specifies the seed number to generate the +matrix A. ISEED must be at least zero. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dscal.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dscal.3 new file mode 100644 index 000000000..8f42a10f5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dscal.3 @@ -0,0 +1,62 @@ +.TH HPL_dscal 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dscal \- x = alpha * x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dscal(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dscal\fR +scales the vector x by alpha. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vector x. N must be +at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero, then the entries of the incremented array X +need not be set on input. +.TP 8 +X (local input/output) double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +On exit, the entries of the incremented array X are scaled +by the scalar alpha. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3]; +.br + x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; +.br + HPL_dscal( 3, 2.0, x, 1 ); +.br + printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_daxpy \ (3), +.BR HPL_dcopy \ (3), +.BR HPL_dswap \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dswap.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dswap.3 new file mode 100644 index 000000000..a398f795a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dswap.3 @@ -0,0 +1,73 @@ +.TH HPL_dswap 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dswap \- y <-> x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dswap(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dswap\fR +swaps the vectors x and y. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vectors x and y. N +must be at least zero. +.TP 8 +X (local input/output) double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +On exit, the entries of the incremented array X are updated +with the entries of the incremented array Y. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +Y (local input/output) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +On exit, the entries of the incremented array Y are updated +with the entries of the incremented array X. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3], y[3]; +.br + x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; +.br + y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; +.br + HPL_dswap( 3, x, 1, y, 1 ); +.br + printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]); +.br + printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_daxpy \ (3), +.BR HPL_dcopy \ (3), +.BR HPL_dscal \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dtrsm.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dtrsm.3 new file mode 100644 index 000000000..ad099eb83 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dtrsm.3 @@ -0,0 +1,152 @@ +.TH HPL_dtrsm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dtrsm \- B := A^{-1} * B or B := B * A^{-1}. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dtrsm(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const enum HPL_SIDE\fR +\fI\&SIDE\fR, +\fB\&const enum HPL_UPLO\fR +\fI\&UPLO\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANS\fR, +\fB\&const enum HPL_DIAG\fR +\fI\&DIAG\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&B\fR, +\fB\&const int\fR +\fI\&LDB\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dtrsm\fR +solves one of the matrix equations + + op( A ) * X = alpha * B, or X * op( A ) = alpha * B, + +where alpha is a scalar, X and B are m by n matrices, A is a unit, or +non-unit, upper or lower triangular matrix and op(A) is one of + + op( A ) = A or op( A ) = A^T. + +The matrix X is overwritten on B. + +No test for singularity or near-singularity is included in this +routine. Such tests must be performed before calling this routine. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +SIDE (local input) const enum HPL_SIDE +On entry, SIDE specifies whether op(A) appears on the left +or right of X as follows: + SIDE==HplLeft op( A ) * X = alpha * B, + SIDE==HplRight X * op( A ) = alpha * B. +.TP 8 +UPLO (local input) const enum HPL_UPLO +On entry, UPLO specifies whether the upper or lower +triangular part of the array A is to be referenced. When +UPLO==HplUpper, only the upper triangular part of A is to be +referenced, otherwise only the lower triangular part of A is +to be referenced. +.TP 8 +TRANS (local input) const enum HPL_TRANS +On entry, TRANSA specifies the form of op(A) to be used in +the matrix-matrix operation follows: + TRANSA==HplNoTrans : op( A ) = A, + TRANSA==HplTrans : op( A ) = A^T, + TRANSA==HplConjTrans : op( A ) = A^T. +.TP 8 +DIAG (local input) const enum HPL_DIAG +On entry, DIAG specifies whether A is unit triangular or +not. When DIAG==HplUnit, A is assumed to be unit triangular, +and otherwise, A is not assumed to be unit triangular. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix B. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix B. +N must be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero then the elements of the matrix B need not +be set on input. +.TP 8 +A (local input) const double * +On entry, A points to an array of size equal to or greater +than LDA * k, where k is m when SIDE==HplLeft and is n +otherwise. Before entry with UPLO==HplUpper, the leading +k by k upper triangular part of the array A must contain the +upper triangular matrix and the strictly lower triangular +part of A is not referenced. When UPLO==HplLower on entry, +the leading k by k lower triangular part of the array A must +contain the lower triangular matrix and the strictly upper +triangular part of A is not referenced. + +Note that when DIAG==HplUnit, the diagonal elements of A +not referenced either, but are assumed to be unity. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of A as +declared in the calling (sub) program. LDA must be at +least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise. +.TP 8 +B (local input/output) double * +On entry, B points to an array of size equal to or greater +than LDB * n. Before entry, the leading m by n part of the +array B must contain the matrix B, except when beta is zero, +in which case B need not be set on entry. On exit, the array +B is overwritten by the m by n solution matrix. +.TP 8 +LDB (local input) const int +On entry, LDB specifies the leading dimension of B as +declared in the calling (sub) program. LDB must be at +least MAX(1,m). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], b[2*2]; +.br + a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0; +.br + b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0; +.br + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, +.br + HplNoTrans, HplNonUnit, 2, 2, 2.0, +.br + a, 2, b, 2 ); +.br + printf(" [%f,%f]\en", b[0], b[2]); +.br + printf("b=[%f,%f]\en", b[1], b[3]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dgemm \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dtrsv.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dtrsv.3 new file mode 100644 index 000000000..5df37c78b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_dtrsv.3 @@ -0,0 +1,121 @@ +.TH HPL_dtrsv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dtrsv \- x := A^{-1} x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dtrsv(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const enum HPL_UPLO\fR +\fI\&UPLO\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANS\fR, +\fB\&const enum HPL_DIAG\fR +\fI\&DIAG\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dtrsv\fR +solves one of the systems of equations + + A * x = b, or A^T * x = b, + +where b and x are n-element vectors and A is an n by n non-unit, or +unit, upper or lower triangular matrix. + +No test for singularity or near-singularity is included in this +routine. Such tests must be performed before calling this routine. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +UPLO (local input) const enum HPL_UPLO +On entry, UPLO specifies whether the upper or lower +triangular part of the array A is to be referenced. When +UPLO==HplUpper, only the upper triangular part of A is to be +referenced, otherwise only the lower triangular part of A is +to be referenced. +.TP 8 +TRANS (local input) const enum HPL_TRANS +On entry, TRANS specifies the equations to be solved as +follows: + TRANS==HplNoTrans A * x = b, + TRANS==HplTrans A^T * x = b. +.TP 8 +DIAG (local input) const enum HPL_DIAG +On entry, DIAG specifies whether A is unit triangular or +not. When DIAG==HplUnit, A is assumed to be unit triangular, +and otherwise, A is not assumed to be unit triangular. +.TP 8 +N (local input) const int +On entry, N specifies the order of the matrix A. N must be at +least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of size equal to or greater +than LDA * n. Before entry with UPLO==HplUpper, the leading +n by n upper triangular part of the array A must contain the +upper triangular matrix and the strictly lower triangular +part of A is not referenced. When UPLO==HplLower on entry, +the leading n by n lower triangular part of the array A must +contain the lower triangular matrix and the strictly upper +triangular part of A is not referenced. + +Note that when DIAG==HplUnit, the diagonal elements of A +not referenced either, but are assumed to be unity. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of A as +declared in the calling (sub) program. LDA must be at +least MAX(1,n). +.TP 8 +X (local input/output) double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +Before entry, the incremented array X must contain the n +element right-hand side vector b. On exit, X is overwritten +with the solution vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], x[2]; +.br + a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0; +.br + x[0] = 2.0; x[1] = 1.0; +.br + HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, +.br + HplNoUnit, a, 2, x, 1 ); +.br + printf("x=[%f,%f]\en", x[0], x[1]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dger \ (3), +.BR HPL_dgemv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_equil.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_equil.3 new file mode 100644 index 000000000..817780e44 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_equil.3 @@ -0,0 +1,91 @@ +.TH HPL_equil 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_equil \- Equilibrate U and forward the column panel L. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_equil(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANS\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR, +\fB\&int *\fR +\fI\&IWORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_equil\fR +equilibrates the local pieces of U, so that on exit to +this function, pieces of U contained in every process row are of the +same size. This phase makes the rolling phase optimal. In addition, +this function probes for the column panel L and forwards it when +possible. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be equilibrated) information. +.TP 8 +TRANS (global input) const enum HPL_TRANS +On entry, TRANS specifies whether U is stored in transposed +or non-transposed form. +.TP 8 +N (local input) const int +On entry, N specifies the number of rows or columns of U. N +must be at least 0. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U in each process row. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,IPLEN[nprow]) when U is stored in +non-transposed form, and MAX(1,N) otherwise. +.TP 8 +IPLEN (global input) int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in process IPMAP[i]. +.TP 8 +IPMAP (global input) const int * +On entry, IPMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IPMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IPMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i. +.TP 8 +IWORK (workspace) int * +On entry, IWORK is a workarray of dimension NPROW+1. +.SH SEE ALSO +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_fprintf.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_fprintf.3 new file mode 100644 index 000000000..8a81c0bfb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_fprintf.3 @@ -0,0 +1,44 @@ +.TH HPL_fprintf 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_fprintf \- fprintf + fflush wrapper. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_fprintf(\fR +\fB\&FILE *\fR +\fI\&STREAM\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_fprintf\fR +is a wrapper around fprintf flushing the output stream. +.SH ARGUMENTS +.TP 8 +STREAM (local input) FILE * +On entry, STREAM specifies the output stream. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + HPL_fprintf( stdout, "Hello World.\en" ); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_abort \ (3), +.BR HPL_warn \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_exit.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_exit.3 new file mode 100644 index 000000000..dab8067e2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_exit.3 @@ -0,0 +1,25 @@ +.TH HPL_grid_exit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_grid_exit \- Exit process grid. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_grid_exit(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_grid_exit\fR +marks the process grid object for deallocation. The +returned error code MPI_SUCCESS indicates successful completion. +Other error codes are (MPI) implementation dependent. +.SH ARGUMENTS +.TP 8 +GRID (local input/output) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid to be released. +.SH SEE ALSO +.BR HPL_pnum \ (3), +.BR HPL_grid_init \ (3), +.BR HPL_grid_info \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_info.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_info.3 new file mode 100644 index 000000000..53c6a214b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_info.3 @@ -0,0 +1,52 @@ +.TH HPL_grid_info 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_grid_info \- Retrieve grid information. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_grid_info(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&int *\fR +\fI\&NPROW\fR, +\fB\&int *\fR +\fI\&NPCOL\fR, +\fB\&int *\fR +\fI\&MYROW\fR, +\fB\&int *\fR +\fI\&MYCOL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_grid_info\fR +returns the grid shape and the coordinates in the grid +of the calling process. Successful completion is indicated by the +returned error code MPI_SUCCESS. Other error codes depend on the MPI +implementation. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +NPROW (global output) int * +On exit, NPROW specifies the number of process rows in the +grid. NPROW is at least one. +.TP 8 +NPCOL (global output) int * +On exit, NPCOL specifies the number of process columns in +the grid. NPCOL is at least one. +.TP 8 +MYROW (global output) int * +On exit, MYROW specifies my row process coordinate in the +grid. MYROW is greater than or equal to zero and less than +NPROW. +.TP 8 +MYCOL (global output) int * +On exit, MYCOL specifies my column process coordinate in the +grid. MYCOL is greater than or equal to zero and less than +NPCOL. +.SH SEE ALSO +.BR HPL_pnum \ (3), +.BR HPL_grid_init \ (3), +.BR HPL_grid_exit \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_init.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_init.3 new file mode 100644 index 000000000..7792a522d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_grid_init.3 @@ -0,0 +1,55 @@ +.TH HPL_grid_init 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_grid_init \- Create a process grid. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_grid_init(\fR +\fB\&MPI_Comm\fR +\fI\&COMM\fR, +\fB\&const HPL_T_ORDER\fR +\fI\&ORDER\fR, +\fB\&const int\fR +\fI\&NPROW\fR, +\fB\&const int\fR +\fI\&NPCOL\fR, +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_grid_init\fR +creates a NPROW x NPCOL process grid using column- or +row-major ordering from an initial collection of processes identified +by an MPI communicator. Successful completion is indicated by the +returned error code MPI_SUCCESS. Other error codes depend on the MPI +implementation. The coordinates of processes that are not part of the +grid are set to values outside of [0..NPROW) x [0..NPCOL). +.SH ARGUMENTS +.TP 8 +COMM (global/local input) MPI_Comm +On entry, COMM is the MPI communicator identifying the +initial collection of processes out of which the grid is +formed. +.TP 8 +ORDER (global input) const HPL_T_ORDER +On entry, ORDER specifies how the processes should be ordered +in the grid as follows: + ORDER = HPL_ROW_MAJOR row-major ordering; + ORDER = HPL_COLUMN_MAJOR column-major ordering; +.TP 8 +NPROW (global input) const int +On entry, NPROW specifies the number of process rows in the +grid to be created. NPROW must be at least one. +.TP 8 +NPCOL (global input) const int +On entry, NPCOL specifies the number of process columns in +the grid to be created. NPCOL must be at least one. +.TP 8 +GRID (local input/output) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information to be initialized. +.SH SEE ALSO +.BR HPL_pnum \ (3), +.BR HPL_grid_info \ (3), +.BR HPL_grid_exit \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_idamax.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_idamax.3 new file mode 100644 index 000000000..c00292a02 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_idamax.3 @@ -0,0 +1,59 @@ +.TH HPL_idamax 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_idamax \- 1st k s.t. |x_k| = max_i(|x_i|). +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_idamax(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_idamax\fR +returns the index in an n-vector x of the first element +having maximum absolute value. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vector x. N must be +at least zero. +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3]; +.br + int imax; +.br + x[0] = 1.0; x[1] = 3.0; x[2] = 2.0; +.br + imax = HPL_idamax( 3, x, 1 ); +.br + printf("imax=%d\en", imax); +.br + exit(0); +.br + return(0); +.br +} +.SH SEE ALSO +.BR HPL_daxpy \ (3), +.BR HPL_dcopy \ (3), +.BR HPL_dscal \ (3), +.BR HPL_dswap \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2l.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2l.3 new file mode 100644 index 000000000..32c4d9e07 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2l.3 @@ -0,0 +1,53 @@ +.TH HPL_indxg2l 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_indxg2l \- Map a global index into a local one. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_indxg2l(\fR +\fB\&const int\fR +\fI\&IG\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_indxg2l\fR +computes the local index of a matrix entry pointed to by +the global index IG. This local returned index is the same in all +processes. +.SH ARGUMENTS +.TP 8 +IG (input) const int +On entry, IG specifies the global index of the matrix entry. +IG must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix. NB must be larger than one. +.TP 8 +SRCPROC (input) const int +On entry, if SRCPROC = -1, the data is not distributed but +replicated, in which case this routine returns IG in all +processes. Otherwise, the value of SRCPROC is ignored. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2lp \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2lp.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2lp.3 new file mode 100644 index 000000000..ca2004031 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2lp.3 @@ -0,0 +1,66 @@ +.TH HPL_indxg2lp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_indxg2lp \- Map a local index into a global one. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_indxg2lp(\fR +\fB\&int *\fR +\fI\&IL\fR, +\fB\&int *\fR +\fI\&PROC\fR, +\fB\&const int\fR +\fI\&IG\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_indxg2lp\fR +computes the local index of a matrix entry pointed to by +the global index IG as well as the process coordinate which posseses +this entry. The local returned index is the same in all processes. +.SH ARGUMENTS +.TP 8 +IL (output) int * +On exit, IL specifies the local index corresponding to IG. IL +is at least zero. +.TP 8 +PROC (output) int * +On exit, PROC is the coordinate of the process owning the +entry specified by the global index IG. PROC is at least zero +and less than NPROCS. +.TP 8 +IG (input) const int +On entry, IG specifies the global index of the matrix entry. +IG must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +SRCPROC (input) const int +On entry, if SRCPROC = -1, the data is not distributed but +replicated, in which case this routine returns IG in all +processes. Otherwise, the value of SRCPROC is ignored. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2p.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2p.3 new file mode 100644 index 000000000..5e0273feb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxg2p.3 @@ -0,0 +1,52 @@ +.TH HPL_indxg2p 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_indxg2p \- Map a global index into a process coordinate. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_indxg2p(\fR +\fB\&const int\fR +\fI\&IG\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_indxg2p\fR +computes the process coordinate which posseses the entry +of a matrix specified by a global index IG. +.SH ARGUMENTS +.TP 8 +IG (input) const int +On entry, IG specifies the global index of the matrix entry. +IG must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +SRCPROC (input) const int +On entry, SRCPROC specifies the coordinate of the process +that possesses the first row or column of the matrix. SRCPROC +must be at least zero and strictly less than NPROCS. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxl2g.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxl2g.3 new file mode 100644 index 000000000..ba6da53a7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_indxl2g.3 @@ -0,0 +1,59 @@ +.TH HPL_indxl2g 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_indxl2g \- Map a index-process pair into a global index. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_indxl2g(\fR +\fB\&const int\fR +\fI\&IL\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&PROC\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_indxl2g\fR +computes the global index of a matrix entry pointed to +by the local index IL of the process indicated by PROC. +.SH ARGUMENTS +.TP 8 +IL (input) const int +On entry, IL specifies the local index of the matrix entry. +IL must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +PROC (input) const int +On entry, PROC specifies the coordinate of the process whose +local array row or column is to be determined. PROC must be +at least zero and strictly less than NPROCS. +.TP 8 +SRCPROC (input) const int +On entry, SRCPROC specifies the coordinate of the process +that possesses the first row or column of the matrix. SRCPROC +must be at least zero and strictly less than NPROCS. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2lp \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_infog2l.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_infog2l.3 new file mode 100644 index 000000000..c07f276d5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_infog2l.3 @@ -0,0 +1,126 @@ +.TH HPL_infog2l 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_infog2l \- global to local index translation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_infog2l(\fR +\fB\&int\fR +\fI\&I\fR, +\fB\&int\fR +\fI\&J\fR, +\fB\&const int\fR +\fI\&IMB\fR, +\fB\&const int\fR +\fI\&MB\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&RSRC\fR, +\fB\&const int\fR +\fI\&CSRC\fR, +\fB\&const int\fR +\fI\&MYROW\fR, +\fB\&const int\fR +\fI\&MYCOL\fR, +\fB\&const int\fR +\fI\&NPROW\fR, +\fB\&const int\fR +\fI\&NPCOL\fR, +\fB\&int *\fR +\fI\&II\fR, +\fB\&int *\fR +\fI\&JJ\fR, +\fB\&int *\fR +\fI\&PROW\fR, +\fB\&int *\fR +\fI\&PCOL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_infog2l\fR +computes the starting local index II, JJ corresponding to +the submatrix starting globally at the entry pointed by I, J. This +routine returns the coordinates in the grid of the process owning the +matrix entry of global indexes I, J, namely PROW and PCOL. +.SH ARGUMENTS +.TP 8 +I (global input) int +On entry, I specifies the global row index of the matrix +entry. I must be at least zero. +.TP 8 +J (global input) int +On entry, J specifies the global column index of the matrix +entry. J must be at least zero. +.TP 8 +IMB (global input) const int +On entry, IMB specifies the size of the first row block of +the global matrix. IMB must be at least one. +.TP 8 +MB (global input) const int +On entry, MB specifies the blocking factor used to partition +and distribute the rows of the matrix A. MB must be larger +than one. +.TP 8 +INB (global input) const int +On entry, INB specifies the size of the first column block of +the global matrix. INB must be at least one. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the columns of the matrix A. NB must be larger +than one. +.TP 8 +RSRC (global input) const int +On entry, RSRC specifies the row coordinate of the process +that possesses the row I. RSRC must be at least zero and +strictly less than NPROW. +.TP 8 +CSRC (global input) const int +On entry, CSRC specifies the column coordinate of the process +that possesses the column J. CSRC must be at least zero and +strictly less than NPCOL. +.TP 8 +MYROW (local input) const int +On entry, MYROW specifies my row process coordinate in the +grid. MYROW is greater than or equal to zero and less than +NPROW. +.TP 8 +MYCOL (local input) const int +On entry, MYCOL specifies my column process coordinate in the +grid. MYCOL is greater than or equal to zero and less than +NPCOL. +.TP 8 +NPROW (global input) const int +On entry, NPROW specifies the number of process rows in the +grid. NPROW is at least one. +.TP 8 +NPCOL (global input) const int +On entry, NPCOL specifies the number of process columns in +the grid. NPCOL is at least one. +.TP 8 +II (local output) int * +On exit, II specifies the local starting row index of the +submatrix. On exit, II is at least 0. +.TP 8 +JJ (local output) int * +On exit, JJ specifies the local starting column index of the +submatrix. On exit, JJ is at least 0. +.TP 8 +PROW (global output) int * +On exit, PROW is the row coordinate of the process owning the +entry specified by the global index I. PROW is at least zero +and less than NPROW. +.TP 8 +PCOL (global output) int * +On exit, PCOL is the column coordinate of the process owning +the entry specified by the global index J. PCOL is at least +zero and less than NPCOL. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_jumpit.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_jumpit.3 new file mode 100644 index 000000000..66e77ac32 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_jumpit.3 @@ -0,0 +1,48 @@ +.TH HPL_jumpit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_jumpit \- jump into the random sequence. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_jumpit(\fR +\fB\&int *\fR +\fI\&MULT\fR, +\fB\&int *\fR +\fI\&IADD\fR, +\fB\&int *\fR +\fI\&IRANN\fR, +\fB\&int *\fR +\fI\&IRANM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_jumpit\fR +jumps in the random sequence from the number X(n) encoded +in IRANN to the number X(m) encoded in IRANM using the constants A +and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A +and C obviously depend on m and n, see the function HPL_xjumpm in +order to initialize them. +.SH ARGUMENTS +.TP 8 +MULT (local input) int * +On entry, MULT is an array of dimension 2, that contains the +16-lower and 15-higher bits of the constant A. +.TP 8 +IADD (local input) int * +On entry, IADD is an array of dimension 2, that contains the +16-lower and 15-higher bits of the constant C. +.TP 8 +IRANN (local input) int * +On entry, IRANN is an array of dimension 2, that contains +the 16-lower and 15-higher bits of the encoding of X(n). +.TP 8 +IRANM (local output) int * +On entry, IRANM is an array of dimension 2. On exit, this +array contains respectively the 16-lower and 15-higher bits +of the encoding of X(m). +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ladd.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ladd.3 new file mode 100644 index 000000000..9fd6805d3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ladd.3 @@ -0,0 +1,41 @@ +.TH HPL_ladd 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_ladd \- Adds two long positive integers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_ladd(\fR +\fB\&int *\fR +\fI\&J\fR, +\fB\&int *\fR +\fI\&K\fR, +\fB\&int *\fR +\fI\&I\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_ladd\fR +adds without carry two long positive integers K and J and +puts the result into I. The long integers I, J, K are encoded on 64 +bits using an array of 2 integers. The 32-lower bits are stored in +the first entry of each array, the 32-higher bits in the second +entry. +.SH ARGUMENTS +.TP 8 +J (local input) int * +On entry, J is an integer array of dimension 2 containing the +encoded long integer J. +.TP 8 +K (local input) int * +On entry, K is an integer array of dimension 2 containing the +encoded long integer K. +.TP 8 +I (local output) int * +On entry, I is an integer array of dimension 2. On exit, this +array contains the encoded long integer result. +.SH SEE ALSO +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_lmul.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_lmul.3 new file mode 100644 index 000000000..8be7380e0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_lmul.3 @@ -0,0 +1,42 @@ +.TH HPL_lmul 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_lmul \- multiplies 2 long positive integers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_lmul(\fR +\fB\&int *\fR +\fI\&K\fR, +\fB\&int *\fR +\fI\&J\fR, +\fB\&int *\fR +\fI\&I\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_lmul\fR +multiplies without carry two long positive integers K and J +and puts the result into I. The long integers I, J, K are encoded on +64 bits using an array of 2 integers. The 32-lower bits are stored in +the first entry of each array, the 32-higher bits in the second entry +of each array. For efficiency purposes, the intrisic modulo function +is inlined. +.SH ARGUMENTS +.TP 8 +K (local input) int * +On entry, K is an integer array of dimension 2 containing the +encoded long integer K. +.TP 8 +J (local input) int * +On entry, J is an integer array of dimension 2 containing the +encoded long integer J. +.TP 8 +I (local output) int * +On entry, I is an integer array of dimension 2. On exit, this +array contains the encoded long integer result. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_logsort.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_logsort.3 new file mode 100644 index 000000000..e7e80062a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_logsort.3 @@ -0,0 +1,65 @@ +.TH HPL_logsort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_logsort \- Sort the processes in logarithmic order. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_logsort(\fR +\fB\&const int\fR +\fI\&NPROCS\fR, +\fB\&const int\fR +\fI\&ICURROC\fR, +\fB\&int *\fR +\fI\&IPLEN\fR, +\fB\&int *\fR +\fI\&IPMAP\fR, +\fB\&int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_logsort\fR +computes an array IPMAP and its inverse IPMAPM1 that +contain the logarithmic sorted processes id with repect to the local +number of rows of U that they own. This is necessary to ensure that +the logarithmic spreading of U is optimal in terms of number of steps +and communication volume as well. In other words, the larget pieces +of U will be sent a minimal number of times. +.SH ARGUMENTS +.TP 8 +NPROCS (global input) const int +On entry, NPROCS specifies the number of process rows in the +process grid. NPROCS is at least one. +.TP 8 +ICURROC (global input) const int +On entry, ICURROC is the source process row. +.TP 8 +IPLEN (global input/output) int * +On entry, IPLEN is an array of dimension NPROCS+1, such that +IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U, +that process i-1 has. On exit, IPLEN[i] is the number of +rows of U in the processes before process IPMAP[i] after the +sort, with the convention that IPLEN[NPROCS] is the total +number of rows of the panel. In other words, IPLEN[i+1] - +IPLEN[i] is the number of rows of A that should be moved to +the process IPMAP[i]. IPLEN is such that the number of rows +of the source process row is IPLEN[1] - IPLEN[0], and the +remaining entries of this array are sorted so that the +quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted. +.TP 8 +IPMAP (global output) int * +On entry, IPMAP is an array of dimension NPROCS. On exit, +array contains the logarithmic mapping of the processes. In +other words, IPMAP[myroc] is the corresponding sorted process +coordinate. +.TP 8 +IPMAPM1 (global output) int * +On entry, IPMAPM1 is an array of dimension NPROCS. On exit, +this array contains the inverse of the logarithmic mapping +contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in +[0.. NPROCS) +.SH SEE ALSO +.BR HPL_plindx1 \ (3), +.BR HPL_plindx10 \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_max.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_max.3 new file mode 100644 index 000000000..16d8aecc6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_max.3 @@ -0,0 +1,43 @@ +.TH HPL_max 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_max \- Combine (max) two buffers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_max(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const void *\fR +\fI\&IN\fR, +\fB\&void *\fR +\fI\&INOUT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_max\fR +combines (max) two buffers. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the length of the buffers to be +combined. N must be at least zero. +.TP 8 +IN (input) const void * +On entry, IN points to the input-only buffer to be combined. +.TP 8 +INOUT (input/output) void * +On entry, INOUT points to the input-output buffer to be +combined. On exit, the entries of this array contains the +combined results. +.TP 8 +DTYPE (input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_min.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_min.3 new file mode 100644 index 000000000..a816d61b7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_min.3 @@ -0,0 +1,43 @@ +.TH HPL_min 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_min \- Combine (min) two buffers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_min(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const void *\fR +\fI\&IN\fR, +\fB\&void *\fR +\fI\&INOUT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_min\fR +combines (min) two buffers. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the length of the buffers to be +combined. N must be at least zero. +.TP 8 +IN (input) const void * +On entry, IN points to the input-only buffer to be combined. +.TP 8 +INOUT (input/output) void * +On entry, INOUT points to the input-output buffer to be +combined. On exit, the entries of this array contains the +combined results. +.TP 8 +DTYPE (input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_numroc.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_numroc.3 new file mode 100644 index 000000000..34c8acfa9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_numroc.3 @@ -0,0 +1,60 @@ +.TH HPL_numroc 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_numroc \- Compute the local number of row/columns. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_numroc(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&PROC\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_numroc\fR +returns the local number of matrix rows/columns process +PROC will get if we give out N rows/columns starting from global +index 0. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the number of rows/columns being dealt +out. N must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +PROC (input) const int +On entry, PROC specifies the coordinate of the process whose +local portion is determined. PROC must be at least zero and +strictly less than NPROCS. +.TP 8 +SRCPROC (input) const int +On entry, SRCPROC specifies the coordinate of the process +that possesses the first row or column of the matrix. SRCPROC +must be at least zero and strictly less than NPROCS. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2lp \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_numrocI.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_numrocI.3 new file mode 100644 index 000000000..1891f1ac9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_numrocI.3 @@ -0,0 +1,66 @@ +.TH HPL_numrocI 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_numrocI \- Compute the local number of row/columns. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_numrocI(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&I\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&PROC\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_numrocI\fR +returns the local number of matrix rows/columns process +PROC will get if we give out N rows/columns starting from global +index I. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the number of rows/columns being dealt +out. N must be at least zero. +.TP 8 +I (input) const int +On entry, I specifies the global index of the matrix entry +I must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of th +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +PROC (input) const int +On entry, PROC specifies the coordinate of the process whos +local portion is determined. PROC must be at least zero an +strictly less than NPROCS. +.TP 8 +SRCPROC (input) const int +On entry, SRCPROC specifies the coordinate of the proces +that possesses the first row or column of the matrix. SRCPRO +must be at least zero and strictly less than NPROCS. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process row +or columns over which the matrix is distributed. NPROCS mus +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2lp \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pabort.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pabort.3 new file mode 100644 index 000000000..044e87210 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pabort.3 @@ -0,0 +1,40 @@ +.TH HPL_pabort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pabort \- halts execution. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pabort(\fR +\fB\&int\fR +\fI\&LINE\fR, +\fB\&const char *\fR +\fI\&SRNAME\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pabort\fR +displays an error message on stderr and halts execution. +.SH ARGUMENTS +.TP 8 +LINE (local input) int +On entry, LINE specifies the line number in the file where +the error has occured. When LINE is not a positive line +number, it is ignored. +.TP 8 +SRNAME (local input) const char * +On entry, SRNAME should be the name of the routine calling +this error handler. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH SEE ALSO +.BR HPL_fprintf \ (3), +.BR HPL_pwarn \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_packL.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_packL.3 new file mode 100644 index 000000000..c79019c37 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_packL.3 @@ -0,0 +1,42 @@ +.TH HPL_packL 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_packL \- Form the MPI structure for the row ring broadcasts. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_packL(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&INDEX\fR, +\fB\&const int\fR +\fI\&LEN\fR, +\fB\&const int\fR +\fI\&IBUF\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_packL\fR +forms the MPI data type for the panel to be broadcast. +Successful completion is indicated by the returned error code +MPI_SUCCESS. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.TP 8 +INDEX (input) const int +On entry, INDEX points to the first entry of the packed +buffer being broadcast. +.TP 8 +LEN (input) const int +On entry, LEN is the length of the packed buffer. +.TP 8 +IBUF (input) const int +On entry, IBUF specifies the panel buffer/count/type entries +that should be initialized. +.SH SEE ALSO +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pddriver.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pddriver.3 new file mode 100644 index 000000000..30e55b62e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pddriver.3 @@ -0,0 +1,15 @@ +.TH main 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +main \- HPL main timing program. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&main();\fR +.SH DESCRIPTION +\fB\&main\fR +is the main driver program for testing the HPL routines. +This program is driven by a short data file named "HPL.dat". +.SH SEE ALSO +.BR HPL_pdinfo \ (3), +.BR HPL_pdtest \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdfact.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdfact.3 new file mode 100644 index 000000000..e3db5fb8b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdfact.3 @@ -0,0 +1,64 @@ +.TH HPL_pdfact 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdfact \- recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdfact(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdfact\fR +recursively factorizes a 1-dimensional panel of columns. +The RPFACT function pointer specifies the recursive algorithm to be +used, either Crout, Left- or Right looking. NBMIN allows to vary the +recursive stopping criterium in terms of the number of columns in the +panel, and NDIV allow to specify the number of subpanels each panel +should be divided into. Usuallly a value of 2 will be chosen. Finally +PFACT is a function pointer specifying the non-recursive algorithm to +to be used on at most NBMIN columns. One can also choose here between +Crout, Left- or Right looking. Empirical tests seem to indicate that +values of 4 or 8 for NBMIN give the best results. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesv.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesv.3 new file mode 100644 index 000000000..ab4b62c4e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesv.3 @@ -0,0 +1,40 @@ +.TH HPL_pdgesv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdgesv \- Solve A x = b. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdgesv(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdgesv\fR +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with or without look-ahead. The lower triangular factor is left +unpivoted and the pivots are not returned. The right hand side is the +N+1 column of the coefficient matrix. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.SH SEE ALSO +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdtrsv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesv0.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesv0.3 new file mode 100644 index 000000000..180f191f2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesv0.3 @@ -0,0 +1,47 @@ +.TH HPL_pdgesv0 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdgesv0 \- Factor an N x N+1 matrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdgesv0(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdgesv0\fR +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +without look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdfact \ (3), +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pdupdateTT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesvK1.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesvK1.3 new file mode 100644 index 000000000..64cee67ed --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesvK1.3 @@ -0,0 +1,46 @@ +.TH HPL_pdgesvK1 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdgesvK1 \- Factor an N x N+1 matrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdgesvK1(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdgesvK1\fR +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdfact \ (3), +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pdupdateTT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesvK2.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesvK2.3 new file mode 100644 index 000000000..9f389b9dd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdgesvK2.3 @@ -0,0 +1,47 @@ +.TH HPL_pdgesvK2 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdgesvK2 \- Factor an N x N+1 matrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdgesvK2(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdgesvK2\fR +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdfact \ (3), +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pdupdateTT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdinfo.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdinfo.3 new file mode 100644 index 000000000..eed541159 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdinfo.3 @@ -0,0 +1,212 @@ +.TH HPL_pdinfo 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdinfo \- Read input parameter file. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdinfo(\fR +\fB\&HPL_T_test *\fR +\fI\&TEST\fR, +\fB\&int *\fR +\fI\&NS\fR, +\fB\&int *\fR +\fI\&N\fR, +\fB\&int *\fR +\fI\&NBS\fR, +\fB\&int *\fR +\fI\&NB\fR, +\fB\&HPL_T_ORDER *\fR +\fI\&PMAPPIN\fR, +\fB\&int *\fR +\fI\&NPQS\fR, +\fB\&int *\fR +\fI\&P\fR, +\fB\&int *\fR +\fI\&Q\fR, +\fB\&int *\fR +\fI\&NPFS\fR, +\fB\&HPL_T_FACT *\fR +\fI\&PF\fR, +\fB\&int *\fR +\fI\&NBMS\fR, +\fB\&int *\fR +\fI\&NBM\fR, +\fB\&int *\fR +\fI\&NDVS\fR, +\fB\&int *\fR +\fI\&NDV\fR, +\fB\&int *\fR +\fI\&NRFS\fR, +\fB\&HPL_T_FACT *\fR +\fI\&RF\fR, +\fB\&int *\fR +\fI\&NTPS\fR, +\fB\&HPL_T_TOP *\fR +\fI\&TP\fR, +\fB\&int *\fR +\fI\&NDHS\fR, +\fB\&int *\fR +\fI\&DH\fR, +\fB\&HPL_T_SWAP *\fR +\fI\&FSWAP\fR, +\fB\&int *\fR +\fI\&TSWAP\fR, +\fB\&int *\fR +\fI\&L1NOTRAN\fR, +\fB\&int *\fR +\fI\&UNOTRAN\fR, +\fB\&int *\fR +\fI\&EQUIL\fR, +\fB\&int *\fR +\fI\&ALIGN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdinfo\fR +reads the startup information for the various tests and +transmits it to all processes. +.SH ARGUMENTS +.TP 8 +TEST (global output) HPL_T_test * +On entry, TEST points to a testing data structure. On exit, +the fields of this data structure are initialized as follows: +TEST->outfp specifies the output file where the results will +be printed. It is only defined and used by the process 0 of +the grid. TEST->thrsh specifies the threshhold value for the +test ratio. TEST->epsil is the relative machine precision of +the distributed computer. Finally the test counters, kfail, +kpass, kskip, ktest are initialized to zero. +.TP 8 +NS (global output) int * +On exit, NS specifies the number of different problem sizes +to be tested. NS is less than or equal to HPL_MAX_PARAM. +.TP 8 +N (global output) int * +On entry, N is an array of dimension HPL_MAX_PARAM. On exit, +the first NS entries of this array contain the problem sizes +to run the code with. +.TP 8 +NBS (global output) int * +On exit, NBS specifies the number of different distribution +blocking factors to be tested. NBS must be less than or equal +to HPL_MAX_PARAM. +.TP 8 +NB (global output) int * +On exit, PMAPPIN specifies the process mapping onto the no- +des of the MPI machine configuration. PMAPPIN defaults to +row-major ordering. +.TP 8 +PMAPPIN (global output) HPL_T_ORDER * +On entry, NB is an array of dimension HPL_MAX_PARAM. On exit, +the first NBS entries of this array contain the values of the +various distribution blocking factors, to run the code with. +.TP 8 +NPQS (global output) int * +On exit, NPQS specifies the number of different values that +can be used for P and Q, i.e., the number of process grids to +run the code with. NPQS must be less than or equal to +HPL_MAX_PARAM. +.TP 8 +P (global output) int * +On entry, P is an array of dimension HPL_MAX_PARAM. On exit, +the first NPQS entries of this array contain the values of P, +the number of process rows of the NPQS grids to run the code +with. +.TP 8 +Q (global output) int * +On entry, Q is an array of dimension HPL_MAX_PARAM. On exit, +the first NPQS entries of this array contain the values of Q, +the number of process columns of the NPQS grids to run the +code with. +.TP 8 +NPFS (global output) int * +On exit, NPFS specifies the number of different values that +can be used for PF : the panel factorization algorithm to run +the code with. NPFS is less than or equal to HPL_MAX_PARAM. +.TP 8 +PF (global output) HPL_T_FACT * +On entry, PF is an array of dimension HPL_MAX_PARAM. On exit, +the first NPFS entries of this array contain the various +panel factorization algorithms to run the code with. +.TP 8 +NBMS (global output) int * +On exit, NBMS specifies the number of various recursive +stopping criteria to be tested. NBMS must be less than or +equal to HPL_MAX_PARAM. +.TP 8 +NBM (global output) int * +On entry, NBM is an array of dimension HPL_MAX_PARAM. On +exit, the first NBMS entries of this array contain the values +of the various recursive stopping criteria to be tested. +.TP 8 +NDVS (global output) int * +On exit, NDVS specifies the number of various numbers of +panels in recursion to be tested. NDVS is less than or equal +to HPL_MAX_PARAM. +.TP 8 +NDV (global output) int * +On entry, NDV is an array of dimension HPL_MAX_PARAM. On +exit, the first NDVS entries of this array contain the values +of the various numbers of panels in recursion to be tested. +.TP 8 +NRFS (global output) int * +On exit, NRFS specifies the number of different values that +can be used for RF : the recursive factorization algorithm to +be tested. NRFS is less than or equal to HPL_MAX_PARAM. +.TP 8 +RF (global output) HPL_T_FACT * +On entry, RF is an array of dimension HPL_MAX_PARAM. On exit, +the first NRFS entries of this array contain the various +recursive factorization algorithms to run the code with. +.TP 8 +NTPS (global output) int * +On exit, NTPS specifies the number of different values that +can be used for the broadcast topologies to be tested. NTPS +is less than or equal to HPL_MAX_PARAM. +.TP 8 +TP (global output) HPL_T_TOP * +On entry, TP is an array of dimension HPL_MAX_PARAM. On exit, +the first NTPS entries of this array contain the various +broadcast (along rows) topologies to run the code with. +.TP 8 +NDHS (global output) int * +On exit, NDHS specifies the number of different values that +can be used for the lookahead depths to be tested. NDHS is +less than or equal to HPL_MAX_PARAM. +.TP 8 +DH (global output) int * +On entry, DH is an array of dimension HPL_MAX_PARAM. On +exit, the first NDHS entries of this array contain the values +of lookahead depths to run the code with. Such a value is at +least 0 (no-lookahead) or greater than zero. +.TP 8 +FSWAP (global output) HPL_T_SWAP * +On exit, FSWAP specifies the swapping algorithm to be used in +all tests. +.TP 8 +TSWAP (global output) int * +On exit, TSWAP specifies the swapping threshold as a number +of columns when the mixed swapping algorithm was chosen. +.TP 8 +L1NOTRA (global output) int * +On exit, L1NOTRAN specifies whether the upper triangle of the +panels of columns should be stored in no-transposed form +(L1NOTRAN=1) or in transposed form (L1NOTRAN=0). +.TP 8 +UNOTRAN (global output) int * +On exit, UNOTRAN specifies whether the panels of rows should +be stored in no-transposed form (UNOTRAN=1) or transposed +form (UNOTRAN=0) during their broadcast. +.TP 8 +EQUIL (global output) int * +On exit, EQUIL specifies whether equilibration during the +swap-broadcast of the panel of rows should be performed +(EQUIL=1) or not (EQUIL=0). +.TP 8 +ALIGN (global output) int * +On exit, ALIGN specifies the alignment of the dynamically +allocated buffers in double precision words. ALIGN is greater +than zero. +.SH SEE ALSO +.BR HPL_pddriver \ (3), +.BR HPL_pdtest \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlamch.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlamch.3 new file mode 100644 index 000000000..7ce46c23e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlamch.3 @@ -0,0 +1,53 @@ +.TH HPL_pdlamch 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlamch \- determines machine-specific arithmetic constants. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_pdlamch(\fR +\fB\&MPI_Comm\fR +\fI\&COMM\fR, +\fB\&const HPL_T_MACH\fR +\fI\&CMACH\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlamch\fR +determines machine-specific arithmetic constants such as +the relative machine precision (eps), the safe minimum(sfmin) such that +1/sfmin does not overflow, the base of the machine (base), the precision +(prec), the number of (base) digits in the mantissa (t), whether +rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum +exponent before (gradual) underflow (emin), the underflow threshold +(rmin)- base**(emin-1), the largest exponent before overflow (emax), the +overflow threshold (rmax) - (base**emax)*(1-eps). +.SH ARGUMENTS +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.TP 8 +CMACH (global input) const HPL_T_MACH +Specifies the value to be returned by HPL_pdlamch + = HPL_MACH_EPS, HPL_pdlamch := eps (default) + = HPL_MACH_SFMIN, HPL_pdlamch := sfmin + = HPL_MACH_BASE, HPL_pdlamch := base + = HPL_MACH_PREC, HPL_pdlamch := eps*base + = HPL_MACH_MLEN, HPL_pdlamch := t + = HPL_MACH_RND, HPL_pdlamch := rnd + = HPL_MACH_EMIN, HPL_pdlamch := emin + = HPL_MACH_RMIN, HPL_pdlamch := rmin + = HPL_MACH_EMAX, HPL_pdlamch := emax + = HPL_MACH_RMAX, HPL_pdlamch := rmax + +where + + eps = relative machine precision, + sfmin = safe minimum, + base = base of the machine, + prec = eps*base, + t = number of digits in the mantissa, + rnd = 1.0 if rounding occurs in addition, + emin = minimum exponent before underflow, + rmin = underflow threshold, + emax = largest exponent before overflow, + rmax = overflow threshold. diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlange.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlange.3 new file mode 100644 index 000000000..30593401b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlange.3 @@ -0,0 +1,68 @@ +.TH HPL_pdlange 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlange \- Compute ||A||. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_pdlange(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&const HPL_T_NORM\fR +\fI\&NORM\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlange\fR +returns the value of the one norm, or the infinity norm, +or the element of largest absolute value of a distributed matrix A: + + + max(abs(A(i,j))) when NORM = HPL_NORM_A, + norm1(A), when NORM = HPL_NORM_1, + normI(A), when NORM = HPL_NORM_I, + +where norm1 denotes the one norm of a matrix (maximum column sum) and +normI denotes the infinity norm of a matrix (maximum row sum). Note +that max(abs(A(i,j))) is not a matrix norm. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +NORM (global input) const HPL_T_NORM +On entry, NORM specifies the value to be returned by this +function as described above. +.TP 8 +M (global input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (global input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix. NB must be larger than one. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,LocQ(N)), +that contains the local pieces of the distributed matrix A. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,LocP(M)). +.SH SEE ALSO +.BR HPL_pdlaprnt \ (3), +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaprnt.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaprnt.3 new file mode 100644 index 000000000..feb010a67 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaprnt.3 @@ -0,0 +1,72 @@ +.TH HPL_pdlaprnt 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaprnt \- Print a distributed matrix A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaprnt(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int\fR +\fI\&IAROW\fR, +\fB\&const int\fR +\fI\&IACOL\fR, +\fB\&const char *\fR +\fI\&CMATNM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaprnt\fR +prints to standard error a distributed matrix A. The +local pieces of A are sent to the process of coordinates (0,0) in +the grid and then printed. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +M (global input) const int +On entry, M specifies the number of rows of the coefficient +matrix A. M must be at least zero. +.TP 8 +N (global input) const int +On entry, N specifies the number of columns of the +coefficient matrix A. N must be at least zero. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix. NB must be larger than one. +.TP 8 +A (local input) double * +On entry, A points to an array of dimension (LDA,LocQ(N)). +This array contains the coefficient matrix to be printed. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,LocP(M)). +.TP 8 +IAROW (global input) const int +On entry, IAROW specifies the row process coordinate owning +the first row of A. IAROW must be larger than or equal to +zero and less than NPROW. +.TP 8 +IACOL (global input) const int +On entry, IACOL specifies the column process coordinate +owning the first column of A. IACOL must be larger than or +equal to zero and less than NPCOL. +.TP 8 +CMATNM (global input) const char * +On entry, CMATNM is the name of the matrix to be printed. +.SH SEE ALSO +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp00N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp00N.3 new file mode 100644 index 000000000..3875400e3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp00N.3 @@ -0,0 +1,65 @@ +.TH HPL_pdlaswp00N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaswp00N \- Broadcast a column panel L and swap the row panel U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaswp00N(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaswp00N\fR +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +Bi-directional exchange is used to perform the swap :: broadcast of +the row panel U at once, resulting in a lower number of messages than +usual as well as a lower communication volume. With P process rows and +assuming bi-directional links, the running time of this function can +be approximated by: + + log_2(P) * (lat + NB*LocQ(N) / bdwth) + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. Mono +directional links will double this communication cost. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be broadcast and swapped) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be swapped and broadcast starting at +the current position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pipid \ (3), +.BR HPL_plindx0 \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp05N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp00T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp00T.3 new file mode 100644 index 000000000..39901ba4b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp00T.3 @@ -0,0 +1,65 @@ +.TH HPL_pdlaswp00T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaswp00T \- Broadcast a column panel L and swap the row panel U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaswp00T(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaswp00T\fR +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +Bi-directional exchange is used to perform the swap :: broadcast of +the row panel U at once, resulting in a lower number of messages than +usual as well as a lower communication volume. With P process rows and +assuming bi-directional links, the running time of this function can +be approximated by: + + log_2(P) * (lat + NB*LocQ(N) / bdwth) + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. Mono +directional links will double this communication cost. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be broadcast and swapped) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be swapped and broadcast starting at +the current position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTT \ (3), +.BR HPL_pipid \ (3), +.BR HPL_plindx0 \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp01N.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp01N.3 new file mode 100644 index 000000000..1ee14c0a8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp01N.3 @@ -0,0 +1,69 @@ +.TH HPL_pdlaswp01N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaswp01N \- Broadcast a column panel L and swap the row panel U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaswp01N(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaswp01N\fR +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +A "Spread then roll" algorithm performs the swap :: broadcast of the +row panel U at once, resulting in a minimal communication volume and +a "very good" use of the connectivity if available. With P process +rows and assuming bi-directional links, the running time of this +function can be approximated by: + + (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. K is +a constant in (2,3] that depends on the achieved bandwidth during a +simultaneous message exchange between two processes. An empirical +optimistic value of K is typically 2.4. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be swapped and broadcast starting at +the current position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pipid \ (3), +.BR HPL_plindx1 \ (3), +.BR HPL_plindx10 \ (3), +.BR HPL_spreadN \ (3), +.BR HPL_equil \ (3), +.BR HPL_rollN \ (3), +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp06N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp01T.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp01T.3 new file mode 100644 index 000000000..e5c5de024 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdlaswp01T.3 @@ -0,0 +1,69 @@ +.TH HPL_pdlaswp01T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaswp01T \- Broadcast a column panel L and swap the row panel U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaswp01T(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaswp01T\fR +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +A "Spread then roll" algorithm performs the swap :: broadcast of the +row panel U at once, resulting in a minimal communication volume and +a "very good" use of the connectivity if available. With P process +rows and assuming bi-directional links, the running time of this +function can be approximated by: + + (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. K is +a constant in (2,3] that depends on the achieved bandwidth during a +simultaneous message exchange between two processes. An empirical +optimistic value of K is typically 2.4. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be swapped and broadcast starting at +the current position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTT \ (3), +.BR HPL_pipid \ (3), +.BR HPL_plindx1 \ (3), +.BR HPL_plindx10 \ (3), +.BR HPL_spreadT \ (3), +.BR HPL_equil \ (3), +.BR HPL_rollT \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdmatgen.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdmatgen.3 new file mode 100644 index 000000000..5b4675c6e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdmatgen.3 @@ -0,0 +1,67 @@ +.TH HPL_pdmatgen 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdmatgen \- Parallel random matrix generator. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdmatgen(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int\fR +\fI\&ISEED\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdmatgen\fR +generates (or regenerates) a parallel random matrix A. + +The pseudo-random generator uses the linear congruential algorithm: +X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer +Programming, Knuth 1973, Vol. 2. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +M (global input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (global input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,LocQ(N)). +On exit, this array contains the coefficients of the randomly +generated matrix. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,LocP(M)). +.TP 8 +ISEED (global input) const int +On entry, ISEED specifies the seed number to generate the +matrix A. ISEED must be at least zero. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_drand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdmxswp.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdmxswp.3 new file mode 100644 index 000000000..41c604373 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdmxswp.3 @@ -0,0 +1,78 @@ +.TH HPL_pdmxswp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdmxswp \- swaps and broacast the pivot row. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdmxswp(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&II\fR, +\fB\&const int\fR +\fI\&JJ\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdmxswp\fR +swaps and broadcasts the absolute value max row using +bi-directional exchange. The buffer is partially set by HPL_dlocmax. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by + + log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) + +where lat and bdwth are the latency and bandwidth of the network for +double precision real elements. Communication only occurs in one +process column. Mono-directional links will cause the communication +cost to double. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of the matrix +column on which this function operates. +.TP 8 +II (local input) const int +On entry, II specifies the row offset where the column to be +operated on starts with respect to the panel. +.TP 8 +JJ (local input) const int +On entry, JJ specifies the column offset where the column to +be operated on starts with respect to the panel. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2 * (4+2*N0). +It is assumed that HPL_dlocmax was called prior to this +routine to initialize the first four entries of this array. +On exit, the N0 length max row is stored in WORK[4:4+N0-1]; +Note that this is also the JJth row (or column) of L1. The +remaining part is used as a temporary array. +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpancrN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpancrN.3 new file mode 100644 index 000000000..2e94a36a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpancrN.3 @@ -0,0 +1,82 @@ +.TH HPL_pdpancrN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpancrN \- Crout panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpancrN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpancrN\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Crout variant of the usual +one-dimensional algorithm. The lower triangular N0-by-N0 upper block +of the panel is stored in no-transpose form (i.e. just like the input +matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpancrT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpancrT.3 new file mode 100644 index 000000000..035e60d60 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpancrT.3 @@ -0,0 +1,81 @@ +.TH HPL_pdpancrT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpancrT \- Crout panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpancrT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpancrT\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Crout variant of the usual +one-dimensional algorithm. The lower triangular N0-by-N0 upper block +of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_disp.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_disp.3 new file mode 100644 index 000000000..94a212ced --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_disp.3 @@ -0,0 +1,24 @@ +.TH HPL_pdpanel_disp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanel_disp \- Deallocate a panel data structure. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_pdpanel_disp(\fR +\fB\&HPL_T_panel * *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanel_disp\fR +deallocates the panel structure and resources and +stores the error code returned by the panel factorization. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * * +On entry, PANEL points to the address of the panel data +structure to be deallocated. +.SH SEE ALSO +.BR HPL_pdpanel_new \ (3), +.BR HPL_pdpanel_init \ (3), +.BR HPL_pdpanel_free \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_free.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_free.3 new file mode 100644 index 000000000..cfad40c3d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_free.3 @@ -0,0 +1,24 @@ +.TH HPL_pdpanel_free 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanel_free \- Deallocate the panel ressources. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_pdpanel_free(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanel_free\fR +deallocates the panel resources and stores the error +code returned by the panel factorization. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the panel data structure from +which the resources should be deallocated. +.SH SEE ALSO +.BR HPL_pdpanel_new \ (3), +.BR HPL_pdpanel_init \ (3), +.BR HPL_pdpanel_disp \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_init.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_init.3 new file mode 100644 index 000000000..cbb0e7e3a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_init.3 @@ -0,0 +1,76 @@ +.TH HPL_pdpanel_init 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanel_init \- Initialize the panel resources. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanel_init(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&JB\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&IA\fR, +\fB\&const int\fR +\fI\&JA\fR, +\fB\&const int\fR +\fI\&TAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanel_init\fR +initializes a panel data structure. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +M (local input) const int +On entry, M specifies the global number of rows of the panel. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the global number of columns of the +panel and trailing submatrix. N must be at least zero. +.TP 8 +JB (global input) const int +On entry, JB specifies is the number of columns of the panel. +JB must be at least zero. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.TP 8 +IA (global input) const int +On entry, IA is the global row index identifying the panel +and trailing submatrix. IA must be at least zero. +.TP 8 +JA (global input) const int +On entry, JA is the global column index identifying the panel +and trailing submatrix. JA must be at least zero. +.TP 8 +TAG (global input) const int +On entry, TAG is the row broadcast message id. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.SH SEE ALSO +.BR HPL_pdpanel_new \ (3), +.BR HPL_pdpanel_disp \ (3), +.BR HPL_pdpanel_free \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_new.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_new.3 new file mode 100644 index 000000000..ed9fe1053 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanel_new.3 @@ -0,0 +1,76 @@ +.TH HPL_pdpanel_new 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanel_new \- Create a panel data structure. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanel_new(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&JB\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&IA\fR, +\fB\&const int\fR +\fI\&JA\fR, +\fB\&const int\fR +\fI\&TAG\fR, +\fB\&HPL_T_panel * *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanel_new\fR +creates and initializes a panel data structure. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +M (local input) const int +On entry, M specifies the global number of rows of the panel. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the global number of columns of the +panel and trailing submatrix. N must be at least zero. +.TP 8 +JB (global input) const int +On entry, JB specifies is the number of columns of the panel. +JB must be at least zero. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.TP 8 +IA (global input) const int +On entry, IA is the global row index identifying the panel +and trailing submatrix. IA must be at least zero. +.TP 8 +JA (global input) const int +On entry, JA is the global column index identifying the panel +and trailing submatrix. JA must be at least zero. +.TP 8 +TAG (global input) const int +On entry, TAG is the row broadcast message id. +.TP 8 +PANEL (local input/output) HPL_T_panel * * +On entry, PANEL points to the address of the panel data +structure to create and initialize. +.SH SEE ALSO +.BR HPL_pdpanel_new \ (3), +.BR HPL_pdpanel_init \ (3), +.BR HPL_pdpanel_disp \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanllN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanllN.3 new file mode 100644 index 000000000..eca1f4a34 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanllN.3 @@ -0,0 +1,82 @@ +.TH HPL_pdpanllN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanllN \- Left-looking panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanllN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanllN\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Left-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in no-transpose form (i.e. just like the +input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanllT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanllT.3 new file mode 100644 index 000000000..a18d52c61 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanllT.3 @@ -0,0 +1,81 @@ +.TH HPL_pdpanllT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanllT \- Left-looking panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanllT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanllT\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Left-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanrlN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanrlN.3 new file mode 100644 index 000000000..cae2b5b5b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanrlN.3 @@ -0,0 +1,82 @@ +.TH HPL_pdpanrlN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanrlN \- Right-looking panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanrlN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanrlN\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Right-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in no-transpose form (i.e. just like the +input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanrlT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanrlT.3 new file mode 100644 index 000000000..434444bf7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdpanrlT.3 @@ -0,0 +1,81 @@ +.TH HPL_pdpanrlT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanrlT \- Right-looking panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanrlT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanrlT\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Right-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpancrN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpancrN.3 new file mode 100644 index 000000000..fc6dd25f8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpancrN.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpancrN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpancrN \- Crout recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpancrN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpancrN\fR +HPL_pdrpancrN recursively factorizes a panel of columns using the +recursive Crout variant of the usual one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpancrT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpancrT.3 new file mode 100644 index 000000000..ea0a57bc9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpancrT.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpancrT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpancrT \- Crout recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpancrT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpancrT\fR +recursively factorizes a panel of columns using the +recursive Crout variant of the usual one-dimensional algorithm. +The lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanllN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanllN.3 new file mode 100644 index 000000000..29b6db40a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanllN.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpanllN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpanllN \- Left-looking recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpanllN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpanllN\fR +recursively factorizes a panel of columns using the +recursive Left-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanllT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanllT.3 new file mode 100644 index 000000000..18db5c1fb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanllT.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpanllT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpanllT \- Left-looking recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpanllT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpanllT\fR +recursively factorizes a panel of columns using the +recursive Left-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanrlN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanrlN.3 new file mode 100644 index 000000000..441560c14 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanrlN.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpanrlN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpanrlN \- Right-looking recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpanrlN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpanrlN\fR +recursively factorizes a panel of columns using the +recursive Right-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanrlT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanrlT.3 new file mode 100644 index 000000000..e5bd9d110 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdrpanrlT.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpanrlT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpanrlT \- Right-looking recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpanrlT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpanrlT\fR +recursively factorizes a panel of columns using the +recursive Right-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdtest.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdtest.3 new file mode 100644 index 000000000..eaaff2bff --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdtest.3 @@ -0,0 +1,63 @@ +.TH HPL_pdtest 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdtest \- Perform one test. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdtest(\fR +\fB\&HPL_T_test *\fR +\fI\&TEST\fR, +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&NB\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdtest\fR +performs one test given a set of parameters such as the +process grid, the problem size, the distribution blocking factor ... +This function generates the data, calls and times the linear system +solver, checks the accuracy of the obtained vector solution and +writes this information to the file pointed to by TEST->outfp. +.SH ARGUMENTS +.TP 8 +TEST (global input) HPL_T_test * +On entry, TEST points to a testing data structure: outfp +specifies the output file where the results will be printed. +It is only defined and used by the process 0 of the grid. +thrsh specifies the threshhold value for the test ratio. +Concretely, a test is declared "PASSED" if and only if the +following inequality is satisfied: +||Ax-b||_oo / ( epsil * + ( || x ||_oo * || A ||_oo + || b ||_oo ) * + N ) < thrsh. +epsil is the relative machine precision of the distributed +computer. Finally the test counters, kfail, kpass, kskip and +ktest are updated as follows: if the test passes, kpass is +incremented by one; if the test fails, kfail is incremented +by one; if the test is skipped, kskip is incremented by one. +ktest is left unchanged. +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters to be used for this test. +.TP 8 +N (global input) const int +On entry, N specifies the order of the coefficient matrix A. +N must be at least zero. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.SH SEE ALSO +.BR HPL_pddriver \ (3), +.BR HPL_pdinfo \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdtrsv.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdtrsv.3 new file mode 100644 index 000000000..5d2d14dcd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdtrsv.3 @@ -0,0 +1,49 @@ +.TH HPL_pdtrsv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdtrsv \- Solve triu( A ) x = b. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdtrsv(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_pmat *\fR +\fI\&AMAT\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdtrsv\fR +solves an upper triangular system of linear equations. + +The rhs is the last column of the N by N+1 matrix A. The solve starts +in the process column owning the Nth column of A, so the rhs b may +need to be moved one process column to the left at the beginning. The +routine therefore needs a column vector in every process column but +the one owning b. The result is replicated in all process rows, and +returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. + +The algorithm uses decreasing one-ring broadcast in process rows and +columns implemented in terms of synchronous communication point to +point primitives. The lookahead of depth 1 is used to minimize the +critical path. This entire operation is essentially ``latency'' bound +and an estimate of its running time is given by: + + (move rhs) lat + N / ( P bdwth ) + + (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + + gam2 N^2 / ( P Q ), + +where gam2 is an estimate of the Level 2 BLAS rate of execution. +There are N / NB diagonal blocks. One must exchange 2 messages of +length NB to compute the next NB entries of the vector solution, as +well as performing a total of N^2 floating point operations. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +AMAT (local input/output) HPL_T_pmat * +On entry, AMAT points to the data structure containing the +local array information. +.SH SEE ALSO +.BR HPL_pdgesv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateNN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateNN.3 new file mode 100644 index 000000000..e20929a27 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateNN.3 @@ -0,0 +1,48 @@ +.TH HPL_pdupdateNN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdupdateNN \- Broadcast a panel and update the trailing submatrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdupdateNN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdupdateNN\fR +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local output) int * +On exit, IFLAG indicates whether or not the broadcast has +been completed when PBCST is not NULL on entry. In that case, +IFLAG is left unchanged. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be updated) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be updated starting at the current +position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp01N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateNT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateNT.3 new file mode 100644 index 000000000..276c2ceda --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateNT.3 @@ -0,0 +1,48 @@ +.TH HPL_pdupdateNT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdupdateNT \- Broadcast a panel and update the trailing submatrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdupdateNT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdupdateNT\fR +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local output) int * +On exit, IFLAG indicates whether or not the broadcast has +been completed when PBCST is not NULL on entry. In that case, +IFLAG is left unchanged. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be updated) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be updated starting at the current +position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateTN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateTN.3 new file mode 100644 index 000000000..091859d01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateTN.3 @@ -0,0 +1,48 @@ +.TH HPL_pdupdateTN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdupdateTN \- Broadcast a panel and update the trailing submatrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdupdateTN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdupdateTN\fR +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local output) int * +On exit, IFLAG indicates whether or not the broadcast has +been completed when PBCST is not NULL on entry. In that case, +IFLAG is left unchanged. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be updated) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be updated starting at the current +position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp01N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateTT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateTT.3 new file mode 100644 index 000000000..34502c6ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pdupdateTT.3 @@ -0,0 +1,48 @@ +.TH HPL_pdupdateTT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdupdateTT \- Broadcast a panel and update the trailing submatrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdupdateTT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdupdateTT\fR +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local output) int * +On exit, IFLAG indicates whether or not the broadcast has +been completed when PBCST is not NULL on entry. In that case, +IFLAG is left unchanged. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be updated) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be updated starting at the current +position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_perm.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_perm.3 new file mode 100644 index 000000000..9476b5eff --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_perm.3 @@ -0,0 +1,50 @@ +.TH HPL_perm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_perm \- Combine 2 index arrays - Generate the permutation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_perm(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&int *\fR +\fI\&LINDXA\fR, +\fB\&int *\fR +\fI\&LINDXAU\fR, +\fB\&int *\fR +\fI\&IWORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_perm\fR +combines two index arrays and generate the corresponding +permutation. First, this function computes the inverse of LINDXA, and +then combine it with LINDXAU. Second, in order to be able to perform +the permutation in place, LINDXAU is overwritten by the sequence of +permutation producing the same result. What we ultimately want to +achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the +call to this function, this in place permutation can be performed by +for i in [0..N) swap U[i] with U[LINDXAU[i]]. +.SH ARGUMENTS +.TP 8 +N (global input) const int +On entry, N specifies the length of the arrays LINDXA and +LINDXAU. N should be at least zero. +.TP 8 +LINDXA (global input/output) int * +On entry, LINDXA is an array of dimension N containing the +source indexes. On exit, LINDXA contains the combined index +array. +.TP 8 +LINDXAU (global input/output) int * +On entry, LINDXAU is an array of dimension N containing the +target indexes. On exit, LINDXAU contains the sequence of +permutation, that should be applied in increasing order to +permute the underlying array U in place. +.TP 8 +IWORK (workspace) int * +On entry, IWORK is a workarray of dimension N. +.SH SEE ALSO +.BR HPL_plindx1 \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pipid.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pipid.3 new file mode 100644 index 000000000..6a8f5f277 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pipid.3 @@ -0,0 +1,79 @@ +.TH HPL_pipid 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pipid \- Simplify the pivot vector. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pipid(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&int *\fR +\fI\&K\fR, +\fB\&int *\fR +\fI\&IPID\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pipid\fR +computes an array IPID that contains the source and final +destination of matrix rows resulting from the application of N +interchanges as computed by the LU factorization with row partial +pivoting. The array IPID is such that the row of global index IPID(i) +should be mapped onto the row of global index IPID(i+1). Note that we +cannot really know the length of IPID a priori. However, we know that +this array is at least 2*N long, since there are N rows to swap and +broadcast. The length of this array must be smaller than or equal to +4*N, since every row is swapped with at most a single distinct remote +row. The algorithm constructing IPID goes as follows: Let IA be the +global index of the first row to be swapped. + +For every row src IA + i with i in [0..N) to be swapped with row dst +such that dst is given by DPIV[i]: + +Is row src the destination of a previous row of the current block, +that is, is there k odd such that IPID(k) is equal to src ? + Yes: update this destination with dst. For example, if the +pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, +we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it +was thought so far ... + No : add the pair (src,dst) at the end of IPID; row src has not +been moved yet. + +Is row dst different from src the destination of a previous row of +the current block, i.e., is there k odd such that IPID(k) is equal to +dst ? + Yes: update IPID(k) with src. For example, if the pivot array +is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in +fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought +so far ... + No : add the pair (dst,src) at the end of IPID; row dst has not +been moved yet. + +Note that when src is equal to dst, the pair (dst,src) should not be +added to IPID in order to avoid duplicated entries in this array. +During the construction of the array IPID, we make sure that the +first N entries are such that IPID(k) with k odd is equal to IA+k/2. +For k in [0..K/2), the row of global index IPID(2*k) should be +mapped onto the row of global index IPID(2*k+1). +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +K (global output) int * +On exit, K specifies the number of entries in IPID. K is at +least 2*N, and at most 4*N. +.TP 8 +IPID (global output) int * +On entry, IPID is an array of length 4*N. On exit, the first +K entries of that array contain the src and final destination +resulting from the application of the N interchanges as +specified by DPIV. The pairs (src,dst) are contiguously +stored and sorted so that IPID(2*i+1) is equal to IA+i with i +in [0..N) +.SH SEE ALSO +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx0.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx0.3 new file mode 100644 index 000000000..2b889947a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx0.3 @@ -0,0 +1,168 @@ +.TH HPL_plindx0 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_plindx0 \- Compute local swapping index arrays. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_plindx0(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&K\fR, +\fB\&int *\fR +\fI\&IPID\fR, +\fB\&int *\fR +\fI\&LINDXA\fR, +\fB\&int *\fR +\fI\&LINDXAU\fR, +\fB\&int *\fR +\fI\&LLEN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_plindx0\fR +computes two local arrays LINDXA and LINDXAU containing +the local source and final destination position resulting from the +application of row interchanges. + +On entry, the array IPID of length K is such that the row of global +index IPID(i) should be mapped onto row of global index IPID(i+1). +Let IA be the global index of the first row to be swapped. For k in +[0..K/2), the row of global index IPID(2*k) should be mapped onto the +row of global index IPID(2*k+1). The question then, is to determine +which rows should ultimately be part of U. + +First, some rows of the process ICURROW may be swapped locally. One +of this row belongs to U, the other one belongs to my local piece of +A. The other rows of the current block are swapped with remote rows +and are thus not part of U. These rows however should be sent along, +and grabbed by the other processes as we progress in the exchange +phase. + +So, assume that I am ICURROW and consider a row of index IPID(2*i) +that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less +than N, this row is locally swapped and should be copied into U at +the position IPID(2*i+1) - IA. No row will be exchanged for this one. +If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be +locally copied into my local piece of A at the position corresponding +to the row of global index IPID(2*i+1). + +If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) +is to be swapped away and strictly speaking does not belong to U, but +to A remotely. Since this process will however send this array U, +this row is copied into U, exactly where the row IPID(2*i+1) should +go. For this, we search IPID for k1, such that IPID(2*k1) is equal to +IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position +IPID(2*k1+1)-IA. + +It is thus important to put the rows that go into U, i.e., such that +IPID(2*i+1) - IA is less than N at the begining of the array IPID. By +doing so, U is formed, and the local copy is performed in just one +sweep. + +Two lists LINDXA and LINDXAU are built. LINDXA contains the local +index of the rows I have that should be copied. LINDXAU contains the +local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A +is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) +of A should be locally copied into A(-LINDXAU(k),:). In the process +ICURROW, the initial packing algorithm proceeds as follows. + + for all entries in IPID, + if IPID(2*i) is in ICURROW, + if IPID(2*i+1) is in ICURROW, + if( IPID(2*i+1) - IA < N ) + save corresponding local position + of this row (LINDXA); + save local position (LINDXAU) in U + where this row goes; + [copy row IPID(2*i) in U at position + IPID(2*i+1)-IA; ]; + else + save corresponding local position of + this row (LINDXA); + save local position (-LINDXAU) in A + where this row goes; + [copy row IPID(2*i) in my piece of A + at IPID(2*i+1);] + end if + else + find k1 such that IPID(2*k1) = IPID(2*i+1); + copy row IPID(2*i) in U at position + IPID(2*k1+1)-IA; + save corresponding local position of this + row (LINDXA); + save local position (LINDXAU) in U where + this row goes; + end if + end if + end for + +Second, if I am not the current row process ICURROW, all source rows +in IPID that I own are part of U. Indeed, they are swapped with one +row of the current block of rows, and the main factorization +algorithm proceeds one row after each other. The processes different +from ICURROW, should exchange and accumulate those rows until they +receive some data previously owned by the process ICURROW. + +In processes different from ICURROW, the initial packing algorithm +proceeds as follows. Consider a row of global index IPID(2*i) that I +own. When I will be receiving data previously owned by ICURROW, i.e., +U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, +and this particular row of U should be first copied into my piece of +A, at A(il,:), where il is the local row index corresponding to +IPID(2*i). Now,initially, this row will be packed into workspace, say +as the kth row of that work array. The following algorithm sets +LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row +should be copied. LINDXA(k) stores the local index in A where this +row of U should be copied, i.e il. + + for all entries in IPID, + if IPID(2*i) is not in ICURROW, + copy row IPID(2*i) in work array; + save corresponding local position + of this row (LINDXA); + save position (LINDXAU) in U where + this row should be copied; + end if + end for + +Since we are at it, we also globally figure out how many rows every +process has. That is necessary, because it would rather be cumbersome +to figure it on the fly during the bi-directional exchange phase. +This information is kept in the array LLEN of size NPROW. Also note +that the arrays LINDXA and LINDXAU are of max length equal to 2*N. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +K (global input) const int +On entry, K specifies the number of entries in IPID. K is at +least 2*N, and at most 4*N. +.TP 8 +IPID (global input) int * +On entry, IPID is an array of length K. The first K entries +of that array contain the src and final destination resulting +from the application of the interchanges. +.TP 8 +LINDXA (local output) int * +On entry, LINDXA is an array of dimension 2*N. On exit, this +array contains the local indexes of the rows of A I have that +should be copied into U. +.TP 8 +LINDXAU (local output) int * +On exit, LINDXAU is an array of dimension 2*N. On exit, this +array contains the local destination information encoded as +follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be +copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) +of A should be locally copied into A(-LINDXAU(k),:). +.TP 8 +LLEN (global output) int * +On entry, LLEN is an array of length NPROW. On exit, it +contains how many rows every process has. +.SH SEE ALSO +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx1.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx1.3 new file mode 100644 index 000000000..7d4f8feba --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx1.3 @@ -0,0 +1,106 @@ +.TH HPL_plindx1 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_plindx1 \- Compute local swapping index arrays. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_plindx1(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&K\fR, +\fB\&const int *\fR +\fI\&IPID\fR, +\fB\&int *\fR +\fI\&IPA\fR, +\fB\&int *\fR +\fI\&LINDXA\fR, +\fB\&int *\fR +\fI\&LINDXAU\fR, +\fB\&int *\fR +\fI\&IPLEN\fR, +\fB\&int *\fR +\fI\&IPMAP\fR, +\fB\&int *\fR +\fI\&IPMAPM1\fR, +\fB\&int *\fR +\fI\&PERMU\fR, +\fB\&int *\fR +\fI\&IWORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_plindx1\fR +computes two local arrays LINDXA and LINDXAU containing +the local source and final destination position resulting from the +application of row interchanges. In addition, this function computes +three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic +mapping information for the spreading phase. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +K (global input) const int +On entry, K specifies the number of entries in IPID. K is at +least 2*N, and at most 4*N. +.TP 8 +IPID (global input) const int * +On entry, IPID is an array of length K. The first K entries +of that array contain the src and final destination resulting +from the application of the interchanges. +.TP 8 +IPA (global output) int * +On exit, IPA specifies the number of rows that the current +process row has that either belong to U or should be swapped +with remote rows of A. +.TP 8 +LINDXA (global output) int * +On entry, LINDXA is an array of dimension 2*N. On exit, this +array contains the local indexes of the rows of A I have that +should be copied into U. +.TP 8 +LINDXAU (global output) int * +On exit, LINDXAU is an array of dimension 2*N. On exit, this +array contains the local destination information encoded as +follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be +copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) +of A should be locally copied into A(-LINDXAU(k),:). +.TP 8 +IPLEN (global output) int * +On entry, IPLEN is an array of dimension NPROW + 1. On exit, +this array is such that IPLEN[i] is the number of rows of A +in the processes before process IPMAP[i] after the sort +with the convention that IPLEN[nprow] is the total number of +rows of the panel. In other words IPLEN[i+1]-IPLEN[i] is the +local number of rows of A that should be moved to the process +IPMAP[i]. IPLEN is such that the number of rows of the source +process row can be computed as IPLEN[1] - IPLEN[0], and the +remaining entries of this array are sorted so that the +quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted. +.TP 8 +IPMAP (global output) int * +On entry, IPMAP is an array of dimension NPROW. On exit, this +array contains the logarithmic mapping of the processes. In +other words, IPMAP[myrow] is the corresponding sorted process +coordinate. +.TP 8 +IPMAPM1 (global output) int * +On entry, IPMAPM1 is an array of dimension NPROW. On exit, +this array contains the inverse of the logarithmic mapping +contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in +[0.. NPROCS) +.TP 8 +PERMU (global output) int * +On entry, PERMU is an array of dimension JB. On exit, PERMU +contains a sequence of permutations, that should be applied +in increasing order to permute in place the row panel U. +.TP 8 +IWORK (workspace) int * +On entry, IWORK is a workarray of dimension 2*JB. +.SH SEE ALSO +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx10.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx10.3 new file mode 100644 index 000000000..d22d64f36 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_plindx10.3 @@ -0,0 +1,68 @@ +.TH HPL_plindx10 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_plindx10 \- Compute the logarithmic maps for the spreading. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_plindx10(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&K\fR, +\fB\&const int *\fR +\fI\&IPID\fR, +\fB\&int *\fR +\fI\&IPLEN\fR, +\fB\&int *\fR +\fI\&IPMAP\fR, +\fB\&int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_plindx10\fR +computes three arrays IPLEN, IPMAP and IPMAPM1 that +contain the logarithmic mapping information for the spreading phase. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +K (global input) const int +On entry, K specifies the number of entries in IPID. K is at +least 2*N, and at most 4*N. +.TP 8 +IPID (global input) const int * +On entry, IPID is an array of length K. The first K entries +of that array contain the src and final destination resulting +from the application of the interchanges. +.TP 8 +IPLEN (global output) int * +On entry, IPLEN is an array of dimension NPROW + 1. On exit, +this array is such that IPLEN[i] is the number of rows of A +in the processes before process IMAP[i] after the sort, with +the convention that IPLEN[nprow] is the total number of rows. +In other words, IPLEN[i+1] - IPLEN[i] is the local number of +rows of A that should be moved for each process. IPLEN is +such that the number of rows of the source process row can be +computed as IPLEN[1] - IPLEN[0], and the remaining entries of +this array are sorted so that the quantities IPLEN[i+1] - +IPLEN[i] are logarithmically sorted. +.TP 8 +IPMAP (global output) int * +On entry, IPMAP is an array of dimension NPROW. On exit, this +array contains the logarithmic mapping of the processes. In +other words, IPMAP[myrow] is the corresponding sorted process +coordinate. +.TP 8 +IPMAPM1 (global output) int * +On entry, IPMAPM1 is an array of dimension NPROW. On exit, +this array contains the inverse of the logarithmic mapping +contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in +[0.. NPROW) +.SH SEE ALSO +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pnum.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pnum.3 new file mode 100644 index 000000000..38956c5a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pnum.3 @@ -0,0 +1,38 @@ +.TH HPL_pnum 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pnum \- Rank determination. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_pnum(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&const int\fR +\fI\&MYROW\fR, +\fB\&const int\fR +\fI\&MYCOL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pnum\fR +determines the rank of a process as a function of its +coordinates in the grid. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +MYROW (local input) const int +On entry, MYROW specifies the row coordinate of the process +whose rank is to be determined. MYROW must be greater than or +equal to zero and less than NPROW. +.TP 8 +MYCOL (local input) const int +On entry, MYCOL specifies the column coordinate of the +process whose rank is to be determined. MYCOL must be greater +than or equal to zero and less than NPCOL. +.SH SEE ALSO +.BR HPL_grid_init \ (3), +.BR HPL_grid_info \ (3), +.BR HPL_grid_exit \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer.3 new file mode 100644 index 000000000..550703aee --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer.3 @@ -0,0 +1,35 @@ +.TH HPL_ptimer 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_ptimer \- Timer facility. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_ptimer(\fR +\fB\&const int\fR +\fI\&I\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_ptimer\fR +provides a "stopwatch" functionality cpu/wall timer in +seconds. Up to 64 separate timers can be functioning at once. The +first call starts the timer, and the second stops it. This routine +can be disenabled by calling HPL_ptimer_disable(), so that calls to +the timer are ignored. This feature can be used to make sure certain +sections of code do not affect timings, even if they call routines +which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable +the timer functionality. One can retrieve the current value of a +timer by calling + +t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + +where I is the timer index in [0..64). To inititialize the timer +functionality, one must have called HPL_ptimer_boot() prior to any of +the functions mentioned above. +.SH ARGUMENTS +.TP 8 +I (global input) const int +On entry, I specifies the timer to stop/start. +.SH SEE ALSO +.BR HPL_ptimer_cputime \ (3), +.BR HPL_ptimer_walltime \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer_cputime.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer_cputime.3 new file mode 100644 index 000000000..a93a1c208 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer_cputime.3 @@ -0,0 +1,23 @@ +.TH HPL_ptimer_cputime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_ptimer_cputime \- Return the CPU time. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_ptimer_cputime();\fR +.SH DESCRIPTION +\fB\&HPL_ptimer_cputime\fR +returns the cpu time. If HPL_USE_CLOCK is defined, +the clock() function is used to return an approximation of processor +time used by the program. The value returned is the CPU time used so +far as a clock_t; to get the number of seconds used, the result is +divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C +standard library. If HPL_USE_TIMES is defined, the times() function +is used instead. This function returns the current process times. +times() returns the number of clock ticks that have elapsed since the +system has been up. Otherwise and by default, the standard library +function getrusage() is used. +.SH SEE ALSO +.BR HPL_ptimer_walltime \ (3), +.BR HPL_ptimer \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer_walltime.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer_walltime.3 new file mode 100644 index 000000000..37e5e8c54 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_ptimer_walltime.3 @@ -0,0 +1,14 @@ +.TH HPL_ptimer_walltime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_ptimer_walltime \- Return the elapsed (wall-clock) time. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_ptimer_walltime();\fR +.SH DESCRIPTION +\fB\&HPL_ptimer_walltime\fR +returns the elapsed (wall-clock) time. +.SH SEE ALSO +.BR HPL_ptimer_cputime \ (3), +.BR HPL_ptimer \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pwarn.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pwarn.3 new file mode 100644 index 000000000..14e4a65d3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_pwarn.3 @@ -0,0 +1,45 @@ +.TH HPL_pwarn 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pwarn \- displays an error message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pwarn(\fR +\fB\&FILE *\fR +\fI\&STREAM\fR, +\fB\&int\fR +\fI\&LINE\fR, +\fB\&const char *\fR +\fI\&SRNAME\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pwarn\fR +displays an error message. +.SH ARGUMENTS +.TP 8 +STREAM (local input) FILE * +On entry, STREAM specifies the output stream. +.TP 8 +LINE (local input) int +On entry, LINE specifies the line number in the file where +the error has occured. When LINE is not a positive line +number, it is ignored. +.TP 8 +SRNAME (local input) const char * +On entry, SRNAME should be the name of the routine calling +this error handler. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH SEE ALSO +.BR HPL_pabort \ (3), +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rand.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rand.3 new file mode 100644 index 000000000..8b1918fea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rand.3 @@ -0,0 +1,28 @@ +.TH HPL_rand 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_rand \- random number generator. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_rand();\fR +.SH DESCRIPTION +\fB\&HPL_rand\fR +generates the next number in the random sequence. This +function ensures that this number lies in the interval (-0.5, 0.5]. + +The static array irand contains the information (2 integers) required +to generate the next number in the sequence X(n). This number is +computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the +constant d is the largest 64 bit positive integer. The array irand is +then updated for the generation of the next number X(n+1) in the +random sequence as follows X(n+1) = a * X(n) + c. The constants a and +c should have been preliminarily stored in the arrays ias and ics as +2 pairs of integers. The initialization of ias, ics and irand is +performed by the function HPL_setran. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_recv.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_recv.3 new file mode 100644 index 000000000..d9136c14b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_recv.3 @@ -0,0 +1,49 @@ +.TH HPL_recv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_recv \- Receive a message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_recv(\fR +\fB\&double *\fR +\fI\&RBUF\fR, +\fB\&int\fR +\fI\&RCOUNT\fR, +\fB\&int\fR +\fI\&SRC\fR, +\fB\&int\fR +\fI\&RTAG\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_recv\fR +is a simple wrapper around MPI_Recv. Its main purpose is +to allow for some experimentation / tuning of this simple routine. +Successful completion is indicated by the returned error code +HPL_SUCCESS. In the case of messages of length less than or equal to +zero, this function returns immediately. +.SH ARGUMENTS +.TP 8 +RBUF (local output) double * +On entry, RBUF specifies the starting address of buffer to be +received. +.TP 8 +RCOUNT (local input) int +On entry, RCOUNT specifies the number of double precision +entries in RBUF. RCOUNT must be at least zero. +.TP 8 +SRC (local input) int +On entry, SRC specifies the rank of the sending process in +the communication space defined by COMM. +.TP 8 +RTAG (local input) int +On entry, STAG specifies the message tag to be used for this +communication operation. +.TP 8 +COMM (local input) MPI_Comm +The MPI communicator identifying the communication space. +.SH SEE ALSO +.BR HPL_send \ (3), +.BR HPL_sendrecv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_reduce.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_reduce.3 new file mode 100644 index 000000000..c48f04ded --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_reduce.3 @@ -0,0 +1,56 @@ +.TH HPL_reduce 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_reduce \- Reduce operation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_reduce(\fR +\fB\&void *\fR +\fI\&BUFFER\fR, +\fB\&const int\fR +\fI\&COUNT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR, +\fB\&const HPL_T_OP \fR +\fI\&OP\fR, +\fB\&const int\fR +\fI\&ROOT\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_reduce\fR +performs a global reduce operation across all processes of +a group. Note that the input buffer is used as workarray and in all +processes but the accumulating process corrupting the original data. +.SH ARGUMENTS +.TP 8 +BUFFER (local input/output) void * +On entry, BUFFER points to the buffer to be reduced. On +exit, and in process of rank ROOT this array contains the +reduced data. This buffer is also used as workspace during +the operation in the other processes of the group. +.TP 8 +COUNT (global input) const int +On entry, COUNT indicates the number of entries in BUFFER. +COUNT must be at least zero. +.TP 8 +DTYPE (global input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.TP 8 +OP (global input) const HPL_T_OP +On entry, OP is a pointer to the local combine function. +.TP 8 +ROOT (global input) const int +On entry, ROOT is the coordinate of the accumulating process. +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rollN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rollN.3 new file mode 100644 index 000000000..eac4deb66 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rollN.3 @@ -0,0 +1,77 @@ +.TH HPL_rollN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_rollN \- Roll U and forward the column panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_rollN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_rollN\fR +rolls the local arrays containing the local pieces of U, so +that on exit to this function U is replicated in every process row. +In addition, this function probe for the presence of the column panel +and forwards it when available. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be rolled) information. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of U. N must be +at least zero. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U in each process row. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,IPLEN[NPROW]). +.TP 8 +IPLEN (global input) const int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in each process row. +.TP 8 +IPMAP (global input) const int * +On entry, IMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. +.SH SEE ALSO +.BR HPL_pdlaswp01N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rollT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rollT.3 new file mode 100644 index 000000000..bab5bdffd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_rollT.3 @@ -0,0 +1,77 @@ +.TH HPL_rollT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_rollT \- Roll U and forward the column panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_rollT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_rollT\fR +rolls the local arrays containing the local pieces of U, so +that on exit to this function U is replicated in every process row. +In addition, this function probe for the presence of the column panel +and forwards it when available. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be rolled) information. +.TP 8 +N (local input) const int +On entry, N specifies the local number of rows of U. N must +be at least zero. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U in each process row. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,N). +.TP 8 +IPLEN (global input) const int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in each process row. +.TP 8 +IPMAP (global input) const int * +On entry, IMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. +.SH SEE ALSO +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_sdrv.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_sdrv.3 new file mode 100644 index 000000000..a11252d6a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_sdrv.3 @@ -0,0 +1,67 @@ +.TH HPL_sdrv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_sdrv \- Send and receive a message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_sdrv(\fR +\fB\&double *\fR +\fI\&SBUF\fR, +\fB\&int\fR +\fI\&SCOUNT\fR, +\fB\&int\fR +\fI\&STAG\fR, +\fB\&double *\fR +\fI\&RBUF\fR, +\fB\&int\fR +\fI\&RCOUNT\fR, +\fB\&int\fR +\fI\&RTAG\fR, +\fB\&int\fR +\fI\&PARTNER\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_sdrv\fR +is a simple wrapper around MPI_Sendrecv. Its main purpose is +to allow for some experimentation and tuning of this simple function. +Messages of length less than or equal to zero are not sent nor +received. Successful completion is indicated by the returned error +code HPL_SUCCESS. +.SH ARGUMENTS +.TP 8 +SBUF (local input) double * +On entry, SBUF specifies the starting address of buffer to be +sent. +.TP 8 +SCOUNT (local input) int +On entry, SCOUNT specifies the number of double precision +entries in SBUF. SCOUNT must be at least zero. +.TP 8 +STAG (local input) int +On entry, STAG specifies the message tag to be used for the +sending communication operation. +.TP 8 +RBUF (local output) double * +On entry, RBUF specifies the starting address of buffer to be +received. +.TP 8 +RCOUNT (local input) int +On entry, RCOUNT specifies the number of double precision +entries in RBUF. RCOUNT must be at least zero. +.TP 8 +RTAG (local input) int +On entry, RTAG specifies the message tag to be used for the +receiving communication operation. +.TP 8 +PARTNER (local input) int +On entry, PARTNER specifies the rank of the collaborative +process in the communication space defined by COMM. +.TP 8 +COMM (local input) MPI_Comm +The MPI communicator identifying the communication space. +.SH SEE ALSO +.BR HPL_send \ (3), +.BR HPL_recv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_send.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_send.3 new file mode 100644 index 000000000..48ffc5d62 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_send.3 @@ -0,0 +1,49 @@ +.TH HPL_send 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_send \- Send a message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_send(\fR +\fB\&double *\fR +\fI\&SBUF\fR, +\fB\&int\fR +\fI\&SCOUNT\fR, +\fB\&int\fR +\fI\&DEST\fR, +\fB\&int\fR +\fI\&STAG\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_send\fR +is a simple wrapper around MPI_Send. Its main purpose is +to allow for some experimentation / tuning of this simple routine. +Successful completion is indicated by the returned error code +MPI_SUCCESS. In the case of messages of length less than or equal to +zero, this function returns immediately. +.SH ARGUMENTS +.TP 8 +SBUF (local input) double * +On entry, SBUF specifies the starting address of buffer to be +sent. +.TP 8 +SCOUNT (local input) int +On entry, SCOUNT specifies the number of double precision +entries in SBUF. SCOUNT must be at least zero. +.TP 8 +DEST (local input) int +On entry, DEST specifies the rank of the receiving process in +the communication space defined by COMM. +.TP 8 +STAG (local input) int +On entry, STAG specifies the message tag to be used for this +communication operation. +.TP 8 +COMM (local input) MPI_Comm +The MPI communicator identifying the communication space. +.SH SEE ALSO +.BR HPL_recv \ (3), +.BR HPL_sendrecv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_setran.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_setran.3 new file mode 100644 index 000000000..e9a9433ae --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_setran.3 @@ -0,0 +1,37 @@ +.TH HPL_setran 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_setran \- Manage the random number generator. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_setran(\fR +\fB\&const int\fR +\fI\&OPTION\fR, +\fB\&int *\fR +\fI\&IRAN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_setran\fR +initializes the random generator with the encoding of the +first number X(0) in the sequence, and the constants a and c used to +compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), +a and c are stored in the static variables irand, ias and ics. When +OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the +values of the input array IRAN. When OPTION is 3, IRAN is set to the +current value of irand, and irand is then incremented. +.SH ARGUMENTS +.TP 8 +OPTION (local input) const int +On entry, OPTION is an integer that specifies the operations +to be performed on the random generator as specified above. +.TP 8 +IRAN (local input/output) int * +On entry, IRAN is an array of dimension 2, that contains the +16-lower and 15-higher bits of a random number. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_spreadN.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_spreadN.3 new file mode 100644 index 000000000..452b8da34 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_spreadN.3 @@ -0,0 +1,96 @@ +.TH HPL_spreadN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_spreadN \- Spread row panel U and forward current column panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_spreadN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const enum HPL_SIDE\fR +\fI\&SIDE\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int\fR +\fI\&SRCDIST\fR, +\fB\&const int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_spreadN\fR +spreads the local array containing local pieces of U, so +that on exit to this function, a piece of U is contained in every +process row. The array IPLEN contains the number of rows of U, that +should be spread on any given process row. This function also probes +for the presence of the column panel PBCST. In case of success, this +panel will be forwarded. If PBCST is NULL on input, this probing +mechanism will be disabled. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be spread) information. +.TP 8 +SIDE (global input) const enum HPL_SIDE +On entry, SIDE specifies whether the local piece of U located +in process IPMAP[SRCDIST] should be spread to the right or to +the left. This feature is used by the equilibration process. +.TP 8 +N (global input) const int +On entry, N specifies the local number of columns of U. N +must be at least zero. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,IPLEN[nprow]). +.TP 8 +SRCDIST (local input) const int +On entry, SRCDIST specifies the source process that spreads +its piece of U. +.TP 8 +IPLEN (global input) const int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in each process before process IPMAP[i], with the convention +that IPLEN[nprow] is the total number of rows. In other words +IPLEN[i+1] - IPLEN[i] is the local number of rows of U that +should be moved to process IPMAP[i]. +.TP 8 +IPMAP (global input) const int * +On entry, IPMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IPMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IPMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. +.SH SEE ALSO +.BR HPL_pdlaswp01N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_spreadT.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_spreadT.3 new file mode 100644 index 000000000..54f7dda31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_spreadT.3 @@ -0,0 +1,96 @@ +.TH HPL_spreadT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_spreadT \- Spread row panel U and forward current column panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_spreadT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const enum HPL_SIDE\fR +\fI\&SIDE\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int\fR +\fI\&SRCDIST\fR, +\fB\&const int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_spreadT\fR +spreads the local array containing local pieces of U, so +that on exit to this function, a piece of U is contained in every +process row. The array IPLEN contains the number of columns of U, +that should be spread on any given process row. This function also +probes for the presence of the column panel PBCST. If available, +this panel will be forwarded. If PBCST is NULL on input, this +probing mechanism will be disabled. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be spread) information. +.TP 8 +SIDE (global input) const enum HPL_SIDE +On entry, SIDE specifies whether the local piece of U located +in process IPMAP[SRCDIST] should be spread to the right or to +the left. This feature is used by the equilibration process. +.TP 8 +N (global input) const int +On entry, N specifies the local number of rows of U. N must +be at least zero. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,N). +.TP 8 +SRCDIST (local input) const int +On entry, SRCDIST specifies the source process that spreads +its piece of U. +.TP 8 +IPLEN (global input) const int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in each process before process IPMAP[i], with the convention +that IPLEN[nprow] is the total number of rows. In other words +IPLEN[i+1] - IPLEN[i] is the local number of rows of U that +should be moved to process IPMAP[i]. +.TP 8 +IPMAP (global input) const int * +On entry, IPMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IPMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IPMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. +.SH SEE ALSO +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_sum.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_sum.3 new file mode 100644 index 000000000..a3c4e2190 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_sum.3 @@ -0,0 +1,44 @@ +.TH HPL_sum 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_sum \- Combine (sum) two buffers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_sum(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const void *\fR +\fI\&IN\fR, +\fB\&void *\fR +\fI\&INOUT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_sum\fR +combines (sum) two buffers. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the length of the buffers to be +combined. N must be at least zero. +.TP 8 +IN (input) const void * +On entry, IN points to the input-only buffer to be combined. +.TP 8 +INOUT (input/output) void * +On entry, INOUT points to the input-output buffer to be +combined. On exit, the entries of this array contains the +combined results. +.TP 8 +DTYPE (input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer.3 new file mode 100644 index 000000000..61f3f7cb1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer.3 @@ -0,0 +1,35 @@ +.TH HPL_timer 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_timer \- Timer facility. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_timer(\fR +\fB\&const int\fR +\fI\&I\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_timer\fR +provides a "stopwatch" functionality cpu/wall timer in +seconds. Up to 64 separate timers can be functioning at once. The +first call starts the timer, and the second stops it. This routine +can be disenabled by calling HPL_timer_disable(), so that calls to +the timer are ignored. This feature can be used to make sure certain +sections of code do not affect timings, even if they call routines +which have HPL_timer calls in them. HPL_timer_enable() will re-enable +the timer functionality. One can retrieve the current value of a +timer by calling + +t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + +where I is the timer index in [0..64). To initialize the timer +functionality, one must have called HPL_timer_boot() prior to any of +the functions mentioned above. +.SH ARGUMENTS +.TP 8 +I (global input) const int +On entry, I specifies the timer to stop/start. +.SH SEE ALSO +.BR HPL_timer_cputime \ (3), +.BR HPL_timer_walltime \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer_cputime.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer_cputime.3 new file mode 100644 index 000000000..1f8987ca2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer_cputime.3 @@ -0,0 +1,23 @@ +.TH HPL_timer_cputime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_timer_cputime \- Return the CPU time. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_timer_cputime();\fR +.SH DESCRIPTION +\fB\&HPL_timer_cputime\fR +returns the cpu time. If HPL_USE_CLOCK is defined, +the clock() function is used to return an approximation of processor +time used by the program. The value returned is the CPU time used so +far as a clock_t; to get the number of seconds used, the result is +divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C +standard library. If HPL_USE_TIMES is defined, the times() function +is used instead. This function returns the current process times. +times() returns the number of clock ticks that have elapsed since the +system has been up. Otherwise and by default, the standard library +function getrusage() is used. +.SH SEE ALSO +.BR HPL_timer_walltime \ (3), +.BR HPL_timer \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer_walltime.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer_walltime.3 new file mode 100644 index 000000000..9a6e898e7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_timer_walltime.3 @@ -0,0 +1,14 @@ +.TH HPL_timer_walltime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_timer_walltime \- Return the elapsed (wall-clock) time. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_timer_walltime();\fR +.SH DESCRIPTION +\fB\&HPL_timer_walltime\fR +returns the elapsed (wall-clock) time. +.SH SEE ALSO +.BR HPL_timer_cputime \ (3), +.BR HPL_timer \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_warn.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_warn.3 new file mode 100644 index 000000000..6b051acb3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_warn.3 @@ -0,0 +1,59 @@ +.TH HPL_warn 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_warn \- displays an error message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_warn(\fR +\fB\&FILE *\fR +\fI\&STREAM\fR, +\fB\&int\fR +\fI\&LINE\fR, +\fB\&const char *\fR +\fI\&SRNAME\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_warn\fR +displays an error message. +.SH ARGUMENTS +.TP 8 +STREAM (local input) FILE * +On entry, STREAM specifies the output stream. +.TP 8 +LINE (local input) int +On entry, LINE specifies the line number in the file where +the error has occured. When LINE is not a positive line +number, it is ignored. +.TP 8 +SRNAME (local input) const char * +On entry, SRNAME should be the name of the routine calling +this error handler. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + HPL_warn( stderr, __LINE__, __FILE__, +.br + "Demo.\en" ); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_abort \ (3), +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_xjumpm.3 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_xjumpm.3 new file mode 100644 index 000000000..df3e0a954 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/man/man3/HPL_xjumpm.3 @@ -0,0 +1,77 @@ +.TH HPL_xjumpm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_xjumpm \- Compute constants to jump in the random sequence. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_xjumpm(\fR +\fB\&const int\fR +\fI\&JUMPM\fR, +\fB\&int *\fR +\fI\&MULT\fR, +\fB\&int *\fR +\fI\&IADD\fR, +\fB\&int *\fR +\fI\&IRANN\fR, +\fB\&int *\fR +\fI\&IRANM\fR, +\fB\&int *\fR +\fI\&IAM\fR, +\fB\&int *\fR +\fI\&ICM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_xjumpm\fR +computes the constants A and C to jump JUMPM numbers in +the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in +MULT and IADD specify how to jump from one entry in the sequence to +the next. +.SH ARGUMENTS +.TP 8 +JUMPM (local input) const int +On entry, JUMPM specifies the number of entries in the +sequence to jump over. When JUMPM is less or equal than zero, +A and C are not computed, IRANM is set to IRANN corresponding +to a jump of size zero. +.TP 8 +MULT (local input) int * +On entry, MULT is an array of dimension 2, that contains the +16-lower and 15-higher bits of the constant a to jump from +X(n) to X(n+1) = a*X(n) + c in the random sequence. +.TP 8 +IADD (local input) int * +On entry, IADD is an array of dimension 2, that contains the +16-lower and 15-higher bits of the constant c to jump from +X(n) to X(n+1) = a*X(n) + c in the random sequence. +.TP 8 +IRANN (local input) int * +On entry, IRANN is an array of dimension 2. that contains the +16-lower and 15-higher bits of the encoding of X(n). +.TP 8 +IRANM (local output) int * +On entry, IRANM is an array of dimension 2. On exit, this +array contains respectively the 16-lower and 15-higher bits +of the encoding of X(n+JUMPM). +.TP 8 +IAM (local output) int * +On entry, IAM is an array of dimension 2. On exit, when JUMPM +is greater than zero, this array contains the encoded +constant A to jump from X(n) to X(n+JUMPM) in the random +sequence. IAM(0:1) contains respectively the 16-lower and +15-higher bits of this constant A. When JUMPM is less or +equal than zero, this array is not referenced. +.TP 8 +ICM (local output) int * +On entry, ICM is an array of dimension 2. On exit, when JUMPM +is greater than zero, this array contains the encoded +constant C to jump from X(n) to X(n+JUMPM) in the random +sequence. ICM(0:1) contains respectively the 16-lower and +15-higher bits of this constant C. When JUMPM is less or +equal than zero, this array is not referenced. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/missing b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/missing new file mode 100755 index 000000000..625aeb118 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/missing @@ -0,0 +1,215 @@ +#! /bin/sh +# Common wrapper for a few potentially missing GNU programs. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# Originally written by Fran,cois Pinard , 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +if test $# -eq 0; then + echo 1>&2 "Try '$0 --help' for more information" + exit 1 +fi + +case $1 in + + --is-lightweight) + # Used by our autoconf macros to check whether the available missing + # script is modern enough. + exit 0 + ;; + + --run) + # Back-compat with the calling convention used by older automake. + shift + ;; + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due +to PROGRAM being missing or too old. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + +Supported PROGRAM values: + aclocal autoconf autoheader autom4te automake makeinfo + bison yacc flex lex help2man + +Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and +'g' are ignored when checking the name. + +Send bug reports to ." + exit $? + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing $scriptversion (GNU Automake)" + exit $? + ;; + + -*) + echo 1>&2 "$0: unknown '$1' option" + echo 1>&2 "Try '$0 --help' for more information" + exit 1 + ;; + +esac + +# Run the given program, remember its exit status. +"$@"; st=$? + +# If it succeeded, we are done. +test $st -eq 0 && exit 0 + +# Also exit now if we it failed (or wasn't found), and '--version' was +# passed; such an option is passed most likely to detect whether the +# program is present and works. +case $2 in --version|--help) exit $st;; esac + +# Exit code 63 means version mismatch. This often happens when the user +# tries to use an ancient version of a tool on a file that requires a +# minimum version. +if test $st -eq 63; then + msg="probably too old" +elif test $st -eq 127; then + # Program was missing. + msg="missing on your system" +else + # Program was found and executed, but failed. Give up. + exit $st +fi + +perl_URL=https://www.perl.org/ +flex_URL=https://github.com/westes/flex +gnu_software_URL=https://www.gnu.org/software + +program_details () +{ + case $1 in + aclocal|automake) + echo "The '$1' program is part of the GNU Automake package:" + echo "<$gnu_software_URL/automake>" + echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/autoconf>" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + autoconf|autom4te|autoheader) + echo "The '$1' program is part of the GNU Autoconf package:" + echo "<$gnu_software_URL/autoconf/>" + echo "It also requires GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + esac +} + +give_advice () +{ + # Normalize program name to check for. + normalized_program=`echo "$1" | sed ' + s/^gnu-//; t + s/^gnu//; t + s/^g//; t'` + + printf '%s\n' "'$1' is $msg." + + configure_deps="'configure.ac' or m4 files included by 'configure.ac'" + case $normalized_program in + autoconf*) + echo "You should only need it if you modified 'configure.ac'," + echo "or m4 files included by it." + program_details 'autoconf' + ;; + autoheader*) + echo "You should only need it if you modified 'acconfig.h' or" + echo "$configure_deps." + program_details 'autoheader' + ;; + automake*) + echo "You should only need it if you modified 'Makefile.am' or" + echo "$configure_deps." + program_details 'automake' + ;; + aclocal*) + echo "You should only need it if you modified 'acinclude.m4' or" + echo "$configure_deps." + program_details 'aclocal' + ;; + autom4te*) + echo "You might have modified some maintainer files that require" + echo "the 'autom4te' program to be rebuilt." + program_details 'autom4te' + ;; + bison*|yacc*) + echo "You should only need it if you modified a '.y' file." + echo "You may want to install the GNU Bison package:" + echo "<$gnu_software_URL/bison/>" + ;; + lex*|flex*) + echo "You should only need it if you modified a '.l' file." + echo "You may want to install the Fast Lexical Analyzer package:" + echo "<$flex_URL>" + ;; + help2man*) + echo "You should only need it if you modified a dependency" \ + "of a man page." + echo "You may want to install the GNU Help2man package:" + echo "<$gnu_software_URL/help2man/>" + ;; + makeinfo*) + echo "You should only need it if you modified a '.texi' file, or" + echo "any other file indirectly affecting the aspect of the manual." + echo "You might want to install the Texinfo package:" + echo "<$gnu_software_URL/texinfo/>" + echo "The spurious makeinfo call might also be the consequence of" + echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might" + echo "want to install GNU make:" + echo "<$gnu_software_URL/make/>" + ;; + *) + echo "You might have modified some files without having the proper" + echo "tools for further handling them. Check the 'README' file, it" + echo "often tells you about the needed prerequisites for installing" + echo "this package. You may also peek at any GNU archive site, in" + echo "case some other package contains this missing '$1' program." + ;; + esac +} + +give_advice "$1" | sed -e '1s/^/WARNING: /' \ + -e '2,$s/^/ /' >&2 + +# Propagate the correct exit status (expected to be 127 for a program +# not found, 63 for a program that failed due to version mismatch). +exit $st + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS new file mode 100644 index 000000000..056fd81ba --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = FreeBSD_PIV_CBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpich +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a $(MPdir)/lib/libpmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/share/ATLAS/lib/FreeBSD_P5SSE2 +LAinc = +LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = /usr/bin/f77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = /usr/bin/ranlib +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.HPUX_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.HPUX_FBLAS new file mode 100644 index 000000000..af3f5da5f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.HPUX_FBLAS @@ -0,0 +1,179 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = HPUX +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - MPI directories - library ------------------------------------------ +# ---------------------------------------------------------------------- +# MPIinc tells the C compiler where to find the MPI header files, MPIlib +# is defined to be the name of the MPI library to be used. The variables +# MPIdir and MPIplat are only used for defining MPIinc and MPIlib). +# +MPIdir = $(HOME)/local/mpi +MPIplat = $(MPIdir)/hpux/ch_p4 +# +MPIinc = -I$(MPIdir)/include -I$(MPIplat)/include +MPIlib = $(MPIplat)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - BLAS library ------------------------------------------------------- +# ---------------------------------------------------------------------- +# +BLASlib = /usr/lib/pa1.1/libblas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(MPIinc) +HPL_LIBS = $(HPLlib) $(BLASlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS F77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(HPL_INCLUDES) $(F2CDEFS) $(HPL_OPTS) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -D_INCLUDE_POSIX_SOURCE -DUseTimes -Aa +O4 +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = cc +LINKFLAGS = -Aa +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.I860_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.I860_FBLAS new file mode 100644 index 000000000..984236be2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.I860_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = I860_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = -lmpi +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lkmath +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) -nx +CCFLAGS = $(HPL_DEFS) -O4 -nx +# +LINKER = f77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.IRIX_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.IRIX_FBLAS new file mode 100644 index 000000000..d78bcf09f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.IRIX_FBLAS @@ -0,0 +1,181 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = IRIX_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = $(HOME)/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/IRIX64/ch_p4/include +MPlib = $(MPdir)/IRIX64/ch_p4/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lblas +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DStringSunStyle -DF77_INTEGER=int +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) -64 +CCFLAGS = $(HPL_DEFS) -O3 -64 -OPT:Olimit=15000 -TARG:platform=IP30 \ + -LNO:blocking=OFF -LOPT:alias=typed +# +LINKER = cc +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS new file mode 100644 index 000000000..624306902 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_ATHLON_CBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - MPI directories - library ------------------------------------------ +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_ATHLON +LAinc = +LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the Fortran 77 BLAS interface +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +LINKER = /usr/bin/gcc +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS new file mode 100644 index 000000000..07985f781 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_ATHLON_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_ATHLON +LAinc = +LAlib = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +LINKER = /usr/bin/g77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL new file mode 100644 index 000000000..ddf3fb4b6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_ATHLON_VSIPL +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - MPI directories - library ------------------------------------------ +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = /home/software/TASP_VSIPL_Core_Plus +LAinc = -I$(LAdir)/include +LAlib = $(LAdir)/lib/libvsip_c.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the Fortran 77 BLAS interface +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_VSIPL +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +LINKER = /usr/bin/gcc +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_Intel64 b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_Intel64 new file mode 100644 index 000000000..47661c25d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_Intel64 @@ -0,0 +1,193 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -fs +MKDIR = mkdir -p +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_Intel64 +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +# MPdir = /opt/intel/mpi/4.1.0 +# MPinc = -I$(MPdir)/include64 +# MPlib = $(MPdir)/lib64/libmpi.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(MKLROOT) +ifndef LAinc +LAinc = $(LAdir)/mkl/include +endif +ifndef LAlib +LAlib = -L$(LAdir)/mkl/lib/intel64 \ + -Wl,--start-group \ + $(LAdir)/lib/intel64/libmkl_intel_lp64.a \ + $(LAdir)/lib/intel64/libmkl_intel_thread.a \ + $(LAdir)/lib/intel64/libmkl_core.a \ + -Wl,--end-group -lpthread -ldl +endif +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) -I$(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpiicc +CCNOOPT = $(HPL_DEFS) +OMP_DEFS = -openmp +CCFLAGS = $(HPL_DEFS) -O3 -w -ansi-alias -i-static -z noexecstack -z relro -z now -nocompchk -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = $(CC) +LINKFLAGS = $(CCFLAGS) $(OMP_DEFS) -mt_mpi +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_CBLAS new file mode 100644 index 000000000..535a0e214 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_CBLAS @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_CBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII +LAinc = +LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = /usr/bin/g77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm new file mode 100644 index 000000000..31fc9ea74 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_CBLAS_gm +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII +LAinc = +LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = mpif77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_FBLAS new file mode 100644 index 000000000..5ed9aac12 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_FBLAS @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII +LAinc = +LAlib = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = /usr/bin/g77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm new file mode 100644 index 000000000..a2416396c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_FBLAS_gm +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII +LAinc = +LAlib = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = mpif77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_VSIPL new file mode 100644 index 000000000..0f690a1b3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_VSIPL @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_VSIPL +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = /home/software/TASP_VSIPL_Core_Plus +LAinc = -I$(LAdir)/include +LAlib = $(LAdir)/lib/libvsip_c.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_VSIPL +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = /usr/bin/g77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm new file mode 100644 index 000000000..fee265e46 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_VSIPL_gm +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = /home/software/TASP_VSIPL_Core_Plus +LAinc = -I$(LAdir)/include +LAlib = $(LAdir)/lib/libvsip_c.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_VSIPL +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = mpif77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.MacOSX_Accelerate b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.MacOSX_Accelerate new file mode 100644 index 000000000..d1ce69b64 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.MacOSX_Accelerate @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -fs +MKDIR = mkdir -p +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = MacOSX_Accelerate +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +# MPdir = /opt/intel/mpi/4.1.0 +# MPinc = -I$(MPdir)/include64 +# MPlib = $(MPdir)/lib64/libmpi.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -framework Accelerate +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc-openmpi-mp +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -O3 +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = $(CC) +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = cr +RANLIB = ranlib +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWR2_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWR2_FBLAS new file mode 100644 index 000000000..628f2c152 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWR2_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = PWR2_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lesslp2 +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpcc_r +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -O3 -qarch=pwr2 -qtune=pwr2 -qmaxmem=-1 +# +LINKER = mpxlf_r +LINKFLAGS = -bmaxdata:0x70000000 $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWR3_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWR3_FBLAS new file mode 100644 index 000000000..bba468803 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWR3_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = PWR3_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lessl +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/vac/bin/xlc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -qtune=pwr3 -qarch=pwr3 -O3 -qmaxmem=-1 -qfloat=hsflt +# +LINKER = /usr/bin/xlf +LINKFLAGS = -bmaxdata:0x70000000 $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWRPC_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWRPC_FBLAS new file mode 100644 index 000000000..2a0fb2ec6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.PWRPC_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = PWRPC_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lessl +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpcc_r +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -O3 -qarch=ppc -qtune=604 -qmaxmem=-1 +# +LINKER = mpxlf_r +LINKFLAGS = -bmaxdata:0x70000000 $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS new file mode 100644 index 000000000..1ade2d8aa --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = SUN4SOL2-g_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = $(HOME)/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include +MPlib = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -xlic_lib=sunperf +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -g +# +LINKER = purify -best-effort f77 +LINKFLAGS = +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL new file mode 100644 index 000000000..1cbb371fd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = SUN4SOL2-g_VSIPL +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = $(HOME)/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include +MPlib = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/local/TASP_VSIPL_Core_Plus +LAinc = -I$(LAdir)/include +LAlib = $(LAdir)/lib/libvsip_c.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_VSIPL +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -g +# +LINKER = purify -best-effort cc +LINKFLAGS = +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2_FBLAS new file mode 100644 index 000000000..a1d5d6315 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.SUN4SOL2_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = SUN4SOL2_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = $(HOME)/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include +MPlib = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -xlic_lib=sunperf +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -dalign -fsingle -xO5 -native -xarch=v8plusa +# +LINKER = f77 +LINKFLAGS = -dalign -native -xarch=v8plusa -xO5 +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.T3E_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.T3E_FBLAS new file mode 100644 index 000000000..fe12cae9a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.T3E_FBLAS @@ -0,0 +1,187 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = T3E_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DUpCase -DF77_INTEGER=long -DStringCrayStyle \ + -DCRAY_BLAS -DHPL_USE_TIMES +# +# When UpCase is defined, CRAY_BLAS redefines the BLAS routines used in +# HPL to be prefixed with an S. In the Cray programming environment, the +# default INTEGER and REAL size is 64 bits. This is reflected in the +# Cray Scientific Library as well, so SGEMM is the 64-bit matrix multi- +# ply. +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -O3 +# +LINKER = f77 +LINKFLAGS = -O3,unroll2,pipeline2 +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Tru64_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Tru64_FBLAS new file mode 100644 index 000000000..3d8062061 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Tru64_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Tru64_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/alpha/ch_p4/include +MPlib = $(MPdir)/alpha/ch_p4/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lcxml +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -arch host -tune host -std -O5 +# +LINKER = f77 +LINKFLAGS = -nofor_main -O5 -arch host -tune host +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = ranlib +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Tru64_FBLAS_elan b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Tru64_FBLAS_elan new file mode 100644 index 000000000..f9550412c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.Tru64_FBLAS_elan @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Tru64_FBLAS_elan +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = -lmpi -lelan +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lcxml +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -arch host -tune host -std -O5 +# +LINKER = f77 +LINKFLAGS = -nofor_main -O5 -arch host -tune host +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = ranlib +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.UNKNOWN.in b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.UNKNOWN.in new file mode 100644 index 000000000..8cbbd8242 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/Make.UNKNOWN.in @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = @SHELL@ +# +CD = @CD@ +CP = @CP@ +LN_S = @LN_S@ +MKDIR = @MKDIR@ +RM = @RM@ +TOUCH = @TOUCH@ +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = @ARCH@ +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = @MPDIR@ +MPinc = @MPINC@ +MPlib = @MPLIB@ +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = @LADIR@ +LAinc = @LAINC@ +LAlib = @LALIB@ +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = @F2CDEFS@ +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = @CC@ +CCNOOPT = $(HPL_DEFS) @CCNOOPT@ +CCFLAGS = $(HPL_DEFS) @CCFLAGS@ +# +LINKER = @LINKER@ +LINKFLAGS = @LINKFLAGS@ +# +ARCHIVER = @ARCHIVER@ +ARFLAGS = @ARFLAGS@ +RANLIB = @RANLIB@ +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/make_generic b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/make_generic new file mode 100644 index 000000000..68cf74a3a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/setup/make_generic @@ -0,0 +1,83 @@ +#!/bin/sh +# +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# +# Configure script to create Make.UNKNOWN from Make.UNKNOWN.in for the +# HPL distribution, so users without a real Unix system can have a gene- +# ric Make.UNKNOWN to edit for their needs. This script substitutes +# pathless version of all the system programs, and commonly used options +# values into Make.UNKNOWN.in. +# +######################################################################## +# +sed -e 's%@SHELL@%/bin/sh%' \ + -e 's%@CD@%cd%' \ + -e 's%@CP@%cp%' \ + -e 's%@LN_S@%ln -s%' \ + -e 's%@MKDIR@%mkdir%' \ + -e 's%@RM@%/bin/rm -f%' \ + -e 's%@TOUCH@%touch%' \ + -e 's%@ARCH@%UNKNOWN%' \ + -e 's%@CC@%mpicc%' \ + -e 's%@CCNOOPT@%%' \ + -e 's%@CCFLAGS@%%' \ + -e 's%@LINKER@%mpif77%' \ + -e 's%@LINKFLAGS@%%' \ + -e 's%@ARCHIVER@%ar%' \ + -e 's%@ARFLAGS@%r%' \ + -e 's%@RANLIB@%echo%' \ + -e 's%@MPDIR@%%' \ + -e 's%@MPINC@%%' \ + -e 's%@MPLIB@%%' \ + -e 's%@F2CDEFS@%-DAdd_ -DF77_INTEGER=int -DStringSunStyle%' \ + -e 's%@LADIR@%%' \ + -e 's%@LAINC@%%' \ + -e 's%@LALIB@%-lblas%' \ + Make.UNKNOWN.in > Make.UNKNOWN +# +######################################################################## diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/Makefile.am new file mode 100644 index 000000000..2e6d3d454 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/Makefile.am @@ -0,0 +1,42 @@ +AM_CPPFLAGS = -I$(top_srcdir)/../include + +lib_LIBRARIES = libhpl.a + +libhpl_a_SOURCES = \ +auxil/HPL_dlatcpy.c auxil/HPL_fprintf.c auxil/HPL_dlacpy.c auxil/HPL_dlamch.c \ +blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \ +blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \ +blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \ +comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \ +comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \ +comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \ +grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \ +grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \ +grid/HPL_min.c grid/HPL_all_reduce.c grid/HPL_barrier.c \ +panel/HPL_pdpanel_disp.c panel/HPL_pdpanel_free.c panel/HPL_pdpanel_init.c panel/HPL_pdpanel_new.c \ +pauxil/HPL_pdlamch.c pauxil/HPL_pdlange.c \ +pauxil/HPL_indxg2p.c pauxil/HPL_numroc.c pauxil/HPL_numrocI.c pauxil/HPL_numrocI.c \ +pauxil/HPL_dlaswp00N.c pauxil/HPL_dlaswp01N.c pauxil/HPL_dlaswp01T.c \ +pauxil/HPL_dlaswp02N.c pauxil/HPL_dlaswp03N.c pauxil/HPL_dlaswp03T.c \ +pauxil/HPL_dlaswp04N.c pauxil/HPL_dlaswp04T.c pauxil/HPL_dlaswp05N.c \ +pauxil/HPL_dlaswp05T.c pauxil/HPL_dlaswp06N.c pauxil/HPL_dlaswp06T.c \ +pauxil/HPL_infog2l.c pauxil/HPL_dlaswp10N.c pauxil/HPL_pwarn.c \ +pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \ +pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \ +pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c pfact/HPL_pdrpanrlT.c \ +pfact/HPL_pdmxswp.c pfact/HPL_pdfact.c pfact/HPL_dlocmax.c \ +pfact/HPL_pdpancrT.c pfact/HPL_pdpancrN.c pfact/HPL_dlocmax.c \ +pfact/HPL_dlocswpN.c pfact/HPL_dlocswpT.c pfact/HPL_pdmxswp.c \ +pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \ +pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \ +pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c \ +pfact/HPL_pdrpanrlT.c pauxil/HPL_pabort.c pauxil/HPL_pdlamch.c \ +pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c \ +pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c pgesv/HPL_pdupdateTT.c \ +pgesv/HPL_equil.c pgesv/HPL_pipid.c pgesv/HPL_plindx0.c \ +pgesv/HPL_plindx10.c pgesv/HPL_plindx1.c pgesv/HPL_plindx10.c \ +pgesv/HPL_rollN.c pgesv/HPL_rollT.c pgesv/HPL_spreadN.c pgesv/HPL_spreadT.c \ +pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesvK2.c \ +pgesv/HPL_pdlaswp00N.c pgesv/HPL_pdlaswp00T.c pgesv/HPL_pdlaswp01N.c pgesv/HPL_pdlaswp01T.c \ +pgesv/HPL_pdtrsv.c pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c \ +pgesv/HPL_pdupdateTT.c pgesv/HPL_logsort.c pgesv/HPL_perm.c diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/Makefile.in new file mode 100644 index 000000000..139ecbad0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/Makefile.in @@ -0,0 +1,1355 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +subdir = src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/include/hplconfig.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" +LIBRARIES = $(lib_LIBRARIES) +AR = ar +ARFLAGS = cru +AM_V_AR = $(am__v_AR_@AM_V@) +am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@) +am__v_AR_0 = @echo " AR " $@; +am__v_AR_1 = +libhpl_a_AR = $(AR) $(ARFLAGS) +libhpl_a_LIBADD = +am__dirstamp = $(am__leading_dot)dirstamp +am_libhpl_a_OBJECTS = auxil/HPL_dlatcpy.$(OBJEXT) \ + auxil/HPL_fprintf.$(OBJEXT) auxil/HPL_dlacpy.$(OBJEXT) \ + auxil/HPL_dlamch.$(OBJEXT) blas/HPL_dscal.$(OBJEXT) \ + blas/HPL_dtrsm.$(OBJEXT) blas/HPL_dtrsv.$(OBJEXT) \ + blas/HPL_idamax.$(OBJEXT) blas/HPL_dgemv.$(OBJEXT) \ + blas/HPL_dscal.$(OBJEXT) blas/HPL_daxpy.$(OBJEXT) \ + blas/HPL_dcopy.$(OBJEXT) blas/HPL_dgemm.$(OBJEXT) \ + blas/HPL_dgemv.$(OBJEXT) blas/HPL_dger.$(OBJEXT) \ + comm/HPL_sdrv.$(OBJEXT) comm/HPL_send.$(OBJEXT) \ + comm/HPL_recv.$(OBJEXT) comm/HPL_bcast.$(OBJEXT) \ + comm/HPL_binit.$(OBJEXT) comm/HPL_bwait.$(OBJEXT) \ + comm/HPL_blong.$(OBJEXT) comm/HPL_1ring.$(OBJEXT) \ + comm/HPL_1rinM.$(OBJEXT) comm/HPL_2rinM.$(OBJEXT) \ + comm/HPL_2ring.$(OBJEXT) comm/HPL_blonM.$(OBJEXT) \ + comm/HPL_packL.$(OBJEXT) grid/HPL_reduce.$(OBJEXT) \ + grid/HPL_sum.$(OBJEXT) grid/HPL_grid_info.$(OBJEXT) \ + grid/HPL_grid_init.$(OBJEXT) grid/HPL_all_reduce.$(OBJEXT) \ + grid/HPL_broadcast.$(OBJEXT) grid/HPL_grid_exit.$(OBJEXT) \ + grid/HPL_max.$(OBJEXT) grid/HPL_min.$(OBJEXT) \ + grid/HPL_all_reduce.$(OBJEXT) grid/HPL_barrier.$(OBJEXT) \ + panel/HPL_pdpanel_disp.$(OBJEXT) \ + panel/HPL_pdpanel_free.$(OBJEXT) \ + panel/HPL_pdpanel_init.$(OBJEXT) \ + panel/HPL_pdpanel_new.$(OBJEXT) pauxil/HPL_pdlamch.$(OBJEXT) \ + pauxil/HPL_pdlange.$(OBJEXT) pauxil/HPL_indxg2p.$(OBJEXT) \ + pauxil/HPL_numroc.$(OBJEXT) pauxil/HPL_numrocI.$(OBJEXT) \ + pauxil/HPL_numrocI.$(OBJEXT) pauxil/HPL_dlaswp00N.$(OBJEXT) \ + pauxil/HPL_dlaswp01N.$(OBJEXT) pauxil/HPL_dlaswp01T.$(OBJEXT) \ + pauxil/HPL_dlaswp02N.$(OBJEXT) pauxil/HPL_dlaswp03N.$(OBJEXT) \ + pauxil/HPL_dlaswp03T.$(OBJEXT) pauxil/HPL_dlaswp04N.$(OBJEXT) \ + pauxil/HPL_dlaswp04T.$(OBJEXT) pauxil/HPL_dlaswp05N.$(OBJEXT) \ + pauxil/HPL_dlaswp05T.$(OBJEXT) pauxil/HPL_dlaswp06N.$(OBJEXT) \ + pauxil/HPL_dlaswp06T.$(OBJEXT) pauxil/HPL_infog2l.$(OBJEXT) \ + pauxil/HPL_dlaswp10N.$(OBJEXT) pauxil/HPL_pwarn.$(OBJEXT) \ + pfact/HPL_pdpanllN.$(OBJEXT) pfact/HPL_pdpanllT.$(OBJEXT) \ + pfact/HPL_pdpanrlN.$(OBJEXT) pfact/HPL_pdpanrlT.$(OBJEXT) \ + pfact/HPL_pdrpancrN.$(OBJEXT) pfact/HPL_pdrpancrT.$(OBJEXT) \ + pfact/HPL_pdrpanllN.$(OBJEXT) pfact/HPL_pdrpanllT.$(OBJEXT) \ + pfact/HPL_pdrpanrlN.$(OBJEXT) pfact/HPL_pdrpanrlT.$(OBJEXT) \ + pfact/HPL_pdmxswp.$(OBJEXT) pfact/HPL_pdfact.$(OBJEXT) \ + pfact/HPL_dlocmax.$(OBJEXT) pfact/HPL_pdpancrT.$(OBJEXT) \ + pfact/HPL_pdpancrN.$(OBJEXT) pfact/HPL_dlocmax.$(OBJEXT) \ + pfact/HPL_dlocswpN.$(OBJEXT) pfact/HPL_dlocswpT.$(OBJEXT) \ + pfact/HPL_pdmxswp.$(OBJEXT) pfact/HPL_pdpanllN.$(OBJEXT) \ + pfact/HPL_pdpanllT.$(OBJEXT) pfact/HPL_pdpanrlN.$(OBJEXT) \ + pfact/HPL_pdpanrlT.$(OBJEXT) pfact/HPL_pdrpancrN.$(OBJEXT) \ + pfact/HPL_pdrpancrT.$(OBJEXT) pfact/HPL_pdrpanllN.$(OBJEXT) \ + pfact/HPL_pdrpanllT.$(OBJEXT) pfact/HPL_pdrpanrlN.$(OBJEXT) \ + pfact/HPL_pdrpanrlT.$(OBJEXT) pauxil/HPL_pabort.$(OBJEXT) \ + pauxil/HPL_pdlamch.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \ + pgesv/HPL_pdgesv.$(OBJEXT) pgesv/HPL_pdgesvK1.$(OBJEXT) \ + pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdupdateNN.$(OBJEXT) \ + pgesv/HPL_pdupdateNT.$(OBJEXT) pgesv/HPL_pdupdateTN.$(OBJEXT) \ + pgesv/HPL_pdupdateTT.$(OBJEXT) pgesv/HPL_equil.$(OBJEXT) \ + pgesv/HPL_pipid.$(OBJEXT) pgesv/HPL_plindx0.$(OBJEXT) \ + pgesv/HPL_plindx10.$(OBJEXT) pgesv/HPL_plindx1.$(OBJEXT) \ + pgesv/HPL_plindx10.$(OBJEXT) pgesv/HPL_rollN.$(OBJEXT) \ + pgesv/HPL_rollT.$(OBJEXT) pgesv/HPL_spreadN.$(OBJEXT) \ + pgesv/HPL_spreadT.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \ + pgesv/HPL_pdgesv.$(OBJEXT) pgesv/HPL_pdgesvK1.$(OBJEXT) \ + pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \ + pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdlaswp00N.$(OBJEXT) \ + pgesv/HPL_pdlaswp00T.$(OBJEXT) pgesv/HPL_pdlaswp01N.$(OBJEXT) \ + pgesv/HPL_pdlaswp01T.$(OBJEXT) pgesv/HPL_pdtrsv.$(OBJEXT) \ + pgesv/HPL_pdupdateNN.$(OBJEXT) pgesv/HPL_pdupdateNT.$(OBJEXT) \ + pgesv/HPL_pdupdateTN.$(OBJEXT) pgesv/HPL_pdupdateTT.$(OBJEXT) \ + pgesv/HPL_logsort.$(OBJEXT) pgesv/HPL_perm.$(OBJEXT) +libhpl_a_OBJECTS = $(am_libhpl_a_OBJECTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = auxil/$(DEPDIR)/HPL_dlacpy.Po \ + auxil/$(DEPDIR)/HPL_dlamch.Po auxil/$(DEPDIR)/HPL_dlatcpy.Po \ + auxil/$(DEPDIR)/HPL_fprintf.Po blas/$(DEPDIR)/HPL_daxpy.Po \ + blas/$(DEPDIR)/HPL_dcopy.Po blas/$(DEPDIR)/HPL_dgemm.Po \ + blas/$(DEPDIR)/HPL_dgemv.Po blas/$(DEPDIR)/HPL_dger.Po \ + blas/$(DEPDIR)/HPL_dscal.Po blas/$(DEPDIR)/HPL_dtrsm.Po \ + blas/$(DEPDIR)/HPL_dtrsv.Po blas/$(DEPDIR)/HPL_idamax.Po \ + comm/$(DEPDIR)/HPL_1rinM.Po comm/$(DEPDIR)/HPL_1ring.Po \ + comm/$(DEPDIR)/HPL_2rinM.Po comm/$(DEPDIR)/HPL_2ring.Po \ + comm/$(DEPDIR)/HPL_bcast.Po comm/$(DEPDIR)/HPL_binit.Po \ + comm/$(DEPDIR)/HPL_blonM.Po comm/$(DEPDIR)/HPL_blong.Po \ + comm/$(DEPDIR)/HPL_bwait.Po comm/$(DEPDIR)/HPL_packL.Po \ + comm/$(DEPDIR)/HPL_recv.Po comm/$(DEPDIR)/HPL_sdrv.Po \ + comm/$(DEPDIR)/HPL_send.Po grid/$(DEPDIR)/HPL_all_reduce.Po \ + grid/$(DEPDIR)/HPL_barrier.Po grid/$(DEPDIR)/HPL_broadcast.Po \ + grid/$(DEPDIR)/HPL_grid_exit.Po \ + grid/$(DEPDIR)/HPL_grid_info.Po \ + grid/$(DEPDIR)/HPL_grid_init.Po grid/$(DEPDIR)/HPL_max.Po \ + grid/$(DEPDIR)/HPL_min.Po grid/$(DEPDIR)/HPL_reduce.Po \ + grid/$(DEPDIR)/HPL_sum.Po panel/$(DEPDIR)/HPL_pdpanel_disp.Po \ + panel/$(DEPDIR)/HPL_pdpanel_free.Po \ + panel/$(DEPDIR)/HPL_pdpanel_init.Po \ + panel/$(DEPDIR)/HPL_pdpanel_new.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp00N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp01N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp01T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp02N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp03N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp03T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp04N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp04T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp05N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp05T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp06N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp06T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp10N.Po \ + pauxil/$(DEPDIR)/HPL_indxg2p.Po \ + pauxil/$(DEPDIR)/HPL_infog2l.Po pauxil/$(DEPDIR)/HPL_numroc.Po \ + pauxil/$(DEPDIR)/HPL_numrocI.Po pauxil/$(DEPDIR)/HPL_pabort.Po \ + pauxil/$(DEPDIR)/HPL_pdlamch.Po \ + pauxil/$(DEPDIR)/HPL_pdlange.Po pauxil/$(DEPDIR)/HPL_pwarn.Po \ + pfact/$(DEPDIR)/HPL_dlocmax.Po pfact/$(DEPDIR)/HPL_dlocswpN.Po \ + pfact/$(DEPDIR)/HPL_dlocswpT.Po pfact/$(DEPDIR)/HPL_pdfact.Po \ + pfact/$(DEPDIR)/HPL_pdmxswp.Po pfact/$(DEPDIR)/HPL_pdpancrN.Po \ + pfact/$(DEPDIR)/HPL_pdpancrT.Po \ + pfact/$(DEPDIR)/HPL_pdpanllN.Po \ + pfact/$(DEPDIR)/HPL_pdpanllT.Po \ + pfact/$(DEPDIR)/HPL_pdpanrlN.Po \ + pfact/$(DEPDIR)/HPL_pdpanrlT.Po \ + pfact/$(DEPDIR)/HPL_pdrpancrN.Po \ + pfact/$(DEPDIR)/HPL_pdrpancrT.Po \ + pfact/$(DEPDIR)/HPL_pdrpanllN.Po \ + pfact/$(DEPDIR)/HPL_pdrpanllT.Po \ + pfact/$(DEPDIR)/HPL_pdrpanrlN.Po \ + pfact/$(DEPDIR)/HPL_pdrpanrlT.Po pgesv/$(DEPDIR)/HPL_equil.Po \ + pgesv/$(DEPDIR)/HPL_logsort.Po pgesv/$(DEPDIR)/HPL_pdgesv.Po \ + pgesv/$(DEPDIR)/HPL_pdgesv0.Po pgesv/$(DEPDIR)/HPL_pdgesvK1.Po \ + pgesv/$(DEPDIR)/HPL_pdgesvK2.Po \ + pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po \ + pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po \ + pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po \ + pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po \ + pgesv/$(DEPDIR)/HPL_pdtrsv.Po \ + pgesv/$(DEPDIR)/HPL_pdupdateNN.Po \ + pgesv/$(DEPDIR)/HPL_pdupdateNT.Po \ + pgesv/$(DEPDIR)/HPL_pdupdateTN.Po \ + pgesv/$(DEPDIR)/HPL_pdupdateTT.Po pgesv/$(DEPDIR)/HPL_perm.Po \ + pgesv/$(DEPDIR)/HPL_pipid.Po pgesv/$(DEPDIR)/HPL_plindx0.Po \ + pgesv/$(DEPDIR)/HPL_plindx1.Po pgesv/$(DEPDIR)/HPL_plindx10.Po \ + pgesv/$(DEPDIR)/HPL_rollN.Po pgesv/$(DEPDIR)/HPL_rollT.Po \ + pgesv/$(DEPDIR)/HPL_spreadN.Po pgesv/$(DEPDIR)/HPL_spreadT.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libhpl_a_SOURCES) +DIST_SOURCES = $(libhpl_a_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIBS = @BLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build_alias = @build_alias@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host_alias = @host_alias@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/../include +lib_LIBRARIES = libhpl.a +libhpl_a_SOURCES = \ +auxil/HPL_dlatcpy.c auxil/HPL_fprintf.c auxil/HPL_dlacpy.c auxil/HPL_dlamch.c \ +blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \ +blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \ +blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \ +comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \ +comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \ +comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \ +grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \ +grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \ +grid/HPL_min.c grid/HPL_all_reduce.c grid/HPL_barrier.c \ +panel/HPL_pdpanel_disp.c panel/HPL_pdpanel_free.c panel/HPL_pdpanel_init.c panel/HPL_pdpanel_new.c \ +pauxil/HPL_pdlamch.c pauxil/HPL_pdlange.c \ +pauxil/HPL_indxg2p.c pauxil/HPL_numroc.c pauxil/HPL_numrocI.c pauxil/HPL_numrocI.c \ +pauxil/HPL_dlaswp00N.c pauxil/HPL_dlaswp01N.c pauxil/HPL_dlaswp01T.c \ +pauxil/HPL_dlaswp02N.c pauxil/HPL_dlaswp03N.c pauxil/HPL_dlaswp03T.c \ +pauxil/HPL_dlaswp04N.c pauxil/HPL_dlaswp04T.c pauxil/HPL_dlaswp05N.c \ +pauxil/HPL_dlaswp05T.c pauxil/HPL_dlaswp06N.c pauxil/HPL_dlaswp06T.c \ +pauxil/HPL_infog2l.c pauxil/HPL_dlaswp10N.c pauxil/HPL_pwarn.c \ +pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \ +pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \ +pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c pfact/HPL_pdrpanrlT.c \ +pfact/HPL_pdmxswp.c pfact/HPL_pdfact.c pfact/HPL_dlocmax.c \ +pfact/HPL_pdpancrT.c pfact/HPL_pdpancrN.c pfact/HPL_dlocmax.c \ +pfact/HPL_dlocswpN.c pfact/HPL_dlocswpT.c pfact/HPL_pdmxswp.c \ +pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \ +pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \ +pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c \ +pfact/HPL_pdrpanrlT.c pauxil/HPL_pabort.c pauxil/HPL_pdlamch.c \ +pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c \ +pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c pgesv/HPL_pdupdateTT.c \ +pgesv/HPL_equil.c pgesv/HPL_pipid.c pgesv/HPL_plindx0.c \ +pgesv/HPL_plindx10.c pgesv/HPL_plindx1.c pgesv/HPL_plindx10.c \ +pgesv/HPL_rollN.c pgesv/HPL_rollT.c pgesv/HPL_spreadN.c pgesv/HPL_spreadT.c \ +pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesvK2.c \ +pgesv/HPL_pdlaswp00N.c pgesv/HPL_pdlaswp00T.c pgesv/HPL_pdlaswp01N.c pgesv/HPL_pdlaswp01T.c \ +pgesv/HPL_pdtrsv.c pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c \ +pgesv/HPL_pdupdateTT.c pgesv/HPL_logsort.c pgesv/HPL_perm.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-libLIBRARIES: $(lib_LIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(INSTALL_DATA) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(INSTALL_DATA) $$list2 "$(DESTDIR)$(libdir)" || exit $$?; } + @$(POST_INSTALL) + @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + if test -f $$p; then \ + $(am__strip_dir) \ + echo " ( cd '$(DESTDIR)$(libdir)' && $(RANLIB) $$f )"; \ + ( cd "$(DESTDIR)$(libdir)" && $(RANLIB) $$f ) || exit $$?; \ + else :; fi; \ + done + +uninstall-libLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(libdir)'; $(am__uninstall_files_from_dir) + +clean-libLIBRARIES: + -test -z "$(lib_LIBRARIES)" || rm -f $(lib_LIBRARIES) +auxil/$(am__dirstamp): + @$(MKDIR_P) auxil + @: > auxil/$(am__dirstamp) +auxil/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) auxil/$(DEPDIR) + @: > auxil/$(DEPDIR)/$(am__dirstamp) +auxil/HPL_dlatcpy.$(OBJEXT): auxil/$(am__dirstamp) \ + auxil/$(DEPDIR)/$(am__dirstamp) +auxil/HPL_fprintf.$(OBJEXT): auxil/$(am__dirstamp) \ + auxil/$(DEPDIR)/$(am__dirstamp) +auxil/HPL_dlacpy.$(OBJEXT): auxil/$(am__dirstamp) \ + auxil/$(DEPDIR)/$(am__dirstamp) +auxil/HPL_dlamch.$(OBJEXT): auxil/$(am__dirstamp) \ + auxil/$(DEPDIR)/$(am__dirstamp) +blas/$(am__dirstamp): + @$(MKDIR_P) blas + @: > blas/$(am__dirstamp) +blas/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) blas/$(DEPDIR) + @: > blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dscal.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dtrsm.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dtrsv.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_idamax.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dgemv.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_daxpy.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dcopy.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dgemm.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dger.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +comm/$(am__dirstamp): + @$(MKDIR_P) comm + @: > comm/$(am__dirstamp) +comm/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) comm/$(DEPDIR) + @: > comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_sdrv.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_send.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_recv.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_bcast.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_binit.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_bwait.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_blong.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_1ring.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_1rinM.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_2rinM.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_2ring.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_blonM.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_packL.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +grid/$(am__dirstamp): + @$(MKDIR_P) grid + @: > grid/$(am__dirstamp) +grid/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) grid/$(DEPDIR) + @: > grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_reduce.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_sum.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_grid_info.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_grid_init.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_all_reduce.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_broadcast.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_grid_exit.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_max.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_min.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_barrier.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +panel/$(am__dirstamp): + @$(MKDIR_P) panel + @: > panel/$(am__dirstamp) +panel/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) panel/$(DEPDIR) + @: > panel/$(DEPDIR)/$(am__dirstamp) +panel/HPL_pdpanel_disp.$(OBJEXT): panel/$(am__dirstamp) \ + panel/$(DEPDIR)/$(am__dirstamp) +panel/HPL_pdpanel_free.$(OBJEXT): panel/$(am__dirstamp) \ + panel/$(DEPDIR)/$(am__dirstamp) +panel/HPL_pdpanel_init.$(OBJEXT): panel/$(am__dirstamp) \ + panel/$(DEPDIR)/$(am__dirstamp) +panel/HPL_pdpanel_new.$(OBJEXT): panel/$(am__dirstamp) \ + panel/$(DEPDIR)/$(am__dirstamp) +pauxil/$(am__dirstamp): + @$(MKDIR_P) pauxil + @: > pauxil/$(am__dirstamp) +pauxil/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pauxil/$(DEPDIR) + @: > pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_pdlamch.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_pdlange.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_indxg2p.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_numroc.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_numrocI.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp00N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp01N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp01T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp02N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp03N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp03T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp04N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp04T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp05N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp05T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp06N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp06T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_infog2l.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp10N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_pwarn.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pfact/$(am__dirstamp): + @$(MKDIR_P) pfact + @: > pfact/$(am__dirstamp) +pfact/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pfact/$(DEPDIR) + @: > pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpanllN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpanllT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpanrlN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpanrlT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpancrN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpancrT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpanllN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpanllT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpanrlN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpanrlT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdmxswp.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdfact.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_dlocmax.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpancrT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpancrN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_dlocswpN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_dlocswpT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_pabort.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pgesv/$(am__dirstamp): + @$(MKDIR_P) pgesv + @: > pgesv/$(am__dirstamp) +pgesv/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pgesv/$(DEPDIR) + @: > pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdgesv0.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdgesv.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdgesvK1.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdgesvK2.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdupdateNN.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdupdateNT.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdupdateTN.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdupdateTT.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_equil.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pipid.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_plindx0.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_plindx10.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_plindx1.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_rollN.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_rollT.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_spreadN.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_spreadT.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdlaswp00N.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdlaswp00T.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdlaswp01N.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdlaswp01T.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdtrsv.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_logsort.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_perm.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) + +libhpl.a: $(libhpl_a_OBJECTS) $(libhpl_a_DEPENDENCIES) $(EXTRA_libhpl_a_DEPENDENCIES) + $(AM_V_at)-rm -f libhpl.a + $(AM_V_AR)$(libhpl_a_AR) libhpl.a $(libhpl_a_OBJECTS) $(libhpl_a_LIBADD) + $(AM_V_at)$(RANLIB) libhpl.a + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f auxil/*.$(OBJEXT) + -rm -f blas/*.$(OBJEXT) + -rm -f comm/*.$(OBJEXT) + -rm -f grid/*.$(OBJEXT) + -rm -f panel/*.$(OBJEXT) + -rm -f pauxil/*.$(OBJEXT) + -rm -f pfact/*.$(OBJEXT) + -rm -f pgesv/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlacpy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlamch.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlatcpy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_fprintf.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_daxpy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dcopy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dgemm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dgemv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dger.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dscal.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dtrsm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dtrsv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_idamax.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_1rinM.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_1ring.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_2rinM.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_2ring.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bcast.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_binit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blonM.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blong.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bwait.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_packL.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_recv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_sdrv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_send.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_all_reduce.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_barrier.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_broadcast.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_exit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_info.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_max.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_min.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_reduce.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_sum.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_disp.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_free.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_new.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp00N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp01N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp01T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp02N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp03N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp03T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp04N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp04T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp05N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp05T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp06N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp06T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp10N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_indxg2p.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_infog2l.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_numroc.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_numrocI.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pabort.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pdlamch.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pdlange.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pwarn.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocmax.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocswpN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocswpT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdfact.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdmxswp.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpancrN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpancrT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanllN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanllT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanrlN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanrlT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpancrN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpancrT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanllN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanllT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanrlN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanrlT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_equil.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_logsort.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesv0.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesvK1.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesvK2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdtrsv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateNN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateNT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateTN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateTT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_perm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pipid.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx0.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx1.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx10.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_rollN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_rollT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_spreadN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_spreadT.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f auxil/$(DEPDIR)/$(am__dirstamp) + -rm -f auxil/$(am__dirstamp) + -rm -f blas/$(DEPDIR)/$(am__dirstamp) + -rm -f blas/$(am__dirstamp) + -rm -f comm/$(DEPDIR)/$(am__dirstamp) + -rm -f comm/$(am__dirstamp) + -rm -f grid/$(DEPDIR)/$(am__dirstamp) + -rm -f grid/$(am__dirstamp) + -rm -f panel/$(DEPDIR)/$(am__dirstamp) + -rm -f panel/$(am__dirstamp) + -rm -f pauxil/$(DEPDIR)/$(am__dirstamp) + -rm -f pauxil/$(am__dirstamp) + -rm -f pfact/$(DEPDIR)/$(am__dirstamp) + -rm -f pfact/$(am__dirstamp) + -rm -f pgesv/$(DEPDIR)/$(am__dirstamp) + -rm -f pgesv/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLIBRARIES mostlyclean-am + +distclean: distclean-am + -rm -f auxil/$(DEPDIR)/HPL_dlacpy.Po + -rm -f auxil/$(DEPDIR)/HPL_dlamch.Po + -rm -f auxil/$(DEPDIR)/HPL_dlatcpy.Po + -rm -f auxil/$(DEPDIR)/HPL_fprintf.Po + -rm -f blas/$(DEPDIR)/HPL_daxpy.Po + -rm -f blas/$(DEPDIR)/HPL_dcopy.Po + -rm -f blas/$(DEPDIR)/HPL_dgemm.Po + -rm -f blas/$(DEPDIR)/HPL_dgemv.Po + -rm -f blas/$(DEPDIR)/HPL_dger.Po + -rm -f blas/$(DEPDIR)/HPL_dscal.Po + -rm -f blas/$(DEPDIR)/HPL_dtrsm.Po + -rm -f blas/$(DEPDIR)/HPL_dtrsv.Po + -rm -f blas/$(DEPDIR)/HPL_idamax.Po + -rm -f comm/$(DEPDIR)/HPL_1rinM.Po + -rm -f comm/$(DEPDIR)/HPL_1ring.Po + -rm -f comm/$(DEPDIR)/HPL_2rinM.Po + -rm -f comm/$(DEPDIR)/HPL_2ring.Po + -rm -f comm/$(DEPDIR)/HPL_bcast.Po + -rm -f comm/$(DEPDIR)/HPL_binit.Po + -rm -f comm/$(DEPDIR)/HPL_blonM.Po + -rm -f comm/$(DEPDIR)/HPL_blong.Po + -rm -f comm/$(DEPDIR)/HPL_bwait.Po + -rm -f comm/$(DEPDIR)/HPL_packL.Po + -rm -f comm/$(DEPDIR)/HPL_recv.Po + -rm -f comm/$(DEPDIR)/HPL_sdrv.Po + -rm -f comm/$(DEPDIR)/HPL_send.Po + -rm -f grid/$(DEPDIR)/HPL_all_reduce.Po + -rm -f grid/$(DEPDIR)/HPL_barrier.Po + -rm -f grid/$(DEPDIR)/HPL_broadcast.Po + -rm -f grid/$(DEPDIR)/HPL_grid_exit.Po + -rm -f grid/$(DEPDIR)/HPL_grid_info.Po + -rm -f grid/$(DEPDIR)/HPL_grid_init.Po + -rm -f grid/$(DEPDIR)/HPL_max.Po + -rm -f grid/$(DEPDIR)/HPL_min.Po + -rm -f grid/$(DEPDIR)/HPL_reduce.Po + -rm -f grid/$(DEPDIR)/HPL_sum.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_disp.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_free.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_init.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_new.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp00N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp01N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp01T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp02N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp03N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp03T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp04N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp04T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp05N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp05T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp06N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp06T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp10N.Po + -rm -f pauxil/$(DEPDIR)/HPL_indxg2p.Po + -rm -f pauxil/$(DEPDIR)/HPL_infog2l.Po + -rm -f pauxil/$(DEPDIR)/HPL_numroc.Po + -rm -f pauxil/$(DEPDIR)/HPL_numrocI.Po + -rm -f pauxil/$(DEPDIR)/HPL_pabort.Po + -rm -f pauxil/$(DEPDIR)/HPL_pdlamch.Po + -rm -f pauxil/$(DEPDIR)/HPL_pdlange.Po + -rm -f pauxil/$(DEPDIR)/HPL_pwarn.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocmax.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocswpN.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocswpT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdfact.Po + -rm -f pfact/$(DEPDIR)/HPL_pdmxswp.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpancrN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpancrT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanllN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanllT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanrlN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanrlT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpancrN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpancrT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanllN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanllT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanrlN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanrlT.Po + -rm -f pgesv/$(DEPDIR)/HPL_equil.Po + -rm -f pgesv/$(DEPDIR)/HPL_logsort.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesv.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesv0.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesvK1.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesvK2.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdtrsv.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateNN.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateNT.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateTN.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateTT.Po + -rm -f pgesv/$(DEPDIR)/HPL_perm.Po + -rm -f pgesv/$(DEPDIR)/HPL_pipid.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx0.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx1.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx10.Po + -rm -f pgesv/$(DEPDIR)/HPL_rollN.Po + -rm -f pgesv/$(DEPDIR)/HPL_rollT.Po + -rm -f pgesv/$(DEPDIR)/HPL_spreadN.Po + -rm -f pgesv/$(DEPDIR)/HPL_spreadT.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f auxil/$(DEPDIR)/HPL_dlacpy.Po + -rm -f auxil/$(DEPDIR)/HPL_dlamch.Po + -rm -f auxil/$(DEPDIR)/HPL_dlatcpy.Po + -rm -f auxil/$(DEPDIR)/HPL_fprintf.Po + -rm -f blas/$(DEPDIR)/HPL_daxpy.Po + -rm -f blas/$(DEPDIR)/HPL_dcopy.Po + -rm -f blas/$(DEPDIR)/HPL_dgemm.Po + -rm -f blas/$(DEPDIR)/HPL_dgemv.Po + -rm -f blas/$(DEPDIR)/HPL_dger.Po + -rm -f blas/$(DEPDIR)/HPL_dscal.Po + -rm -f blas/$(DEPDIR)/HPL_dtrsm.Po + -rm -f blas/$(DEPDIR)/HPL_dtrsv.Po + -rm -f blas/$(DEPDIR)/HPL_idamax.Po + -rm -f comm/$(DEPDIR)/HPL_1rinM.Po + -rm -f comm/$(DEPDIR)/HPL_1ring.Po + -rm -f comm/$(DEPDIR)/HPL_2rinM.Po + -rm -f comm/$(DEPDIR)/HPL_2ring.Po + -rm -f comm/$(DEPDIR)/HPL_bcast.Po + -rm -f comm/$(DEPDIR)/HPL_binit.Po + -rm -f comm/$(DEPDIR)/HPL_blonM.Po + -rm -f comm/$(DEPDIR)/HPL_blong.Po + -rm -f comm/$(DEPDIR)/HPL_bwait.Po + -rm -f comm/$(DEPDIR)/HPL_packL.Po + -rm -f comm/$(DEPDIR)/HPL_recv.Po + -rm -f comm/$(DEPDIR)/HPL_sdrv.Po + -rm -f comm/$(DEPDIR)/HPL_send.Po + -rm -f grid/$(DEPDIR)/HPL_all_reduce.Po + -rm -f grid/$(DEPDIR)/HPL_barrier.Po + -rm -f grid/$(DEPDIR)/HPL_broadcast.Po + -rm -f grid/$(DEPDIR)/HPL_grid_exit.Po + -rm -f grid/$(DEPDIR)/HPL_grid_info.Po + -rm -f grid/$(DEPDIR)/HPL_grid_init.Po + -rm -f grid/$(DEPDIR)/HPL_max.Po + -rm -f grid/$(DEPDIR)/HPL_min.Po + -rm -f grid/$(DEPDIR)/HPL_reduce.Po + -rm -f grid/$(DEPDIR)/HPL_sum.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_disp.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_free.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_init.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_new.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp00N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp01N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp01T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp02N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp03N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp03T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp04N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp04T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp05N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp05T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp06N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp06T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp10N.Po + -rm -f pauxil/$(DEPDIR)/HPL_indxg2p.Po + -rm -f pauxil/$(DEPDIR)/HPL_infog2l.Po + -rm -f pauxil/$(DEPDIR)/HPL_numroc.Po + -rm -f pauxil/$(DEPDIR)/HPL_numrocI.Po + -rm -f pauxil/$(DEPDIR)/HPL_pabort.Po + -rm -f pauxil/$(DEPDIR)/HPL_pdlamch.Po + -rm -f pauxil/$(DEPDIR)/HPL_pdlange.Po + -rm -f pauxil/$(DEPDIR)/HPL_pwarn.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocmax.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocswpN.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocswpT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdfact.Po + -rm -f pfact/$(DEPDIR)/HPL_pdmxswp.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpancrN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpancrT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanllN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanllT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanrlN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanrlT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpancrN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpancrT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanllN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanllT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanrlN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanrlT.Po + -rm -f pgesv/$(DEPDIR)/HPL_equil.Po + -rm -f pgesv/$(DEPDIR)/HPL_logsort.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesv.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesv0.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesvK1.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesvK2.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdtrsv.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateNN.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateNT.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateTN.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateTT.Po + -rm -f pgesv/$(DEPDIR)/HPL_perm.Po + -rm -f pgesv/$(DEPDIR)/HPL_pipid.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx0.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx1.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx10.Po + -rm -f pgesv/$(DEPDIR)/HPL_rollN.Po + -rm -f pgesv/$(DEPDIR)/HPL_rollT.Po + -rm -f pgesv/$(DEPDIR)/HPL_spreadN.Po + -rm -f pgesv/$(DEPDIR)/HPL_spreadT.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-libLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libLIBRARIES cscopelist-am ctags ctags-am \ + distclean distclean-compile distclean-generic distclean-tags \ + distdir dvi dvi-am html html-am info info-am install \ + install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am \ + install-libLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am uninstall-libLIBRARIES + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_abort.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_abort.c new file mode 100644 index 000000000..bf0c5e727 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_abort.c @@ -0,0 +1,129 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_abort +( + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_abort( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_abort displays an error message on stderr and halts execution. + * + * + * Arguments + * ========= + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[128]; +#ifndef STDC_HEADERS + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( stderr, "%s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR in function", SRNAME, cline ); + else + HPL_fprintf( stderr, "%s %d %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR on line", LINE, "of function", SRNAME, cline ); + exit( 0 ); +/* + * End of HPL_abort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlacpy.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlacpy.c new file mode 100644 index 000000000..ec71180eb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlacpy.c @@ -0,0 +1,343 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factors + * #ifndef HPL_LACPY_M_DEPTH + * #define HPL_LACPY_M_DEPTH 32 + * #define HPL_LACPY_LOG2_M_DEPTH 5 + * #endif + * #ifndef HPL_LACPY_N_DEPTH + * #define HPL_LACPY_N_DEPTH 4 + * #define HPL_LACPY_LOG2_N_DEPTH 2 + * #endif + */ +#ifndef HPL_LACPY_M_DEPTH +#define HPL_LACPY_M_DEPTH 4 +#define HPL_LACPY_LOG2_M_DEPTH 2 +#endif +#ifndef HPL_LACPY_N_DEPTH +#define HPL_LACPY_N_DEPTH 2 +#define HPL_LACPY_LOG2_N_DEPTH 1 +#endif + +#ifdef STDC_HEADERS +void HPL_dlacpy +( + const int M, + const int N, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dlacpy +( M, N, A, LDA, B, LDB ) + const int M; + const int N; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlacpy copies an array A into an array B. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the arrays A and + * B. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the arrays A + * and B. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N). + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * B (local output) double * + * On entry, B points to an array of dimension (LDB,N). On exit, + * B is overwritten with A. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of the array B. + * LDB must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_LACPY_USE_COPY + register int j; +#else +#if ( HPL_LACPY_N_DEPTH == 1 ) + const double * A0 = A; + double * B0 = B; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + const double * A0 = A, * A1 = A + LDA; + double * B0 = B, * B1 = B + LDB; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + const double * A0 = A, * A1 = A + LDA, + * A2 = A + (LDA << 1), * A3 = A + 3 * LDA; + double * B0 = B, * B1 = B + LDB, + * B2 = B + (LDB << 1), * B3 = B + 3 * LDB; +#endif + const int incA = ( (unsigned int)(LDA) << + HPL_LACPY_LOG2_N_DEPTH ) - M, + incB = ( (unsigned int)(LDB) << + HPL_LACPY_LOG2_N_DEPTH ) - M, + incA0 = (unsigned int)(LDA) - M, + incB0 = (unsigned int)(LDB) - M; + int mu, nu; + register int i, j; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + +#ifdef HPL_LACPY_USE_COPY + for( j = 0; j < N; j++, A0 += LDA, B0 += LDB ) HPL_dcopy( M, A0, 1, B0, 1 ); +#else + mu = (int)( ( (unsigned int)(M) >> HPL_LACPY_LOG2_M_DEPTH ) << + HPL_LACPY_LOG2_M_DEPTH ); + nu = (int)( ( (unsigned int)(N) >> HPL_LACPY_LOG2_N_DEPTH ) << + HPL_LACPY_LOG2_N_DEPTH ); + + for( j = 0; j < nu; j += HPL_LACPY_N_DEPTH ) + { + for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH ) + { +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 0] = A0[ 0]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; B2[ 0] = A2[ 0]; B3[ 0] = A3[ 0]; +#endif + +#if ( HPL_LACPY_M_DEPTH > 1 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 1] = A0[ 1]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; B2[ 1] = A2[ 1]; B3[ 1] = A3[ 1]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 2 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B2[ 2] = A2[ 2]; B3[ 2] = A3[ 2]; + B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; B2[ 3] = A2[ 3]; B3[ 3] = A3[ 3]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 4 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; + B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B2[ 4] = A2[ 4]; B3[ 4] = A3[ 4]; + B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; B2[ 5] = A2[ 5]; B3[ 5] = A3[ 5]; + B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B2[ 6] = A2[ 6]; B3[ 6] = A3[ 6]; + B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; B2[ 7] = A2[ 7]; B3[ 7] = A3[ 7]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 8 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11]; + B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; + B0[10] = A0[10]; B1[10] = A1[10]; B0[11] = A0[11]; B1[11] = A1[11]; + B0[12] = A0[12]; B1[12] = A1[12]; B0[13] = A0[13]; B1[13] = A1[13]; + B0[14] = A0[14]; B1[14] = A1[14]; B0[15] = A0[15]; B1[15] = A1[15]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B2[ 8] = A2[ 8]; B3[ 8] = A3[ 8]; + B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; B2[ 9] = A2[ 9]; B3[ 9] = A3[ 9]; + B0[10] = A0[10]; B1[10] = A1[10]; B2[10] = A2[10]; B3[10] = A3[10]; + B0[11] = A0[11]; B1[11] = A1[11]; B2[11] = A2[11]; B3[11] = A3[11]; + B0[12] = A0[12]; B1[12] = A1[12]; B2[12] = A2[12]; B3[12] = A3[12]; + B0[13] = A0[13]; B1[13] = A1[13]; B2[13] = A2[13]; B3[13] = A3[13]; + B0[14] = A0[14]; B1[14] = A1[14]; B2[14] = A2[14]; B3[14] = A3[14]; + B0[15] = A0[15]; B1[15] = A1[15]; B2[15] = A2[15]; B3[15] = A3[15]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 16 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19]; + B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23]; + B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27]; + B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[16] = A0[16]; B1[16] = A1[16]; B0[17] = A0[17]; B1[17] = A1[17]; + B0[18] = A0[18]; B1[18] = A1[18]; B0[19] = A0[19]; B1[19] = A1[19]; + B0[20] = A0[20]; B1[20] = A1[20]; B0[21] = A0[21]; B1[21] = A1[21]; + B0[22] = A0[22]; B1[22] = A1[22]; B0[23] = A0[23]; B1[23] = A1[23]; + B0[24] = A0[24]; B1[24] = A1[24]; B0[25] = A0[25]; B1[25] = A1[25]; + B0[26] = A0[26]; B1[26] = A1[26]; B0[27] = A0[27]; B1[27] = A1[27]; + B0[28] = A0[28]; B1[28] = A1[28]; B0[29] = A0[29]; B1[29] = A1[29]; + B0[30] = A0[30]; B1[30] = A1[30]; B0[31] = A0[31]; B1[31] = A1[31]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[16] = A0[16]; B1[16] = A1[16]; B2[16] = A2[16]; B3[16] = A3[16]; + B0[17] = A0[17]; B1[17] = A1[17]; B2[17] = A2[17]; B3[17] = A3[17]; + B0[18] = A0[18]; B1[18] = A1[18]; B2[18] = A2[18]; B3[18] = A3[18]; + B0[19] = A0[19]; B1[19] = A1[19]; B2[19] = A2[19]; B3[19] = A3[19]; + B0[20] = A0[20]; B1[20] = A1[20]; B2[20] = A2[20]; B3[20] = A3[20]; + B0[21] = A0[21]; B1[21] = A1[21]; B2[21] = A2[21]; B3[21] = A3[21]; + B0[22] = A0[22]; B1[22] = A1[22]; B2[22] = A2[22]; B3[22] = A3[22]; + B0[23] = A0[23]; B1[23] = A1[23]; B2[23] = A2[23]; B3[23] = A3[23]; + B0[24] = A0[24]; B1[24] = A1[24]; B2[24] = A2[24]; B3[24] = A3[24]; + B0[25] = A0[25]; B1[25] = A1[25]; B2[25] = A2[25]; B3[25] = A3[25]; + B0[26] = A0[26]; B1[26] = A1[26]; B2[26] = A2[26]; B3[26] = A3[26]; + B0[27] = A0[27]; B1[27] = A1[27]; B2[27] = A2[27]; B3[27] = A3[27]; + B0[28] = A0[28]; B1[28] = A1[28]; B2[28] = A2[28]; B3[28] = A3[28]; + B0[29] = A0[29]; B1[29] = A1[29]; B2[29] = A2[29]; B3[29] = A3[29]; + B0[30] = A0[30]; B1[30] = A1[30]; B2[30] = A2[30]; B3[30] = A3[30]; + B0[31] = A0[31]; B1[31] = A1[31]; B2[31] = A2[31]; B3[31] = A3[31]; +#endif + +#endif + +#if ( HPL_LACPY_N_DEPTH == 1 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; + A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; + A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH; + A2 += HPL_LACPY_M_DEPTH; B2 += HPL_LACPY_M_DEPTH; + A3 += HPL_LACPY_M_DEPTH; B3 += HPL_LACPY_M_DEPTH; +#endif + } + + for( i = mu; i < M; i++ ) + { +#if ( HPL_LACPY_N_DEPTH == 1 ) + *B0 = *A0; B0++; A0++; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++; + *B2 = *A2; B2++; A2++; *B3 = *A3; B3++; A3++; +#endif + } + +#if ( HPL_LACPY_N_DEPTH == 1 ) + A0 += incA; B0 += incB; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + A0 += incA; B0 += incB; A1 += incA; B1 += incB; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + A0 += incA; B0 += incB; A1 += incA; B1 += incB; + A2 += incA; B2 += incB; A3 += incA; B3 += incB; +#endif + } + + for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 ) + { + for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH, + B0 += HPL_LACPY_M_DEPTH, A0 += HPL_LACPY_M_DEPTH ) + { + B0[ 0] = A0[ 0]; +#if ( HPL_LACPY_M_DEPTH > 1 ) + B0[ 1] = A0[ 1]; +#endif +#if ( HPL_LACPY_M_DEPTH > 2 ) + B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3]; +#endif +#if ( HPL_LACPY_M_DEPTH > 4 ) + B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7]; +#endif +#if ( HPL_LACPY_M_DEPTH > 8 ) + B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11]; + B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15]; +#endif +#if ( HPL_LACPY_M_DEPTH > 16 ) + B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19]; + B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23]; + B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27]; + B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31]; +#endif + } + for( i = mu; i < M; i++, B0++, A0++ ) { *B0 = *A0; } + } +#endif +/* + * End of HPL_dlacpy + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlamch.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlamch.c new file mode 100644 index 000000000..c685f0d5e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlamch.c @@ -0,0 +1,876 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static function prototypes + * --------------------------------------------------------------------- + */ +static void HPL_dlamc1 +STDC_ARGS( +( int *, int *, int *, int * ) ); +static void HPL_dlamc2 +STDC_ARGS( +( int *, int *, int *, double *, + int *, double *, int *, double * ) ); +static double HPL_dlamc3 +STDC_ARGS( +( const double, const double ) ); +static void HPL_dlamc4 +STDC_ARGS( +( int *, const double, const int ) ); +static void HPL_dlamc5 +STDC_ARGS( +( const int, const int, const int, const int, + int *, double * ) ); +static double HPL_dipow +STDC_ARGS( +( const double, const int ) ); + +#ifdef STDC_HEADERS +double HPL_dlamch +( + const HPL_T_MACH CMACH +) +#else +double HPL_dlamch +( CMACH ) + const HPL_T_MACH CMACH; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamch determines machine-specific arithmetic constants such as + * the relative machine precision (eps), the safe minimum (sfmin) such + * that 1 / sfmin does not overflow, the base of the machine (base), the + * precision (prec), the number of (base) digits in the mantissa (t), + * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the + * minimum exponent before (gradual) underflow (emin), the underflow + * threshold (rmin) base**(emin-1), the largest exponent before overflow + * (emax), the overflow threshold (rmax) (base**emax)*(1-eps). + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamch.f (version 2.0 -- 1992), that was itself + * based on the function ENVRON by Malcolm and incorporated suggestions + * by Gentleman and Marovich. See + * + * Malcolm M. A., Algorithms to reveal properties of floating-point + * arithmetic., Comms. of the ACM, 15, 949-951 (1972). + * + * Gentleman W. M. and Marovich S. B., More on algorithms that reveal + * properties of floating point arithmetic units., Comms. of the ACM, + * 17, 276-277 (1974). + * + * Arguments + * ========= + * + * CMACH (local input) const HPL_T_MACH + * Specifies the value to be returned by HPL_dlamch + * = HPL_MACH_EPS, HPL_dlamch := eps (default) + * = HPL_MACH_SFMIN, HPL_dlamch := sfmin + * = HPL_MACH_BASE, HPL_dlamch := base + * = HPL_MACH_PREC, HPL_dlamch := eps*base + * = HPL_MACH_MLEN, HPL_dlamch := t + * = HPL_MACH_RND, HPL_dlamch := rnd + * = HPL_MACH_EMIN, HPL_dlamch := emin + * = HPL_MACH_RMIN, HPL_dlamch := rmin + * = HPL_MACH_EMAX, HPL_dlamch := emax + * = HPL_MACH_RMAX, HPL_dlamch := rmax + * + * where + * + * eps = relative machine precision, + * sfmin = safe minimum, + * base = base of the machine, + * prec = eps*base, + * t = number of digits in the mantissa, + * rnd = 1.0 if rounding occurs in addition, + * emin = minimum exponent before underflow, + * rmin = underflow threshold, + * emax = largest exponent before overflow, + * rmax = overflow threshold. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + static double eps, sfmin, base, t, rnd, emin, rmin, emax, + rmax, prec; + double small; + static int first=1; + int beta=0, imax=0, imin=0, it=0, lrnd=0; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; + HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax ); + base = (double)(beta); t = (double)(it); + if( lrnd != 0 ) + { rnd = HPL_rone; eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; } + else + { rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it ); } + prec = eps * base; emin = (double)(imin); emax = (double)(imax); + sfmin = rmin; small = HPL_rone / rmax; +/* + * Use SMALL plus a bit, to avoid the possibility of rounding causing + * overflow when computing 1/sfmin. + */ + if( small >= sfmin ) sfmin = small * ( HPL_rone + eps ); + } + + if( CMACH == HPL_MACH_EPS ) return( eps ); + if( CMACH == HPL_MACH_SFMIN ) return( sfmin ); + if( CMACH == HPL_MACH_BASE ) return( base ); + if( CMACH == HPL_MACH_PREC ) return( prec ); + if( CMACH == HPL_MACH_MLEN ) return( t ); + if( CMACH == HPL_MACH_RND ) return( rnd ); + if( CMACH == HPL_MACH_EMIN ) return( emin ); + if( CMACH == HPL_MACH_RMIN ) return( rmin ); + if( CMACH == HPL_MACH_EMAX ) return( emax ); + if( CMACH == HPL_MACH_RMAX ) return( rmax ); + + return( eps ); +/* + * End of HPL_dlamch + */ +} + +#ifdef STDC_HEADERS +static void HPL_dlamc1 +( + int * BETA, + int * T, + int * RND, + int * IEEE1 +) +#else +static void HPL_dlamc1 +( BETA, T, RND, IEEE1 ) +/* + * .. Scalar Arguments .. + */ + int * BETA, * IEEE1, * RND, * T; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc1 determines the machine parameters given by BETA, T, RND, + * and IEEE1. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc1.f (version 2.0 -- 1992), that was itself + * based on the function ENVRON by Malcolm and incorporated suggestions + * by Gentleman and Marovich. See + * + * Malcolm M. A., Algorithms to reveal properties of floating-point + * arithmetic., Comms. of the ACM, 15, 949-951 (1972). + * + * Gentleman W. M. and Marovich S. B., More on algorithms that reveal + * properties of floating point arithmetic units., Comms. of the ACM, + * 17, 276-277 (1974). + * + * Arguments + * ========= + * + * BETA (local output) int * + * The base of the machine. + * + * T (local output) int * + * The number of ( BETA ) digits in the mantissa. + * + * RND (local output) int * + * Specifies whether proper rounding (RND=1) or chopping (RND=0) + * occurs in addition. This may not be a reliable guide to the + * way in which the machine performs its arithmetic. + * + * IEEE1 (local output) int * + * Specifies whether rounding appears to be done in the IEEE + * `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double a, b, c, f, one, qtr, savec, t1, t2; + static int first=1, lbeta, lieee1, lrnd, lt; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; one = HPL_rone; +/* + * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and + * RND. Throughout this routine we use the function HPL_dlamc3 to ensure + * that relevant values are stored and not held in registers, or are not + * affected by optimizers. + * + * Compute a = 2.0**m with the smallest positive integer m such that + * fl( a + 1.0 ) == a. + */ + a = HPL_rone; c = HPL_rone; + do + { a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); } + while( c == HPL_rone ); +/* + * Now compute b = 2.0**m with the smallest positive integer m such that + * fl( a + b ) > a. + */ + b = HPL_rone; c = HPL_dlamc3( a, b ); + while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); } +/* + * Now compute the base. a and c are neighbouring floating point num- + * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe- + * rence is BETA. Adding 0.25 to c is to ensure that it is truncated to + * BETA and not (BETA-1). + */ + qtr = one / 4.0; savec = c; + c = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr); +/* + * Now determine whether rounding or chopping occurs, by adding a bit + * less than BETA/2 and a bit more than BETA/2 to a. + */ + b = (double)(lbeta); + f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a ); + if( c == a ) { lrnd = 1; } else { lrnd = 0; } + f = HPL_dlamc3( b / HPL_rtwo, b / 100.0 ); c = HPL_dlamc3( f, a ); + if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0; +/* + * Try and decide whether rounding is done in the IEEE round to nea- + * rest style. b/2 is half a unit in the last place of the two numbers + * a and savec. Furthermore, a is even, i.e. has last bit zero, and sa- + * vec is odd. Thus adding b/2 to a should not change a, but adding b/2 + * to savec should change savec. + */ + t1 = HPL_dlamc3( b / HPL_rtwo, a ); + t2 = HPL_dlamc3( b / HPL_rtwo, savec ); + if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1; + else lieee1 = 0; +/* + * Now find the mantissa, T. It should be the integer part of log to the + * base BETA of a, however it is safer to determine T by powering. So we + * find T as the smallest positive integer for which fl( beta**t + 1.0 ) + * is equal to 1.0. + */ + lt = 0; a = HPL_rone; c = HPL_rone; + + do + { + lt++; a *= (double)(lbeta); + c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); + } while( c == HPL_rone ); + } + + *BETA = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1; +} + +#ifdef STDC_HEADERS +static void HPL_dlamc2 +( + int * BETA, + int * T, + int * RND, + double * EPS, + int * EMIN, + double * RMIN, + int * EMAX, + double * RMAX +) +#else +static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) +/* + * .. Scalar Arguments .. + */ + int * BETA, * EMAX, * EMIN, * RND, * T; + double * EPS, * RMAX, * RMIN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc2 determines the machine parameters specified in its argu- + * ment list. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc2.f (version 2.0 -- 1992), that was itself + * based on a function PARANOIA by W. Kahan of the University of Cali- + * fornia at Berkeley for the computation of the relative machine epsi- + * lon eps. + * + * Arguments + * ========= + * + * BETA (local output) int * + * The base of the machine. + * + * T (local output) int * + * The number of ( BETA ) digits in the mantissa. + * + * RND (local output) int * + * Specifies whether proper rounding (RND=1) or chopping (RND=0) + * occurs in addition. This may not be a reliable guide to the + * way in which the machine performs its arithmetic. + * + * EPS (local output) double * + * The smallest positive number such that fl( 1.0 - EPS ) < 1.0, + * where fl denotes the computed value. + * + * EMIN (local output) int * + * The minimum exponent before (gradual) underflow occurs. + * + * RMIN (local output) double * + * The smallest normalized number for the machine, given by + * BASE**( EMIN - 1 ), where BASE is the floating point value + * of BETA. + * + * EMAX (local output) int * + * The maximum exponent before overflow occurs. + * + * RMAX (local output) double * + * The largest positive number for the machine, given by + * BASE**EMAX * ( 1 - EPS ), where BASE is the floating point + * value of BETA. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + static double leps, lrmax, lrmin; + double a, b, c, half, one, rbase, sixth, small, + third, two, zero; + static int first=1, iwarn=0, lbeta=0, lemax, lemin, + lt=0; + int gnmin=0, gpmin=0, i, ieee, lieee1=0, + lrnd=0, ngnmin=0, ngpmin=0; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo; +/* + * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of BETA, + * T, RND, EPS, EMIN and RMIN. + * + * Throughout this routine we use the function HPL_dlamc3 to ensure that + * relevant values are stored and not held in registers, or are not af- + * fected by optimizers. + * + * HPL_dlamc1 returns the parameters lbeta, lt, lrnd and lieee1. + */ + HPL_dlamc1( &lbeta, <, &lrnd, &lieee1 ); +/* + * Start to find eps. + */ + b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a; +/* + * Try some tricks to see whether or not this is the correct EPS. + */ + b = two / 3.0; + half = one / HPL_rtwo; + sixth = HPL_dlamc3( b, -half ); + third = HPL_dlamc3( sixth, sixth ); + b = HPL_dlamc3( third, -half ); + b = HPL_dlamc3( b, sixth ); + b = Mabs( b ); if( b < leps ) b = leps; + + leps = HPL_rone; + + while( ( leps > b ) && ( b > zero ) ) + { + leps = b; + c = HPL_dlamc3( half * leps, + HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) ); + c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c ); + c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c ); + } + if( a < leps ) leps = a; +/* + * Computation of EPS complete. + * + * Now find EMIN. Let a = + or - 1, and + or - (1 + BASE**(-3)). Keep + * dividing a by BETA until (gradual) underflow occurs. This is detected + * when we cannot recover the previous a. + */ + rbase = one / (double)(lbeta); small = one; + for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero ); + a = HPL_dlamc3( one, small ); + HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta ); + HPL_dlamc4( &gpmin, a, lbeta ); HPL_dlamc4( &gnmin, -a, lbeta ); + + ieee = 0; + + if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) ) + { + if( ngpmin == gpmin ) + { +/* + * Non twos-complement machines, no gradual underflow; e.g., VAX ) + */ + lemin = ngpmin; + } + else if( ( gpmin-ngpmin ) == 3 ) + { +/* + * Non twos-complement machines with gradual underflow; e.g., IEEE stan- + * dard followers + */ + lemin = ngpmin - 1 + lt; ieee = 1; + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, gpmin ); + iwarn = 1; + } + } + else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) ) + { + if( Mabs( ngpmin-ngnmin ) == 1 ) + { +/* + * Twos-complement machines, no gradual underflow; e.g., CYBER 205 + */ + lemin = Mmax( ngpmin, ngnmin ); + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); + iwarn = 1; + } + } + else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) ) + { + if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 ) + { +/* + * Twos-complement machines with gradual underflow; no known machine + */ + lemin = Mmax( ngpmin, ngnmin ) - 1 + lt; + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); + iwarn = 1; + } + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin ); + lemin = Mmin( lemin, gnmin ); iwarn = 1; + } +/* + * Comment out this if block if EMIN is ok + */ + if( iwarn != 0 ) + { + first = 1; + HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n", +"WARNING. The value EMIN may be incorrect:- EMIN =", lemin, +"If, after inspection, the value EMIN looks acceptable, please comment ", +"out the if block as marked within the code of routine HPL_dlamc2, ", +"otherwise supply EMIN explicitly." ); + } +/* + * Assume IEEE arithmetic if we found denormalised numbers above, or if + * arithmetic seems to round in the IEEE style, determined in routine + * HPL_dlamc1. A true IEEE machine should have both things true; how- + * ever, faulty machines may have one or the other. + */ + if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1; + else ieee = 0; +/* + * Compute RMIN by successive division by BETA. We could compute RMIN + * as BASE**( EMIN - 1 ), but some machines underflow during this compu- + * tation. + */ + lrmin = HPL_rone; + for( i = 0; i < 1 - lemin; i++ ) + lrmin = HPL_dlamc3( lrmin*rbase, zero ); +/* + * Finally, call HPL_dlamc5 to compute emax and rmax. + */ + HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax ); + } + *BETA = lbeta; *T = lt; *RND = lrnd; *EPS = leps; + *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax; +} + +#ifdef STDC_HEADERS +static double HPL_dlamc3( const double A, const double B ) +#else +static double HPL_dlamc3( A, B ) +/* + * .. Scalar Arguments .. + */ + const double A, B; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc3 is intended to force a and b to be stored prior to doing + * the addition of a and b, for use in situations where optimizers + * might hold one of these in a register. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc3.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * A, B (local input) double + * The values a and b. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + return( A + B ); +} + +#ifdef STDC_HEADERS +static void HPL_dlamc4 +( + int * EMIN, + const double START, + const int BASE +) +#else +static void HPL_dlamc4( EMIN, START, BASE ) +/* + * .. Scalar Arguments .. + */ + int * EMIN; + const int BASE; + const double START; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc4 is a service function for HPL_dlamc2. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc4.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * EMIN (local output) int * + * The minimum exponent before (gradual) underflow, computed by + * setting A = START and dividing by BASE until the previous A + * can not be recovered. + * + * START (local input) double + * The starting point for determining EMIN. + * + * BASE (local input) int + * The base of the machine. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double a, b1, b2, c1, c2, d1, d2, one, rbase, zero; + int i; +/* .. + * .. Executable Statements .. + */ + a = START; one = HPL_rone; rbase = one / (double)(BASE); + zero = HPL_rzero; + *EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a; + + do + { + (*EMIN)--; a = b1; + b1 = HPL_dlamc3( a / BASE, zero ); + c1 = HPL_dlamc3( b1 * BASE, zero ); + d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1; + b2 = HPL_dlamc3( a * rbase, zero ); + c2 = HPL_dlamc3( b2 / rbase, zero ); + d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2; + } while( ( c1 == a ) && ( c2 == a ) && ( d1 == a ) && ( d2 == a ) ); +} + +#ifdef STDC_HEADERS +static void HPL_dlamc5 +( + const int BETA, + const int P, + const int EMIN, + const int IEEE, + int * EMAX, + double * RMAX +) +#else +static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX ) +/* + * .. Scalar Arguments .. + */ + const int BETA, EMIN, IEEE, P; + int * EMAX; + double * RMAX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc5 attempts to compute RMAX, the largest machine floating- + * point number, without overflow. It assumes that EMAX + abs(EMIN) sum + * approximately to a power of 2. It will fail on machines where this + * assumption does not hold, for example, the Cyber 205 (EMIN = -28625, + * EMAX = 28718). It will also fail if the value supplied for EMIN is + * too large (i.e. too close to zero), probably with overflow. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc5.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * BETA (local input) int + * The base of floating-point arithmetic. + * + * P (local input) int + * The number of base BETA digits in the mantissa of a floating- + * point value. + * + * EMIN (local input) int + * The minimum exponent before (gradual) underflow. + * + * IEEE (local input) int + * A logical flag specifying whether or not the arithmetic sys- + * tem is thought to comply with the IEEE standard. + * + * EMAX (local output) int * + * The largest exponent before overflow. + * + * RMAX (local output) double * + * The largest machine floating-point number. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double oldy=HPL_rzero, recbas, y, z; + int exbits=1, expsum, i, lexp=1, nbits, try, + uexp; +/* .. + * .. Executable Statements .. + */ +/* + * First compute lexp and uexp, two powers of 2 that bound abs(EMIN). + * We then assume that EMAX + abs( EMIN ) will sum approximately to the + * bound that is closest to abs( EMIN ). (EMAX is the exponent of the + * required number RMAX). + */ +l_10: + try = (int)( (unsigned int)(lexp) << 1 ); + if( try <= ( -EMIN ) ) { lexp = try; exbits++; goto l_10; } + + if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try; exbits++; } +/* + * Now -lexp is less than or equal to EMIN, and -uexp is greater than or + * equal to EMIN. exbits is the number of bits needed to store the expo- + * nent. + */ + if( ( uexp+EMIN ) > ( -lexp-EMIN ) ) + { expsum = (int)( (unsigned int)(lexp) << 1 ); } + else + { expsum = (int)( (unsigned int)(uexp) << 1 ); } +/* + * expsum is the exponent range, approximately equal to EMAX - EMIN + 1. + */ + *EMAX = expsum + EMIN - 1; +/* + * nbits is the total number of bits needed to store a floating-point + * number. + */ + nbits = 1 + exbits + P; + + if( ( nbits % 2 == 1 ) && ( BETA == 2 ) ) + { +/* + * Either there are an odd number of bits used to store a floating-point + * number, which is unlikely, or some bits are not used in the represen- + * tation of numbers, which is possible, (e.g. Cray machines) or the + * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines), + * which is perhaps the most likely. We have to assume the last alterna- + * tive. If this is true, then we need to reduce EMAX by one because + * there must be some way of representing zero in an implicit-bit sys- + * tem. On machines like Cray we are reducing EMAX by one unnecessarily. + */ + (*EMAX)--; + } + + if( IEEE != 0 ) + { +/* + * Assume we are on an IEEE machine which reserves one exponent for in- + * finity and NaN. + */ + (*EMAX)--; + } +/* + * Now create RMAX, the largest machine number, which should be equal to + * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being + * careful that the result is less than 1.0. + */ + recbas = HPL_rone / (double)(BETA); + z = (double)(BETA) - HPL_rone; + y = HPL_rzero; + + for( i = 0; i < P; i++ ) + { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); } + + if( y >= HPL_rone ) y = oldy; +/* + * Now multiply by BETA**EMAX to get RMAX. + */ + for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero ); + + *RMAX = y; +/* + * End of HPL_dlamch + */ +} + +#ifdef STDC_HEADERS +static double HPL_dipow +( + const double X, + const int N +) +#else +static double HPL_dipow( X, N ) +/* + * .. Scalar Arguments .. + */ + const int N; + const double X; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dipow computes the integer n-th power of a real scalar x. + * + * Arguments + * ========= + * + * X (local input) const double + * The real scalar x. + * + * N (local input) const int + * The integer power to raise x to. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r, y=HPL_rone; + int k, n; +/* .. + * .. Executable Statements .. + */ + if( X == HPL_rzero ) return( HPL_rzero ); + if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; } + for( k = 0; k < n; k++ ) y *= r; + + return( y ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlange.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlange.c new file mode 100644 index 000000000..82f118b6b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlange.c @@ -0,0 +1,184 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_dlange +( + const HPL_T_NORM NORM, + const int M, + const int N, + const double * A, + const int LDA +) +#else +double HPL_dlange +( NORM, M, N, A, LDA ) + const HPL_T_NORM NORM; + const int M; + const int N; + const double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlange returns the value of the one norm, or the infinity norm, + * or the element of largest absolute value of a matrix A: + * + * max(abs(A(i,j))) when NORM = HPL_NORM_A, + * norm1(A), when NORM = HPL_NORM_1, + * normI(A), when NORM = HPL_NORM_I, + * + * where norm1 denotes the one norm of a matrix (maximum column sum) and + * normI denotes the infinity norm of a matrix (maximum row sum). Note + * that max(abs(A(i,j))) is not a matrix norm. + * + * Arguments + * ========= + * + * NORM (local input) const HPL_T_NORM + * On entry, NORM specifies the value to be returned by this + * function as described above. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N), that + * contains the matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double s, v0=HPL_rzero, * work = NULL; + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return( HPL_rzero ); + + if( NORM == HPL_NORM_A ) + { +/* + * max( abs( A ) ) + */ + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) { v0 = Mmax( v0, Mabs( *A ) ); A++; } + A += LDA - M; + } + } + else if( NORM == HPL_NORM_1 ) + { +/* + * Find norm_1( A ). + */ + work = (double*)malloc( (size_t)(N) * sizeof( double ) ); + if( work == NULL ) + { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); } + else + { + for( j = 0; j < N; j++ ) + { + s = HPL_rzero; + for( i = 0; i < M; i++ ) { s += Mabs( *A ); A++; } + work[j] = s; A += LDA - M; + } +/* + * Find maximum sum of columns for 1-norm + */ + v0 = work[HPL_idamax( N, work, 1 )]; v0 = Mabs( v0 ); + if( work ) free( work ); + } + } + else if( NORM == HPL_NORM_I ) + { +/* + * Find norm_inf( A ) + */ + work = (double*)malloc( (size_t)(M) * sizeof( double ) ); + if( work == NULL ) + { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); } + else + { + for( i = 0; i < M; i++ ) { work[i] = HPL_rzero; } + + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) { work[i] += Mabs( *A ); A++; } + A += LDA - M; + } +/* + * Find maximum sum of rows for inf-norm + */ + v0 = work[HPL_idamax( M, work, 1 )]; v0 = Mabs( v0 ); + if( work ) free( work ); + } + } + + return( v0 ); +/* + * End of HPL_dlange + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlaprnt.c new file mode 100644 index 000000000..f29df3cd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlaprnt.c @@ -0,0 +1,130 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dlaprnt +( + const int M, + const int N, + double * A, + const int IA, + const int JA, + const int LDA, + const char * CMATNM +) +#else +void HPL_dlaprnt +( M, N, A, IA, JA, LDA, CMATNM ) + const int M; + const int N; + double * A; + const int IA; + const int JA; + const int LDA; + const char * CMATNM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaprnt prints to standard error an M-by-N matrix A. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A. M must be at + * least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of A. N must be + * at least zero. + * + * A (local input) double * + * On entry, A points to an array of dimension (LDA,N). + * + * IA (local input) const int + * On entry, IA specifies the starting row index to be printed. + * + * JA (local input) const int + * On entry, JA specifies the starting column index to be + * printed. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * CMATNM (local input) const char * + * On entry, CMATNM is the name of the matrix to be printed. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) + { + HPL_fprintf( stderr, "%s(%6d,%6d)=%30.18f\n", CMATNM, IA+i, + JA+j, *(Mptr( A, i, j, LDA )) ); + } + } +/* + * End of HPL_dlaprnt + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlatcpy.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlatcpy.c new file mode 100644 index 000000000..410451c24 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_dlatcpy.c @@ -0,0 +1,398 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factors + * #ifndef HPL_LATCPY_M_DEPTH + * #define HPL_LATCPY_M_DEPTH 32 + * #define HPL_LATCPY_LOG2_M_DEPTH 5 + * #endif + * #ifndef HPL_LATCPY_N_DEPTH + * #define HPL_LATCPY_N_DEPTH 4 + * #define HPL_LATCPY_LOG2_N_DEPTH 2 + * #endif + */ +#ifndef HPL_LATCPY_M_DEPTH +#define HPL_LATCPY_M_DEPTH 4 +#define HPL_LATCPY_LOG2_M_DEPTH 2 +#endif +#ifndef HPL_LATCPY_N_DEPTH +#define HPL_LATCPY_N_DEPTH 2 +#define HPL_LATCPY_LOG2_N_DEPTH 1 +#endif + +#ifdef STDC_HEADERS +void HPL_dlatcpy +( + const int M, + const int N, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dlatcpy +( M, N, A, LDA, B, LDB ) + const int M; + const int N; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlatcpy copies the transpose of an array A into an array B. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the array B and + * the number of columns of A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of rows of the array A and + * the number of columns of B. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,M). + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,N). + * + * B (local output) double * + * On entry, B points to an array of dimension (LDB,N). On exit, + * B is overwritten with the transpose of A. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of the array B. + * LDB must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_LATCPY_USE_COPY + register int j; +#else +#if ( HPL_LATCPY_N_DEPTH == 1 ) + const double * A0 = A; + double * B0 = B; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + const double * A0 = A, * A1 = A + 1; + double * B0 = B, * B1 = B + LDB; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + const double * A0 = A, * A1 = A + 1, + * A2 = A + 2, * A3 = A + 3; + double * B0 = B, * B1 = B + LDB, + * B2 = B + (LDB << 1), * B3 = B + 3 * LDB; +#endif + const int incA = -M * LDA + (1 << HPL_LATCPY_LOG2_N_DEPTH), + incB = ( (unsigned int)(LDB) << + HPL_LATCPY_LOG2_N_DEPTH ) - M, + incA0 = -M * LDA + 1, incB0 = LDB - M; + int mu, nu; + register int i, j; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + +#ifdef HPL_LATCPY_USE_COPY + for( j = 0; j < N; j++, B0 += LDB ) HPL_dcopy( M, A0+j, LDA, B0, 1 ); +#else + mu = (int)( ( (unsigned int)(M) >> HPL_LATCPY_LOG2_M_DEPTH ) << + HPL_LATCPY_LOG2_M_DEPTH ); + nu = (int)( ( (unsigned int)(N) >> HPL_LATCPY_LOG2_N_DEPTH ) << + HPL_LATCPY_LOG2_N_DEPTH ); + + for( j = 0; j < nu; j += HPL_LATCPY_N_DEPTH ) + { + for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH ) + { +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 0] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA; + B2[ 0] = *A2; A2 += LDA; B3[ 0] = *A3; A3 += LDA; +#endif + +#if ( HPL_LATCPY_M_DEPTH > 1 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 1] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA; + B2[ 1] = *A2; A2 += LDA; B3[ 1] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 2 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 2] = *A0; A0 += LDA; B0[ 3] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA; + B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA; + B2[ 2] = *A2; A2 += LDA; B3[ 2] = *A3; A3 += LDA; + B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA; + B2[ 3] = *A2; A2 += LDA; B3[ 3] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 4 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 4] = *A0; A0 += LDA; B0[ 5] = *A0; A0 += LDA; + B0[ 6] = *A0; A0 += LDA; B0[ 7] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA; + B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA; + B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA; + B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA; + B2[ 4] = *A2; A2 += LDA; B3[ 4] = *A3; A3 += LDA; + B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA; + B2[ 5] = *A2; A2 += LDA; B3[ 5] = *A3; A3 += LDA; + B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA; + B2[ 6] = *A2; A2 += LDA; B3[ 6] = *A3; A3 += LDA; + B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA; + B2[ 7] = *A2; A2 += LDA; B3[ 7] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 8 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 8] = *A0; A0 += LDA; B0[ 9] = *A0; A0 += LDA; + B0[10] = *A0; A0 += LDA; B0[11] = *A0; A0 += LDA; + B0[12] = *A0; A0 += LDA; B0[13] = *A0; A0 += LDA; + B0[14] = *A0; A0 += LDA; B0[15] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA; + B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA; + B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA; + B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA; + B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA; + B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA; + B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA; + B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA; + B2[ 8] = *A2; A2 += LDA; B3[ 8] = *A3; A3 += LDA; + B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA; + B2[ 9] = *A2; A2 += LDA; B3[ 9] = *A3; A3 += LDA; + B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA; + B2[10] = *A2; A2 += LDA; B3[10] = *A3; A3 += LDA; + B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA; + B2[11] = *A2; A2 += LDA; B3[11] = *A3; A3 += LDA; + B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA; + B2[12] = *A2; A2 += LDA; B3[12] = *A3; A3 += LDA; + B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA; + B2[13] = *A2; A2 += LDA; B3[13] = *A3; A3 += LDA; + B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA; + B2[14] = *A2; A2 += LDA; B3[14] = *A3; A3 += LDA; + B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA; + B2[15] = *A2; A2 += LDA; B3[15] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 16 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[16] = *A0; A0 += LDA; B0[17] = *A0; A0 += LDA; + B0[18] = *A0; A0 += LDA; B0[19] = *A0; A0 += LDA; + B0[20] = *A0; A0 += LDA; B0[21] = *A0; A0 += LDA; + B0[22] = *A0; A0 += LDA; B0[23] = *A0; A0 += LDA; + B0[24] = *A0; A0 += LDA; B0[25] = *A0; A0 += LDA; + B0[26] = *A0; A0 += LDA; B0[27] = *A0; A0 += LDA; + B0[28] = *A0; A0 += LDA; B0[29] = *A0; A0 += LDA; + B0[30] = *A0; A0 += LDA; B0[31] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA; + B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA; + B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA; + B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA; + B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA; + B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA; + B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA; + B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA; + B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA; + B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA; + B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA; + B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA; + B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA; + B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA; + B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA; + B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA; + B2[16] = *A2; A2 += LDA; B3[16] = *A3; A3 += LDA; + B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA; + B2[17] = *A2; A2 += LDA; B3[17] = *A3; A3 += LDA; + B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA; + B2[18] = *A2; A2 += LDA; B3[18] = *A3; A3 += LDA; + B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA; + B2[19] = *A2; A2 += LDA; B3[19] = *A3; A3 += LDA; + B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA; + B2[20] = *A2; A2 += LDA; B3[20] = *A3; A3 += LDA; + B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA; + B2[21] = *A2; A2 += LDA; B3[21] = *A3; A3 += LDA; + B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA; + B2[22] = *A2; A2 += LDA; B3[22] = *A3; A3 += LDA; + B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA; + B2[23] = *A2; A2 += LDA; B3[23] = *A3; A3 += LDA; + B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA; + B2[24] = *A2; A2 += LDA; B3[24] = *A3; A3 += LDA; + B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA; + B2[25] = *A2; A2 += LDA; B3[25] = *A3; A3 += LDA; + B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA; + B2[26] = *A2; A2 += LDA; B3[26] = *A3; A3 += LDA; + B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA; + B2[27] = *A2; A2 += LDA; B3[27] = *A3; A3 += LDA; + B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA; + B2[28] = *A2; A2 += LDA; B3[28] = *A3; A3 += LDA; + B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA; + B2[29] = *A2; A2 += LDA; B3[29] = *A3; A3 += LDA; + B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA; + B2[30] = *A2; A2 += LDA; B3[30] = *A3; A3 += LDA; + B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA; + B2[31] = *A2; A2 += LDA; B3[31] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0 += HPL_LATCPY_M_DEPTH; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH; + B2 += HPL_LATCPY_M_DEPTH; B3 += HPL_LATCPY_M_DEPTH; +#endif + } + + for( i = mu; i < M; i++ ) + { +#if ( HPL_LATCPY_N_DEPTH == 1 ) + *B0 = *A0; B0++; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA; + *B2 = *A2; B2++; A2 += LDA; *B3 = *A3; B3++; A3 += LDA; +#endif + } + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + A0 += incA; B0 += incB; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + A0 += incA; A1 += incA; B0 += incB; B1 += incB; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + A0 += incA; A1 += incA; A2 += incA; A3 += incA; + B0 += incB; B1 += incB; B2 += incB; B3 += incB; +#endif + } + + for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 ) + { + for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH, B0 += HPL_LATCPY_M_DEPTH ) + { + B0[ 0]=*A0; A0 += LDA; +#if ( HPL_LATCPY_M_DEPTH > 1 ) + B0[ 1]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 2 ) + B0[ 2]=*A0; A0 += LDA; B0[ 3]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 4 ) + B0[ 4]=*A0; A0 += LDA; B0[ 5]=*A0; A0 += LDA; + B0[ 6]=*A0; A0 += LDA; B0[ 7]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 8 ) + B0[ 8]=*A0; A0 += LDA; B0[ 9]=*A0; A0 += LDA; + B0[10]=*A0; A0 += LDA; B0[11]=*A0; A0 += LDA; + B0[12]=*A0; A0 += LDA; B0[13]=*A0; A0 += LDA; + B0[14]=*A0; A0 += LDA; B0[15]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 16 ) + B0[16]=*A0; A0 += LDA; B0[17]=*A0; A0 += LDA; + B0[18]=*A0; A0 += LDA; B0[19]=*A0; A0 += LDA; + B0[20]=*A0; A0 += LDA; B0[21]=*A0; A0 += LDA; + B0[22]=*A0; A0 += LDA; B0[23]=*A0; A0 += LDA; + B0[24]=*A0; A0 += LDA; B0[25]=*A0; A0 += LDA; + B0[26]=*A0; A0 += LDA; B0[27]=*A0; A0 += LDA; + B0[28]=*A0; A0 += LDA; B0[29]=*A0; A0 += LDA; + B0[30]=*A0; A0 += LDA; B0[31]=*A0; A0 += LDA; +#endif + } + + for( i = mu; i < M; i++, B0++, A0 += LDA ) { *B0 = *A0; } + } +#endif +/* + * End of HPL_dlatcpy + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_fprintf.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_fprintf.c new file mode 100644 index 000000000..adaf22b39 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_fprintf.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_fprintf +( + FILE * STREAM, + const char * FORM, + ... +) +#else +void HPL_fprintf( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_fprintf is a wrapper around fprintf flushing the output stream. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[256]; +#ifndef STDC_HEADERS + FILE * STREAM; + char * FORM; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + (void) fprintf( STREAM, "%s", cline ); + (void) fflush( STREAM ); +/* + * End of HPL_fprintf + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_warn.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_warn.c new file mode 100644 index 000000000..bc40818a9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/HPL_warn.c @@ -0,0 +1,134 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_warn +( + FILE * STREAM, + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_warn( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_warn displays an error message. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[128]; +#ifndef STDC_HEADERS + FILE * STREAM; + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( STREAM, "%s %s:\n>>> %s <<<\n\n", "HPL ERROR in function", + SRNAME, cline ); + else + HPL_fprintf( STREAM, "%s %d %s %s:\n>>> %s <<<\n\n", + "HPL ERROR on line", LINE, "of function", SRNAME, cline ); +/* + * End of HPL_warn + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/intel64/Make.inc new file mode 120000 index 000000000..3ee301793 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/intel64/Make.inc @@ -0,0 +1 @@ +/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/intel64/Makefile new file mode 100644 index 000000000..e92d18b80 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/auxil/intel64/Makefile @@ -0,0 +1,100 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h +# +## Object files ######################################################## +# +HPL_au0obj = \ + HPL_dlacpy.o HPL_dlatcpy.o HPL_fprintf.o \ + HPL_warn.o HPL_abort.o HPL_dlaprnt.o \ + HPL_dlange.o +HPL_au1obj = \ + HPL_dlamch.o +HPL_auxobj = \ + $(HPL_au0obj) $(HPL_au1obj) +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_auxobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_auxobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dlacpy.o : ../HPL_dlacpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlacpy.c +HPL_dlatcpy.o : ../HPL_dlatcpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlatcpy.c +HPL_fprintf.o : ../HPL_fprintf.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_fprintf.c +HPL_warn.o : ../HPL_warn.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_warn.c +HPL_abort.o : ../HPL_abort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_abort.c +HPL_dlaprnt.o : ../HPL_dlaprnt.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaprnt.c +HPL_dlange.o : ../HPL_dlange.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlange.c +HPL_dlamch.o : ../HPL_dlamch.c $(INCdep) + $(CC) -o $@ -c $(CCNOOPT) ../HPL_dlamch.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_daxpy.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_daxpy.c new file mode 100644 index 000000000..72be5774b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_daxpy.c @@ -0,0 +1,175 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_daxpy + +#ifdef STDC_HEADERS +void HPL_daxpy +( + const int N, + const double ALPHA, + const double * X, + const int INCX, + double * Y, + const int INCY +) +#else +void HPL_daxpy +( N, ALPHA, X, INCX, Y, INCY ) + const int N; + const double ALPHA; + const double * X; + const int INCX; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_daxpy scales the vector x by alpha and adds it to y. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vectors x and y. N + * must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero, then the entries of the incremented array X + * need not be set on input. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * On exit, the entries of the incremented array Y are updated + * with the scaled entries of the incremented array X. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_daxpy( N, ALPHA, X, INCX, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + register const double alpha = ALPHA; + register double x0, x1, x2, x3, y0, y1, y2, y3; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY; + + if( ( N > 0 ) && ( alpha != HPL_rzero ) ) + { + if( ( nu = ( N >> 2 ) << 2 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); y0 = (*Y); x1 = X[INCX ]; y1 = Y[INCY ]; + x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3]; + + *Y = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1; + Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3; + + X += incX4; + Y += incY4; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { + x0 = (*X); + y0 = (*Y); + + *Y = y0 + alpha * x0; + + X += INCX; + Y += INCY; + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; +#else +#define F77N N +#define F77incx INCX +#define F77incy INCY +#endif + F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy ); +#endif +/* + * End of HPL_daxpy + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dcopy.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dcopy.c new file mode 100644 index 000000000..a8fe24109 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dcopy.c @@ -0,0 +1,168 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dcopy + +#ifdef STDC_HEADERS +void HPL_dcopy +( + const int N, + const double * X, + const int INCX, + double * Y, + const int INCY +) +#else +void HPL_dcopy +( N, X, INCX, Y, INCY ) + const int N; + const double * X; + const int INCX; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dcopy copies the vector x into the vector y. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vectors x and y. N + * must be at least zero. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * On exit, the entries of the incremented array Y are updated + * with the entries of the incremented array X. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dcopy( N, X, INCX, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + register double x0, x1, x2, x3, x4, x5, x6, x7; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY, + incX5 = 5 * INCX, incY5 = 5 * INCY, + incX6 = 6 * INCX, incY6 = 6 * INCY, + incX7 = 7 * INCX, incY7 = 7 * INCY, + incX8 = 8 * INCX, incY8 = 8 * INCY; + + if( N > 0 ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + *Y = x0; Y[incY4] = x4; Y[INCY ] = x1; Y[incY5] = x5; + Y[incY2] = x2; Y[incY6] = x6; Y[incY3] = x3; Y[incY7] = x7; + + X += incX8; + Y += incY8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { + x0 = (*X); + *Y = x0; + + X += INCX; + Y += INCY; + } + } +#endif +#ifdef HPL_CALL_FBLAS +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; +#else +#define F77N N +#define F77incx INCX +#define F77incy INCY +#endif + F77dcopy( &F77N, X, &F77incx, Y, &F77incy ); +#endif +/* + * End of HPL_dcopy + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dgemm.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dgemm.c new file mode 100644 index 000000000..b222e4717 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dgemm.c @@ -0,0 +1,521 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dgemm + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dgemmNN +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iail, iblj, icij, j, jal, jbj, jcj, l; + + for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) + { + HPL_dscal( M, BETA, C+jcj, 1 ); + for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 ) + { + t0 = ALPHA * B[iblj]; + for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) + { C[icij] += A[iail] * t0; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmNT +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iail, ibj, ibjl, icij, j, jal, jcj, l; + + for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) + { + HPL_dscal( M, BETA, C+jcj, 1 ); + for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB ) + { + t0 = ALPHA * B[ibjl]; + for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) + { C[icij] += A[iail] * t0; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmTN +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iai, iail, iblj, icij, j, jbj, jcj, l; + + for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) + { + for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA ) + { + t0 = HPL_rzero; + for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 ) + { t0 += A[iail] * B[iblj]; } + if( BETA == HPL_rzero ) C[icij] = HPL_rzero; + else C[icij] *= BETA; + C[icij] += ALPHA * t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmTT +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iali, ibj, ibjl, icij, j, jai, jcj, l; + + for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) + { + for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA ) + { + t0 = HPL_rzero; + for( l = 0, iali = jai, ibjl = ibj; + l < K; l++, iali += 1, ibjl += LDB ) t0 += A[iali] * B[ibjl]; + if( BETA == HPL_rzero ) C[icij] = HPL_rzero; + else C[icij] *= BETA; + C[icij] += ALPHA * t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemm0 +( + const enum HPL_TRANS TRANSA, + const enum HPL_TRANS TRANSB, + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, + BETA, C, LDC ) + const enum HPL_TRANS TRANSA, TRANSB; + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + int i, j; + + if( ( M == 0 ) || ( N == 0 ) || + ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) && + ( BETA == HPL_rone ) ) ) return; + + if( ALPHA == HPL_rzero ) + { + for( j = 0; j < N; j++ ) + { for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; } + return; + } + + if( TRANSB == HplNoTrans ) + { + if( TRANSA == HplNoTrans ) + { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + else + { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + } + else + { + if( TRANSA == HplNoTrans ) + { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + else + { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dgemm +( + const enum HPL_ORDER ORDER, + const enum HPL_TRANS TRANSA, + const enum HPL_TRANS TRANSB, + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +void HPL_dgemm +( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const enum HPL_ORDER ORDER; + const enum HPL_TRANS TRANSA; + const enum HPL_TRANS TRANSB; + const int M; + const int N; + const int K; + const double ALPHA; + const double * A; + const int LDA; + const double * B; + const int LDB; + const double BETA; + double * C; + const int LDC; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dgemm performs one of the matrix-matrix operations + * + * C := alpha * op( A ) * op( B ) + beta * C + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X^T. + * + * Alpha and beta are scalars, and A, B and C are matrices, with op(A) + * an m by k matrix, op(B) a k by n matrix and C an m by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * TRANSA (local input) const enum HPL_TRANS + * On entry, TRANSA specifies the form of op(A) to be used in + * the matrix-matrix operation follows: + * TRANSA==HplNoTrans : op( A ) = A, + * TRANSA==HplTrans : op( A ) = A^T, + * TRANSA==HplConjTrans : op( A ) = A^T. + * + * TRANSB (local input) const enum HPL_TRANS + * On entry, TRANSB specifies the form of op(B) to be used in + * the matrix-matrix operation follows: + * TRANSB==HplNoTrans : op( B ) = B, + * TRANSB==HplTrans : op( B ) = B^T, + * TRANSB==HplConjTrans : op( B ) = B^T. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix + * op(A) and of the matrix C. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix + * op(B) and the number of columns of the matrix C. N must be + * at least zero. + * + * K (local input) const int + * On entry, K specifies the number of columns of the matrix + * op(A) and the number of rows of the matrix op(B). K must be + * be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then the elements of the matrices A and B + * need not be set on input. + * + * A (local input) const double * + * On entry, A is an array of dimension (LDA,ka), where ka is + * k when TRANSA==HplNoTrans, and is m otherwise. Before + * entry with TRANSA==HplNoTrans, the leading m by k part of + * the array A must contain the matrix A, otherwise the leading + * k by m part of the array A must contain the matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the first dimension of A as declared + * in the calling (sub) program. When TRANSA==HplNoTrans then + * LDA must be at least max(1,m), otherwise LDA must be at least + * max(1,k). + * + * B (local input) const double * + * On entry, B is an array of dimension (LDB,kb), where kb is + * n when TRANSB==HplNoTrans, and is k otherwise. Before + * entry with TRANSB==HplNoTrans, the leading k by n part of + * the array B must contain the matrix B, otherwise the leading + * n by k part of the array B must contain the matrix B. + * + * LDB (local input) const int + * On entry, LDB specifies the first dimension of B as declared + * in the calling (sub) program. When TRANSB==HplNoTrans then + * LDB must be at least max(1,k), otherwise LDB must be at least + * max(1,n). + * + * BETA (local input) const double + * On entry, BETA specifies the scalar beta. When BETA is + * supplied as zero then the elements of the matrix C need + * not be set on input. + * + * C (local input/output) double * + * On entry, C is an array of dimension (LDC,n). Before entry, + * the leading m by n part of the array C must contain the + * matrix C, except when beta is zero, in which case C need not + * be set on entry. On exit, the array C is overwritten by the + * m by n matrix ( alpha*op( A )*op( B ) + beta*C ). + * + * LDC (local input) const int + * On entry, LDC specifies the first dimension of C as declared + * in the calling (sub) program. LDC must be at least + * max(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + printf("Order %d, TransA %d, TransB %d, M %d, N %d, K %d\n", ORDER, TRANSA, TRANSB, M, N, K); + cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, + C, LDC ); + } + else + { + HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA, + C, LDC ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA, beta = BETA; +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef StringStructPtr + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef StringCrayStyle + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, F77K = K, + F77lda = LDA, F77ldb = LDB, F77ldc = LDC; +#else +#define F77M M +#define F77N N +#define F77K K +#define F77lda LDA +#define F77ldb LDB +#define F77ldc LDC +#endif + char ctransa, ctransb; + + if( TRANSA == HplNoTrans ) ctransa = 'N'; + else if( TRANSA == HplTrans ) ctransa = 'T'; + else ctransa = 'C'; + + if( TRANSB == HplNoTrans ) ctransb = 'N'; + else if( TRANSB == HplTrans ) ctransb = 'T'; + else ctransb = 'C'; + + if( ORDER == HplColumnMajor ) + { +#ifdef StringSunStyle + F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); + F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif +#ifdef StringStructVal + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif +#ifdef StringStructPtr + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif + } + else + { +#ifdef StringSunStyle + F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); + F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif +#ifdef StringStructVal + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif +#ifdef StringStructPtr + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif + } +#endif +/* + * End of HPL_dgemm + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dgemv.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dgemv.c new file mode 100644 index 000000000..6366c5a48 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dgemv.c @@ -0,0 +1,326 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dgemv + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dgemv0 +( + const enum HPL_TRANS TRANS, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + const double * X, + const int INCX, + const double BETA, + double * Y, + const int INCY +) +#else +static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) + const enum HPL_TRANS TRANS; + const int INCX, INCY, LDA, M, N; + const double ALPHA, BETA; + const double * A, * X; + double * Y; +#endif +{ +/* + * .. Local Variables .. + */ + int i, iaij, ix, iy, j, jaj, jx, jy; + register double t0; +/* .. + * .. Executable Statements .. + */ + if( ( M == 0 ) || ( N == 0 ) || + ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone ) ) ) return; + + if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; } + + if( TRANS == HplNoTrans ) + { + HPL_dscal( M, BETA, Y, INCY ); + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = ALPHA * X[jx]; + for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY ) + { Y[iy] += A[iaij] * t0; } + } + } + else + { + for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) + { + t0 = HPL_rzero; + for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) + { t0 += A[iaij] * X[ix]; } + if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0; + else Y[jy] = BETA * Y[jy] + ALPHA * t0; + } + } +} +#endif + +#ifdef STDC_HEADERS +void HPL_dgemv +( + const enum HPL_ORDER ORDER, + const enum HPL_TRANS TRANS, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + const double * X, + const int INCX, + const double BETA, + double * Y, + const int INCY +) +#else +void HPL_dgemv +( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) + const enum HPL_ORDER ORDER; + const enum HPL_TRANS TRANS; + const int M; + const int N; + const double ALPHA; + const double * A; + const int LDA; + const double * X; + const int INCX; + const double BETA; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dgemv performs one of the matrix-vector operations + * + * y := alpha * op( A ) * x + beta * y, + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X^T. + * + * where alpha and beta are scalars, x and y are vectors and A is an m + * by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANS specifies the operation to be performed as + * follows: + * TRANS = HplNoTrans y := alpha*A *x + beta*y, + * TRANS = HplTrans y := alpha*A^T*x + beta*y. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then A and X need not be set on input. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry, the leading m by n part of the + * array A must contain the matrix coefficients. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m). + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * BETA (local input) const double + * On entry, BETA specifies the scalar beta. When ALPHA is + * supplied as zero then Y need not be set on input. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * Before entry with BETA non-zero, the incremented array Y must + * contain the vector y. On exit, Y is overwritten by the + * updated vector y. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); + } + else + { + HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), + N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA, beta = BETA; +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR ftran; +#endif +#ifdef StringStructPtr + F77_CHAR ftran; +#endif +#ifdef StringCrayStyle + F77_CHAR ftran; +#endif + +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77incx = INCX, F77incy = INCY; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77incx INCX +#define F77incy INCY +#endif + char ctran; + + if( ORDER == HplColumnMajor ) + { + ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); + +#ifdef StringSunStyle + F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); + F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructVal + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructPtr + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif + } + else + { + ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); +#ifdef StringSunStyle + F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); + F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructVal + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructPtr + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif + } + +#endif +/* + * End of HPL_dgemv + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dger.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dger.c new file mode 100644 index 000000000..5ea702778 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dger.c @@ -0,0 +1,195 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dger + +#ifdef STDC_HEADERS +void HPL_dger +( + const enum HPL_ORDER ORDER, + const int M, + const int N, + const double ALPHA, + const double * X, + const int INCX, + double * Y, + const int INCY, + double * A, + const int LDA +) +#else +void HPL_dger +( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) + const enum HPL_ORDER ORDER; + const int M; + const int N; + const double ALPHA; + const double * X; + const int INCX; + double * Y; + const int INCY; + double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dger performs the rank 1 operation + * + * A := alpha * x * y^T + A, + * + * where alpha is a scalar, x is an m-element vector, y is an n-element + * vector and A is an m by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then X and Y need not be set on input. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( m - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * A (local input/output) double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry, the leading m by n part of the + * array A must contain the matrix coefficients. On exit, A is + * overwritten by the updated matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dger( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA ); +#endif +#ifdef HPL_CALL_VSIPL + register double t0; + int i, iaij, ix, iy, j, jaj, jx, jy; + + if( ( M == 0 ) || ( N == 0 ) || ( ALPHA == HPL_rzero ) ) return; + + if( ORDER == HplColumnMajor ) + { + for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) + { + t0 = ALPHA * Y[jy]; + for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) + { A[iaij] += X[ix] * t0; } + } + } + else + { + for( j = 0, jaj = 0, jx = 0; j < M; j++, jaj += LDA, jx += INCX ) + { + t0 = ALPHA * X[jx]; + for( i = 0, iaij = jaj, iy = 0; i < N; i++, iaij += 1, iy += INCY ) + { A[iaij] += Y[iy] * t0; } + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77incx = INCX, F77incy = INCY; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77incx INCX +#define F77incy INCY +#endif + + if( ORDER == HplColumnMajor ) + { F77dger( &F77M, &F77N, &alpha, X, &F77incx, Y, &F77incy, A, &F77lda ); } + else + { F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); } +#endif +/* + * End of HPL_dger + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dscal.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dscal.c new file mode 100644 index 000000000..7e041991f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dscal.c @@ -0,0 +1,179 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dscal + +#ifdef STDC_HEADERS +void HPL_dscal +( + const int N, + const double ALPHA, + double * X, + const int INCX +) +#else +void HPL_dscal +( N, ALPHA, X, INCX ) + const int N; + const double ALPHA; + double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dscal scales the vector x by alpha. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vector x. N must be + * at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero, then the entries of the incremented array X + * need not be set on input. + * + * X (local input/output) double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * On exit, the entries of the incremented array X are scaled + * by the scalar alpha. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dscal( N, ALPHA, X, INCX ); +#endif +#ifdef HPL_CALL_VSIPL + register double x0, x1, x2, x3, x4, x5, x6, x7; + register const double alpha = ALPHA; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incX3 = 3 * INCX, + incX4 = 4 * INCX, incX5 = 5 * INCX, + incX6 = 6 * INCX, incX7 = 7 * INCX, + incX8 = 8 * INCX; + + if( ( N > 0 ) && ( alpha != HPL_rone ) ) + { + if( alpha == HPL_rzero ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = (double *)X + nu * INCX; + + do + { + (*X) = HPL_rzero; X[incX4] = HPL_rzero; + X[INCX ] = HPL_rzero; X[incX5] = HPL_rzero; + X[incX2] = HPL_rzero; X[incX6] = HPL_rzero; + X[incX3] = HPL_rzero; X[incX7] = HPL_rzero; X += incX8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) { *X = HPL_rzero; X += INCX; } + } + else + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + x0 *= alpha; x4 *= alpha; x1 *= alpha; x5 *= alpha; + x2 *= alpha; x6 *= alpha; x3 *= alpha; x7 *= alpha; + + (*X) = x0; X[incX4] = x4; X[INCX ] = x1; X[incX5] = x5; + X[incX2] = x2; X[incX6] = x6; X[incX3] = x3; X[incX7] = x7; + + X += incX8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { x0 = (*X); x0 *= alpha; *X = x0; X += INCX; } + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX; +#else +#define F77N N +#define F77incx INCX +#endif + + F77dscal( &F77N, &alpha, X, &F77incx ); +#endif +/* + * End of HPL_dscal + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dswap.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dswap.c new file mode 100644 index 000000000..eb1b8e08d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dswap.c @@ -0,0 +1,157 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dswap + +#ifdef STDC_HEADERS +void HPL_dswap +( + const int N, + double * X, + const int INCX, + double * Y, + const int INCY +) +#else +void HPL_dswap +( N, X, INCX, Y, INCY ) + const int N; + double * X; + const int INCX; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dswap swaps the vectors x and y. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vectors x and y. N + * must be at least zero. + * + * X (local input/output) double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * On exit, the entries of the incremented array X are updated + * with the entries of the incremented array Y. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * On exit, the entries of the incremented array Y are updated + * with the entries of the incremented array X. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dswap( N, X, INCX, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + register double x0, x1, x2, x3, y0, y1, y2, y3; + double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY; + + if( N > 0 ) + { + if( ( nu = ( N >> 2 ) << 2 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); y0 = (*Y); x1 = X[INCX ]; y1 = Y[INCY ]; + x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3]; + *Y = x0; *X = y0; Y[INCY ] = x1; X[INCX ] = y1; + Y[incY2] = x2; X[incX2] = y2; Y[incY3] = x3; X[incX3] = y3; + X += incX4; Y += incY4; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { x0 = (*X); y0 = (*Y); *Y = x0; *X = y0; X += INCX; Y += INCY; } + } +#endif +#ifdef HPL_CALL_FBLAS +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; +#else +#define F77N N +#define F77incx INCX +#define F77incy INCY +#endif + F77dswap( &F77N, X, &F77incx, Y, &F77incy ); +#endif +/* + * End of HPL_dswap + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dtrsm.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dtrsm.c new file mode 100644 index 000000000..a336a7d29 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dtrsm.c @@ -0,0 +1,977 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dtrsm + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) + { + B[ibkj] /= A[k+jak]; + for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; + i < M; i++, iaik +=1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) + { + for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; + i < M; i++, iaik +=1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; + i >= 0; i--, jai -= LDA, ibij -= 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; + k < M; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + t0 /= A[i+jai]; + B[ibij] = t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; + i >= 0; i--, jai -= LDA, ibij -= 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; + k < M; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + B[ibij] = t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; + k >= 0; k--, jak -= LDA, ibkj -= 1 ) + { + B[ibkj] /= A[k+jak]; + for( i = 0, iaik = jak, ibij = jbj; + i < k; i++, iaik += 1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; + k >= 0; k--, jak -= LDA, ibkj -= 1 ) + { + for( i = 0, iaik = jak, ibij = jbj; + i < k; i++, iaik += 1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaki, ibij, ibkj, j, jai, jbj, k; + register double t0; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + t0 /= A[i+jai]; + B[ibij] = t0; + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + B[ibij] = t0; + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; + j >= 0; j--, jaj -= LDA, jbj -= LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; + k < N; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; + j >= 0; j--, jaj -= LDA, jbj -= LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; + k < N; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) + { + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } + for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; + j < N; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) + { + for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; + j < N; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; + k >= 0; k--, jak -= LDA, jbk -= LDB ) + { + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } + for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; + k >= 0; k--, jak -= LDA, jbk -= LDB ) + { + for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsm0 +( + const enum HPL_SIDE SIDE, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) + const enum HPL_SIDE SIDE; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, j; + + if( ( M == 0 ) || ( N == 0 ) ) return; + + if( ALPHA == HPL_rzero ) + { + for( j = 0; j < N; j++ ) + { for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; } + return; + } + + if( SIDE == HplLeft ) + { + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + } + else + { + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dtrsm +( + const enum HPL_ORDER ORDER, + const enum HPL_SIDE SIDE, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dtrsm +( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) + const enum HPL_ORDER ORDER; + const enum HPL_SIDE SIDE; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int M; + const int N; + const double ALPHA; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dtrsm solves one of the matrix equations + * + * op( A ) * X = alpha * B, or X * op( A ) = alpha * B, + * + * where alpha is a scalar, X and B are m by n matrices, A is a unit, or + * non-unit, upper or lower triangular matrix and op(A) is one of + * + * op( A ) = A or op( A ) = A^T. + * + * The matrix X is overwritten on B. + * + * No test for singularity or near-singularity is included in this + * routine. Such tests must be performed before calling this routine. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * SIDE (local input) const enum HPL_SIDE + * On entry, SIDE specifies whether op(A) appears on the left + * or right of X as follows: + * SIDE==HplLeft op( A ) * X = alpha * B, + * SIDE==HplRight X * op( A ) = alpha * B. + * + * UPLO (local input) const enum HPL_UPLO + * On entry, UPLO specifies whether the upper or lower + * triangular part of the array A is to be referenced. When + * UPLO==HplUpper, only the upper triangular part of A is to be + * referenced, otherwise only the lower triangular part of A is + * to be referenced. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANSA specifies the form of op(A) to be used in + * the matrix-matrix operation follows: + * TRANSA==HplNoTrans : op( A ) = A, + * TRANSA==HplTrans : op( A ) = A^T, + * TRANSA==HplConjTrans : op( A ) = A^T. + * + * DIAG (local input) const enum HPL_DIAG + * On entry, DIAG specifies whether A is unit triangular or + * not. When DIAG==HplUnit, A is assumed to be unit triangular, + * and otherwise, A is not assumed to be unit triangular. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix B. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix B. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then the elements of the matrix B need not + * be set on input. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * k, where k is m when SIDE==HplLeft and is n + * otherwise. Before entry with UPLO==HplUpper, the leading + * k by k upper triangular part of the array A must contain the + * upper triangular matrix and the strictly lower triangular + * part of A is not referenced. When UPLO==HplLower on entry, + * the leading k by k lower triangular part of the array A must + * contain the lower triangular matrix and the strictly upper + * triangular part of A is not referenced. + * + * Note that when DIAG==HplUnit, the diagonal elements of A + * not referenced either, but are assumed to be unity. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise. + * + * B (local input/output) double * + * On entry, B points to an array of size equal to or greater + * than LDB * n. Before entry, the leading m by n part of the + * array B must contain the matrix B, except when beta is zero, + * in which case B need not be set on entry. On exit, the array + * B is overwritten by the m by n solution matrix. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of B as + * declared in the calling (sub) program. LDB must be at + * least MAX(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); + } + else + { + HPL_dtrsm0( ( SIDE == HplRight ? HplLeft : HplRight ), + ( UPLO == HplLower ? HplUpper : HplLower ), + TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef StringSunStyle +#if defined( HPL_USE_F77_INTEGER_DEF ) + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef StringStructPtr + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef StringCrayStyle + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77ldb = LDB; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77ldb LDB +#endif + char cside, cuplo, ctran, cdiag; + + if( TRANS == HplNoTrans ) ctran = 'N'; + else if( TRANS == HplTrans ) ctran = 'T'; + else ctran = 'C'; + cdiag = ( DIAG == HplUnit ? 'U' : 'N' ); + + if( ORDER == HplColumnMajor ) + { + cside = ( SIDE == HplRight ? 'R' : 'L' ); + cuplo = ( UPLO == HplLower ? 'L' : 'U' ); +#ifdef StringSunStyle + F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructVal + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructPtr + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif + } + else + { + cside = ( SIDE == HplRight ? 'L' : 'R' ); + cuplo = ( UPLO == HplLower ? 'U' : 'L' ); +#ifdef StringSunStyle + F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructVal + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructPtr + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif + } +#endif +/* + * End of HPL_dtrsm + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dtrsv.c new file mode 100644 index 000000000..99e84f073 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_dtrsv.c @@ -0,0 +1,520 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dtrsv + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dtrsvLNN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLNN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX ) + { + X[jx] /= A[jaj]; t0 = X[jx]; + for( i = j+1, iaij = jaj+1, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLNU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLNU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = jaj+1, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLTN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLTN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX; + j >= 0; j--, jaj -= ldap1, jx -= INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = 1+jaj, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } + t0 /= A[jaj]; X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLTU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLTU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX; + j >= 0; j--, jaj -= ldap1, jx -= INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = 1+jaj, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } + X[jx] = t0; + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUNN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUNN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX; + j >= 0; j--, jaj -= LDA, jx -= INCX ) + { + X[jx] /= A[j+jaj]; t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { X[ix] -= t0 * A[iaij]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUNU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUNU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX; + j >= 0; j--, jaj -= LDA, jx -= INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { X[ix] -= t0 * A[iaij]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUTN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUTN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = 0, jaj = 0,jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { t0 -= A[iaij] * X[ix]; } + t0 /= A[iaij]; X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvUTU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUTU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { t0 -= A[iaij] * X[ix]; } + X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsv0 +( + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + if( N == 0 ) return; + + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) { HPL_dtrsvUNN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvUNU( N, A, LDA, X, INCX ); } + } + else + { + if( DIAG == HplNonUnit ) { HPL_dtrsvUTN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvUTU( N, A, LDA, X, INCX ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) { HPL_dtrsvLNN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvLNU( N, A, LDA, X, INCX ); } + } + else + { + if( DIAG == HplNonUnit ) { HPL_dtrsvLTN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvLTU( N, A, LDA, X, INCX ); } + } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dtrsv +( + const enum HPL_ORDER ORDER, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +void HPL_dtrsv +( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) + const enum HPL_ORDER ORDER; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int N; + const double * A; + const int LDA; + double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dtrsv solves one of the systems of equations + * + * A * x = b, or A^T * x = b, + * + * where b and x are n-element vectors and A is an n by n non-unit, or + * unit, upper or lower triangular matrix. + * + * No test for singularity or near-singularity is included in this + * routine. Such tests must be performed before calling this routine. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * UPLO (local input) const enum HPL_UPLO + * On entry, UPLO specifies whether the upper or lower + * triangular part of the array A is to be referenced. When + * UPLO==HplUpper, only the upper triangular part of A is to be + * referenced, otherwise only the lower triangular part of A is + * to be referenced. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANS specifies the equations to be solved as + * follows: + * TRANS==HplNoTrans A * x = b, + * TRANS==HplTrans A^T * x = b. + * + * DIAG (local input) const enum HPL_DIAG + * On entry, DIAG specifies whether A is unit triangular or + * not. When DIAG==HplUnit, A is assumed to be unit triangular, + * and otherwise, A is not assumed to be unit triangular. + * + * N (local input) const int + * On entry, N specifies the order of the matrix A. N must be at + * least zero. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry with UPLO==HplUpper, the leading + * n by n upper triangular part of the array A must contain the + * upper triangular matrix and the strictly lower triangular + * part of A is not referenced. When UPLO==HplLower on entry, + * the leading n by n lower triangular part of the array A must + * contain the lower triangular matrix and the strictly upper + * triangular part of A is not referenced. + * + * Note that when DIAG==HplUnit, the diagonal elements of A + * not referenced either, but are assumed to be unity. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,n). + * + * X (local input/output) double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * Before entry, the incremented array X must contain the n + * element right-hand side vector b. On exit, X is overwritten + * with the solution vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dtrsv( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ); + } + else + { + HPL_dtrsv0( ( UPLO == HplUpper ? HplLower : HplUpper ), + ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), + DIAG, N, A, LDA, X, INCX ); + } +#endif +#ifdef HPL_CALL_FBLAS +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR fuplo, ftran, fdiag; +#endif +#ifdef StringStructPtr + F77_CHAR fuplo, ftran, fdiag; +#endif +#ifdef StringCrayStyle + F77_CHAR fuplo, ftran, fdiag; +#endif + +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77lda = LDA, F77incx = INCX; +#else +#define F77N N +#define F77lda LDA +#define F77incx INCX +#endif + char cuplo, ctran, cdiag; + + if( ORDER == HplColumnMajor ) + { + cuplo = ( UPLO == HplUpper ? 'U' : 'L' ); + ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); + } + else + { + cuplo = ( UPLO == HplUpper ? 'L' : 'U' ); + ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); + } + cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' ); + +#ifdef StringSunStyle + F77dtrsv( &cuplo, &ctran, &cdiag, &F77N, A, &F77lda, X, &F77incx, + IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + fuplo = HPL_C2F_CHAR( cuplo ); + F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif +#ifdef StringStructVal + fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; + fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif +#ifdef StringStructPtr + fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; + fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsv( &fuplo, &ftran, &fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif + +#endif +/* + * End of HPL_dtrsv + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_idamax.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_idamax.c new file mode 100644 index 000000000..5ceabdf25 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/HPL_idamax.c @@ -0,0 +1,167 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_idamax + +#ifdef STDC_HEADERS +int HPL_idamax +( + const int N, + const double * X, + const int INCX +) +#else +int HPL_idamax +( N, X, INCX ) + const int N; + const double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_idamax returns the index in an n-vector x of the first element + * having maximum absolute value. + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vector x. N must be + * at least zero. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + return( (int)(cblas_idamax( N, X, INCX )) ); +#endif +#ifdef HPL_CALL_VSIPL + register double absxi, smax = HPL_rzero, x0, x1, x2, x3, + x4, x5, x6, x7; + const double * StX; + register int imax = 0, i = 0, j; + int nu; + const int incX2 = 2 * INCX, incX3 = 3 * INCX, + incX4 = 4 * INCX, incX5 = 5 * INCX, + incX6 = 6 * INCX, incX7 = 7 * INCX, + incX8 = 8 * INCX; + + if( N > 0 ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x1 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x2 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x3 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x4 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x5 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x6 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x7 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + + X += incX8; + + } while( X != StX ); + } + + for( j = N - nu; j != 0; j-- ) + { + x0 = (*X); + absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + X += INCX; + } + } + return( imax ); +#endif +#ifdef HPL_CALL_FBLAS +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX; +#else +#define F77N N +#define F77incx INCX +#endif + int imax = 0; + + if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1; + return( imax ); +#endif +/* + * End of HPL_idamax + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/intel64/Make.inc new file mode 120000 index 000000000..3ee301793 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/intel64/Make.inc @@ -0,0 +1 @@ +/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/intel64/Makefile new file mode 100644 index 000000000..ed9f3d0e2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/blas/intel64/Makefile @@ -0,0 +1,98 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h +# +## Object files ######################################################## +# +HPL_blaobj = \ + HPL_dcopy.o HPL_daxpy.o HPL_dscal.o \ + HPL_idamax.o HPL_dgemv.o HPL_dtrsv.o \ + HPL_dger.o HPL_dgemm.o HPL_dtrsm.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_blaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_blaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dcopy.o : ../HPL_dcopy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dcopy.c +HPL_daxpy.o : ../HPL_daxpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_daxpy.c +HPL_dscal.o : ../HPL_dscal.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dscal.c +HPL_idamax.o : ../HPL_idamax.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_idamax.c +HPL_dgemv.o : ../HPL_dgemv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemv.c +HPL_dtrsv.o : ../HPL_dtrsv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsv.c +HPL_dger.o : ../HPL_dger.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dger.c +HPL_dgemm.o : ../HPL_dgemm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemm.c +HPL_dtrsm.o : ../HPL_dtrsm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsm.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_1rinM.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_1rinM.c new file mode 100644 index 000000000..dd03b79b1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_1rinM.c @@ -0,0 +1,224 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_1rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_1rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_1rinM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_1rinM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, prev, + rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, then send message to its two + * next neighbors. Otherwise, probe for message. If the message is here, + * then receive it, and if I am not the last process of the ring, or + * just after the root process, then forward it to the next. Otherwise, + * inform the caller that the panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( next, + size ), msgid, comm ); + } + } + else + { + prev = MModSub1( rank, size ); + if( ( size > 2 ) && + ( MModSub1( prev, size ) == root ) ) partner = root; + else partner = prev; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && + ( prev != root ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_1rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_1rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_1ring.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_1ring.c new file mode 100644 index 000000000..dd5eb2d12 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_1ring.c @@ -0,0 +1,216 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_1ring +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_1ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_1ring +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_1ring( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, prev, rank, root, + size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, start spreading the panel. If + * I am not the root process, probe for message. If the message is here, + * then receive it, and if I am not the last process of the ring, then + * forward it to the next. Otherwise, inform the caller that the panel + * has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( rank, + size ), msgid, comm ); + } + else + { + prev = MModSub1( rank, size ); + + ierr = MPI_Iprobe( prev, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, prev, msgid, + comm, &PANEL->status[0] ); + next = MModAdd1( rank, size ); + if( ( ierr == MPI_SUCCESS ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, + msgid, comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_1ring +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_1ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_2rinM.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_2rinM.c new file mode 100644 index 000000000..56581ea0d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_2rinM.c @@ -0,0 +1,236 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_2rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_2rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_2rinM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_2rinM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, prev, + rank, roo2, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process send to its two right neighbors and mid-pro- + * cess. If I am not the root process, probe for message. If the message + * is there, then receive it. If I am not the last process of both rings + * then forward it to the next. Otherwise, inform the caller that the + * panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); roo2 = ( ( size + 1 ) >> 1 ); + roo2 = MModAdd( root, roo2, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + if( MModAdd1( next, size ) != roo2 ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, + MModAdd1( next, size ), msgid, comm ); + } + + if( ierr == MPI_SUCCESS ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid, + comm ); + } + } + } + else + { + prev = MModSub1( rank, size ); + if( ( prev == root ) || ( rank == roo2 ) || + ( MModSub1( prev, size ) == root ) ) partner = root; + else partner = prev; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && ( prev != root ) && + ( next != roo2 ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_2rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_2rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_2ring.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_2ring.c new file mode 100644 index 000000000..f0e6e2647 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_2ring.c @@ -0,0 +1,224 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_2ring +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_2ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_2ring +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_2ring( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, rank, + roo2, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process send to its right neighbor and mid-process. + * If I am not the root process, probe for message. If the message is + * there, then receive it, and if I am not the last process of both + * rings, then forward it to the next. Otherwise, inform the caller that + * the panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); roo2 = ( ( size + 1 ) >> 1 ); + roo2 = MModAdd( root, roo2, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid, + comm ); + } + } + else + { + partner = MModSub1( rank, size ); + if( ( partner == root ) || ( rank == roo2 ) ) partner = root; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && + ( next != roo2 ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_2ring +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_2ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_bcast.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_bcast.c new file mode 100644 index 000000000..100161152 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_bcast.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_bcast +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast +( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_bcast broadcasts the current panel. Successful completion is + * indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to + * HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was + * not completed, in which case this function should be called again. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * IFLAG (output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * occured. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_bcast_1rinM( PANEL, IFLAG ); break; + case HPL_1RING : ierr = HPL_bcast_1ring( PANEL, IFLAG ); break; + case HPL_2RING_M : ierr = HPL_bcast_2rinM( PANEL, IFLAG ); break; + case HPL_2RING : ierr = HPL_bcast_2ring( PANEL, IFLAG ); break; + case HPL_BLONG_M : ierr = HPL_bcast_blonM( PANEL, IFLAG ); break; + case HPL_BLONG : ierr = HPL_bcast_blong( PANEL, IFLAG ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_bcast + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_binit.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_binit.c new file mode 100644 index 000000000..3daf72b7d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_binit.c @@ -0,0 +1,108 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_binit +( + HPL_T_panel * PANEL +) +#else +int HPL_binit +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_binit initializes a row broadcast. Successful completion is + * indicated by the returned error code HPL_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS ); +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_binit_1rinM( PANEL ); break; + case HPL_1RING : ierr = HPL_binit_1ring( PANEL ); break; + case HPL_2RING_M : ierr = HPL_binit_2rinM( PANEL ); break; + case HPL_2RING : ierr = HPL_binit_2ring( PANEL ); break; + case HPL_BLONG_M : ierr = HPL_binit_blonM( PANEL ); break; + case HPL_BLONG : ierr = HPL_binit_blong( PANEL ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_binit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_blonM.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_blonM.c new file mode 100644 index 000000000..5fa221937 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_blonM.c @@ -0,0 +1,445 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +int HPL_binit_blonM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_blonM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif + return( HPL_SUCCESS ); +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF_S1 PANEL->buffers[I_SEND] +#define _M_COUNT_S1 PANEL->counts[I_SEND] +#define _M_TYPE_S1 PANEL->dtypes[I_SEND] + +#define _M_BUFF_S2 PANEL->buffers[I_SEND] +#define _M_COUNT_S2 PANEL->counts[I_SEND] +#define _M_TYPE_S2 PANEL->dtypes[I_SEND] + +#define _M_BUFF_R1 PANEL->buffers[I_RECV] +#define _M_COUNT_R1 PANEL->counts[I_RECV] +#define _M_TYPE_R1 PANEL->dtypes[I_RECV] + +#define _M_BUFF_R2 PANEL->buffers[I_RECV] +#define _M_COUNT_R2 PANEL->counts[I_RECV] +#define _M_TYPE_R2 PANEL->dtypes[I_RECV] + +#define _M_ROLL_BUFF_S PANEL->buffers[I_SEND] +#define _M_ROLL_COUNT_S PANEL->counts[I_SEND] +#define _M_ROLL_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_ROLL_BUFF_R PANEL->buffers[I_RECV] +#define _M_ROLL_COUNT_R PANEL->counts[I_RECV] +#define _M_ROLL_TYPE_R PANEL->dtypes[I_RECV] + +#else + +#define _M_BUFF_S1 (void *)(PANEL->L2) +#define _M_COUNT_S1 PANEL->len +#define _M_TYPE_S1 MPI_DOUBLE + +#define _M_BUFF_S2 (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_S2 lbuf +#define _M_TYPE_S2 MPI_DOUBLE + +#define _M_BUFF_R1 (void *)(PANEL->L2) +#define _M_COUNT_R1 PANEL->len +#define _M_TYPE_R1 MPI_DOUBLE + +#define _M_BUFF_R2 (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_R2 lbuf +#define _M_TYPE_R2 MPI_DOUBLE + +#define _M_ROLL_BUFF_S (void *)(PANEL->L2 + ibufS) +#define _M_ROLL_COUNT_S lbufS +#define _M_ROLL_TYPE_S MPI_DOUBLE +#define _M_ROLL_BUFF_R (void *)(PANEL->L2 + ibufR) +#define _M_ROLL_COUNT_R lbufR +#define _M_ROLL_TYPE_R MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_blonM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_blonM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int COUNT, count, go=1, ierr=MPI_SUCCESS, ibuf, + ibufR, ibufS, dummy=0, indx, ip2=1, k, l, + lbuf, lbufR, lbufS, mask=1, msgid, mydist, + mydist2, next, npm1, npm2, partner, prev, + rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process sends to its right neighbor, then spread + * the panel on the other npcol - 2 processes. If I am not the root + * process, probe for message received. If the message is there, then + * receive it. If I am just after the root process, return. Otherwise, + * keep spreading on those npcol - 2 processes. Otherwise, inform the + * caller that the panel has still not been received. + */ + comm = PANEL->grid->row_comm; rank = PANEL->grid->mycol; + root = PANEL->pcol; msgid = PANEL->msgid; + prev = MModSub1( rank, size ); + + if( rank == root ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, 0, PANEL->len, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S1, _M_COUNT_S1, _M_TYPE_S1, + MModAdd1( rank, size ), msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else if( prev == root ) + { +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + * + * ierr = MPI_Iprobe( root, msgid, comm, &go, &PANEL->status[0] ); + */ + if( ierr == MPI_SUCCESS ) + { /* if panel is here, proceed */ + if( go != 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + ierr = HPL_packL( PANEL, 0, PANEL->len, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R1, _M_COUNT_R1, _M_TYPE_R1, + root, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } + } +/* + * if I am just after the root, exit now. The message receive completed + * successfully, this guy is done. If there are only 2 processes in each + * row of processes, we are done as well. + */ + if( ( prev == root ) || ( size == 2 ) ) + { + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + return( *IFLAG ); + } +/* + * Otherwise, proceed with broadcast - Spread the panel across process + * columns + */ + npm2 = ( npm1 = size - 1 ) - 1; COUNT = PANEL->len; + + k = npm2; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + if( rank == root ) mydist2 = ( mydist = 0 ); + else mydist2 = ( mydist = MModSub( rank, root, size ) - 1 ); + + indx = ip2; count = COUNT / npm1; count = Mmax( count, 1 ); + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = COUNT - ( ibuf = indx * count ); + if( indx + ip2 < npm1 ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); } + + partner = mydist ^ ip2; + + if( ( mydist & ip2 ) != 0 ) + { + partner = MModAdd( root, partner, size ); + if( partner != root ) partner = MModAdd1( partner, size ); +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + */ +#if 0 + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { /* if panel is not here, return and keep testing */ + if( go == 0 ) + { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } +#endif + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R2, _M_COUNT_R2, _M_TYPE_R2, + partner, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + } + else if( partner < npm1 ) + { + partner = MModAdd( root, partner, size ); + if( partner != root ) partner = MModAdd1( partner, size ); + + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S2, _M_COUNT_S2, _M_TYPE_S2, + partner, msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE, + partner, msgid, comm ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; indx -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; } + + } while( ip2 > 0 ); +/* + * Roll the pieces + */ + prev = MModSub1( rank, size ); + if( MModSub1( prev, size ) == root ) prev = root; + next = MModAdd1( rank, size ); + if( rank == root ) next = MModAdd1( next, size ); + + for( k = 0; k < npm2; k++ ) + { + l = ( k >> 1 ); +/* + * Who is sending to who and how much + */ + if( ( ( mydist + k ) & 1 ) != 0 ) + { + ibufS = ( indx = MModAdd( mydist, l, npm1 ) ) * count; + lbufS = ( indx == npm2 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModSub( mydist, l+1, npm1 ) ) * count; + lbufR = ( indx == npm2 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = prev; + } + else + { + ibufS = ( indx = MModSub( mydist, l, npm1 ) ) * count; + lbufS = ( indx == npm2 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModAdd( mydist, l+1, npm1 ) ) * count; + lbufR = ( indx == npm2 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = next; + } +/* + * Exchange the messages + */ + if( lbufS > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufS, lbufS, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S, + _M_ROLL_TYPE_S, partner, msgid, comm, + &PANEL->request[0] ); + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->request[0] ); + } + + if( lbufR > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufR, lbufR, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R, + _M_ROLL_TYPE_R, partner, msgid, comm, + &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait ( &PANEL->request[0], &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_blonM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_blonM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } + + return( HPL_SUCCESS ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_blong.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_blong.c new file mode 100644 index 000000000..e57f11bcc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_blong.c @@ -0,0 +1,363 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +int HPL_binit_blong +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_blong( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif + return( HPL_SUCCESS ); +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF_S PANEL->buffers[I_SEND] +#define _M_COUNT_S PANEL->counts[I_SEND] +#define _M_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_BUFF_R PANEL->buffers[I_RECV] +#define _M_COUNT_R PANEL->counts[I_RECV] +#define _M_TYPE_R PANEL->dtypes[I_RECV] + +#define _M_ROLL_BUFF_S PANEL->buffers[I_SEND] +#define _M_ROLL_COUNT_S PANEL->counts[I_SEND] +#define _M_ROLL_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_ROLL_BUFF_R PANEL->buffers[I_RECV] +#define _M_ROLL_COUNT_R PANEL->counts[I_RECV] +#define _M_ROLL_TYPE_R PANEL->dtypes[I_RECV] + +#else + +#define _M_BUFF_S (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_S lbuf +#define _M_TYPE_S MPI_DOUBLE + +#define _M_BUFF_R (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_R lbuf +#define _M_TYPE_R MPI_DOUBLE + +#define _M_ROLL_BUFF_S (void *)(PANEL->L2 + ibufS) +#define _M_ROLL_COUNT_S lbufS +#define _M_ROLL_TYPE_S MPI_DOUBLE + +#define _M_ROLL_BUFF_R (void *)(PANEL->L2 + ibufR) +#define _M_ROLL_COUNT_R lbufR +#define _M_ROLL_TYPE_R MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_blong +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_blong( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int COUNT, count, dummy=0, ierr=MPI_SUCCESS, + ibuf, ibufR, ibufS, indx, ip2, k, l, lbuf, + lbufR, lbufS, mask, msgid, mydist, mydist2, + next, npm1, partner, prev, rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, start spreading the panel. If + * I am not the root process, test for message receive completion. If + * the message is there, then receive it, and keep spreading in a + * blocking fashion this time. Otherwise, inform the caller that the + * panel has still not been received. + */ + comm = PANEL->grid->row_comm; rank = PANEL->grid->mycol; + mask = PANEL->grid->col_mask; ip2 = PANEL->grid->col_ip2m1; + root = PANEL->pcol; msgid = PANEL->msgid; + COUNT = PANEL->len; npm1 = size - 1; + mydist2 = ( mydist = MModSub( rank, root, size ) ); indx = ip2; + count = COUNT / size; count = Mmax( count, 1 ); +/* + * Spread the panel across process columns + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = COUNT - ( ibuf = indx * count ); + if( indx + ip2 < size ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); } + + partner = mydist ^ ip2; + + if( ( mydist & ip2 ) != 0 ) + { + partner = MModAdd( root, partner, size ); +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + */ +#if 0 + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + if( ierr == MPI_SUCCESS ) + { /* if panel is not here, return and keep testing */ + if( go == 0 ) + { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } +#endif + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R, _M_COUNT_R, _M_TYPE_R, + partner, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + } + else if( partner < size ) + { + partner = MModAdd( root, partner, size ); + + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S, _M_COUNT_S, _M_TYPE_S, + partner, msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else /* Send message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE, + partner, msgid, comm ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; indx -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; } + + } while( ip2 > 0 ); +/* + * Roll the pieces + */ + prev = MModSub1( rank, size ); next = MModAdd1( rank, size ); + + for( k = 0; k < npm1; k++ ) + { + l = ( k >> 1 ); +/* + * Who is sending to who and how much + */ + if( ( ( mydist + k ) & 1 ) != 0 ) + { + ibufS = ( indx = MModAdd( mydist, l, size ) ) * count; + lbufS = ( indx == npm1 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModSub( mydist, l+1, size ) ) * count; + lbufR = ( indx == npm1 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = prev; + } + else + { + ibufS = ( indx = MModSub( mydist, l, size ) ) * count; + lbufS = ( indx == npm1 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModAdd( mydist, l+1, size ) ) * count; + lbufR = ( indx == npm1 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = next; + } +/* + * Exchange the messages + */ + if( lbufS > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufS, lbufS, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S, + _M_ROLL_TYPE_S, partner, msgid, comm, + &PANEL->request[0] ); + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->request[0] ); + } + + if( lbufR > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufR, lbufR, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R, + _M_ROLL_TYPE_R, partner, msgid, comm, + &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait ( &PANEL->request[0], &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_blong +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_blong( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } + + return( HPL_SUCCESS ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_bwait.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_bwait.c new file mode 100644 index 000000000..a2e0f4df8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_bwait.c @@ -0,0 +1,109 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_bwait +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_bwait HPL_bwait waits for the row broadcast of the current panel to + * terminate. Successful completion is indicated by the returned error + * code HPL_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS ); +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_bwait_1rinM( PANEL ); break; + case HPL_1RING : ierr = HPL_bwait_1ring( PANEL ); break; + case HPL_2RING_M : ierr = HPL_bwait_2rinM( PANEL ); break; + case HPL_2RING : ierr = HPL_bwait_2ring( PANEL ); break; + case HPL_BLONG_M : ierr = HPL_bwait_blonM( PANEL ); break; + case HPL_BLONG : ierr = HPL_bwait_blong( PANEL ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_bwait + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_copyL.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_copyL.c new file mode 100644 index 000000000..04f765a6b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_copyL.c @@ -0,0 +1,108 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_copyL +( + HPL_T_panel * PANEL +) +#else +void HPL_copyL +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_copyL copies the panel of columns, the L1 replicated submatrix, + * the pivot array and the info scalar into a contiguous workspace for + * later broadcast. + * + * The copy of this panel into a contiguous buffer can be enforced by + * specifying -DHPL_COPY_L in the architecture specific Makefile. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int jb, lda; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->mycol == PANEL->pcol ) + { + jb = PANEL->jb; lda = PANEL->lda; + + if( PANEL->grid->myrow == PANEL->prow ) + { + HPL_dlacpy( PANEL->mp-jb, jb, Mptr( PANEL->A, jb, -jb, lda ), + lda, PANEL->L2, PANEL->ldl2 ); + } + else + { + HPL_dlacpy( PANEL->mp, jb, Mptr( PANEL->A, 0, -jb, lda ), + lda, PANEL->L2, PANEL->ldl2 ); + } + } +/* + * End of HPL_copyL + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_packL.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_packL.c new file mode 100644 index 000000000..8a70ef83d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_packL.c @@ -0,0 +1,245 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_packL +( + HPL_T_panel * PANEL, + const int INDEX, + const int LEN, + const int IBUF +) +#else +int HPL_packL +( PANEL, INDEX, LEN, IBUF ) + HPL_T_panel * PANEL; + const int INDEX; + const int LEN; + const int IBUF; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_packL forms the MPI data type for the panel to be broadcast. + * Successful completion is indicated by the returned error code + * MPI_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * INDEX (input) const int + * On entry, INDEX points to the first entry of the packed + * buffer being broadcast. + * + * LEN (input) const int + * On entry, LEN is the length of the packed buffer. + * + * IBUF (input) const int + * On entry, IBUF specifies the panel buffer/count/type entries + * that should be initialized. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ +#ifndef HPL_COPY_L + MPI_Datatype * type = NULL; + void * * * bufs = NULL; + double * A; + int * blen = NULL; + MPI_Aint * disp = NULL; + int curr, i, i1, ibuf, ierr=MPI_SUCCESS, j1, + jb, jbm, jbp1, lda, len, m, m1, nbufs; +#else + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_COPY_L +/* + * Panel + L1 + DPIV have been copied into a contiguous buffer - Create + * and commit a contiguous data type + */ + PANEL->buffers[IBUF] = (void *)(PANEL->L2 + INDEX); + PANEL->counts [IBUF] = 1; + + ierr = MPI_Type_contiguous( LEN, MPI_DOUBLE, &PANEL->dtypes[IBUF] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &PANEL->dtypes[IBUF] ); + + return( ierr ); +#else +/* + * Panel is not contiguous (because of LDA and also L1 + DPIV) - Create + * and commit a struct data type + */ + jbp1 = ( jb = PANEL->jb ) + 1; +/* + * Temporaries to create the type struct. + */ + bufs = (void * * *)malloc( jbp1 * sizeof( void * * ) ); + blen = (int *)malloc( jbp1 * sizeof( int ) ); + disp = (MPI_Aint *)malloc( jbp1 * sizeof( MPI_Aint ) ); + type = (MPI_Datatype *)malloc( jbp1 * sizeof( MPI_Datatype ) ); + + if( ( bufs != NULL ) && ( blen != NULL ) && + ( disp != NULL ) && ( type != NULL ) ) + { + m = PANEL->mp; curr = (int)( PANEL->grid->myrow == PANEL->prow ); + if( curr != 0 ) m -= jb; + + len = LEN; ibuf = INDEX; nbufs = 0; jbm = jb * m; + + if( ( m > 0 ) && ( ibuf < jbm ) ) + { +/* + * Retrieve proper pointers depending on process row and column + */ + if( PANEL->grid->mycol == PANEL->pcol ) + { + lda = PANEL->lda; + if( curr != 0 ) { A = Mptr( PANEL->A, jb, -jb, lda ); } + else { A = Mptr( PANEL->A, 0, -jb, lda ); } + } + else { lda = PANEL->ldl2; A = PANEL->L2; } +/* + * Pack the first (partial) column of L + */ + m1 = m - ( i1 = ibuf - ( j1 = ibuf / m ) * m ); + m1 = Mmin( len, m1 ); + + bufs[nbufs] = (void *)(Mptr( A, i1, j1, lda )); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = m1; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + + nbufs++; len -= m1; j1++; ibuf += m1; +/* + * Pack the remaining columns of L + */ + while( ( len > 0 ) && ( j1 < jb ) ) + { + m1 = Mmin( len, m ); + + bufs[nbufs] = (void*)(Mptr( A, 0, j1, lda )); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = m1; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + + nbufs++; len -= m1; j1++; ibuf += m1; + } + } +/* + * Pack L1, DPIV, DINFO + */ + if( len > 0 ) + { /* L1, DPIV, DINFO */ + bufs[nbufs] = (void *)(PANEL->L1 + ibuf - jbm); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = len; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + nbufs++; + } + + for( i = 1; i < nbufs; i++ ) disp[i] -= disp[0]; disp[0] = 0; + + PANEL->buffers[IBUF] = (void *)(bufs[0]); PANEL->counts [IBUF] = 1; +/* + * construct the struct type + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_create_struct( nbufs, blen, disp, type, + &PANEL->dtypes[IBUF] ); +/* + * release temporaries + */ + if( bufs ) free( bufs ); + if( blen ) free( blen ); + if( disp ) free( disp ); + if( type ) free( type ); +/* + * commit the type + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &PANEL->dtypes[IBUF] ); + + return( ierr ); + } + else + { +/* + * Memory allocation failed -> abort + */ + HPL_pabort( __LINE__, "HPL_packL", "Memory allocation failed" ); + return( MPI_SUCCESS ); /* never executed (hopefully ...) */ + } +#endif +#else + /* HPL_USE_MPI_DATATYPE not defined - Oops, there is a bug + somewhere, so, just in case and until I find it ... */ + return( MPI_SUCCESS ); +#endif +/* + * End of HPL_packL + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_recv.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_recv.c new file mode 100644 index 000000000..ff426891c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_recv.c @@ -0,0 +1,142 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_recv +( + double * RBUF, + int RCOUNT, + int SRC, + int RTAG, + MPI_Comm COMM +) +#else +int HPL_recv +( RBUF, RCOUNT, SRC, RTAG, COMM ) + double * RBUF; + int RCOUNT; + int SRC; + int RTAG; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_recv is a simple wrapper around MPI_Recv. Its main purpose is + * to allow for some experimentation / tuning of this simple routine. + * Successful completion is indicated by the returned error code + * HPL_SUCCESS. In the case of messages of length less than or equal to + * zero, this function returns immediately. + * + * Arguments + * ========= + * + * RBUF (local output) double * + * On entry, RBUF specifies the starting address of buffer to be + * received. + * + * RCOUNT (local input) int + * On entry, RCOUNT specifies the number of double precision + * entries in RBUF. RCOUNT must be at least zero. + * + * SRC (local input) int + * On entry, SRC specifies the rank of the sending process in + * the communication space defined by COMM. + * + * RTAG (local input) int + * On entry, STAG specifies the message tag to be used for this + * communication operation. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Status status; +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type; +#endif + int ierr; +/* .. + * .. Executable Statements .. + */ + if( RCOUNT <= 0 ) return( HPL_SUCCESS ); + +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(RBUF), 1, type, SRC, RTAG, COMM, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else + ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, SRC, RTAG, + COMM, &status ); +#endif + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_recv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_sdrv.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_sdrv.c new file mode 100644 index 000000000..0b2363563 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_sdrv.c @@ -0,0 +1,239 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_sdrv +( + double * SBUF, + int SCOUNT, + int STAG, + double * RBUF, + int RCOUNT, + int RTAG, + int PARTNER, + MPI_Comm COMM +) +#else +int HPL_sdrv +( SBUF, SCOUNT, STAG, RBUF, RCOUNT, RTAG, PARTNER, COMM ) + double * SBUF; + int SCOUNT; + int STAG; + double * RBUF; + int RCOUNT; + int RTAG; + int PARTNER; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_sdrv is a simple wrapper around MPI_Sendrecv. Its main purpose is + * to allow for some experimentation and tuning of this simple function. + * Messages of length less than or equal to zero are not sent nor + * received. Successful completion is indicated by the returned error + * code HPL_SUCCESS. + * + * Arguments + * ========= + * + * SBUF (local input) double * + * On entry, SBUF specifies the starting address of buffer to be + * sent. + * + * SCOUNT (local input) int + * On entry, SCOUNT specifies the number of double precision + * entries in SBUF. SCOUNT must be at least zero. + * + * STAG (local input) int + * On entry, STAG specifies the message tag to be used for the + * sending communication operation. + * + * RBUF (local output) double * + * On entry, RBUF specifies the starting address of buffer to be + * received. + * + * RCOUNT (local input) int + * On entry, RCOUNT specifies the number of double precision + * entries in RBUF. RCOUNT must be at least zero. + * + * RTAG (local input) int + * On entry, RTAG specifies the message tag to be used for the + * receiving communication operation. + * + * PARTNER (local input) int + * On entry, PARTNER specifies the rank of the collaborative + * process in the communication space defined by COMM. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type[2]; +#endif + MPI_Request request; + MPI_Status status; + int ierr; +/* .. + * .. Executable Statements .. + */ + if( RCOUNT > 0 ) + { + if( SCOUNT > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE +/* + * Post asynchronous receive + */ + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( (void *)(RBUF), 1, type[0], PARTNER, + RTAG, COMM, &request ); +/* + * Blocking send + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, + STAG, COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[1] ); +/* + * Wait for the receive to complete + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[0] ); +#else +/* + * Post asynchronous receive + */ + ierr = MPI_Irecv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, + PARTNER, RTAG, COMM, &request ); +/* + * Blocking send + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, + PARTNER, STAG, COMM ); +/* + * Wait for the receive to complete + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); +#endif + } + else + { +/* + * Blocking receive + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(RBUF), 1, type[0], PARTNER, RTAG, + COMM, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[0] ); +#else + ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, + PARTNER, RTAG, COMM, &status ); +#endif + } + } + else if( SCOUNT > 0 ) + { +/* + * Blocking send + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG, + COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[1] ) ); +#else + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, PARTNER, + STAG, COMM ); +#endif + } + else { ierr = MPI_SUCCESS; } + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_sdrv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_send.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_send.c new file mode 100644 index 000000000..9e9868594 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/HPL_send.c @@ -0,0 +1,139 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_send +( + double * SBUF, + int SCOUNT, + int DEST, + int STAG, + MPI_Comm COMM +) +#else +int HPL_send +( SBUF, SCOUNT, DEST, STAG, COMM ) + double * SBUF; + int SCOUNT; + int DEST; + int STAG; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_send is a simple wrapper around MPI_Send. Its main purpose is + * to allow for some experimentation / tuning of this simple routine. + * Successful completion is indicated by the returned error code + * MPI_SUCCESS. In the case of messages of length less than or equal to + * zero, this function returns immediately. + * + * Arguments + * ========= + * + * SBUF (local input) double * + * On entry, SBUF specifies the starting address of buffer to be + * sent. + * + * SCOUNT (local input) int + * On entry, SCOUNT specifies the number of double precision + * entries in SBUF. SCOUNT must be at least zero. + * + * DEST (local input) int + * On entry, DEST specifies the rank of the receiving process in + * the communication space defined by COMM. + * + * STAG (local input) int + * On entry, STAG specifies the message tag to be used for this + * communication operation. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type; +#endif + int ierr; +/* .. + * .. Executable Statements .. + */ + if( SCOUNT <= 0 ) return( HPL_SUCCESS ); + +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type, DEST, STAG, COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, DEST, STAG, COMM ); +#endif + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_send + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/intel64/Make.inc new file mode 120000 index 000000000..3ee301793 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/intel64/Make.inc @@ -0,0 +1 @@ +/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/intel64/Makefile new file mode 100644 index 000000000..529fe9aea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/comm/intel64/Makefile @@ -0,0 +1,111 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h \ + $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_comobj = \ + HPL_1ring.o HPL_1rinM.o HPL_2ring.o \ + HPL_2rinM.o HPL_blong.o HPL_blonM.o \ + HPL_packL.o HPL_copyL.o HPL_binit.o \ + HPL_bcast.o HPL_bwait.o HPL_send.o \ + HPL_recv.o HPL_sdrv.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_comobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_comobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_1ring.o : ../HPL_1ring.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_1ring.c +HPL_1rinM.o : ../HPL_1rinM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_1rinM.c +HPL_2ring.o : ../HPL_2ring.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_2ring.c +HPL_2rinM.o : ../HPL_2rinM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_2rinM.c +HPL_blong.o : ../HPL_blong.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_blong.c +HPL_blonM.o : ../HPL_blonM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_blonM.c +HPL_packL.o : ../HPL_packL.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_packL.c +HPL_copyL.o : ../HPL_copyL.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_copyL.c +HPL_binit.o : ../HPL_binit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_binit.c +HPL_bcast.o : ../HPL_bcast.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_bcast.c +HPL_bwait.o : ../HPL_bwait.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_bwait.c +HPL_send.o : ../HPL_send.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_send.c +HPL_recv.o : ../HPL_recv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_recv.c +HPL_sdrv.o : ../HPL_sdrv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_sdrv.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/Makefile new file mode 100644 index 000000000..6596a047b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/Makefile @@ -0,0 +1,138 @@ +# /* +# * -- High Performance Computing Linpack Benchmark (HPL) +# * Modifications Copyright (C) 2023 Intel Corporation​ +# * +# * -- Copyright notice and Licensing terms: +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions +# * are met: +# * +# * 1. Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * +# * 2. Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions, and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * +# * 3. All advertising materials mentioning features or use of this +# * software must display the following acknowledgement: +# * This product includes software developed at the University of +# * Tennessee, Knoxville, Innovative Computing Laboratory. +# * +# * 4. The name of the University, the name of the Laboratory, or the +# * names of its contributors may not be used to endorse or promote +# * products derived from this software without specific written +# * permission. +# * +# * -- Disclaimer: +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * --------------------------------------------------------------------- +# * +# * SPDX-License-Identifier: BSD-4-Clause +# */ + +# /* +# * -- High Performance Computing Linpack Benchmark (HPL) +# * HPL - 2.3 - December 2, 2018 +# * Antoine P. Petitet +# * University of Tennessee, Knoxville +# * Innovative Computing Laboratory +# * (C) Copyright 2000-2008 All Rights Reserved +# * +# * -- Copyright notice and Licensing terms: +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions +# * are met: +# * +# * 1. Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * +# * 2. Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions, and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * +# * 3. All advertising materials mentioning features or use of this +# * software must display the following acknowledgement: +# * This product includes software developed at the University of +# * Tennessee, Knoxville, Innovative Computing Laboratory. +# * +# * 4. The name of the University, the name of the Laboratory, or the +# * names of its contributors may not be used to endorse or promote +# * products derived from this software without specific written +# * permission. +# * +# * -- Disclaimer: +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * --------------------------------------------------------------------- +# */ + + +all: libdgemm.so.1.0.1 + +OBJS = dpcpp_dgemm.o + +.PRECIOUS: $(OBJS) + +all : libdgemm.so.1.0.1 + +dpcpp_dgemm.o : dpcpp_dgemm.cpp + +#DEFINES = -DMPI +#DEFINES += -DUSE_FERMI_DGEMM +#DEFINES += -DVERBOSE_PRINT +#DEFINES += -DACML +#DEFINES += -DGOTO + +%.o: %.cpp +ifeq ($(USE_NVIDIA_BACKEND),ON) + clang++ -O2 -fsycl -fsycl-targets=nvptx64-nvidia-cuda -c -fPIC $(DEFINES) $*.cpp -o $*.o -DUSE_CUBLAS +else ifeq ($(USE_AMD_BACKEND),ON) + clang++ -O2 -fsycl -fsycl-targets=amd_gpu_gfx90a -c -fPIC $(DEFINES) $*.cpp -o $*.o -DUSE_HIPBLAS -D__HIP_PLATFORM_AMD__ +else + icpx -fsycl -O2 -c -fPIC $(DEFINES) $*.cpp -o $*.o +endif + +libdgemm.so.1.0.1: $(OBJS) +ifeq ($(USE_NVIDIA_BACKEND),ON) + clang++ -O3 -std=c++17 -shared -Wl,-soname,libdgemm.so.1 -o libdgemm.so.1.0.1 $(OBJS) -lcudart -lsycl -lcuda -lcublas -fsycl -fsycl-targets=nvptx64-nvidia-cuda -L$(MKL_LIB) -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lpthread -liomp5 -lm -lstdc++ + ln -sf libdgemm.so.1.0.1 libdgemm.so.1.0 + ln -sf libdgemm.so.1.0 libdgemm.so.1 + ln -sf libdgemm.so.1 libdgemm.so +else ifeq ($(USE_AMD_BACKEND),ON) + clang++ -O3 -std=c++17 -shared -Wl,-soname,libdgemm.so.1 -o libdgemm.so.1.0.1 $(OBJS) -lsycl -lhipblas -fsycl -fsycl-targets=amd_gpu_gfx90a + ln -sf libdgemm.so.1.0.1 libdgemm.so.1.0 + ln -sf libdgemm.so.1.0 libdgemm.so.1 + ln -sf libdgemm.so.1 libdgemm.so +else + icpx -fsycl -fsycl-device-code-split=per_kernel -Wl, -lsycl -lOpenCL -lpthread -lm -ldl -fPIC -O2 -shared -Wl,-soname,libdgemm.so.1 -o libdgemm.so.1.0.1 $(OBJS) #-I$(SYCL_INCLUDE) + ln -sf libdgemm.so.1.0.1 libdgemm.so.1.0 + ln -sf libdgemm.so.1.0 libdgemm.so.1 + ln -sf libdgemm.so.1 libdgemm.so +endif + +clean: + rm -f $(OBJS) $(CUBINS) libdgemm.so.1.0.1 libdgemm.so.1.0 libdgemm.so.1 libdgemm.so diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/dpcpp_dgemm.cpp b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/dpcpp_dgemm.cpp new file mode 100644 index 000000000..a5b800ce4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/dpcpp_dgemm.cpp @@ -0,0 +1,461 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ + + + +#define NUMBER_OF_STREAMS 4 +#define CHUNK_SIZE 512 +#define NN 64 +#define NM 128 +//#define DPCPP_DEBUG +//#define DEVICE_DEBUG +//#define MPI + +#ifdef MPI +#include +#endif + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include "oneapi/mkl/blas.hpp" +#include "dpcpp_dgemm.h" + + +#ifdef USE_CUBLAS +#include +#include +#include +#include "mkl.h" +#include +#elif USE_HIPBLAS +#include "hipblas.h" +#else +#include "oneapi/mkl/blas.hpp" +#endif + +extern "C" { + void dpcpp_dgemm + ( const int ORDER, + const int TRANSA, const int TRANSB, + const int M, const int N, const int K, + const double ALPHA, const double *A, const int LDA, + const double *B, const int LDB, const double BETA, + double *C, const int LDC); + + void dpcpp_dtrsm( + int HPL_ORDER, + int HPL_SIDE, + int HPL_UPLO, + int HPL_TRANS, + int HPL_DIAG, + const int, + const int, + const double, + const double *, + const int, + double *, + const int); +} + +void DeviceManager::display_device_properties(sycl::device const &dev) +{ + std::cout << "\tSYCL device : " << dev.get_info() << std::endl; + std::cout << "\tDriver version : " << dev.get_info() << std::endl; + std::cout << "\tPlatform : " << dev.get_platform().get_info()<< std::endl; + std::cout << "\tVendor : " << dev.get_info() << std::endl; + std::cout << "\tMax compute units : " << dev.get_info() << std::endl; +} + +#ifdef USE_CUBLAS +#define CHECK_ERROR(FUNC) checkCudaErrorMsg(FUNC, " " #FUNC) + +void inline checkCudaErrorMsg(cublasStatus_t status, const char *msg) { + if (status != CUBLAS_STATUS_SUCCESS) { + std::cout << "ERROR CUBLAS:" << msg << " - " << status << std::endl; + exit(EXIT_FAILURE); + } +} + +void inline checkCudaErrorMsg(cudaError status, const char *msg) { + if (status != cudaSuccess) { + std::cout << "ERROR CUDA: " << msg << " - " << status << std::endl; + exit(EXIT_FAILURE); + } +} + +void inline checkCudaErrorMsg(CUresult status, const char *msg) { + if (status != CUDA_SUCCESS) { + std::cout << "ERROR CUDA: " << msg << " - " << status << std::endl; + exit(EXIT_FAILURE); + } +} + +#endif + +// helper functions to determine buffer dimension +template constexpr T inner_dimension(oneapi::mkl::transpose trans, T m, T n) + { return (trans == oneapi::mkl::transpose::nontrans) ? m : n; } +template constexpr T outer_dimension(oneapi::mkl::transpose trans, T m, T n) + { return (trans == oneapi::mkl::transpose::nontrans) ? n : m; } +template constexpr T matrix_size(oneapi::mkl::transpose trans, T m, T n, T ldm) + { return outer_dimension(trans, m, n) * ldm; + //return outer_dimension(trans, m, n); + } + +// TODO: hardcoded values for enums, +inline oneapi::mkl::transpose to_mkl_trans(int hpltrans){ + if(hpltrans==111) return oneapi::mkl::transpose::nontrans; + if(hpltrans==112) return oneapi::mkl::transpose::trans; + if(hpltrans==113) return oneapi::mkl::transpose::conjtrans; + return oneapi::mkl::transpose::trans; +} + +inline oneapi::mkl::uplo to_mkl_uplo(int hpluplo){ + if(hpluplo==121) return oneapi::mkl::uplo::upper; + if(hpluplo==122) return oneapi::mkl::uplo::lower; + return oneapi::mkl::uplo::upper; +} + +inline oneapi::mkl::diag to_mkl_diag(int hpldiag){ + if(hpldiag==131) return oneapi::mkl::diag::nonunit; + if(hpldiag==132) return oneapi::mkl::diag::unit; + return oneapi::mkl::diag::nonunit; +} + +inline oneapi::mkl::side to_mkl_side(int hplside){ + if(hplside==141) return oneapi::mkl::side::left; + if(hplside==142) return oneapi::mkl::side::right; + return oneapi::mkl::side::left; +} +void dpcpp_dgemm +( const int ORDER, const int TRANSA, const int TRANSB, + const int M, const int N, const int K, + const double ALPHA,const double *A, const int LDA, + const double *B, const int LDB, + const double BETA, double *C, const int LDC) +{ + + + +if ((M==0)||(K==0)||(N==0)) + return; + + + +#ifdef DPCPP_DEBUG + using namespace std; + cout <<"Calling DPC++ dgemm ========="<()); + cublasSetStream(handle, ih.get_native_queue()); + + CHECK_ERROR(cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &ALPHA, A_buffer, LDA, B_buffer, LDB, &BETA, C_buffer, LDC)); + cudaDeviceSynchronize (); + }); + }).wait_and_throw(); + #elif USE_HIPBLAS + hipblasHandle_t handle; + hipblasCreate(&handle); + + + mQueue.submit([&](sycl::handler &h){ + + h.host_task([=](sycl::interop_handle ih) { + hipCtxSetCurrent(ih.get_native_context()); + hipblasSetStream(handle, ih.get_native_queue()); + + hipblasDgemm(handle, HIPBLAS_OP_N, HIPBLAS_OP_N, M, N, K, &ALPHA, A_buffer, LDA, B_buffer, LDB, &BETA, C_buffer, LDC); + }); + }).wait_and_throw(); + #else + oneapi::mkl::blas::gemm(mQueue, transA, transB, M, N, K, ALPHA, A_buffer, LDA, B_buffer, LDB, BETA, C_buffer, LDC); + mQueue.wait(); + #endif + mQueue.memcpy(C, C_buffer, c_size_total * sizeof(double)).wait(); + free(A_buffer, mQueue); + free(B_buffer, mQueue); + free(C_buffer, mQueue); +} + +void dpcpp_dtrsm + +( const int ORDER, const int SIDE, + const int UPLO, const int TRANS, + const int DIAG, const int M, const int N, + const double ALPHA, const double* A, const int LDA, double* B, + const int LDB) +{ + + if ((M==0)||(N==0)){ + return; + } + + + +#ifdef DPCPP_DEBUG + using namespace std; + cout <<"Calling DPC++ dtrsm ========="<()); + cublasSetStream(handle, ih.get_native_queue()); + + CHECK_ERROR(cublasDtrsm(handle,CUBLAS_SIDE_LEFT,CUBLAS_FILL_MODE_LOWER,CUBLAS_OP_N,CUBLAS_DIAG_UNIT,M,N,&ALPHA,A_buffer,LDA,B_buffer,LDB)); + cudaDeviceSynchronize(); + }); + }).wait_and_throw(); + #elif USE_HIPBLAS + hipblasHandle_t handle; + hipblasCreate(&handle); + + + mQueue.submit([&](sycl::handler &h){ + h.host_task([=](sycl::interop_handle ih) { + hipCtxSetCurrent(ih.get_native_context()); + hipblasSetStream(handle, ih.get_native_queue()); + + hipblasDtrsm(handle,HIPBLAS_SIDE_LEFT,HIPBLAS_FILL_MODE_LOWER,HIPBLAS_OP_N,HIPBLAS_DIAG_UNIT,M,N,&ALPHA,A_buffer,LDA,B_buffer,LDB); + }); + }).wait_and_throw(); + + #else + + oneapi::mkl::blas::trsm(mQueue, side, uplo, trans, diag, M, N, ALPHA, A_buffer, LDA, B_buffer, LDB); + mQueue.wait(); + + + #endif + + mQueue.memcpy(B, B_buffer, N * LDB * sizeof(double)).wait(); + + free(A_buffer, mQueue); + free(B_buffer, mQueue); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/dpcpp_dgemm.h b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/dpcpp_dgemm.h new file mode 100644 index 000000000..e5de8ce8d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/dpcpp/dpcpp_dgemm.h @@ -0,0 +1,157 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ + + +#define NUMBER_OF_STREAMS 2 + +#include +#include +#include + +class DeviceManager; +static DeviceManager *instance[2]; + +class DeviceManager{ + cl::sycl::device *m_pDevice; + cl::sycl::queue queues[NUMBER_OF_STREAMS]; + + DeviceManager(){ + try{ + m_pDevice = new cl::sycl::device(cl::sycl::default_selector()); + }catch(...){ + std::cout << "ERROR: failed to create sycl device.\n"; + } + + auto exception_handler = [] (cl::sycl::exception_list exceptions) { + for (std::exception_ptr const& e : exceptions) { + try { + std::rethrow_exception(e); + } catch(cl::sycl::exception const& e) { + std::cout << "Caught asynchronous SYCL exception during GEMM:\n" + << e.what() << std::endl; + } + } + }; + + + + queues[0] = cl::sycl::queue(*m_pDevice, exception_handler); + //DeviceManager::display_device_properties(*m_pDevice); + //std::cout << "Done\n"; + + } + + + ~DeviceManager() { std::cout << "Destructor Singleton" << std::endl; } + + DeviceManager(const DeviceManager&) = delete; + DeviceManager& operator=(const DeviceManager&) = delete; + + + public: + + static DeviceManager* getInstance(int mpi_id){ + if(!instance[mpi_id]){ + + std::cout << "Creating device for " << mpi_id << "\n"; + instance[mpi_id] = new DeviceManager(); + + } + return instance[mpi_id]; + } + + cl::sycl::device &getDevice(){ return *m_pDevice;} + cl::sycl::queue *getQueues(){ return queues;} + + static void display_device_properties(cl::sycl::device const &dev); + static void destroyAllInstances() {delete instance[0]; delete instance[1];} +}; diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_all_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_all_reduce.c new file mode 100644 index 000000000..776f48504 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_all_reduce.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_all_reduce +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const HPL_T_OP OP, + MPI_Comm COMM +) +#else +int HPL_all_reduce +( BUFFER, COUNT, DTYPE, OP, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const HPL_T_OP OP; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_all_reduce performs a global reduce operation across all + * processes of a group leaving the results on all processes. + * + * Arguments + * ========= + * + * BUFFER (local input/global output) void * + * On entry, BUFFER points to the buffer to be combined. On + * exit, this array contains the combined data and is identical + * on all processes in the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * OP (global input) const HPL_T_OP + * On entry, OP is a pointer to the local combine function. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr; +/* .. + * .. Executable Statements .. + */ + hplerr = HPL_reduce( BUFFER, COUNT, DTYPE, OP, 0, COMM ); + if( hplerr != MPI_SUCCESS ) return( hplerr ); + return( HPL_broadcast( BUFFER, COUNT, DTYPE, 0, COMM ) ); +/* + * End of HPL_all_reduce + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_barrier.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_barrier.c new file mode 100644 index 000000000..9a5d9b10a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_barrier.c @@ -0,0 +1,90 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_barrier +( + MPI_Comm COMM +) +#else +int HPL_barrier +( COMM ) + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_barrier blocks the caller until all process members have call it. + * The call returns at any process only after all group members have + * entered the call. + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i=0; +/* .. + * .. Executable Statements .. + */ + return( HPL_broadcast( (void*)(&i), 1, HPL_INT, 0, COMM ) ); +/* + * End of HPL_barrier + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_broadcast.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_broadcast.c new file mode 100644 index 000000000..42d962864 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_broadcast.c @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_broadcast +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const int ROOT, + MPI_Comm COMM +) +#else +int HPL_broadcast +( BUFFER, COUNT, DTYPE, ROOT, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const int ROOT; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_broadcast broadcasts a message from the process with rank ROOT to + * all processes in the group. + * + * Arguments + * ========= + * + * BUFFER (local input/output) void * + * On entry, BUFFER points to the buffer to be broadcast. On + * exit, this array contains the broadcast data and is identical + * on all processes in the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * ROOT (global input) const int + * On entry, ROOT is the coordinate of the source process. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr=MPI_SUCCESS, ip2=1, kk, mask=1, + mpierr, mydist, partner, rank, size, + tag = MSGID_BEGIN_COLL; + MPI_Status status; +/* .. + * .. Executable Statements .. + */ + if( COUNT <= 0 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_size( COMM, &size ); if( size <= 1 ) return( mpierr ); + mpierr = MPI_Comm_rank( COMM, &rank ); + + kk = size - 1; + while( kk > 1 ) { kk >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist = MModSub( rank, ROOT, size ); + + do + { + mask ^= ip2; + if( ( mydist & mask ) == 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Recv( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM, &status ); + } + else if( partner < size ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM ); + } + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + ip2 >>= 1; + } while( ip2 ); + + return( hplerr ); +/* + * End of HPL_broadcast + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_exit.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_exit.c new file mode 100644 index 000000000..f0d00b065 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_exit.c @@ -0,0 +1,109 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_exit +( + HPL_T_grid * GRID +) +#else +int HPL_grid_exit +( GRID ) + HPL_T_grid * GRID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_exit marks the process grid object for deallocation. The + * returned error code MPI_SUCCESS indicates successful completion. + * Other error codes are (MPI) implementation dependent. + * + * Arguments + * ========= + * + * GRID (local input/output) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid to be released. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr = MPI_SUCCESS, mpierr; +/* .. + * .. Executable Statements .. + */ + if( GRID->all_comm != MPI_COMM_NULL ) + { + mpierr = MPI_Comm_free( &(GRID->row_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + mpierr = MPI_Comm_free( &(GRID->col_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + mpierr = MPI_Comm_free( &(GRID->all_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + + GRID->order = HPL_COLUMN_MAJOR; + + GRID->iam = GRID->myrow = GRID->mycol = -1; + GRID->nprow = GRID->npcol = GRID->nprocs = -1; + + GRID->row_ip2 = GRID->row_hdim = GRID->row_ip2m1 = GRID->row_mask = -1; + GRID->col_ip2 = GRID->col_hdim = GRID->col_ip2m1 = GRID->col_mask = -1; + + return( hplerr ); +/* + * End of HPL_grid_exit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_info.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_info.c new file mode 100644 index 000000000..95c5a7315 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_info.c @@ -0,0 +1,116 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_info +( + const HPL_T_grid * GRID, + int * NPROW, + int * NPCOL, + int * MYROW, + int * MYCOL +) +#else +int HPL_grid_info +( GRID, NPROW, NPCOL, MYROW, MYCOL ) + const HPL_T_grid * GRID; + int * NPROW; + int * NPCOL; + int * MYROW; + int * MYCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_info returns the grid shape and the coordinates in the grid + * of the calling process. Successful completion is indicated by the + * returned error code MPI_SUCCESS. Other error codes depend on the MPI + * implementation. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * NPROW (global output) int * + * On exit, NPROW specifies the number of process rows in the + * grid. NPROW is at least one. + * + * NPCOL (global output) int * + * On exit, NPCOL specifies the number of process columns in + * the grid. NPCOL is at least one. + * + * MYROW (global output) int * + * On exit, MYROW specifies my row process coordinate in the + * grid. MYROW is greater than or equal to zero and less than + * NPROW. + * + * MYCOL (global output) int * + * On exit, MYCOL specifies my column process coordinate in the + * grid. MYCOL is greater than or equal to zero and less than + * NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + *NPROW = GRID->nprow; *NPCOL = GRID->npcol; + *MYROW = GRID->myrow; *MYCOL = GRID->mycol; + return( MPI_SUCCESS ); +/* + * End of HPL_grid_info + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_init.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_init.c new file mode 100644 index 000000000..52111ac52 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_grid_init.c @@ -0,0 +1,184 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_init +( + MPI_Comm COMM, + const HPL_T_ORDER ORDER, + const int NPROW, + const int NPCOL, + HPL_T_grid * GRID +) +#else +int HPL_grid_init +( COMM, ORDER, NPROW, NPCOL, GRID ) + MPI_Comm COMM; + const HPL_T_ORDER ORDER; + const int NPROW; + const int NPCOL; + HPL_T_grid * GRID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_init creates a NPROW x NPCOL process grid using column- or + * row-major ordering from an initial collection of processes identified + * by an MPI communicator. Successful completion is indicated by the + * returned error code MPI_SUCCESS. Other error codes depend on the MPI + * implementation. The coordinates of processes that are not part of the + * grid are set to values outside of [0..NPROW) x [0..NPCOL). + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * On entry, COMM is the MPI communicator identifying the + * initial collection of processes out of which the grid is + * formed. + * + * ORDER (global input) const HPL_T_ORDER + * On entry, ORDER specifies how the processes should be ordered + * in the grid as follows: + * ORDER = HPL_ROW_MAJOR row-major ordering; + * ORDER = HPL_COLUMN_MAJOR column-major ordering; + * + * NPROW (global input) const int + * On entry, NPROW specifies the number of process rows in the + * grid to be created. NPROW must be at least one. + * + * NPCOL (global input) const int + * On entry, NPCOL specifies the number of process columns in + * the grid to be created. NPCOL must be at least one. + * + * GRID (local input/output) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information to be initialized. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hdim, hplerr=MPI_SUCCESS, ierr, ip2, k, + mask, mycol, myrow, nprocs, rank, size; +/* .. + * .. Executable Statements .. + */ + MPI_Comm_rank( COMM, &rank ); MPI_Comm_size( COMM, &size ); +/* + * Abort if illegal process grid + */ + nprocs = NPROW * NPCOL; + if( ( nprocs > size ) || ( NPROW < 1 ) || ( NPCOL < 1 ) ) + { HPL_pabort( __LINE__, "HPL_grid_init", "Illegal Grid" ); } +/* + * Row- or column-major ordering of the processes + */ + if( ORDER == HPL_ROW_MAJOR ) + { + GRID->order = HPL_ROW_MAJOR; + myrow = rank / NPCOL; mycol = rank - myrow * NPCOL; + } + else + { + GRID->order = HPL_COLUMN_MAJOR; + mycol = rank / NPROW; myrow = rank - mycol * NPROW; + } + GRID->iam = rank; GRID->myrow = myrow; GRID->mycol = mycol; + GRID->nprow = NPROW; GRID->npcol = NPCOL; GRID->nprocs = nprocs; +/* + * row_ip2 : largest power of two <= nprow; + * row_hdim : row_ip2 procs hypercube dim; + * row_ip2m1 : largest power of two <= nprow-1; + * row_mask : row_ip2m1 procs hypercube mask; + */ + hdim = 0; ip2 = 1; k = NPROW; + while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; } + GRID->row_ip2 = ip2; GRID->row_hdim = hdim; + + mask = ip2 = 1; k = NPROW - 1; + while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + GRID->row_ip2m1 = ip2; GRID->row_mask = mask; +/* + * col_ip2 : largest power of two <= npcol; + * col_hdim : col_ip2 procs hypercube dim; + * col_ip2m1 : largest power of two <= npcol-1; + * col_mask : col_ip2m1 procs hypercube mask; + */ + hdim = 0; ip2 = 1; k = NPCOL; + while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; } + GRID->col_ip2 = ip2; GRID->col_hdim = hdim; + + mask = ip2 = 1; k = NPCOL - 1; + while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + GRID->col_ip2m1 = ip2; GRID->col_mask = mask; +/* + * All communicator, leave if I am not part of this grid. Creation of the + * row- and column communicators. + */ + ierr = MPI_Comm_split( COMM, ( rank < nprocs ? 0 : MPI_UNDEFINED ), + rank, &(GRID->all_comm) ); + if( GRID->all_comm == MPI_COMM_NULL ) return( ierr ); + + ierr = MPI_Comm_split( GRID->all_comm, myrow, mycol, &(GRID->row_comm) ); + if( ierr != MPI_SUCCESS ) hplerr = ierr; + + ierr = MPI_Comm_split( GRID->all_comm, mycol, myrow, &(GRID->col_comm) ); + if( ierr != MPI_SUCCESS ) hplerr = ierr; + + return( hplerr ); +/* + * End of HPL_grid_init + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_max.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_max.c new file mode 100644 index 000000000..002aabe01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_max.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_max +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_max +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_max combines (max) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] ); + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] ); + } +/* + * End of HPL_max + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_min.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_min.c new file mode 100644 index 000000000..a99e5e58a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_min.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_min +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_min +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_min combines (min) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] ); + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] ); + } +/* + * End of HPL_min + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_pnum.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_pnum.c new file mode 100644 index 000000000..c80885b9a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_pnum.c @@ -0,0 +1,103 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pnum +( + const HPL_T_grid * GRID, + const int MYROW, + const int MYCOL +) +#else +int HPL_pnum +( GRID, MYROW, MYCOL ) + const HPL_T_grid * GRID; + const int MYROW; + const int MYCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pnum determines the rank of a process as a function of its + * coordinates in the grid. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * MYROW (local input) const int + * On entry, MYROW specifies the row coordinate of the process + * whose rank is to be determined. MYROW must be greater than or + * equal to zero and less than NPROW. + * + * MYCOL (local input) const int + * On entry, MYCOL specifies the column coordinate of the + * process whose rank is to be determined. MYCOL must be greater + * than or equal to zero and less than NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + if( GRID->order == HPL_ROW_MAJOR ) + return( MYROW * GRID->npcol + MYCOL ); + else + return( MYCOL * GRID->nprow + MYROW ); +/* + * End of HPL_pnum + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_reduce.c new file mode 100644 index 000000000..417c21163 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_reduce.c @@ -0,0 +1,179 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_reduce +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const HPL_T_OP OP, + const int ROOT, + MPI_Comm COMM +) +#else +int HPL_reduce +( BUFFER, COUNT, DTYPE, OP, ROOT, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const HPL_T_OP OP; + const int ROOT; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_reduce performs a global reduce operation across all processes of + * a group. Note that the input buffer is used as workarray and in all + * processes but the accumulating process corrupting the original data. + * + * Arguments + * ========= + * + * BUFFER (local input/output) void * + * On entry, BUFFER points to the buffer to be reduced. On + * exit, and in process of rank ROOT this array contains the + * reduced data. This buffer is also used as workspace during + * the operation in the other processes of the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * OP (global input) const HPL_T_OP + * On entry, OP is a pointer to the local combine function. + * + * ROOT (global input) const int + * On entry, ROOT is the coordinate of the accumulating process. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Status status; + void * buffer = NULL; + int hplerr=MPI_SUCCESS, d=1, i, ip2=1, mask=0, + mpierr, mydist, partner, rank, size, + tag = MSGID_BEGIN_COLL; +/* .. + * .. Executable Statements .. + */ + if( COUNT <= 0 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_size( COMM, &size ); + if( size == 1 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_rank( COMM, &rank ); + i = size - 1; while( i > 1 ) { i >>= 1; d++; } + + if( DTYPE == HPL_INT ) + buffer = (void *)( (int *) malloc( (size_t)(COUNT) * + sizeof( int ) ) ); + else + buffer = (void *)( (double *)malloc( (size_t)(COUNT) * + sizeof( double ) ) ); + + if( !( buffer ) ) + { HPL_pabort( __LINE__, "HPL_reduce", "Memory allocation failed" ); } + + if( ( mydist = MModSub( rank, ROOT, size ) ) == 0 ) + { + do + { + mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ), + MModAdd( ROOT, ip2, size ), tag, COMM, + &status ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + OP( COUNT, buffer, BUFFER, DTYPE ); + ip2 <<= 1; d--; + } while( d ); + } + else + { + do + { + if( ( mydist & mask ) == 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM ); + } + else if( partner < size ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM, &status ); + OP( COUNT, buffer, BUFFER, DTYPE ); + } + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + mask ^= ip2; ip2 <<= 1; d--; + } while( d ); + } + if( buffer ) free( buffer ); + + return( hplerr ); +/* + * End of HPL_reduce + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_sum.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_sum.c new file mode 100644 index 000000000..34cf87210 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/HPL_sum.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_sum +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_sum +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_sum combines (sum) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] += a[i]; + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] += a[i]; + } +/* + * End of HPL_sum + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/intel64/Make.inc new file mode 120000 index 000000000..3ee301793 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/intel64/Make.inc @@ -0,0 +1 @@ +/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/intel64/Makefile new file mode 100644 index 000000000..51549d817 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/grid/intel64/Makefile @@ -0,0 +1,103 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h +# +## Object files ######################################################## +# +HPL_griobj = \ + HPL_grid_init.o HPL_pnum.o HPL_grid_info.o \ + HPL_grid_exit.o HPL_broadcast.o HPL_reduce.o \ + HPL_all_reduce.o HPL_barrier.o HPL_min.o \ + HPL_max.o HPL_sum.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_griobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_griobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_grid_init.o : ../HPL_grid_init.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_init.c +HPL_pnum.o : ../HPL_pnum.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pnum.c +HPL_grid_info.o : ../HPL_grid_info.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_info.c +HPL_grid_exit.o : ../HPL_grid_exit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_exit.c +HPL_broadcast.o : ../HPL_broadcast.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_broadcast.c +HPL_reduce.o : ../HPL_reduce.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_reduce.c +HPL_all_reduce.o : ../HPL_all_reduce.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_all_reduce.c +HPL_barrier.o : ../HPL_barrier.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_barrier.c +HPL_min.o : ../HPL_min.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_min.c +HPL_max.o : ../HPL_max.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_max.c +HPL_sum.o : ../HPL_sum.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_sum.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_disp.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_disp.c new file mode 100644 index 000000000..757dad242 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_disp.c @@ -0,0 +1,97 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pdpanel_disp +( + HPL_T_panel * * PANEL +) +#else +int HPL_pdpanel_disp +( PANEL ) + HPL_T_panel * * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_disp deallocates the panel structure and resources and + * stores the error code returned by the panel factorization. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * * + * On entry, PANEL points to the address of the panel data + * structure to be deallocated. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int mpierr; +/* .. + * .. Executable Statements .. + */ +/* + * Deallocate the panel resources and panel structure + */ + mpierr = HPL_pdpanel_free( *PANEL ); + if( *PANEL ) free( *PANEL ); + *PANEL = NULL; + + return( mpierr ); +/* + * End of HPL_pdpanel_disp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_free.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_free.c new file mode 100644 index 000000000..38b5b0d97 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_free.c @@ -0,0 +1,104 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pdpanel_free +( + HPL_T_panel * PANEL +) +#else +int HPL_pdpanel_free +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_free deallocates the panel resources and stores the error + * code returned by the panel factorization. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the panel data structure from + * which the resources should be deallocated. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( PANEL->pmat->info == 0 ) PANEL->pmat->info = *(PANEL->DINFO); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( PANEL->L1block, VSIP_TRUE ); + (void) vsip_blockrelease_d( PANEL->L2block, VSIP_TRUE ); + if( PANEL->grid->nprow > 1 ) + (void) vsip_blockrelease_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Destroy blocks + */ + vsip_blockdestroy_d( PANEL->L1block ); + vsip_blockdestroy_d( PANEL->L2block ); + if( PANEL->grid->nprow > 1 ) + vsip_blockdestroy_d( PANEL->Ublock ); +#endif + + if( PANEL->WORK ) free( PANEL->WORK ); + if( PANEL->IWORK ) free( PANEL->IWORK ); + + return( MPI_SUCCESS ); +/* + * End of HPL_pdpanel_free + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_init.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_init.c new file mode 100644 index 000000000..9e35c7fb4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_init.c @@ -0,0 +1,348 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +void HPL_pdpanel_init +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int M, + const int N, + const int JB, + HPL_T_pmat * A, + const int IA, + const int JA, + const int TAG, + HPL_T_panel * PANEL +) +#else +void HPL_pdpanel_init +( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int M; + const int N; + const int JB; + HPL_T_pmat * A; + const int IA; + const int JA; + const int TAG; + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_init initializes a panel data structure. + * + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * M (local input) const int + * On entry, M specifies the global number of rows of the panel. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the global number of columns of the + * panel and trailing submatrix. N must be at least zero. + * + * JB (global input) const int + * On entry, JB specifies is the number of columns of the panel. + * JB must be at least zero. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * IA (global input) const int + * On entry, IA is the global row index identifying the panel + * and trailing submatrix. IA must be at least zero. + * + * JA (global input) const int + * On entry, JA is the global column index identifying the panel + * and trailing submatrix. JA must be at least zero. + * + * TAG (global input) const int + * On entry, TAG is the row broadcast message id. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + size_t dalign; + int icurcol, icurrow, ii, itmp1, jj, lwork, + ml2, mp, mycol, myrow, nb, npcol, nprow, + nq, nu; +/* .. + * .. Executable Statements .. + */ + PANEL->grid = GRID; /* ptr to the process grid */ + PANEL->algo = ALGO; /* ptr to the algo parameters */ + PANEL->pmat = A; /* ptr to the local array info */ + + myrow = GRID->myrow; mycol = GRID->mycol; + nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb; + + HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol, + nprow, npcol, &ii, &jj, &icurrow, &icurcol ); + mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow ); + nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol ); + /* ptr to trailing part of A */ + PANEL->A = Mptr( (double *)(A->A), ii, jj, A->ld ); +/* + * Workspace pointers are initialized to NULL. + */ + PANEL->WORK = NULL; PANEL->L2 = NULL; PANEL->L1 = NULL; + PANEL->DPIV = NULL; PANEL->DINFO = NULL; PANEL->U = NULL; + PANEL->IWORK = NULL; +/* + * Local lengths, indexes process coordinates + */ + PANEL->nb = nb; /* distribution blocking factor */ + PANEL->jb = JB; /* panel width */ + PANEL->m = M; /* global # of rows of trailing part of A */ + PANEL->n = N; /* global # of cols of trailing part of A */ + PANEL->ia = IA; /* global row index of trailing part of A */ + PANEL->ja = JA; /* global col index of trailing part of A */ + PANEL->mp = mp; /* local # of rows of trailing part of A */ + PANEL->nq = nq; /* local # of cols of trailing part of A */ + PANEL->ii = ii; /* local row index of trailing part of A */ + PANEL->jj = jj; /* local col index of trailing part of A */ + PANEL->lda = A->ld; /* local leading dim of array A */ + PANEL->prow = icurrow; /* proc row owning 1st row of trailing A */ + PANEL->pcol = icurcol; /* proc col owning 1st col of trailing A */ + PANEL->msgid = TAG; /* message id to be used for panel bcast */ +/* + * Initialize ldl2 and len to temporary dummy values and Update tag for + * next panel + */ + PANEL->ldl2 = 0; /* local leading dim of array L2 */ + PANEL->len = 0; /* length of the buffer to broadcast */ +/* + * Figure out the exact amount of workspace needed by the factorization + * and the update - Allocate that space - Finish the panel data structu- + * re initialization. + * + * L1: JB x JB in all processes + * DPIV: JB in all processes + * DINFO: 1 in all processes + * + * We make sure that those three arrays are contiguous in memory for the + * later panel broadcast. We also choose to put this amount of space + * right after L2 (when it exist) so that one can receive a contiguous + * buffer. + */ + dalign = ALGO->align * sizeof( double ); + + if( npcol == 1 ) /* P x 1 process grid */ + { /* space for L1, DPIV, DINFO */ + lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 ); + if( nprow > 1 ) /* space for U */ + { nu = nq - JB; lwork += JB * Mmax( 0, nu ); } + + if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * + sizeof( double ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_init", + "Memory allocation failed" ); + } +/* + * Initialize the pointers of the panel structure - Always re-use A in + * the only process column + */ + PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 ); + PANEL->ldl2 = A->ld; + PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->DPIV = PANEL->L1 + JB * JB; + PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; + PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1: NULL ); + } + else + { /* space for L2, L1, DPIV */ + ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 ); + PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 ); +#ifdef HPL_COPY_L + lwork = ALGO->align + PANEL->len; +#else + lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len ); +#endif + if( nprow > 1 ) /* space for U */ + { + nu = ( mycol == icurcol ? nq - JB : nq ); + lwork += JB * Mmax( 0, nu ); + } + + if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * + sizeof( double ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_init", + "Memory allocation failed" ); + } +/* + * Initialize the pointers of the panel structure - Re-use A in the cur- + * rent process column when HPL_COPY_L is not defined. + */ +#ifdef HPL_COPY_L + PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->ldl2 = Mmax( 1, ml2 ); + PANEL->L1 = PANEL->L2 + ml2 * JB; +#else + if( mycol == icurcol ) + { + PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 ); + PANEL->ldl2 = A->ld; + PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign ); + } + else + { + PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->ldl2 = Mmax( 1, ml2 ); + PANEL->L1 = PANEL->L2 + ml2 * JB; + } +#endif + PANEL->DPIV = PANEL->L1 + JB * JB; + PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; + PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1 : NULL ); + } +#ifdef HPL_CALL_VSIPL + PANEL->Ablock = A->block; +/* + * Create blocks and bind them to the data pointers + */ + PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1), + (vsip_length)(JB*JB), VSIP_MEM_NONE ); + PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2), + (vsip_length)(PANEL->ldl2*JB), + VSIP_MEM_NONE ); + if( nprow > 1 ) + { + nu = ( mycol == icurcol ? nq - JB : nq ); + PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U), + (vsip_length)(JB * Mmax( 0, nu )), + VSIP_MEM_NONE ); + } + else { PANEL->Ublock = A->block; } +#endif +/* + * If nprow is 1, we just allocate an array of JB integers for the swap. + * When nprow > 1, we allocate the space for the index arrays immediate- + * ly. The exact size of this array depends on the swapping routine that + * will be used, so we allocate the maximum: + * + * IWORK[0] is of size at most 1 + + * IPL is of size at most 1 + + * IPID is of size at most 4 * JB + + * + * For HPL_pdlaswp00: + * lindxA is of size at most 2 * JB + + * lindxAU is of size at most 2 * JB + + * llen is of size at most NPROW + + * llen_sv is of size at most NPROW. + * + * For HPL_pdlaswp01: + * ipA is of size ar most 1 + + * lindxA is of size at most 2 * JB + + * lindxAU is of size at most 2 * JB + + * iplen is of size at most NPROW + 1 + + * ipmap is of size at most NPROW + + * ipmapm1 is of size at most NPROW + + * permU is of size at most JB + + * iwork is of size at most MAX( 2*JB, NPROW+1 ). + * + * that is 3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1)) + * = 4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ). + * + * We use the fist entry of this to work array to indicate whether the + * the local index arrays have already been computed, and if yes, by + * which function: + * IWORK[0] = -1: no index arrays have been computed so far; + * IWORK[0] = 0: HPL_pdlaswp00 already computed those arrays; + * IWORK[0] = 1: HPL_pdlaswp01 already computed those arrays; + * This allows to save some redundant and useless computations. + */ + if( nprow == 1 ) { lwork = JB; } + else + { + itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork ); + lwork = 4 + (9 * JB) + (3 * nprow) + itmp1; + } + + PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); + + if( PANEL->IWORK == NULL ) + { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); } + /* Initialize the first entry of the workarray */ + *(PANEL->IWORK) = -1; +/* + * End of HPL_pdpanel_init + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_new.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_new.c new file mode 100644 index 000000000..1dbd8a18f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/HPL_pdpanel_new.c @@ -0,0 +1,152 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanel_new +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int M, + const int N, + const int JB, + HPL_T_pmat * A, + const int IA, + const int JA, + const int TAG, + HPL_T_panel * * PANEL +) +#else +void HPL_pdpanel_new +( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int M; + const int N; + const int JB; + HPL_T_pmat * A; + const int IA; + const int JA; + const int TAG; + HPL_T_panel * * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_new creates and initializes a panel data structure. + * + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * M (local input) const int + * On entry, M specifies the global number of rows of the panel. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the global number of columns of the + * panel and trailing submatrix. N must be at least zero. + * + * JB (global input) const int + * On entry, JB specifies is the number of columns of the panel. + * JB must be at least zero. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * IA (global input) const int + * On entry, IA is the global row index identifying the panel + * and trailing submatrix. IA must be at least zero. + * + * JA (global input) const int + * On entry, JA is the global column index identifying the panel + * and trailing submatrix. JA must be at least zero. + * + * TAG (global input) const int + * On entry, TAG is the row broadcast message id. + * + * PANEL (local input/output) HPL_T_panel * * + * On entry, PANEL points to the address of the panel data + * structure to create and initialize. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * p = NULL; +/* .. + * .. Executable Statements .. + */ +/* + * Allocate the panel structure - Check for enough memory + */ + if( !( p = (HPL_T_panel *)malloc( sizeof( HPL_T_panel ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_new", "Memory allocation failed" ); + } + + HPL_pdpanel_init( GRID, ALGO, M, N, JB, A, IA, JA, TAG, p ); + *PANEL = p; +/* + * End of HPL_pdpanel_new + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/intel64/Make.inc new file mode 120000 index 000000000..3ee301793 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/intel64/Make.inc @@ -0,0 +1 @@ +/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/intel64/Makefile new file mode 100644 index 000000000..804749cc2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/panel/intel64/Makefile @@ -0,0 +1,90 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ + $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_panobj = \ + HPL_pdpanel_new.o HPL_pdpanel_init.o HPL_pdpanel_disp.o \ + HPL_pdpanel_free.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_panobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_panobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pdpanel_new.o : ../HPL_pdpanel_new.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_new.c +HPL_pdpanel_init.o : ../HPL_pdpanel_init.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_init.c +HPL_pdpanel_disp.o : ../HPL_pdpanel_disp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_disp.c +HPL_pdpanel_free.o : ../HPL_pdpanel_free.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_free.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp00N.c new file mode 100644 index 000000000..7ad5a1a99 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp00N.c @@ -0,0 +1,198 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP00N_DEPTH +#define HPL_LASWP00N_DEPTH 32 +#define HPL_LASWP00N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp00N +( + const int M, + const int N, + double * A, + const int LDA, + const int * IPIV +) +#else +void HPL_dlaswp00N +( M, N, A, LDA, IPIV ) + const int M; + const int N; + double * A; + const int LDA; + const int * IPIV; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp00N performs a series of local row interchanges on a matrix + * A. One row interchange is initiated for rows 0 through M-1 of A. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the array A to be + * interchanged. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the array A. + * N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N) to which + * the row interchanges will be applied. On exit, the permuted + * matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * IPIV (local input) const int * + * On entry, IPIV is an array of size M that contains the + * pivoting information. For k in [0..M), IPIV[k]=IROFF + l + * implies that local rows k and l are to be interchanged. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register double r; + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP00N_LOG2_DEPTH ); + int ip, nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP00N_LOG2_DEPTH ) + << HPL_LASWP00N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP00N_DEPTH, A += incA ) + { + for( i = 0; i < M; i++ ) + { + if( i != ( ip = IPIV[i] ) ) + { + a0 = A + i; a1 = A + ip; + + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#if ( HPL_LASWP00N_DEPTH > 1 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 2 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 4 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 8 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 16 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif + } + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + if( i != ( ip = IPIV[i] ) ) + { + a0 = A + i; a1 = A + ip; + for( j = 0; j < nr; j++, a0 += LDA, a1 += LDA ) + { r = *a0; *a0 = *a1; *a1 = r; } + } + } + } +/* + * End of HPL_dlaswp00N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp01N.c new file mode 100644 index 000000000..786d1eff4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp01N.c @@ -0,0 +1,209 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP01N_DEPTH +#define HPL_LASWP01N_DEPTH 32 +#define HPL_LASWP01N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp01N +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp01N +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp01N copies scattered rows of A into itself and into an + * array U. The row offsets in A of the source rows are specified by + * LINDXA. The destination of those rows are specified by LINDXAU. A + * positive value of LINDXAU indicates that the array destination is U, + * and A otherwise. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * moved within A or copied into U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * moved within A or copied into U. N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be moved within A or + * copied into U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). The rows + * of A specified by LINDXA are be copied within this array U at + * the positions indicated by positive values of LINDXAU. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be moved within A or + * or copied into U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U where the rows of A should be + * copied at. This array also contains the local row offsets in + * A where some of the rows of A should be moved to. A positive + * value of LINDXAU[i] indicates that the row LINDXA[i] of A + * should be copied into U at the position LINDXAU[i]; otherwise + * the row LINDXA[i] of A should be moved at the position + * -LINDXAU[i] within A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP01N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP01N_LOG2_DEPTH ); + int lda1, nu, nr; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01N_LOG2_DEPTH ) << + HPL_LASWP01N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP01N_DEPTH, A += incA, U += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; } + else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; } + + *a1 = *a0; a1 += lda1; a0 += LDA; +#if ( HPL_LASWP01N_DEPTH > 1 ) + *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 2 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 4 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 8 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 16 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; } + else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; } + for( j = 0; j < nr; j++, a1 += lda1, a0 += LDA ) { *a1 = *a0; } + } + } +/* + * End of HPL_dlaswp01N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp01T.c new file mode 100644 index 000000000..429cfb6f2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp01T.c @@ -0,0 +1,252 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP01T_DEPTH +#define HPL_LASWP01T_DEPTH 32 +#define HPL_LASWP01T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp01T +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp01T +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp01T copies scattered rows of A into itself and into an + * array U. The row offsets in A of the source rows are specified by + * LINDXA. The destination of those rows are specified by LINDXAU. A + * positive value of LINDXAU indicates that the array destination is U, + * and A otherwise. Rows of A are stored as columns in U. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * moved within A or copied into U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * moved within A or copied into U. N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be moved within A or + * copied into U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,M). The rows + * of A specified by LINDXA are copied within this array U at + * the positions indicated by positive values of LINDXAU. The + * rows of A are stored as columns in U. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be moved within A or + * or copied into U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U where the rows of A should be + * copied at. This array also contains the local row offsets in + * A where some of the rows of A should be moved to. A positive + * value of LINDXAU[i] indicates that the row LINDXA[i] of A + * should be copied into U at the position LINDXAU[i]; otherwise + * the row LINDXA[i] of A should be moved at the position + * -LINDXAU[i] within A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP01T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP01T_LOG2_DEPTH ); + int nu, nr; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01T_LOG2_DEPTH ) << + HPL_LASWP01T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP01T_DEPTH, A += incA, U += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + + if( LINDXAU[i] >= 0 ) + { + a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU); + + a1[ 0] = *a0; a0 += LDA; +#if ( HPL_LASWP01T_DEPTH > 1 ) + a1[ 1] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 2 ) + a1[ 2] = *a0; a0 += LDA; a1[ 3] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 4 ) + a1[ 4] = *a0; a0 += LDA; a1[ 5] = *a0; a0 += LDA; + a1[ 6] = *a0; a0 += LDA; a1[ 7] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 8 ) + a1[ 8] = *a0; a0 += LDA; a1[ 9] = *a0; a0 += LDA; + a1[10] = *a0; a0 += LDA; a1[11] = *a0; a0 += LDA; + a1[12] = *a0; a0 += LDA; a1[13] = *a0; a0 += LDA; + a1[14] = *a0; a0 += LDA; a1[15] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 16 ) + a1[16] = *a0; a0 += LDA; a1[17] = *a0; a0 += LDA; + a1[18] = *a0; a0 += LDA; a1[19] = *a0; a0 += LDA; + a1[20] = *a0; a0 += LDA; a1[21] = *a0; a0 += LDA; + a1[22] = *a0; a0 += LDA; a1[23] = *a0; a0 += LDA; + a1[24] = *a0; a0 += LDA; a1[25] = *a0; a0 += LDA; + a1[26] = *a0; a0 += LDA; a1[27] = *a0; a0 += LDA; + a1[28] = *a0; a0 += LDA; a1[29] = *a0; a0 += LDA; + a1[30] = *a0; a0 += LDA; a1[31] = *a0; a0 += LDA; +#endif + } + else + { + a1 = A - (size_t)(LINDXAU[i]); + + *a1 = *a0; a1 += LDA; a0 += LDA; +#if ( HPL_LASWP01T_DEPTH > 1 ) + *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 2 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 4 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 8 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 16 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif + } + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + + if( LINDXAU[i] >= 0 ) + { + a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) { a1[j] = *a0; } + } + else + { + a1 = A - (size_t)(LINDXAU[i]); + for( j = 0; j < nr; j++, a1 += LDA, a0 += LDA ) { *a1 = *a0; } + } + } + } +/* + * End of HPL_dlaswp01T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp02N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp02N.c new file mode 100644 index 000000000..45c2f5f1f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp02N.c @@ -0,0 +1,205 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP02N_DEPTH +#define HPL_LASWP02N_DEPTH 32 +#define HPL_LASWP02N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp02N +( + const int M, + const int N, + const double * A, + const int LDA, + double * W0, + double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp02N +( M, N, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M; + const int N; + const double * A; + const int LDA; + double * W0; + double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp02N packs scattered rows of an array A into workspace W. + * The row offsets in A are specified by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * copied into W. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * copied into W. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be copied into W. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * W0 (local input/output) double * + * On exit, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local output) double * + * On entry, W is an array of size (LDW,M). On exit, W contains + * the rows LINDXA[i] for i in [0..M) of A stored contiguously + * in W(:,i). + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied into W. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U that should be copied into A and + * replaced by the rows of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * A0 = A, * a0; + double * w0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP02N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + for( i = 0; i < M; i++ ) + *(W0+(size_t)(i)*(size_t)(LDW)) = (double)(LINDXAU[i]); + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP02N_LOG2_DEPTH ) << + HPL_LASWP02N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP02N_DEPTH, A0 += incA, W += HPL_LASWP02N_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW); + + w0[ 0] = *a0; a0 += LDA; +#if ( HPL_LASWP02N_DEPTH > 1 ) + w0[ 1] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 2 ) + w0[ 2] = *a0; a0 += LDA; w0[ 3] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 4 ) + w0[ 4] = *a0; a0 += LDA; w0[ 5] = *a0; a0 += LDA; + w0[ 6] = *a0; a0 += LDA; w0[ 7] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 8 ) + w0[ 8] = *a0; a0 += LDA; w0[ 9] = *a0; a0 += LDA; + w0[10] = *a0; a0 += LDA; w0[11] = *a0; a0 += LDA; + w0[12] = *a0; a0 += LDA; w0[13] = *a0; a0 += LDA; + w0[14] = *a0; a0 += LDA; w0[15] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 16 ) + w0[16] = *a0; a0 += LDA; w0[17] = *a0; a0 += LDA; + w0[18] = *a0; a0 += LDA; w0[19] = *a0; a0 += LDA; + w0[20] = *a0; a0 += LDA; w0[21] = *a0; a0 += LDA; + w0[22] = *a0; a0 += LDA; w0[23] = *a0; a0 += LDA; + w0[24] = *a0; a0 += LDA; w0[25] = *a0; a0 += LDA; + w0[26] = *a0; a0 += LDA; w0[27] = *a0; a0 += LDA; + w0[28] = *a0; a0 += LDA; w0[29] = *a0; a0 += LDA; + w0[30] = *a0; a0 += LDA; w0[31] = *a0; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, a0 += LDA ) { w0[j] = *a0; } + } + } +/* + * End of HPL_dlaswp02N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp03N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp03N.c new file mode 100644 index 000000000..760732a8d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp03N.c @@ -0,0 +1,194 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP03N_DEPTH +#define HPL_LASWP03N_DEPTH 32 +#define HPL_LASWP03N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp03N +( + const int M, + const int N, + double * U, + const int LDU, + const double * W0, + const double * W, + const int LDW +) +#else +void HPL_dlaswp03N +( M, N, U, LDU, W0, W, LDW ) + const int M; + const int N; + double * U; + const int LDU; + const double * W0; + const double * W; + const int LDW; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp03N copies columns of W into rows of an array U. The + * destination in U of these columns contained in W is stored within W0. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of W stored + * contiguously that should be copied into U. M must be at least + * zero. + * + * N (local input) const int + * On entry, N specifies the length of columns of W stored + * contiguously that should be copied into U. N must be at least + * zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). Columns + * of W are copied as rows within this array U at the positions + * specified in W0. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M), that contains data + * to be copied into U. For i in [0..M), entries W(:,i) should + * be copied into the row or column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * u0; + const int incU = (int)( (unsigned int)(LDU) << + HPL_LASWP03N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03N_LOG2_DEPTH ) << + HPL_LASWP03N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP03N_DEPTH, U += incU, w += HPL_LASWP03N_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *u0 = w0[ 0]; u0 += LDU; +#if ( HPL_LASWP03N_DEPTH > 1 ) + *u0 = w0[ 1]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 2 ) + *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 4 ) + *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU; + *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 8 ) + *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU; + *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU; + *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU; + *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 16 ) + *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU; + *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU; + *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU; + *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU; + *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU; + *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU; + *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU; + *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; } + } + } +/* + * End of HPL_dlaswp03N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp03T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp03T.c new file mode 100644 index 000000000..fece692ce --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp03T.c @@ -0,0 +1,186 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP03T_DEPTH +#define HPL_LASWP03T_DEPTH 32 +#define HPL_LASWP03T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp03T +( + const int M, + const int N, + double * U, + const int LDU, + const double * W0, + const double * W, + const int LDW +) +#else +void HPL_dlaswp03T +( M, N, U, LDU, W0, W, LDW ) + const int M; + const int N; + double * U; + const int LDU; + const double * W0; + const double * W; + const int LDW; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp03T copies columns of W into an array U. The destination + * in U of these columns contained in W is stored within W0. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of W stored + * contiguously that should be copied into U. M must be at least + * zero. + * + * N (local input) const int + * On entry, N specifies the length of columns of W stored + * contiguously that should be copied into U. N must be at least + * zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,M). Columns + * of W are copied within the array U at the positions specified + * in W0. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M), that contains data + * to be copied into U. For i in [0..M), entries W(:,i) should + * be copied into the row or column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * u0; + const int incU = ( 1 << HPL_LASWP03T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03T_LOG2_DEPTH ) << + HPL_LASWP03T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP03T_DEPTH, U += incU, w += HPL_LASWP03T_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU); + w0 = w + (size_t)(i) * (size_t)(LDW); + + u0[ 0] = w0[ 0]; +#if ( HPL_LASWP03T_DEPTH > 1 ) + u0[ 1] = w0[ 1]; +#endif +#if ( HPL_LASWP03T_DEPTH > 2 ) + u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3]; +#endif +#if ( HPL_LASWP03T_DEPTH > 4 ) + u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7]; +#endif +#if ( HPL_LASWP03T_DEPTH > 8 ) + u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11]; + u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15]; +#endif +#if ( HPL_LASWP03T_DEPTH > 16 ) + u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19]; + u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23]; + u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27]; + u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31]; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; } + } + } +/* + * End of HPL_dlaswp03T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp04N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp04N.c new file mode 100644 index 000000000..4f9c490a5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp04N.c @@ -0,0 +1,285 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP04N_DEPTH +#define HPL_LASWP04N_DEPTH 32 +#define HPL_LASWP04N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp04N +( + const int M0, + const int M1, + const int N, + double * U, + const int LDU, + double * A, + const int LDA, + const double * W0, + const double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp04N +( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M0; + const int M1; + const int N; + double * U; + const int LDU; + double * A; + const int LDA; + const double * W0; + const double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp04N copies M0 rows of U into A and replaces those rows of U + * with columns of W. In addition M1 - M0 columns of W are copied into + * rows of U. + * + * Arguments + * ========= + * + * M0 (local input) const int + * On entry, M0 specifies the number of rows of U that should be + * copied into A and replaced by columns of W. M0 must be at + * least zero. + * + * M1 (local input) const int + * On entry, M1 specifies the number of columns of W that should + * be copied into rows of U. M1 must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of U that should + * be copied into A. N must be at least zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows that are to be copied into A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M1). + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M0). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M0+M1), that contains + * data to be copied into U. For i in [M0..M0+M1), the entries + * W(:,i) are copied into the row W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M0 containing the + * local row indexes A into which rows of U are copied. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M0 that contains + * the local row indexes of U that should be copied into A and + * replaced by the columns of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP04N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP04N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04N_LOG2_DEPTH ) << + HPL_LASWP04N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP04N_DEPTH, A += incA, U += incU, + w += HPL_LASWP04N_DEPTH ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U + (size_t)(LINDXAU[i]); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *a0 = *u0; *u0 = w0[ 0]; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP04N_DEPTH > 1 ) + *a0 = *u0; *u0 = w0[ 1]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 2 ) + *a0 = *u0; *u0 = w0[ 2]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 3]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 4 ) + *a0 = *u0; *u0 = w0[ 4]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 5]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 6]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 7]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 8 ) + *a0 = *u0; *u0 = w0[ 8]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 9]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[10]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[11]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[12]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[13]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[14]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[15]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 16 ) + *a0 = *u0; *u0 = w0[16]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[17]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[18]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[19]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[20]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[21]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[22]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[23]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[24]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[25]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[26]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[27]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[28]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[29]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[30]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[31]; a0 += LDA; u0 += LDU; +#endif + } + + for( i = M0; i < M1; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *u0 = w0[ 0]; u0 += LDU; +#if ( HPL_LASWP04N_DEPTH > 1 ) + *u0 = w0[ 1]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 2 ) + *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 4 ) + *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU; + *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 8 ) + *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU; + *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU; + *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU; + *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 16 ) + *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU; + *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU; + *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU; + *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU; + *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU; + *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU; + *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU; + *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U + (size_t)(LINDXAU[i]); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) + { *a0 = *u0; *u0 = w0[j]; } + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; } + } + } +/* + * End of HPL_dlaswp04N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp04T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp04T.c new file mode 100644 index 000000000..9cbb4c863 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp04T.c @@ -0,0 +1,270 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP04T_DEPTH +#define HPL_LASWP04T_DEPTH 32 +#define HPL_LASWP04T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp04T +( + const int M0, + const int M1, + const int N, + double * U, + const int LDU, + double * A, + const int LDA, + const double * W0, + const double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp04T +( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M0; + const int M1; + const int N; + double * U; + const int LDU; + double * A; + const int LDA; + const double * W0; + const double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp04T copies M0 columns of U into rows of A and replaces those + * columns of U with columns of W. In addition M1 - M0 columns of W are + * copied into U. + * + * Arguments + * ========= + * + * M0 (local input) const int + * On entry, M0 specifies the number of columns of U that should + * be copied into A and replaced by columns of W. M0 must be at + * least zero. + * + * M1 (local input) const int + * On entry, M1 specifies the number of columnns of W that will + * be copied into U. M1 must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the columns of U that + * will be copied into rows of A. N must be at least zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns that are to be copied into rows of + * A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M0). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M0+M1), that contains + * data to be copied into U. For i in [M0..M0+M1), the entries + * W(:,i) are copied into the column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M0 containing the + * local row indexes A into which columns of U are copied. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M0 that contains + * the local column indexes of U that should be copied into A + * and replaced by the columns of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP04T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP04T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04T_LOG2_DEPTH ) << + HPL_LASWP04T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP04T_DEPTH, A += incA, U += incU, + w += HPL_LASWP04T_DEPTH ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW; + + *a0 = u0[ 0]; u0[ 0] = w0[ 0]; a0 += LDA; +#if ( HPL_LASWP04T_DEPTH > 1 ) + *a0 = u0[ 1]; u0[ 1] = w0[ 1]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 2 ) + *a0 = u0[ 2]; u0[ 2] = w0[ 2]; a0 += LDA; + *a0 = u0[ 3]; u0[ 3] = w0[ 3]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 4 ) + *a0 = u0[ 4]; u0[ 4] = w0[ 4]; a0 += LDA; + *a0 = u0[ 5]; u0[ 5] = w0[ 5]; a0 += LDA; + *a0 = u0[ 6]; u0[ 6] = w0[ 6]; a0 += LDA; + *a0 = u0[ 7]; u0[ 7] = w0[ 7]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 8 ) + *a0 = u0[ 8]; u0[ 8] = w0[ 8]; a0 += LDA; + *a0 = u0[ 9]; u0[ 9] = w0[ 9]; a0 += LDA; + *a0 = u0[10]; u0[10] = w0[10]; a0 += LDA; + *a0 = u0[11]; u0[11] = w0[11]; a0 += LDA; + *a0 = u0[12]; u0[12] = w0[12]; a0 += LDA; + *a0 = u0[13]; u0[13] = w0[13]; a0 += LDA; + *a0 = u0[14]; u0[14] = w0[14]; a0 += LDA; + *a0 = u0[15]; u0[15] = w0[15]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 16 ) + *a0 = u0[16]; u0[16] = w0[16]; a0 += LDA; + *a0 = u0[17]; u0[17] = w0[17]; a0 += LDA; + *a0 = u0[18]; u0[18] = w0[18]; a0 += LDA; + *a0 = u0[19]; u0[19] = w0[19]; a0 += LDA; + *a0 = u0[20]; u0[20] = w0[20]; a0 += LDA; + *a0 = u0[21]; u0[21] = w0[21]; a0 += LDA; + *a0 = u0[22]; u0[22] = w0[22]; a0 += LDA; + *a0 = u0[23]; u0[23] = w0[23]; a0 += LDA; + *a0 = u0[24]; u0[24] = w0[24]; a0 += LDA; + *a0 = u0[25]; u0[25] = w0[25]; a0 += LDA; + *a0 = u0[26]; u0[26] = w0[26]; a0 += LDA; + *a0 = u0[27]; u0[27] = w0[27]; a0 += LDA; + *a0 = u0[28]; u0[28] = w0[28]; a0 += LDA; + *a0 = u0[29]; u0[29] = w0[29]; a0 += LDA; + *a0 = u0[30]; u0[30] = w0[30]; a0 += LDA; + *a0 = u0[31]; u0[31] = w0[31]; a0 += LDA; +#endif + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW; + + u0[ 0] = w0[ 0]; +#if ( HPL_LASWP04T_DEPTH > 1 ) + u0[ 1] = w0[ 1]; +#endif +#if ( HPL_LASWP04T_DEPTH > 2 ) + u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3]; +#endif +#if ( HPL_LASWP04T_DEPTH > 4 ) + u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7]; +#endif +#if ( HPL_LASWP04T_DEPTH > 8 ) + u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11]; + u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15]; +#endif +#if ( HPL_LASWP04T_DEPTH > 16 ) + u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19]; + u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23]; + u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27]; + u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31]; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW; + for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; u0[j] = w0[j]; } + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW; + for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; } + } + } +/* + * End of HPL_dlaswp04T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp05N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp05N.c new file mode 100644 index 000000000..3edcf91a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp05N.c @@ -0,0 +1,195 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP05N_DEPTH +#define HPL_LASWP05N_DEPTH 32 +#define HPL_LASWP05N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp05N +( + const int M, + const int N, + double * A, + const int LDA, + const double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp05N +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + const double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp05N copies rows of U of global offset LINDXAU into rows of + * A at positions indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of U that should be + * copied into A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of U that should + * be copied into A. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) const double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows that are to be copied into A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied from U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U that should be copied in A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * U0 = U, * u0; + double * a0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP05N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP05N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05N_LOG2_DEPTH ) << + HPL_LASWP05N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP05N_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]); + + *a0 = *u0; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP05N_DEPTH > 1 ) + *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 2 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 4 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 8 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 16 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { *a0 = *u0; } + } + } +/* + * End of HPL_dlaswp05N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp05T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp05T.c new file mode 100644 index 000000000..0adaa102d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp05T.c @@ -0,0 +1,196 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP05T_DEPTH +#define HPL_LASWP05T_DEPTH 32 +#define HPL_LASWP05T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp05T +( + const int M, + const int N, + double * A, + const int LDA, + const double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp05T +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + const double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp05T copies columns of U of global offset LINDXAU into rows + * of A at positions indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of U that shouldbe copied into A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the columns of U that will + * be copied into rows of A. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) const double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns that are to be copied into rows of + * A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied from U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local column indexes of U that should be copied in A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * U0 = U, * u0; + double * a0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP05T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP05T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05T_LOG2_DEPTH ) << + HPL_LASWP05T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP05T_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[ i]); + u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU); + + *a0 = u0[ 0]; a0 += LDA; +#if ( HPL_LASWP05T_DEPTH > 1 ) + *a0 = u0[ 1]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 2 ) + *a0 = u0[ 2]; a0 += LDA; *a0 = u0[ 3]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 4 ) + *a0 = u0[ 4]; a0 += LDA; *a0 = u0[ 5]; a0 += LDA; + *a0 = u0[ 6]; a0 += LDA; *a0 = u0[ 7]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 8 ) + *a0 = u0[ 8]; a0 += LDA; *a0 = u0[ 9]; a0 += LDA; + *a0 = u0[10]; a0 += LDA; *a0 = u0[11]; a0 += LDA; + *a0 = u0[12]; a0 += LDA; *a0 = u0[13]; a0 += LDA; + *a0 = u0[14]; a0 += LDA; *a0 = u0[15]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 16 ) + *a0 = u0[16]; a0 += LDA; *a0 = u0[17]; a0 += LDA; + *a0 = u0[18]; a0 += LDA; *a0 = u0[19]; a0 += LDA; + *a0 = u0[20]; a0 += LDA; *a0 = u0[21]; a0 += LDA; + *a0 = u0[22]; a0 += LDA; *a0 = u0[23]; a0 += LDA; + *a0 = u0[24]; a0 += LDA; *a0 = u0[25]; a0 += LDA; + *a0 = u0[26]; a0 += LDA; *a0 = u0[27]; a0 += LDA; + *a0 = u0[28]; a0 += LDA; *a0 = u0[29]; a0 += LDA; + *a0 = u0[30]; a0 += LDA; *a0 = u0[31]; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[ i]); + u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; } + } + } +/* + * End of HPL_dlaswp05T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp06N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp06N.c new file mode 100644 index 000000000..a74bae75c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp06N.c @@ -0,0 +1,206 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP06N_DEPTH +#define HPL_LASWP06N_DEPTH 32 +#define HPL_LASWP06N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp06N +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA +) +#else +void HPL_dlaswp06N +( M, N, A, LDA, U, LDU, LINDXA ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp06N swaps rows of U with rows of A at positions + * indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * swapped with rows of U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of A that should + * be swapped with rows of U. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows or columns of U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows of U that are to be swapped with rows + * of A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be swapped with U. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * U0 = U, * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP06N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP06N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06N_LOG2_DEPTH ) << + HPL_LASWP06N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP06N_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i); + + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP06N_DEPTH > 1 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 2 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 4 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 8 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 16 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) + { r = *a0; *a0 = *u0; *u0 = r; } + } + } +/* + * End of HPL_dlaswp06N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp06T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp06T.c new file mode 100644 index 000000000..fb53c2a31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp06T.c @@ -0,0 +1,207 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP06T_DEPTH +#define HPL_LASWP06T_DEPTH 32 +#define HPL_LASWP06T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp06T +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA +) +#else +void HPL_dlaswp06T +( M, N, A, LDA, U, LDU, LINDXA ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp06T swaps columns of U with rows of A at positions + * indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * swapped with columns of U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of A that should + * be swapped with columns of U. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns of U that are to be swapped with + * rows of A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be swapped with U. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * U0 = U, * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP06T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP06T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06T_LOG2_DEPTH ) << + HPL_LASWP06T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP06T_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U0 + (size_t)(i) * (size_t)(LDU); + + r = *a0; *a0 = u0[ 0]; u0[ 0] = r; a0 += LDA; +#if ( HPL_LASWP06T_DEPTH > 1 ) + r = *a0; *a0 = u0[ 1]; u0[ 1] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 2 ) + r = *a0; *a0 = u0[ 2]; u0[ 2] = r; a0 += LDA; + r = *a0; *a0 = u0[ 3]; u0[ 3] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 4 ) + r = *a0; *a0 = u0[ 4]; u0[ 4] = r; a0 += LDA; + r = *a0; *a0 = u0[ 5]; u0[ 5] = r; a0 += LDA; + r = *a0; *a0 = u0[ 6]; u0[ 6] = r; a0 += LDA; + r = *a0; *a0 = u0[ 7]; u0[ 7] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 8 ) + r = *a0; *a0 = u0[ 8]; u0[ 8] = r; a0 += LDA; + r = *a0; *a0 = u0[ 9]; u0[ 9] = r; a0 += LDA; + r = *a0; *a0 = u0[10]; u0[10] = r; a0 += LDA; + r = *a0; *a0 = u0[11]; u0[11] = r; a0 += LDA; + r = *a0; *a0 = u0[12]; u0[12] = r; a0 += LDA; + r = *a0; *a0 = u0[13]; u0[13] = r; a0 += LDA; + r = *a0; *a0 = u0[14]; u0[14] = r; a0 += LDA; + r = *a0; *a0 = u0[15]; u0[15] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 16 ) + r = *a0; *a0 = u0[16]; u0[16] = r; a0 += LDA; + r = *a0; *a0 = u0[17]; u0[17] = r; a0 += LDA; + r = *a0; *a0 = u0[18]; u0[18] = r; a0 += LDA; + r = *a0; *a0 = u0[19]; u0[19] = r; a0 += LDA; + r = *a0; *a0 = u0[20]; u0[20] = r; a0 += LDA; + r = *a0; *a0 = u0[21]; u0[21] = r; a0 += LDA; + r = *a0; *a0 = u0[22]; u0[22] = r; a0 += LDA; + r = *a0; *a0 = u0[23]; u0[23] = r; a0 += LDA; + r = *a0; *a0 = u0[24]; u0[24] = r; a0 += LDA; + r = *a0; *a0 = u0[25]; u0[25] = r; a0 += LDA; + r = *a0; *a0 = u0[26]; u0[26] = r; a0 += LDA; + r = *a0; *a0 = u0[27]; u0[27] = r; a0 += LDA; + r = *a0; *a0 = u0[28]; u0[28] = r; a0 += LDA; + r = *a0; *a0 = u0[29]; u0[29] = r; a0 += LDA; + r = *a0; *a0 = u0[30]; u0[30] = r; a0 += LDA; + r = *a0; *a0 = u0[31]; u0[31] = r; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U0 + (size_t)(i) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) + { r = *a0; *a0 = u0[j]; u0[j] = r; } + } + } +/* + * End of HPL_dlaswp06T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp10N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp10N.c new file mode 100644 index 000000000..7dbf934f2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_dlaswp10N.c @@ -0,0 +1,186 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP10N_DEPTH +#define HPL_LASWP10N_DEPTH 32 +#define HPL_LASWP10N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp10N +( + const int M, + const int N, + double * A, + const int LDA, + const int * IPIV +) +#else +void HPL_dlaswp10N +( M, N, A, LDA, IPIV ) + const int M; + const int N; + double * A; + const int LDA; + const int * IPIV; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp10N performs a sequence of local column interchanges on a + * matrix A. One column interchange is initiated for columns 0 through + * N-1 of A. + * + * Arguments + * ========= + * + * M (local input) const int + * __arg0__ + * + * N (local input) const int + * On entry, M specifies the number of rows of the array A. M + * must be at least zero. + * + * A (local input/output) double * + * On entry, N specifies the number of columns of the array A. N + * must be at least zero. + * + * LDA (local input) const int + * On entry, A points to an array of dimension (LDA,N). This + * array contains the columns onto which the interchanges should + * be applied. On exit, A contains the permuted matrix. + * + * IPIV (local input) const int * + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * a0, * a1; + const int incA = ( 1 << HPL_LASWP10N_LOG2_DEPTH ); + int jp, mr, mu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + mr = M - ( mu = (int)( ( (unsigned int)(M) >> HPL_LASWP10N_LOG2_DEPTH ) + << HPL_LASWP10N_LOG2_DEPTH ) ); + + for( j = 0; j < N; j++ ) + { + if( j != ( jp = IPIV[j] ) ) + { + a0 = A + j * LDA; a1 = A + jp * LDA; + + for( i = 0; i < mu; i += incA, a0 += incA, a1 += incA ) + { + r = *a0; *a0 = *a1; *a1 = r; +#if ( HPL_LASWP10N_DEPTH > 1 ) + r = a0[ 1]; a0[ 1] = a1[ 1]; a1[ 1] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 2 ) + r = a0[ 2]; a0[ 2] = a1[ 2]; a1[ 2] = r; + r = a0[ 3]; a0[ 3] = a1[ 3]; a1[ 3] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 4 ) + r = a0[ 4]; a0[ 4] = a1[ 4]; a1[ 4] = r; + r = a0[ 5]; a0[ 5] = a1[ 5]; a1[ 5] = r; + r = a0[ 6]; a0[ 6] = a1[ 6]; a1[ 6] = r; + r = a0[ 7]; a0[ 7] = a1[ 7]; a1[ 7] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 8 ) + r = a0[ 8]; a0[ 8] = a1[ 8]; a1[ 8] = r; + r = a0[ 9]; a0[ 9] = a1[ 9]; a1[ 9] = r; + r = a0[10]; a0[10] = a1[10]; a1[10] = r; + r = a0[11]; a0[11] = a1[11]; a1[11] = r; + r = a0[12]; a0[12] = a1[12]; a1[12] = r; + r = a0[13]; a0[13] = a1[13]; a1[13] = r; + r = a0[14]; a0[14] = a1[14]; a1[14] = r; + r = a0[15]; a0[15] = a1[15]; a1[15] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 16 ) + r = a0[16]; a0[16] = a1[16]; a1[16] = r; + r = a0[17]; a0[17] = a1[17]; a1[17] = r; + r = a0[18]; a0[18] = a1[18]; a1[18] = r; + r = a0[19]; a0[19] = a1[19]; a1[19] = r; + r = a0[20]; a0[20] = a1[20]; a1[20] = r; + r = a0[21]; a0[21] = a1[21]; a1[21] = r; + r = a0[22]; a0[22] = a1[22]; a1[22] = r; + r = a0[23]; a0[23] = a1[23]; a1[23] = r; + r = a0[24]; a0[24] = a1[24]; a1[24] = r; + r = a0[25]; a0[25] = a1[25]; a1[25] = r; + r = a0[26]; a0[26] = a1[26]; a1[26] = r; + r = a0[27]; a0[27] = a1[27]; a1[27] = r; + r = a0[28]; a0[28] = a1[28]; a1[28] = r; + r = a0[29]; a0[29] = a1[29]; a1[29] = r; + r = a0[30]; a0[30] = a1[30]; a1[30] = r; + r = a0[31]; a0[31] = a1[31]; a1[31] = r; +#endif + } + + for( i = 0; i < mr; i++ ) + { r = a0[i]; a0[i] = a1[i]; a1[i] = r; } + } + } +/* + * End of HPL_dlaswp10N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2l.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2l.c new file mode 100644 index 000000000..e1b5bbfac --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2l.c @@ -0,0 +1,151 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxg2l +( + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxg2l +( IG, INB, NB, SRCPROC, NPROCS ) + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2l computes the local index of a matrix entry pointed to by + * the global index IG. This local returned index is the same in all + * processes. + * + * Arguments + * ========= + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, if SRCPROC = -1, the data is not distributed but + * replicated, in which case this routine returns IG in all + * processes. Otherwise, the value of SRCPROC is ignored. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + return( IG ); +/* + * IG = INB - NB + ( l * NPROCS + MYROC ) * NB + X with 0 <= X < NB, + * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC + * with 0 <= MYROC < NPROCS. The local index to be returned depends on + * whether IG resides in the process owning the first partial block of + * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB, + * so that if NPROCS divides i+1, i.e. MYROC=0, we have i+1 = l*NPROCS. + * If we set j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is + * equal to (j+1) * NPROCS. Conversely, if NPROCS does not divide i+1, + * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that + * j=l and thus (j+1)*NPROCS > i+1. + */ + j = ( i = ( IG - INB ) / NB ) / NPROCS; +/* + * When IG resides in the process owning the first partial block of size + * INB (MYROC = 0), then the result IL can be written as: + * IL = INB - NB + l * NB + X = IG + ( l - (l * NPROCS + MYROC) ) * NB. + * Using the above notation, we have i+1 = l*NPROCS + MYROC = l*NPROCS, + * i.e l = ( i+1 ) / NPROCS = j+1, since NPROCS divides i+1, therefore + * IL = IG + ( j + 1 - ( i + 1 ) ) * NB. + * + * Otherwise when MYROC >= 1, the result IL can be written as: + * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB. + * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1, + * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e + * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB. + */ + return( NB * (j - i) + + ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ) ); +/* + * End of HPL_indxg2l + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2lp.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2lp.c new file mode 100644 index 000000000..74662f9d2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2lp.c @@ -0,0 +1,176 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_indxg2lp +( + int * IL, + int * PROC, + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +void HPL_indxg2lp +( IL, PROC, IG, INB, NB, SRCPROC, NPROCS ) + int * IL; + int * PROC; + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2lp computes the local index of a matrix entry pointed to by + * the global index IG as well as the process coordinate which posseses + * this entry. The local returned index is the same in all processes. + * + * Arguments + * ========= + * + * IL (output) int * + * On exit, IL specifies the local index corresponding to IG. IL + * is at least zero. + * + * PROC (output) int * + * On exit, PROC is the coordinate of the process owning the + * entry specified by the global index IG. PROC is at least zero + * and less than NPROCS. + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, if SRCPROC = -1, the data is not distributed but + * replicated, in which case this routine returns IG in all + * processes. Otherwise, the value of SRCPROC is ignored. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) + { +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + *IL = IG; + *PROC = SRCPROC; + } + else + { +/* + * IG = INB - NB + ( l * NPROCS + MYROC ) * NB + X with 0 <= X < NB, + * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC + * with 0 <= MYROC < NPROCS. The local index to be returned depends on + * whether IG resides in the process owning the first partial block of + * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB, + * so that if NPROCS divides i+1, i.e. MYROC=0, we have i+1 = l*NPROCS. + * If we set j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is + * equal to (j+1) * NPROCS. Conversely, if NPROCS does not divide i+1, + * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that + * j=l and thus (j+1)*NPROCS > i+1. + */ + j = ( i = ( IG - INB ) / NB ) / NPROCS; +/* + * IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC and take + * the NPROCS modulo (definition of the block-cyclic data distribution). + */ + *PROC = SRCPROC + 1 + i; + *PROC = MPosMod( *PROC, NPROCS ); +/* + * When IG resides in the process owning the first partial block of size + * INB (MYROC = 0), then the result IL can be written as: + * IL = INB - NB + l * NB + X = IG + ( l - (l * NPROCS + MYROC) ) * NB. + * Using the above notation, we have i+1 = l*NPROCS + MYROC = l*NPROCS, + * i.e l = ( i+1 ) / NPROCS = j+1, since NPROCS divides i+1, therefore + * IL = IG + ( j + 1 - ( i + 1 ) ) * NB. + * + * Otherwise when MYROC >= 1, the result IL can be written as: + * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB. + * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1, + * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e + * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB. + */ + *IL = NB * (j - i) + + ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ); + } +/* + * End of HPL_indxg2lp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2p.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2p.c new file mode 100644 index 000000000..d0e75f516 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxg2p.c @@ -0,0 +1,128 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxg2p +( + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxg2p +( IG, INB, NB, SRCPROC, NPROCS ) + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2p computes the process coordinate which posseses the entry + * of a matrix specified by a global index IG. + * + * Arguments + * ========= + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int proc; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + return( SRCPROC ); +/* + * Otherwise, IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC + * and take the NPROCS modulo (definition of the block-cyclic data dis- + * tribution). + */ + proc = SRCPROC + 1 + ( IG - INB ) / NB; + return( MPosMod( proc, NPROCS ) ); +/* + * End of HPL_indxg2p + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxl2g.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxl2g.c new file mode 100644 index 000000000..7f139425a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_indxl2g.c @@ -0,0 +1,164 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxl2g +( + const int IL, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxl2g +( IL, INB, NB, PROC, SRCPROC, NPROCS ) + const int IL; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxl2g computes the global index of a matrix entry pointed to + * by the local index IL of the process indicated by PROC. + * + * Arguments + * ========= + * + * IL (input) const int + * On entry, IL specifies the local index of the matrix entry. + * IL must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whose + * local array row or column is to be determined. PROC must be + * at least zero and strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) + { +/* + * The data is not distributed, or there is just one process in this di- + * mension of the grid. + */ + return( IL ); + } + else if( PROC == SRCPROC ) + { +/* + * If I am SRCPROC, my first block is of size INB + */ + if( IL < INB ) +/* + * If IL belongs to the first block, the local and global indexes are + * equal. + */ + return ( IL ); +/* + * The number of entire blocks before the one IL belongs to is + * ( IL - INB ) / NB + 1. In the other NPROCS-1 processes, there are + * thus NB*( ( IL-INB )/NB + 1 ) entries, that are globally before the + * global entry corresponding to IL. + */ + return( ( NPROCS - 1 ) * NB * ( ( IL - INB ) / NB + 1 ) + IL ); + } + else if( PROC < SRCPROC ) + { +/* + * Otherwise, the process of coordinate MOD(SRCPROC+1, NPROCS) owns the + * second block. Let IPROC = PROC-SRCPROC-1+NPROCS be the number of pro- + * cesses between this process and PROC not included when going from + * left to right on the process line with possible wrap around. These + * IPROC processes have one more NB block than the other processes, who + * own IL / NB blocks of size NB. + */ + return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1+NPROCS )+IL+INB ); + } + else + { +/* + * Same reasoning as above with IPROC = PROC - SRCPROC - 1. + */ + return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1 )+IL+INB ); + } +/* + * End of HPL_indxl2g + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_infog2l.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_infog2l.c new file mode 100644 index 000000000..2580f2ad4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_infog2l.c @@ -0,0 +1,382 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_infog2l +( + int I, + int J, + const int IMB, + const int MB, + const int INB, + const int NB, + const int RSRC, + const int CSRC, + const int MYROW, + const int MYCOL, + const int NPROW, + const int NPCOL, + int * II, + int * JJ, + int * PROW, + int * PCOL +) +#else +void HPL_infog2l +( I, J, IMB, MB, INB, NB, RSRC, CSRC, MYROW, MYCOL, NPROW, NPCOL, II, JJ, PROW, PCOL ) + int I; + int J; + const int IMB; + const int MB; + const int INB; + const int NB; + const int RSRC; + const int CSRC; + const int MYROW; + const int MYCOL; + const int NPROW; + const int NPCOL; + int * II; + int * JJ; + int * PROW; + int * PCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_infog2l computes the starting local index II, JJ corresponding to + * the submatrix starting globally at the entry pointed by I, J. This + * routine returns the coordinates in the grid of the process owning the + * matrix entry of global indexes I, J, namely PROW and PCOL. + * + * Arguments + * ========= + * + * I (global input) int + * On entry, I specifies the global row index of the matrix + * entry. I must be at least zero. + * + * J (global input) int + * On entry, J specifies the global column index of the matrix + * entry. J must be at least zero. + * + * IMB (global input) const int + * On entry, IMB specifies the size of the first row block of + * the global matrix. IMB must be at least one. + * + * MB (global input) const int + * On entry, MB specifies the blocking factor used to partition + * and distribute the rows of the matrix A. MB must be larger + * than one. + * + * INB (global input) const int + * On entry, INB specifies the size of the first column block of + * the global matrix. INB must be at least one. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the columns of the matrix A. NB must be larger + * than one. + * + * RSRC (global input) const int + * On entry, RSRC specifies the row coordinate of the process + * that possesses the row I. RSRC must be at least zero and + * strictly less than NPROW. + * + * CSRC (global input) const int + * On entry, CSRC specifies the column coordinate of the process + * that possesses the column J. CSRC must be at least zero and + * strictly less than NPCOL. + * + * MYROW (local input) const int + * On entry, MYROW specifies my row process coordinate in the + * grid. MYROW is greater than or equal to zero and less than + * NPROW. + * + * MYCOL (local input) const int + * On entry, MYCOL specifies my column process coordinate in the + * grid. MYCOL is greater than or equal to zero and less than + * NPCOL. + * + * NPROW (global input) const int + * On entry, NPROW specifies the number of process rows in the + * grid. NPROW is at least one. + * + * NPCOL (global input) const int + * On entry, NPCOL specifies the number of process columns in + * the grid. NPCOL is at least one. + * + * II (local output) int * + * On exit, II specifies the local starting row index of the + * submatrix. On exit, II is at least 0. + * + * JJ (local output) int * + * On exit, JJ specifies the local starting column index of the + * submatrix. On exit, JJ is at least 0. + * + * PROW (global output) int * + * On exit, PROW is the row coordinate of the process owning the + * entry specified by the global index I. PROW is at least zero + * and less than NPROW. + * + * PCOL (global output) int * + * On exit, PCOL is the column coordinate of the process owning + * the entry specified by the global index J. PCOL is at least + * zero and less than NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc; +/* .. + * .. Executable Statements .. + */ + imb = IMB; + *PROW = RSRC; + + if( ( *PROW == -1 ) || ( NPROW == 1 ) ) + { +/* + * The data is not distributed, or there is just one process row in the + * grid. + */ + *II = I; + } + else if( I < imb ) + { +/* + * I refers to an entry in the first block of rows + */ + *II = ( MYROW == *PROW ? I : 0 ); + } + else + { + mb = MB; + rsrc = *PROW; +/* + * The discussion goes as follows: compute my distance from the source + * process so that within this process coordinate system, the source + * process is the process such that mydist = 0, or equivalently + * MYROW == rsrc. + * + * Find out the global coordinate of the block I belongs to (nblocks), + * as well as the minimum local number of blocks that every process has. + * + * when mydist < nblocks-ilocblk*NPROCS, I own ilocblk + 1 full blocks, + * when mydist > nblocks-ilocblk*NPROCS, I own ilocblk full blocks, + * when mydist = nblocks-ilocblk*NPROCS, I own ilocblk full blocks + * but not I, or I own ilocblk + 1 blocks and the entry I refers to. + */ + if( MYROW == rsrc ) + { +/* + * I refers to an entry that is not in the first block, find out which + * process has it. + */ + nblocks = ( I - imb ) / mb + 1; + *PROW += nblocks; + *PROW -= ( *PROW / NPROW ) * NPROW; +/* + * Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only + * three possible cases: + * + * 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I do not own + * I, in which case II = IMB + ( ilocblk - 1 ) * MB. Note that this + * case cannot happen when ilocblk is zero, since nblocks is at + * least one. + * + * 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in + * which case I and II can respectively be written as IMB + + * (nblocks-1)*NB + IL and IMB + (ilocblk-1) * MB + IL. That is + * II = I + (ilocblk-nblocks)*MB. Note that this case cannot happen + * when ilocblk is zero, since nblocks is at least one. + * + * 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns + * ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB. + * Note that when ilocblk is zero, II is just IMB. + */ + if( nblocks < NPROW ) + { + *II = imb; + } + else + { + ilocblk = nblocks / NPROW; + if( ilocblk * NPROW >= nblocks ) + { + *II = ( ( MYROW == *PROW ) ? + I + ( ilocblk - nblocks ) * mb : + imb + ( ilocblk - 1 ) * mb ); + } + else + { + *II = imb + ilocblk * mb; + } + } + } + else + { +/* + * I refers to an entry that is not in the first block, find out which + * process has it. + */ + nblocks = ( I -= imb ) / mb + 1; + *PROW += nblocks; + *PROW -= ( *PROW / NPROW ) * NPROW; +/* + * Compute my distance from the source process so that within this pro- + * cess coordinate system, the source process is the process such that + * mydist=0. + */ + if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW; +/* + * When mydist < nblocks - ilocblk * NPROW, I own ilocblk+1 full blocks + * of size MB since I am not the source process, i.e. II=(ilocblk+1)*MB. + * When mydist>=nblocks-ilocblk*NPROW and I do not own I, I own ilocblk + * full blocks of size MB, i.e. II = ilocblk*MB, otherwise I own ilocblk + * blocks and I, in which case I can be written as IMB + (nblocks-1)*MB + * + IL and II = ilocblk*MB + IL = I - IMB + (ilocblk - nblocks + 1)*MB. + */ + if( nblocks < NPROW ) + { + mydist -= nblocks; + *II = ( ( mydist < 0 ) ? mb : + ( ( MYROW == *PROW ) ? + I + ( 1 - nblocks ) * mb : 0 ) ); + } + else + { + ilocblk = nblocks / NPROW; + mydist -= nblocks - ilocblk * NPROW; + *II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb : + ( ( MYROW == *PROW ) ? + ( ilocblk - nblocks + 1 ) * mb + I : + ilocblk * mb ) ); + } + } + } +/* + * Idem for the columns + */ + inb = INB; + *PCOL = CSRC; + + if( ( *PCOL == -1 ) || ( NPCOL == 1 ) ) + { + *JJ = J; + } + else if( J < inb ) + { + *JJ = ( MYCOL == *PCOL ? J : 0 ); + } + else + { + nb = NB; + csrc = *PCOL; + + if( MYCOL == csrc ) + { + nblocks = ( J - inb ) / nb + 1; + *PCOL += nblocks; + *PCOL -= ( *PCOL / NPCOL ) * NPCOL; + + if( nblocks < NPCOL ) + { + *JJ = inb; + } + else + { + ilocblk = nblocks / NPCOL; + if( ilocblk * NPCOL >= nblocks ) + { + *JJ = ( ( MYCOL == *PCOL ) ? + J + ( ilocblk - nblocks ) * nb : + inb + ( ilocblk - 1 ) * nb ); + } + else + { + *JJ = inb + ilocblk * nb; + } + } + } + else + { + nblocks = ( J -= inb ) / nb + 1; + *PCOL += nblocks; + *PCOL -= ( *PCOL / NPCOL ) * NPCOL; + + if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL; + + if( nblocks < NPCOL ) + { + mydist -= nblocks; + *JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ? + J + ( 1 - nblocks )*nb : 0 ) ); + } + else + { + ilocblk = nblocks / NPCOL; + mydist -= nblocks - ilocblk * NPCOL; + *JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb : + ( ( MYCOL == *PCOL ) ? + ( ilocblk - nblocks + 1 ) * nb + J : + ilocblk * nb ) ); + } + } + } +/* + * End of HPL_infog2l + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_numroc.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_numroc.c new file mode 100644 index 000000000..39cd736d3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_numroc.c @@ -0,0 +1,120 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_numroc +( + const int N, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_numroc +( N, INB, NB, PROC, SRCPROC, NPROCS ) + const int N; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_numroc returns the local number of matrix rows/columns process + * PROC will get if we give out N rows/columns starting from global + * index 0. + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the number of rows/columns being dealt + * out. N must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whose + * local portion is determined. PROC must be at least zero and + * strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + return( HPL_numrocI( N, 0, INB, NB, PROC, SRCPROC, NPROCS ) ); +/* + * End of HPL_numroc + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_numrocI.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_numrocI.c new file mode 100644 index 000000000..70f3497de --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_numrocI.c @@ -0,0 +1,243 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_numrocI +( + const int N, + const int I, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_numrocI +( N, I, INB, NB, PROC, SRCPROC, NPROCS ) + const int N; + const int I; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_numrocI returns the local number of matrix rows/columns process + * PROC will get if we give out N rows/columns starting from global + * index I. + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the number of rows/columns being dealt + * out. N must be at least zero. + * + * I (input) const int + * On entry, I specifies the global index of the matrix entry + * I must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of th + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whos + * local portion is determined. PROC must be at least zero an + * strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the proces + * that possesses the first row or column of the matrix. SRCPRO + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process row + * or columns over which the matrix is distributed. NPROCS mus + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ilocblk, inb, mydist, nblocks, srcproc; +/* .. + * .. Executable Statements .. + */ + if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * The data is not distributed, or there is just one process in this di- + * mension of the grid. + */ + return( N ); +/* + * Compute coordinate of process owning I and corresponding INB + */ + srcproc = SRCPROC; + + if( ( inb = INB - I ) <= 0 ) + { +/* + * I is not in the first block, find out which process has it and update + * the size of first block + */ + srcproc += ( nblocks = (-inb) / NB + 1 ); + srcproc -= ( srcproc / NPROCS ) * NPROCS; + inb += nblocks * NB; + } +/* + * Now everything is just like N, I=0, INB, NB, srcproc, NPROCS. The + * discussion goes as follows: compute my distance from the source pro- + * cess so that within this process coordinate system, the source pro- + * cess is the process such that mydist = 0, or PROC == srcproc. + * + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries. Then remark that + * + * when mydist < nblocks - ilocblk*NPROCS, I own ilocblk+1 full blocks, + * when mydist > nblocks - ilocblk*NPROCS, I own ilocblk full blocks, + * when mydist = nblocks - ilocblk*NPROCS, either the last block is not + * full and I own it, or the last block is full and I am the first pro- + * cess owning only ilocblk full blocks. + */ + if( PROC == srcproc ) + { +/* + * I am the source process, i.e. I own I (mydist=0). When N <= INB, the + * answer is simply N. + */ + if( N <= inb ) return( N ); +/* + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries. + */ + nblocks = ( N - inb ) / NB + 1; +/* + * Since mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only + * two possible cases: + * + * 1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS di- + * vides the global number of full blocks, then the source process + * srcproc owns one more block than the other processes; and N can + * be rewritten as N = INB + (nblocks-1) * NB + LNB with LNB >= 0 + * size of the last block. Similarly, the local value Np correspon- + * ding to N can be written as Np = INB + (ilocblk-1) * NB + LNB = + * N + ( ilocblk-1 - (nblocks-1) )*NB. Note that this case cannot + * happen when ilocblk is zero, since nblocks is at least one. + * + * 2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only + * owns full blocks, and therefore Np = INB + ilocblk * NB. Note + * that when ilocblk is zero, Np is just INB. + */ + if( nblocks < NPROCS ) return( inb ); + + ilocblk = nblocks / NPROCS; + return( ( nblocks - ilocblk * NPROCS ) ? inb + ilocblk * NB : + N + ( ilocblk - nblocks ) * NB ); + } + else + { +/* + * I am not the source process. When N <= INB, the answer is simply 0. + */ + if( N <= inb ) return( 0 ); +/* + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries + */ + nblocks = ( N - inb ) / NB + 1; +/* + * Compute my distance from the source process so that within this pro- + * cess coordinate system, the source process is the process such that + * mydist=0. + */ + if( ( mydist = PROC - srcproc ) < 0 ) mydist += NPROCS; +/* + * When mydist < nblocks - ilocblk*NPROCS, I own ilocblk + 1 full blocks + * of size NB since I am not the source process, + * + * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks + * of size NB since I am not the source process, + * + * when mydist = nblocks - ilocblk*NPROCS, + * either the last block is not full and I own it, in which case + * N = INB + (nblocks - 1)*NB + LNB with LNB the size of the last + * block such that NB > LNB > 0; the local value Np corresponding to + * N is given by Np = ilocblk*NB+LNB = N-INB+(ilocblk-nblocks+1)*NB; + * or the last block is full and I am the first process owning only + * ilocblk full blocks of size NB, that is N = INB+(nblocks-1)*NB and + * Np = ilocblk * NB = N - INB + (ilocblk-nblocks+1) * NB. + */ + if( nblocks < NPROCS ) + return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 : + N - inb + NB * ( 1 - nblocks ) ) ); + + ilocblk = nblocks / NPROCS; + mydist -= nblocks - ilocblk * NPROCS; + return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB : + ( ( mydist > 0 ) ? ilocblk * NB : + N - inb + NB * ( ilocblk - nblocks + 1 ) ) ); + } +/* + * End of HPL_numrocI + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pabort.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pabort.c new file mode 100644 index 000000000..268975fc1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pabort.c @@ -0,0 +1,137 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pabort +( + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_pabort( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pabort displays an error message on stderr and halts execution. + * + * + * Arguments + * ========= + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + int rank; + char cline[128]; +#ifndef STDC_HEADERS + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( stderr, "%s %s %d, %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR", "from process #", rank, "in function", + SRNAME, cline ); + else + HPL_fprintf( stderr, + "%s %s %d, %s %d %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR", "from process #", rank, "on line", LINE, + "of function", SRNAME, cline ); + + MPI_Abort( MPI_COMM_WORLD, -1 ); + exit( -1 ); +/* + * End of HPL_pabort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlamch.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlamch.c new file mode 100644 index 000000000..73cf649da --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlamch.c @@ -0,0 +1,143 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_pdlamch +( + MPI_Comm COMM, + const HPL_T_MACH CMACH +) +#else +double HPL_pdlamch +( COMM, CMACH ) + MPI_Comm COMM; + const HPL_T_MACH CMACH; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlamch determines machine-specific arithmetic constants such as + * the relative machine precision (eps), the safe minimum(sfmin) such that + * 1/sfmin does not overflow, the base of the machine (base), the precision + * (prec), the number of (base) digits in the mantissa (t), whether + * rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum + * exponent before (gradual) underflow (emin), the underflow threshold + * (rmin)- base**(emin-1), the largest exponent before overflow (emax), the + * overflow threshold (rmax) - (base**emax)*(1-eps). + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * CMACH (global input) const HPL_T_MACH + * Specifies the value to be returned by HPL_pdlamch + * = HPL_MACH_EPS, HPL_pdlamch := eps (default) + * = HPL_MACH_SFMIN, HPL_pdlamch := sfmin + * = HPL_MACH_BASE, HPL_pdlamch := base + * = HPL_MACH_PREC, HPL_pdlamch := eps*base + * = HPL_MACH_MLEN, HPL_pdlamch := t + * = HPL_MACH_RND, HPL_pdlamch := rnd + * = HPL_MACH_EMIN, HPL_pdlamch := emin + * = HPL_MACH_RMIN, HPL_pdlamch := rmin + * = HPL_MACH_EMAX, HPL_pdlamch := emax + * = HPL_MACH_RMAX, HPL_pdlamch := rmax + * + * where + * + * eps = relative machine precision, + * sfmin = safe minimum, + * base = base of the machine, + * prec = eps*base, + * t = number of digits in the mantissa, + * rnd = 1.0 if rounding occurs in addition, + * emin = minimum exponent before underflow, + * rmin = underflow threshold, + * emax = largest exponent before overflow, + * rmax = overflow threshold. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double param; +/* .. + * .. Executable Statements .. + */ + param = HPL_dlamch( CMACH ); + + switch( CMACH ) + { + case HPL_MACH_EPS : + case HPL_MACH_SFMIN : + case HPL_MACH_EMIN : + case HPL_MACH_RMIN : + (void) HPL_all_reduce( (void *)(¶m), 1, HPL_DOUBLE, + HPL_max, COMM ); + break; + case HPL_MACH_EMAX : + case HPL_MACH_RMAX : + (void) HPL_all_reduce( (void *)(¶m), 1, HPL_DOUBLE, + HPL_min, COMM ); + break; + default : + break; + } + + return( param ); +/* + * End of HPL_pdlamch + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlange.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlange.c new file mode 100644 index 000000000..40bdcc36b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlange.c @@ -0,0 +1,242 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_pdlange +( + const HPL_T_grid * GRID, + const HPL_T_NORM NORM, + const int M, + const int N, + const int NB, + const double * A, + const int LDA +) +#else +double HPL_pdlange +( GRID, NORM, M, N, NB, A, LDA ) + const HPL_T_grid * GRID; + const HPL_T_NORM NORM; + const int M; + const int N; + const int NB; + const double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlange returns the value of the one norm, or the infinity norm, + * or the element of largest absolute value of a distributed matrix A: + * + * + * max(abs(A(i,j))) when NORM = HPL_NORM_A, + * norm1(A), when NORM = HPL_NORM_1, + * normI(A), when NORM = HPL_NORM_I, + * + * where norm1 denotes the one norm of a matrix (maximum column sum) and + * normI denotes the infinity norm of a matrix (maximum row sum). Note + * that max(abs(A(i,j))) is not a matrix norm. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * NORM (global input) const HPL_T_NORM + * On entry, NORM specifies the value to be returned by this + * function as described above. + * + * M (global input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,LocQ(N)), + * that contains the local pieces of the distributed matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double s, v0=HPL_rzero, * work = NULL; + MPI_Comm Acomm, Ccomm, Rcomm; + int ii, jj, mp, mycol, myrow, npcol, nprow, + nq; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Rcomm = GRID->row_comm; Ccomm = GRID->col_comm; + Acomm = GRID->all_comm; + + Mnumroc( mp, M, NB, NB, myrow, 0, nprow ); + Mnumroc( nq, N, NB, NB, mycol, 0, npcol ); + + if( Mmin( M, N ) == 0 ) { return( v0 ); } + else if( NORM == HPL_NORM_A ) + { +/* + * max( abs( A ) ) + */ + if( ( nq > 0 ) && ( mp > 0 ) ) + { + for( jj = 0; jj < nq; jj++ ) + { + for( ii = 0; ii < mp; ii++ ) + { v0 = Mmax( v0, Mabs( *A ) ); A++; } + A += LDA - mp; + } + } + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0, + Acomm ); + } + else if( NORM == HPL_NORM_1 ) + { +/* + * Find norm_1( A ). + */ + if( nq > 0 ) + { + work = (double*)malloc( (size_t)(nq) * sizeof( double ) ); + if( work == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); } + + for( jj = 0; jj < nq; jj++ ) + { + s = HPL_rzero; + for( ii = 0; ii < mp; ii++ ) { s += Mabs( *A ); A++; } + work[jj] = s; A += LDA - mp; + } +/* + * Find sum of global matrix columns, store on row 0 of process grid + */ + (void) HPL_reduce( (void *)(work), nq, HPL_DOUBLE, HPL_sum, + 0, Ccomm ); +/* + * Find maximum sum of columns for 1-norm + */ + if( myrow == 0 ) + { v0 = work[HPL_idamax( nq, work, 1 )]; v0 = Mabs( v0 ); } + if( work ) free( work ); + } +/* + * Find max in row 0, store result in process (0,0) + */ + if( myrow == 0 ) + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0, + Rcomm ); + } + else if( NORM == HPL_NORM_I ) + { +/* + * Find norm_inf( A ) + */ + if( mp > 0 ) + { + work = (double*)malloc( (size_t)(mp) * sizeof( double ) ); + if( work == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); } + + for( ii = 0; ii < mp; ii++ ) { work[ii] = HPL_rzero; } + + for( jj = 0; jj < nq; jj++ ) + { + for( ii = 0; ii < mp; ii++ ) + { work[ii] += Mabs( *A ); A++; } + A += LDA - mp; + } +/* + * Find sum of global matrix rows, store on column 0 of process grid + */ + (void) HPL_reduce( (void *)(work), mp, HPL_DOUBLE, HPL_sum, + 0, Rcomm ); +/* + * Find maximum sum of rows for inf-norm + */ + if( mycol == 0 ) + { v0 = work[HPL_idamax( mp, work, 1 )]; v0 = Mabs( v0 ); } + if( work ) free( work ); + } +/* + * Find max in column 0, store result in process (0,0) + */ + if( mycol == 0 ) + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, + 0, Ccomm ); + } +/* + * Broadcast answer to every process in the grid + */ + (void) HPL_broadcast( (void *)(&v0), 1, HPL_DOUBLE, 0, Acomm ); + + return( v0 ); +/* + * End of HPL_pdlange + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlaprnt.c new file mode 100644 index 000000000..24fc47540 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pdlaprnt.c @@ -0,0 +1,238 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + + + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaprnt +( + const HPL_T_grid * GRID, + const int M, + const int N, + const int NB, + double * A, + const int LDA, + const int IAROW, + const int IACOL, + const char * CMATNM +) +#else +void HPL_pdlaprnt +( GRID, M, N, NB, A, LDA, IAROW, IACOL, CMATNM ) + const HPL_T_grid * GRID; + const int M; + const int N; + const int NB; + double * A; + const int LDA; + const int IAROW; + const int IACOL; + const char * CMATNM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaprnt prints to standard error a distributed matrix A. The + * local pieces of A are sent to the process of coordinates (0,0) in + * the grid and then printed. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * M (global input) const int + * On entry, M specifies the number of rows of the coefficient + * matrix A. M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the + * coefficient matrix A. N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * A (local input) double * + * On entry, A points to an array of dimension (LDA,LocQ(N)). + * This array contains the coefficient matrix to be printed. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * IAROW (global input) const int + * On entry, IAROW specifies the row process coordinate owning + * the first row of A. IAROW must be larger than or equal to + * zero and less than NPROW. + * + * IACOL (global input) const int + * On entry, IACOL specifies the column process coordinate + * owning the first column of A. IACOL must be larger than or + * equal to zero and less than NPCOL. + * + * CMATNM (global input) const char * + * On entry, CMATNM is the name of the matrix to be printed. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm Acomm; + double * buf = NULL; + int h, i, ib, icurcol=IACOL, icurrow=IAROW, + ii=0, j, jb, jj=0, mycol, myrow, npcol, + nprow, src; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Acomm = GRID->all_comm; + if( ( myrow == 0 ) && ( mycol == 0 ) ) + buf = (double*)malloc( (size_t)(NB) * sizeof( double ) ); + + for( j = 0; j < N; j += NB ) + { + jb = N-j; jb = Mmin( jb, NB ); + for( h = 0; h < jb; h++ ) + { + (void) HPL_barrier( Acomm ); + + for( i = 0; i < M; i += NB ) + { + ib = M-i; ib = Mmin( ib, NB ); + if( ( icurrow == 0 ) && ( icurcol == 0 ) ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_dlaprnt( ib, 1, Mptr( A, ii, jj+h, LDA ), i+1, + j+h+1, LDA, CMATNM ); + } + else + { + if( ( myrow == icurrow ) && ( mycol == icurcol ) ) + { + (void) HPL_send( Mptr( A, ii, jj+h, LDA ), ib, 0, + 9000+(j+h)*M+i, Acomm ); + } + else if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + src = HPL_pnum( GRID, icurrow, icurcol ); + (void) HPL_recv( buf, ib, src, 9000+(j+h)*M+i, + Acomm ); + if (buf != NULL) + HPL_dlaprnt( ib, 1, buf, i+1, j+h+1, NB, CMATNM ); + } + } + if( myrow == icurrow ) ii += ib; + icurrow = MModAdd1( icurrow, nprow ); + (void) HPL_barrier( Acomm ); + } + ii = 0; icurrow = IAROW; + } + if( mycol == icurcol ) jj += jb; + icurcol = MModAdd1( icurcol, npcol ); + (void) HPL_barrier( Acomm ); + } + if( ( myrow == 0 ) && ( mycol == 0 ) && ( buf ) ) free( buf ); +/* + * End of HPL_pdlaprnt + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pwarn.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pwarn.c new file mode 100644 index 000000000..a9f666f89 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/HPL_pwarn.c @@ -0,0 +1,139 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pwarn +( + FILE * STREAM, + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_pwarn( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pwarn displays an error message. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + int rank; + char cline[128]; +#ifndef STDC_HEADERS + FILE * STREAM; + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( STREAM, "%s %s %d, %s %s:\n>>> %s <<<\n\n", + "HPL ERROR", "from process #", rank, "in function", + SRNAME, cline ); + else + HPL_fprintf( STREAM, "%s %s %d, %s %d %s %s:\n>>> %s <<<\n\n", + "HPL ERROR", "from process #", rank, "on line", LINE, + "of function", SRNAME, cline ); +/* + * End of HPL_pwarn + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/intel64/Make.inc new file mode 120000 index 000000000..3ee301793 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/intel64/Make.inc @@ -0,0 +1 @@ +/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/intel64/Makefile new file mode 100644 index 000000000..ea93cd150 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pauxil/intel64/Makefile @@ -0,0 +1,137 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_pauxil.h +# +## Object files ######################################################## +# +HPL_pauobj = \ + HPL_indxg2l.o HPL_indxg2lp.o HPL_indxg2p.o \ + HPL_indxl2g.o HPL_infog2l.o HPL_numroc.o \ + HPL_numrocI.o HPL_dlaswp00N.o HPL_dlaswp10N.o \ + HPL_dlaswp01N.o HPL_dlaswp01T.o HPL_dlaswp02N.o \ + HPL_dlaswp03N.o HPL_dlaswp03T.o HPL_dlaswp04N.o \ + HPL_dlaswp04T.o HPL_dlaswp05N.o HPL_dlaswp05T.o \ + HPL_dlaswp06N.o HPL_dlaswp06T.o HPL_pwarn.o \ + HPL_pabort.o HPL_pdlaprnt.o HPL_pdlamch.o \ + HPL_pdlange.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pauobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pauobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_indxg2l.o : ../HPL_indxg2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2l.c +HPL_indxg2lp.o : ../HPL_indxg2lp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2lp.c +HPL_indxg2p.o : ../HPL_indxg2p.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2p.c +HPL_indxl2g.o : ../HPL_indxl2g.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxl2g.c +HPL_infog2l.o : ../HPL_infog2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_infog2l.c +HPL_numroc.o : ../HPL_numroc.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_numroc.c +HPL_numrocI.o : ../HPL_numrocI.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_numrocI.c +HPL_dlaswp00N.o : ../HPL_dlaswp00N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp00N.c +HPL_dlaswp10N.o : ../HPL_dlaswp10N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp10N.c +HPL_dlaswp01N.o : ../HPL_dlaswp01N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01N.c +HPL_dlaswp01T.o : ../HPL_dlaswp01T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01T.c +HPL_dlaswp02N.o : ../HPL_dlaswp02N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp02N.c +HPL_dlaswp03N.o : ../HPL_dlaswp03N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03N.c +HPL_dlaswp03T.o : ../HPL_dlaswp03T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03T.c +HPL_dlaswp04N.o : ../HPL_dlaswp04N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04N.c +HPL_dlaswp04T.o : ../HPL_dlaswp04T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04T.c +HPL_dlaswp05N.o : ../HPL_dlaswp05N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05N.c +HPL_dlaswp05T.o : ../HPL_dlaswp05T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05T.c +HPL_dlaswp06N.o : ../HPL_dlaswp06N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06N.c +HPL_dlaswp06T.o : ../HPL_dlaswp06T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06T.c +HPL_pwarn.o : ../HPL_pwarn.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pwarn.c +HPL_pabort.o : ../HPL_pabort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pabort.c +HPL_pdlaprnt.o : ../HPL_pdlaprnt.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaprnt.c +HPL_pdlamch.o : ../HPL_pdlamch.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlamch.c +HPL_pdlange.o : ../HPL_pdlange.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlange.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocmax.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocmax.c new file mode 100644 index 000000000..644641412 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocmax.c @@ -0,0 +1,149 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dlocmax +( + HPL_T_panel * PANEL, + const int N, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocmax +( PANEL, N, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int N; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocmax finds the maximum entry in the current column and packs + * the useful information in WORK[0:3]. On exit, WORK[0] contains the + * local maximum absolute value scalar, WORK[1] is the corresponding + * local row index, WORK[2] is the corresponding global row index, and + * WORK[3] is the coordinate of the process owning this max. When N is + * less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set + * to the total number of process rows. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * N (local input) const int + * On entry, N specifies the local number of rows of the column + * of A on which we operate. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 4. On exit, + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A; + int kk, igindx, ilindx, myrow, nb, nprow; +/* .. + * .. Executable Statements .. + */ + if( N > 0 ) + { + A = Mptr( PANEL->A, II, JJ, PANEL->lda ); + myrow = PANEL->grid->myrow; + nprow = PANEL->grid->nprow; + nb = PANEL->nb; + kk = PANEL->ii + II + ( ilindx = HPL_idamax( N, A, 1 ) ); + Mindxl2g( igindx, kk, nb, nb, myrow, 0, nprow ); +/* + * WORK[0] := local maximum absolute value scalar, + * WORK[1] := corresponding local row index, + * WORK[2] := corresponding global row index, + * WORK[3] := coordinate of process owning this max. + */ + WORK[0] = A[ilindx]; WORK[1] = (double)(ilindx); + WORK[2] = (double)(igindx); WORK[3] = (double)(myrow); + } + else + { +/* + * If I do not have any row of A, then set the coordinate of the process + * (WORK[3]) owning this "ghost" row, such that it will never be used, + * even if there are only zeros in the current column of A. + */ + WORK[0] = WORK[1] = WORK[2] = HPL_rzero; + WORK[3] = (double)(PANEL->grid->nprow); + } +/* + * End of HPL_dlocmax + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocswpN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocswpN.c new file mode 100644 index 000000000..a3919500a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocswpN.c @@ -0,0 +1,436 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LOCSWP_DEPTH +#define HPL_LOCSWP_DEPTH 32 +#define HPL_LOCSWP_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlocswpN +( + HPL_T_panel * PANEL, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocswpN +( PANEL, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocswpN performs the local swapping operations within a panel. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. The N0 length max + * row is stored in WORK[4:4+N0-1]; Note that this is also the + * JJth row (or column) of L1. The remaining part of this array + * is used as workspace. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax; + double * A1, * A2, * L, * Wr0, * Wmx; + int ilindx, lda, myrow, n0, nr, nu; + register int i; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda; + + Wr0 = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0]; + nu = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) + << HPL_LOCSWP_LOG2_DEPTH ); + nr = n0 - nu; +/* + * Replicated swap and copy of the current (new) row of A into L1 + */ + L = Mptr( PANEL->L1, JJ, 0, n0 ); +/* + * If the pivot is non-zero ... + */ + if( gmax != HPL_rzero ) + { +/* + * and if I own the current row of A ... + */ + if( myrow == PANEL->prow ) + { +/* + * and if I also own the row to be swapped with the current row of A ... + */ + if( myrow == (int)(WORK[3]) ) + { +/* + * and if the current row of A is not to swapped with itself ... + */ + if( ( ilindx = (int)(WORK[1]) ) != 0 ) + { +/* + * then copy the max row into L1 and locally swap the 2 rows of A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + A2 = Mptr( A1, ilindx, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) + { + *L=*A1=Wmx[ 0]; *A2=Wr0[ 0]; L+=n0; A1+=lda; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L=*A1=Wmx[ 1]; *A2=Wr0[ 1]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L=*A1=Wmx[ 2]; *A2=Wr0[ 2]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 3]; *A2=Wr0[ 3]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L=*A1=Wmx[ 4]; *A2=Wr0[ 4]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 5]; *A2=Wr0[ 5]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 6]; *A2=Wr0[ 6]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 7]; *A2=Wr0[ 7]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L=*A1=Wmx[ 8]; *A2=Wr0[ 8]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 9]; *A2=Wr0[ 9]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[10]; *A2=Wr0[10]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[11]; *A2=Wr0[11]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[12]; *A2=Wr0[12]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[13]; *A2=Wr0[13]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[14]; *A2=Wr0[14]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[15]; *A2=Wr0[15]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L=*A1=Wmx[16]; *A2=Wr0[16]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[17]; *A2=Wr0[17]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[18]; *A2=Wr0[18]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[19]; *A2=Wr0[19]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[20]; *A2=Wr0[20]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[21]; *A2=Wr0[21]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[22]; *A2=Wr0[22]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[23]; *A2=Wr0[23]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[24]; *A2=Wr0[24]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[25]; *A2=Wr0[25]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[26]; *A2=Wr0[26]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[27]; *A2=Wr0[27]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[28]; *A2=Wr0[28]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[29]; *A2=Wr0[29]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[30]; *A2=Wr0[30]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[31]; *A2=Wr0[31]; L+=n0; A1+=lda; A2+=lda; +#endif + } + for( i = 0; i < nr; i++, L += n0, A1 += lda, A2 += lda ) + { *L = *A1 = Wmx[i]; *A2 = Wr0[i]; } + } + else + { +/* + * otherwise the current row of A is swapped with itself, so just copy + * the current of A into L1. + */ + *Mptr( PANEL->A, II, JJ, lda ) = gmax; + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) + { + *L = Wmx[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wmx[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0; + *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0; + *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0; + *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0; + *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0; + *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0; + *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0; + *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0; + *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0; + *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0; + *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0; + *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0; +#endif + } + for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; } + } + } + else + { +/* + * otherwise, the row to be swapped with the current row of A is in Wmx, + * so copy Wmx into L1 and A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH ) + { + *L = *A1 = Wmx[ 0]; L += n0; A1 += lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = *A1 = Wmx[ 1]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = *A1 = Wmx[ 2]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 3]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = *A1 = Wmx[ 4]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 5]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 6]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 7]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = *A1 = Wmx[ 8]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 9]; L += n0; A1 += lda; + *L = *A1 = Wmx[10]; L += n0; A1 += lda; + *L = *A1 = Wmx[11]; L += n0; A1 += lda; + *L = *A1 = Wmx[12]; L += n0; A1 += lda; + *L = *A1 = Wmx[13]; L += n0; A1 += lda; + *L = *A1 = Wmx[14]; L += n0; A1 += lda; + *L = *A1 = Wmx[15]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = *A1 = Wmx[16]; L += n0; A1 += lda; + *L = *A1 = Wmx[17]; L += n0; A1 += lda; + *L = *A1 = Wmx[18]; L += n0; A1 += lda; + *L = *A1 = Wmx[19]; L += n0; A1 += lda; + *L = *A1 = Wmx[20]; L += n0; A1 += lda; + *L = *A1 = Wmx[21]; L += n0; A1 += lda; + *L = *A1 = Wmx[22]; L += n0; A1 += lda; + *L = *A1 = Wmx[23]; L += n0; A1 += lda; + *L = *A1 = Wmx[24]; L += n0; A1 += lda; + *L = *A1 = Wmx[25]; L += n0; A1 += lda; + *L = *A1 = Wmx[26]; L += n0; A1 += lda; + *L = *A1 = Wmx[27]; L += n0; A1 += lda; + *L = *A1 = Wmx[28]; L += n0; A1 += lda; + *L = *A1 = Wmx[29]; L += n0; A1 += lda; + *L = *A1 = Wmx[30]; L += n0; A1 += lda; + *L = *A1 = Wmx[31]; L += n0; A1 += lda; +#endif + } + + for( i = 0; i < nr; i++, L += n0, A1 += lda ) + { *L = *A1 = Wmx[i]; } + } + } + else + { +/* + * otherwise I do not own the current row of A, so copy the max row Wmx + * into L1. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH ) + { + *L = Wmx[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wmx[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0; + *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0; + *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0; + *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0; + *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0; + *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0; + *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0; + *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0; + *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0; + *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0; + *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0; + *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0; +#endif + } + for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; } +/* + * and if I own the max row, overwrite it with the current row Wr0. + */ + if( myrow == (int)(WORK[3]) ) + { + A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *A2 = Wr0[ 0]; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *A2 = Wr0[ 1]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda; + *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda; + *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda; + *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda; + *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda; + *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda; + *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda; + *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda; + *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda; + *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda; + *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda; + *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda; +#endif + } + + for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; } + } + } + } + else + { +/* + * Otherwise the max element in the current column is zero, simply copy + * the current row Wr0 into L1. The matrix is singular. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *L = Wr0[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wr0[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wr0[ 2]; L+=n0; *L = Wr0[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wr0[ 4]; L+=n0; *L = Wr0[ 5]; L+=n0; + *L = Wr0[ 6]; L+=n0; *L = Wr0[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wr0[ 8]; L+=n0; *L = Wr0[ 9]; L+=n0; + *L = Wr0[10]; L+=n0; *L = Wr0[11]; L+=n0; + *L = Wr0[12]; L+=n0; *L = Wr0[13]; L+=n0; + *L = Wr0[14]; L+=n0; *L = Wr0[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wr0[16]; L+=n0; *L = Wr0[17]; L+=n0; + *L = Wr0[18]; L+=n0; *L = Wr0[19]; L+=n0; + *L = Wr0[20]; L+=n0; *L = Wr0[21]; L+=n0; + *L = Wr0[22]; L+=n0; *L = Wr0[23]; L+=n0; + *L = Wr0[24]; L+=n0; *L = Wr0[25]; L+=n0; + *L = Wr0[26]; L+=n0; *L = Wr0[27]; L+=n0; + *L = Wr0[28]; L+=n0; *L = Wr0[29]; L+=n0; + *L = Wr0[30]; L+=n0; *L = Wr0[31]; L+=n0; +#endif + } + + for( i = 0; i < nr; i++, L += n0 ) { *L = Wr0[i]; } +/* + * set INFO. + */ + if( *(PANEL->DINFO) == 0.0 ) + *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1); + } +/* + * End of HPL_dlocswpN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocswpT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocswpT.c new file mode 100644 index 000000000..89b86e35a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_dlocswpT.c @@ -0,0 +1,406 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LOCSWP_DEPTH +#define HPL_LOCSWP_DEPTH 32 +#define HPL_LOCSWP_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlocswpT +( + HPL_T_panel * PANEL, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocswpT +( PANEL, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocswpT performs the local swapping operations within a panel. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. The N0 length max + * row is stored in WORK[4:4+N0-1]; Note that this is also the + * JJth row (or column) of L1. The remaining part of this array + * is used as workspace. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax; + double * A1, * A2, * L, * Wr0, * Wmx; + int ilindx, lda, myrow, n0, nr, nu; + register int i; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda; + + Wr0 = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0]; + nu = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) + << HPL_LOCSWP_LOG2_DEPTH ); + nr = n0 - nu; +/* + * Replicated swap and copy of the current (new) row of A into L1 + */ + L = Mptr( PANEL->L1, 0, JJ, n0 ); +/* + * If the pivot is non-zero ... + */ + if( gmax != HPL_rzero ) + { +/* + * and if I own the current row of A ... + */ + if( myrow == PANEL->prow ) + { +/* + * and if I also own the row to be swapped with the current row of A ... + */ + if( myrow == (int)(WORK[3]) ) + { +/* + * and if the current row of A is not to swapped with itself ... + */ + if( ( ilindx = (int)(WORK[1]) ) != 0 ) + { +/* + * then copy the max row into L1 and locally swap the 2 rows of A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + A2 = Mptr( A1, ilindx, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH, + L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=*A1=Wmx[ 0]; *A2=Wr0[ 0]; A1+=lda; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=*A1=Wmx[ 1]; *A2=Wr0[ 1]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=*A1=Wmx[ 2]; *A2=Wr0[ 2]; A1+=lda; A2+=lda; + L[ 3]=*A1=Wmx[ 3]; *A2=Wr0[ 3]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=*A1=Wmx[ 4]; *A2=Wr0[ 4]; A1+=lda; A2+=lda; + L[ 5]=*A1=Wmx[ 5]; *A2=Wr0[ 5]; A1+=lda; A2+=lda; + L[ 6]=*A1=Wmx[ 6]; *A2=Wr0[ 6]; A1+=lda; A2+=lda; + L[ 7]=*A1=Wmx[ 7]; *A2=Wr0[ 7]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=*A1=Wmx[ 8]; *A2=Wr0[ 8]; A1+=lda; A2+=lda; + L[ 9]=*A1=Wmx[ 9]; *A2=Wr0[ 9]; A1+=lda; A2+=lda; + L[10]=*A1=Wmx[10]; *A2=Wr0[10]; A1+=lda; A2+=lda; + L[11]=*A1=Wmx[11]; *A2=Wr0[11]; A1+=lda; A2+=lda; + L[12]=*A1=Wmx[12]; *A2=Wr0[12]; A1+=lda; A2+=lda; + L[13]=*A1=Wmx[13]; *A2=Wr0[13]; A1+=lda; A2+=lda; + L[14]=*A1=Wmx[14]; *A2=Wr0[14]; A1+=lda; A2+=lda; + L[15]=*A1=Wmx[15]; *A2=Wr0[15]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=*A1=Wmx[16]; *A2=Wr0[16]; A1+=lda; A2+=lda; + L[17]=*A1=Wmx[17]; *A2=Wr0[17]; A1+=lda; A2+=lda; + L[18]=*A1=Wmx[18]; *A2=Wr0[18]; A1+=lda; A2+=lda; + L[19]=*A1=Wmx[19]; *A2=Wr0[19]; A1+=lda; A2+=lda; + L[20]=*A1=Wmx[20]; *A2=Wr0[20]; A1+=lda; A2+=lda; + L[21]=*A1=Wmx[21]; *A2=Wr0[21]; A1+=lda; A2+=lda; + L[22]=*A1=Wmx[22]; *A2=Wr0[22]; A1+=lda; A2+=lda; + L[23]=*A1=Wmx[23]; *A2=Wr0[23]; A1+=lda; A2+=lda; + L[24]=*A1=Wmx[24]; *A2=Wr0[24]; A1+=lda; A2+=lda; + L[25]=*A1=Wmx[25]; *A2=Wr0[25]; A1+=lda; A2+=lda; + L[26]=*A1=Wmx[26]; *A2=Wr0[26]; A1+=lda; A2+=lda; + L[27]=*A1=Wmx[27]; *A2=Wr0[27]; A1+=lda; A2+=lda; + L[28]=*A1=Wmx[28]; *A2=Wr0[28]; A1+=lda; A2+=lda; + L[29]=*A1=Wmx[29]; *A2=Wr0[29]; A1+=lda; A2+=lda; + L[30]=*A1=Wmx[30]; *A2=Wr0[30]; A1+=lda; A2+=lda; + L[31]=*A1=Wmx[31]; *A2=Wr0[31]; A1+=lda; A2+=lda; +#endif + } + + for( i = 0; i < nr; i++, A1 += lda, A2 += lda ) + { L[i] = *A1 = Wmx[i]; *A2 = Wr0[i]; } + } + else + { +/* + * otherwise the current row of A is swapped with itself, so just copy + * the current of A into L1. + */ + *Mptr( PANEL->A, II, JJ, lda ) = gmax; + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wmx[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wmx[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; + L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wmx[ 8]; L[12]=Wmx[12]; + L[ 9]=Wmx[ 9]; L[13]=Wmx[13]; + L[10]=Wmx[10]; L[14]=Wmx[14]; + L[11]=Wmx[11]; L[15]=Wmx[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wmx[16]; L[20]=Wmx[20]; + L[17]=Wmx[17]; L[21]=Wmx[21]; + L[18]=Wmx[18]; L[22]=Wmx[22]; + L[19]=Wmx[19]; L[23]=Wmx[23]; + L[24]=Wmx[24]; L[28]=Wmx[28]; + L[25]=Wmx[25]; L[29]=Wmx[29]; + L[26]=Wmx[26]; L[30]=Wmx[30]; + L[27]=Wmx[27]; L[31]=Wmx[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; } + } + } + else + { +/* + * otherwise, the row to be swapped with the current row of A is in Wmx, + * so copy Wmx into L1 and A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=*A1=Wmx[ 0]; A1+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=*A1=Wmx[ 1]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=*A1=Wmx[ 2]; A1+=lda; L[ 3]=*A1=Wmx[ 3]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=*A1=Wmx[ 4]; A1+=lda; L[ 5]=*A1=Wmx[ 5]; A1+=lda; + L[ 6]=*A1=Wmx[ 6]; A1+=lda; L[ 7]=*A1=Wmx[ 7]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=*A1=Wmx[ 8]; A1+=lda; L[ 9]=*A1=Wmx[ 9]; A1+=lda; + L[10]=*A1=Wmx[10]; A1+=lda; L[11]=*A1=Wmx[11]; A1+=lda; + L[12]=*A1=Wmx[12]; A1+=lda; L[13]=*A1=Wmx[13]; A1+=lda; + L[14]=*A1=Wmx[14]; A1+=lda; L[15]=*A1=Wmx[15]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=*A1=Wmx[16]; A1+=lda; L[17]=*A1=Wmx[17]; A1+=lda; + L[18]=*A1=Wmx[18]; A1+=lda; L[19]=*A1=Wmx[19]; A1+=lda; + L[20]=*A1=Wmx[20]; A1+=lda; L[21]=*A1=Wmx[21]; A1+=lda; + L[22]=*A1=Wmx[22]; A1+=lda; L[23]=*A1=Wmx[23]; A1+=lda; + L[24]=*A1=Wmx[24]; A1+=lda; L[25]=*A1=Wmx[25]; A1+=lda; + L[26]=*A1=Wmx[26]; A1+=lda; L[27]=*A1=Wmx[27]; A1+=lda; + L[28]=*A1=Wmx[28]; A1+=lda; L[29]=*A1=Wmx[29]; A1+=lda; + L[30]=*A1=Wmx[30]; A1+=lda; L[31]=*A1=Wmx[31]; A1+=lda; +#endif + } + + for( i = 0; i < nr; i++, A1 += lda ) { L[i]=*A1=Wmx[i]; } + } + } + else + { +/* + * otherwise I do not own the current row of A, so copy the max row Wmx + * into L1. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wmx[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wmx[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wmx[ 8]; L[ 9]=Wmx[ 9]; L[10]=Wmx[10]; L[11]=Wmx[11]; + L[12]=Wmx[12]; L[13]=Wmx[13]; L[14]=Wmx[14]; L[15]=Wmx[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wmx[16]; L[17]=Wmx[17]; L[18]=Wmx[18]; L[19]=Wmx[19]; + L[20]=Wmx[20]; L[21]=Wmx[21]; L[22]=Wmx[22]; L[23]=Wmx[23]; + L[24]=Wmx[24]; L[25]=Wmx[25]; L[26]=Wmx[26]; L[27]=Wmx[27]; + L[28]=Wmx[28]; L[29]=Wmx[29]; L[30]=Wmx[30]; L[31]=Wmx[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; } +/* + * and if I own the max row, overwrite it with the current row Wr0. + */ + if( myrow == (int)(WORK[3]) ) + { + A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *A2 = Wr0[ 0]; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *A2 = Wr0[ 1]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda; + *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda; + *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda; + *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda; + *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda; + *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda; + *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda; + *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda; + *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda; + *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda; + *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda; + *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda; +#endif + } + for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; } + } + } + } + else + { +/* + * Otherwise the max element in the current column is zero, simply copy + * the current row Wr0 into L1. The matrix is singular. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wr0[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wr0[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wr0[ 2]; L[ 3]=Wr0[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wr0[ 4]; L[ 5]=Wr0[ 5]; L[ 6]=Wr0[ 6]; L[ 7]=Wr0[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wr0[ 8]; L[12]=Wr0[12]; L[ 9]=Wr0[ 9]; L[13]=Wr0[13]; + L[10]=Wr0[10]; L[14]=Wr0[14]; L[11]=Wr0[11]; L[15]=Wr0[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wr0[16]; L[20]=Wr0[20]; L[17]=Wr0[17]; L[21]=Wr0[21]; + L[18]=Wr0[18]; L[22]=Wr0[22]; L[19]=Wr0[19]; L[23]=Wr0[23]; + L[24]=Wr0[24]; L[28]=Wr0[28]; L[25]=Wr0[25]; L[29]=Wr0[29]; + L[26]=Wr0[26]; L[30]=Wr0[30]; L[27]=Wr0[27]; L[31]=Wr0[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wr0[i]; } +/* + * Set INFO. + */ + if( *(PANEL->DINFO) == 0.0 ) + *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1); + } +/* + * End of HPL_dlocswpT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdfact.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdfact.c new file mode 100644 index 000000000..1d99c6e14 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdfact.c @@ -0,0 +1,141 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdfact +( + HPL_T_panel * PANEL +) +#else +void HPL_pdfact +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdfact recursively factorizes a 1-dimensional panel of columns. + * The RPFACT function pointer specifies the recursive algorithm to be + * used, either Crout, Left- or Right looking. NBMIN allows to vary the + * recursive stopping criterium in terms of the number of columns in the + * panel, and NDIV allows to specify the number of subpanels each panel + * should be divided into. Usuallly a value of 2 will be chosen. Finally + * PFACT is a function pointer specifying the non-recursive algorithm to + * to be used on at most NBMIN columns. One can also choose here between + * Crout, Left- or Right looking. Empirical tests seem to indicate that + * values of 4 or 8 for NBMIN give the best results. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + void * vptr = NULL; + int align, jb; +/* .. + * .. Executable Statements .. + */ + jb = PANEL->jb; PANEL->n -= jb; PANEL->ja += jb; + + if( ( PANEL->grid->mycol != PANEL->pcol ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_RPFACT ); +#endif + align = PANEL->algo->align; + vptr = (void *)malloc( ( (size_t)(align) + + (size_t)(((4+((unsigned int)(jb) << 1)) << 1) )) * + sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdfact", "Memory allocation failed" ); } +/* + * Factor the panel - Update the panel pointers + */ + PANEL->algo->rffun( PANEL, PANEL->mp, jb, 0, (double *)HPL_PTR( vptr, + ((size_t)(align) * sizeof(double) ) ) ); + if( vptr ) free( vptr ); + + PANEL->A = Mptr( PANEL->A, 0, jb, PANEL->lda ); + PANEL->nq -= jb; PANEL->jj += jb; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_RPFACT ); +#endif +/* + * End of HPL_pdfact + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdmxswp.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdmxswp.c new file mode 100644 index 000000000..b14452197 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdmxswp.c @@ -0,0 +1,311 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdmxswp +( + HPL_T_panel * PANEL, + const int M, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_pdmxswp +( PANEL, M, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int M; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdmxswp swaps and broadcasts the absolute value max row using + * bi-directional exchange. The buffer is partially set by HPL_dlocmax. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by + * + * log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) + * + * where lat and bdwth are the latency and bandwidth of the network for + * double precision real elements. Communication only occurs in one + * process column. Mono-directional links will cause the communication + * cost to double. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of the matrix + * column on which this function operates. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * It is assumed that HPL_dlocmax was called prior to this + * routine to initialize the first four entries of this array. + * On exit, the N0 length max row is stored in WORK[4:4+N0-1]; + * Note that this is also the JJth row (or column) of L1. The + * remaining part is used as a temporary array. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax, tmp1; + double * A0, * Wmx, * Wwork; + HPL_T_grid * grid; + MPI_Comm comm; + unsigned int hdim, ip2, ip2_, ipow, k, mask; + int Np2, cnt_, cnt0, i, icurrow, lda, mydist, + mydis_, myrow, n0, nprow, partner, rcnt, + root, scnt, size_; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_MXSWP ); +#endif + grid = PANEL->grid; myrow = grid->myrow; nprow = grid->nprow; +/* + * ip2 : the smallest power of two less than or equal to nprow; + * hdim : dimension of the hypercube made of those ip2 processes; + * Np2 : logical flag indicating whether or not nprow is a power of 2; + */ + comm = grid->col_comm; ip2 = (unsigned int)(grid->row_ip2); + hdim = (unsigned int)(grid->row_hdim); n0 = PANEL->jb; + icurrow = PANEL->prow; Np2 = (int)( ( size_ = nprow - ip2 ) != 0 ); + mydist = MModSub( myrow, icurrow, nprow ); +/* + * Set up pointers in workspace: WORK and Wwork point to the beginning + * of the buffers of size 4 + 2*N0 to be combined. Wmx points to the row + * owning the local (before combine) and global (after combine) absolute + * value max. A0 points to the copy of the current row of the matrix. + */ + cnt0 = ( cnt_ = n0 + 4 ) + n0; A0 = ( Wmx = WORK + 4 ) + n0; + Wwork = WORK + cnt0; +/* + * Wmx[0:N0-1] := A[ilindx,0:N0-1] where ilindx is (int)(WORK[1]) (row + * with max in current column). If I am the current process row, pack in + * addition the current row of A in A0[0:N0-1]. If I do not own any row + * of A, then zero out Wmx[0:N0-1]. + */ + if( M > 0 ) + { + lda = PANEL->lda; + HPL_dcopy( n0, Mptr( PANEL->A, II+(int)(WORK[1]), 0, lda ), lda, + Wmx, 1 ); + if( myrow == icurrow ) + { HPL_dcopy( n0, Mptr( PANEL->A, II, 0, lda ), lda, A0, 1 ); } + } + else { for( i = 0; i < n0; i++ ) Wmx[i] = HPL_rzero; } +/* + * Combine the results (bi-directional exchange): the process coordina- + * tes are relative to icurrow, this allows to reduce the communication + * volume when nprow is not a power of 2. + * + * When nprow is not a power of 2: proc[i-ip2] receives local data from + * proc[i] for all i in [ip2..nprow). In addition, proc[0] (icurrow) + * sends to proc[ip2] the current row of A for later broadcast in procs + * [ip2..nprow). + */ + if( ( Np2 != 0 ) && + ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) ) + { + if( ( mydist & ip2 ) != 0 ) + { + if( mydist == (int)(ip2) ) + (void) HPL_sdrv( WORK, cnt_, MSGID_BEGIN_PFACT, A0, n0, + MSGID_BEGIN_PFACT, MModAdd( partner, + icurrow, nprow ), comm ); + else + (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else + { + if( mydist == 0 ) + (void) HPL_sdrv( A0, n0, MSGID_BEGIN_PFACT, Wwork, cnt_, + MSGID_BEGIN_PFACT, MModAdd( partner, + icurrow, nprow ), comm ); + else + (void) HPL_recv( Wwork, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + + tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] ); + if( ( tmp1 > gmax ) || + ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) ) + { HPL_dcopy( cnt_, Wwork, 1, WORK, 1 ); } + } + } + + if( mydist < (int)(ip2) ) + { +/* + * power of 2 part of the processes collection: processes [0..ip2) are + * combining (binary exchange); proc[0] has two rows to send, but one to + * receive. At every step k in [0..hdim) of the algorithm, a process + * pair exchanging 2 rows is such that myrow >> k+1 is 0. Among those + * processes the ones that are sending one more row than what they are + * receiving are such that myrow >> k is equal to 0. + */ + k = 0; ipow = 1; + + while( k < hdim ) + { + if( ( (unsigned int)(mydist) >> ( k + 1 ) ) == 0 ) + { + if( ( (unsigned int)(mydist) >> k ) == 0 ) + { scnt = cnt0; rcnt = cnt_; } + else + { scnt = cnt_; rcnt = cnt0; } + } + else { scnt = rcnt = cnt_; } + + partner = (int)( (unsigned int)(mydist) ^ ipow ); + (void) HPL_sdrv( WORK, scnt, MSGID_BEGIN_PFACT, Wwork, rcnt, + MSGID_BEGIN_PFACT, MModAdd( partner, icurrow, + nprow ), comm ); + + tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] ); + if( ( tmp1 > gmax ) || + ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) ) + { + HPL_dcopy( ( rcnt == cnt0 ? cnt0 : cnt_ ), Wwork, 1, + WORK, 1 ); + } + else if( rcnt == cnt0 ) + { HPL_dcopy( n0, Wwork+cnt_, 1, A0, 1 ); } + + ipow <<= 1; k++; + } + } + else if( size_ > 1 ) + { +/* + * proc[ip2] broadcast current row of A to procs [ip2+1..nprow). + */ + k = (unsigned int)(size_) - 1; ip2_ = mask = 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( A0, n0, MModAdd( root, partner, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else if( partner < size_ ) + { + (void) HPL_send( A0, n0, MModAdd( root, partner, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + } + ip2_ >>= 1; + } while( ip2_ > 0 ); + } +/* + * If nprow is not a power of 2, for all i in [ip2..nprow), proc[i-ip2] + * sends the pivot row to proc[i] along with the first four entries of + * the WORK array. + */ + if( ( Np2 != 0 ) && + ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) ) + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_recv( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else + { + (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + } +/* + * Save the global pivot index in pivot array + */ + (PANEL->DPIV)[JJ] = WORK[2]; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_MXSWP ); +#endif +/* + * End of HPL_pdmxswp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpancrN.c new file mode 100644 index 000000000..4ea170b73 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpancrN.c @@ -0,0 +1,270 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpancrN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpancrN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpancrN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Crout variant of the usual + * one-dimensional algorithm. The lower triangular N0-by-N0 upper block + * of the panel is stored in no-transpose form (i.e. just like the input + * matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); +/* + * Compute row (column) jj of L1 + */ + if( kk > 0 ) + { + L1ptr = Mptr( L1, jj, jj+1, n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk, Nm1 ); + Xv1 = vsip_msubview_d( Xv0, jj, ICOFF, 1, kk ); + Yv1 = vsip_msubview_d( Xv0, jj, jj+1, 1, Nm1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Av1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplTrans, kk, Nm1, -HPL_rone, + Mptr( L1, ICOFF, jj+1, n0 ), n0, Mptr( L1, jj, + ICOFF, n0 ), n0, HPL_rone, L1ptr, n0 ); +#endif + if( curr != 0 ) + HPL_dcopy( Nm1, L1ptr, n0, Mptr( A, ii, jj+1, lda ), lda ); + } +/* + * Scale current column by its absolute value max entry - Update dia- + * diagonal and subdiagonal elements in column A(iip1:iip1+Mm1-1, jj+1) + * and find local absolute value max in that column (Only one pass + * through cache for each current column). This sequence of operations + * could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk+1, 1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + vsip_mdestroy_d( Yv1 ); + vsip_mdestroy_d( Xv1 ); + vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, ICOFF, + jj+1, n0 ), 1, HPL_rone, Mptr( A, iip1, jj+1, lda ), + 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; kk++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); + +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpancrN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpancrT.c new file mode 100644 index 000000000..50ed300aa --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpancrT.c @@ -0,0 +1,267 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpancrT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpancrT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpancrT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Crout variant of the usual + * one-dimensional algorithm. The lower triangular N0-by-N0 upper block + * of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); +/* + * Compute row (column) jj of L1 + */ + if( kk > 0 ) + { + L1ptr = Mptr( L1, jj+1, jj, n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Xv0, jj+1, ICOFF, Nm1, kk ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj, kk, 1 ); + Yv1 = vsip_msubview_d( Xv0, jj+1, jj, Nm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Nm1, kk, -HPL_rone, + Mptr( L1, jj+1, ICOFF, n0 ), n0, Mptr( L1, ICOFF, + jj, n0 ), 1, HPL_rone, L1ptr, 1 ); +#endif + if( curr != 0 ) + HPL_dcopy( Nm1, L1ptr, 1, Mptr( A, ii, jj+1, lda ), lda ); + } +/* + * Scale current column by its absolute value max entry - Update dia- + * diagonal and subdiagonal elements in column A(iip1:iip1+Mm1-1, jj+1) + * and find local absolute value max in that column (Only one pass + * through cache for each current column). This sequence of operations + * could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 ); + Xv1 = vsip_msubview_d( Xv0, jj+1, ICOFF, 1, kk+1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, jj+1, ICOFF, + n0 ), n0, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; kk++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpancrT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanllN.c new file mode 100644 index 000000000..fa471198d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanllN.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanllN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanllN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanllN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Left-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in no-transpose form (i.e. just like the + * input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column and initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + + L1ptr = Mptr( L1, ICOFF, jj+1, n0 ); kk = jj + 1 - ICOFF; + HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, HplUnit, kk, + Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, 1 ); +/* + * Scale current column by its absolute value max entry - Update and + * find local absolute value max in next column (Only one pass through + * cache for each next column). This sequence of operations could bene- + * fit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk, 1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, 1, + HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) + { + HPL_dcopy( kk, L1ptr, 1, Mptr( A, ICOFF, jj+1, lda ), 1 ); + ii = iip1; iip1++; m = Mm1; Mm1--; + } + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanllN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanllT.c new file mode 100644 index 000000000..a6e1b67bd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanllT.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanllT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanllT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanllT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Left-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column and initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + + L1ptr = Mptr( L1, jj+1, ICOFF, n0 ); kk = jj + 1 - ICOFF; + HPL_dtrsv( HplColumnMajor, HplUpper, HplTrans, HplUnit, kk, + Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, n0 ); +/* + * Scale current column by its absolute value max entry - Update and + * find local absolute value max in next column (Only one pass through + * cache for each next column). This sequence of operations could bene- + * fit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk ); + Xv1 = vsip_msubview_d( Xv0, jj+1, ICOFF, 1, kk ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, n0, + HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) + { + HPL_dcopy( kk, L1ptr, n0, Mptr( A, ICOFF, jj+1, lda ), 1 ); + ii = iip1; iip1++; m = Mm1; Mm1--; + } + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); + +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanllT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanrlN.c new file mode 100644 index 000000000..0a3b9a542 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanrlN.c @@ -0,0 +1,250 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanrlN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanrlN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanrlN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Right-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in no-transpose form (i.e. just like the + * input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Acur, * Anxt; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Xv1, * Yv0, * Yv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, lda, m=M; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 >= 1 ) + { + Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda ); +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); +/* + * Scale current column by its absolute value max entry - Update trai- + * ling sub-matrix and find local absolute value max in next column (On- + * ly one pass through cache for each current column). This sequence of + * operations could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 ); + HPL_daxpy( Mm1, -WORK[4+jj+1], Acur, 1, Anxt, 1 ); + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); +#ifdef HPL_CALL_VSIPL + if( Nm1 > 1 ) + { +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2, + Mm1, Nm1-1 ); + Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj, + Mm1, 1 ); + Yv1 = vsip_msubview_d( Yv0, jj, jj+2, 1, Nm1-1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); + } +#else + if( Nm1 > 1 ) + HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + WORK+4+jj+2, 1, Mptr( Anxt, 0, 1, lda ), lda ); +#endif +/* + * Same thing as above but with worse data access on y (A += x * y^T) + * + * if( Nm1 > 1 ) ) + * HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + * Mptr( L1, jj, jj+2, n0 ), n0, Mptr( Anxt, 0, 1, lda ), + * lda ); + */ + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Yv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanrlN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanrlT.c new file mode 100644 index 000000000..68c1afc02 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdpanrlT.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanrlT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanrlT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanrlT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Right-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Acur, * Anxt, * L1; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Xv1, * Yv0, * Yv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, lda, m=M, + n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 >= 1 ) + { + Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda ); +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); +/* + * Scale current column by its absolute value max entry - Update trai- + * ling sub-matrix and find local absolute value max in next column (On- + * ly one pass through cache for each current column). This sequence of + * operations could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 ); + HPL_daxpy( Mm1, -(*(Mptr( L1, jj+1, jj, n0 ))), Acur, 1, Anxt, 1 ); + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + + if( Nm1 > 1 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2, + Mm1, Nm1-1 ); + Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj, + Mm1, 1 ); + Yv1 = vsip_msubview_d( Yv0, jj+2, jj, Nm1-1, 1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + Mptr( L1, jj+2, jj, n0 ), 1, Mptr( Anxt, 0, 1, lda ), + lda ); +#endif + } + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Yv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanrlT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpancrN.c new file mode 100644 index 000000000..348d7ebe6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpancrN.c @@ -0,0 +1,282 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpancrN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpancrN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpancrN HPL_pdrpancrN recursively factorizes a panel of columns using the + * recursive Crout variant of the usual one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Local update - Factor current panel - Replicated update and solve + */ +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + } + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, jj, + -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, + 0, jj, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), + lda ); +#endif + HPL_pdrpancrN( PANEL, m, jb, ioff, WORK ); + + if( n > 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + Av2 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n ); + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff+jb, jj, n ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, jb, n, + jj, -HPL_rone, Mptr( L1ptr, jj, 0, n0 ), n0, + Mptr( L1ptr, 0, jj+jb, n0 ), n0, HPL_rone, + Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, + n0 ), n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); + } +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpancrN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpancrT.c new file mode 100644 index 000000000..a1ecfac2c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpancrT.c @@ -0,0 +1,282 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpancrT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpancrT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpancrT recursively factorizes a panel of columns using the + * recursive Crout variant of the usual one-dimensional algorithm. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Local update - Factor current panel - Replicated update and solve + */ +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, + VSIP_MAT_TRANS, HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, jj, + -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, + jj, 0, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), + lda ); +#endif + HPL_pdrpancrT( PANEL, m, jb, ioff, WORK ); + + if( n > 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Lv0, ioff+jb, ICOFF, n, jj ); + Av2 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb ); + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, + VSIP_MAT_NTRANS, HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, n, jb, + jj, -HPL_rone, Mptr( L1ptr, jj+jb, 0, n0 ), n0, + Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone, + Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); +#endif + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, + n0 ), n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); + } +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpancrT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanllN.c new file mode 100644 index 000000000..4dbc13b44 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanllN.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanllN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanllN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanllN recursively factorizes a panel of columns using the + * recursive Left-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Replicated solve - Local update - Factor current panel + */ + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, + jj, jb, HPL_rone, L1ptr, n0, Mptr( L1ptr, 0, jj, n0 ), + n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jj ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jj ); + } + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, + jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, + Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj, lda ), lda ); +#endif + HPL_pdrpanllN( PANEL, m, jb, ioff, WORK ); +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanllN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanllT.c new file mode 100644 index 000000000..887caeb87 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanllT.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanllT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanllT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanllT recursively factorizes a panel of columns using the + * recursive Left-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Replicated solve - Local update - Factor current panel + */ + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, jb, jj, HPL_rone, L1ptr, n0, Mptr( L1ptr, + jj, 0, n0 ), n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jj ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jj ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Av2 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, + jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, + Mptr( L1ptr, jj, 0, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj, lda ), lda ); +#endif + HPL_pdrpanllT( PANEL, m, jb, ioff, WORK ); +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanllT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanrlN.c new file mode 100644 index 000000000..22f105cf4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanrlN.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanrlN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanrlN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanrlN recursively factorizes a panel of columns using the + * recursive Right-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Factor current panel - Replicated solve - Local update + */ + HPL_pdrpanrlN( PANEL, m, jb, ioff, WORK ); + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), + n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); + if( curr != 0 ) { ii += jb; m -= jb; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb, + m, n ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m, n ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, n, + jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda, + Mptr( L1ptr, jj, jj+jb, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj+jb, lda ), lda ); +#endif +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanrlN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanrlT.c new file mode 100644 index 000000000..a77301b9b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/HPL_pdrpanrlT.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanrlT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanrlT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanrlT recursively factorizes a panel of columns using the + * recursive Right-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Factor current panel - Replicated solve - Local update + */ + HPL_pdrpanrlT( PANEL, m, jb, ioff, WORK ); + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), + n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); + if( curr != 0 ) { ii += jb; m -= jb; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb, + m, N ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m, n ); + } + Lv1 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, n, + jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda, + Mptr( L1ptr, jj+jb, jj, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj+jb, lda ), lda ); +#endif +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanrlT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/intel64/Make.inc new file mode 120000 index 000000000..3ee301793 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/intel64/Make.inc @@ -0,0 +1 @@ +/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/intel64/Makefile new file mode 100644 index 000000000..bf4634d31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pfact/intel64/Makefile @@ -0,0 +1,118 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pfact.h +# +## Object files ######################################################## +# +HPL_pfaobj = \ + HPL_dlocmax.o HPL_dlocswpN.o HPL_dlocswpT.o \ + HPL_pdmxswp.o HPL_pdpancrN.o HPL_pdpancrT.o \ + HPL_pdpanllN.o HPL_pdpanllT.o HPL_pdpanrlN.o \ + HPL_pdpanrlT.o HPL_pdrpanllN.o HPL_pdrpanllT.o \ + HPL_pdrpancrN.o HPL_pdrpancrT.o HPL_pdrpanrlN.o \ + HPL_pdrpanrlT.o HPL_pdfact.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pfaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pfaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dlocmax.o : ../HPL_dlocmax.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocmax.c +HPL_dlocswpN.o : ../HPL_dlocswpN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpN.c +HPL_dlocswpT.o : ../HPL_dlocswpT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpT.c +HPL_pdmxswp.o : ../HPL_pdmxswp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmxswp.c +HPL_pdpancrN.o : ../HPL_pdpancrN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrN.c +HPL_pdpancrT.o : ../HPL_pdpancrT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrT.c +HPL_pdpanllN.o : ../HPL_pdpanllN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllN.c +HPL_pdpanllT.o : ../HPL_pdpanllT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllT.c +HPL_pdpanrlN.o : ../HPL_pdpanrlN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlN.c +HPL_pdpanrlT.o : ../HPL_pdpanrlT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlT.c +HPL_pdrpanllN.o : ../HPL_pdrpanllN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllN.c +HPL_pdrpanllT.o : ../HPL_pdrpanllT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllT.c +HPL_pdrpancrN.o : ../HPL_pdrpancrN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrN.c +HPL_pdrpancrT.o : ../HPL_pdrpancrT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrT.c +HPL_pdrpanrlN.o : ../HPL_pdrpanrlN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlN.c +HPL_pdrpanrlT.o : ../HPL_pdrpanrlT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlT.c +HPL_pdfact.o : ../HPL_pdfact.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdfact.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_equil.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_equil.c new file mode 100644 index 000000000..b917a6525 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_equil.c @@ -0,0 +1,253 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_equil +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_TRANS TRANS, + const int N, + double * U, + const int LDU, + int * IPLEN, + const int * IPMAP, + const int * IPMAPM1, + int * IWORK +) +#else +void HPL_equil +( PBCST, IFLAG, PANEL, TRANS, N, U, LDU, IPLEN, IPMAP, IPMAPM1, IWORK ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_TRANS TRANS; + const int N; + double * U; + const int LDU; + int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_equil equilibrates the local pieces of U, so that on exit to + * this function, pieces of U contained in every process row are of the + * same size. This phase makes the rolling phase optimal. In addition, + * this function probes for the column panel L and forwards it when + * possible. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be equilibrated) information. + * + * TRANS (global input) const enum HPL_TRANS + * On entry, TRANS specifies whether U is stored in transposed + * or non-transposed form. + * + * N (local input) const int + * On entry, N specifies the number of rows or columns of U. N + * must be at least 0. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[nprow]) when U is stored in + * non-transposed form, and MAX(1,N) otherwise. + * + * IPLEN (global input) int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension NPROW+1. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, ip, ipU, ipcur, iprow, iptgt, lastrow, + left, npm1, nprow, ll, llU, llcur, lltgt, + right, slen, smax, smin; +/* .. + * .. Executable Statements .. + */ + if( ( npm1 = ( nprow = PANEL->grid->nprow ) - 1 ) <= 1 ) return; +/* + * If the current distribution of the pieces of U is already optimal for + * the rolling phase, then return imediately. The optimal distribution + * is such that ip processes have smax items and the remaining processes + * only have smin items. Another way to check this is to verify that all + * differences IPLEN[i+1] - IPLEN[i] are either smin or smax. + */ + smax = ( ( slen = IPLEN[nprow] ) + npm1 ) / nprow; + ip = slen - nprow * ( smin = slen / nprow ); + + iprow = 0; + do + { + ll = IPLEN[iprow+1] - IPLEN[iprow]; iprow++; + } while( ( iprow < nprow ) && ( ( ll == smin ) || ( ll == smax ) ) ); + + if( iprow == nprow ) return; +/* + * Now, we are sure the distribution of the pieces of U is not optimal + * with respect to the rolling phase, thus perform equilibration. Go + * through the list of processes: Processes that have rows that do not + * belong to them with respect to the optimal mapping spread them in a + * logarithmic fashion. To simplify a little bit the implementation, and + * mainly the packing, a source process row spreads its data to its left + * first, and then to its right. + */ + IWORK[nprow] = slen; + + for( iprow = 0; iprow < nprow; iprow++ ) + { + llU = IPLEN[iprow+1] - ( ipU = IPLEN[iprow] ); + if( iprow < ip ) { lltgt = smax; iptgt = iprow * smax; } + else { lltgt = smin; iptgt = iprow * smin + ip; } + + left = ( ipU < iptgt ); right = ( iptgt + lltgt < ipU + llU ); +/* + * If I have something to spread to either the left or the right + */ + if( ( llU > 0 ) && ( left || right ) ) + { /* Figure out how much every other process should have */ + + ipcur = ipU; llcur = llU; + + for( i = 0; i < nprow; i++ ) + { + if( i < ip ) { lltgt = smax; iptgt = i * smax; } + else { lltgt = smin; iptgt = i * smin + ip; } + lastrow = iptgt + lltgt - 1; + + if( ( lastrow >= ipcur ) && ( llcur > 0 ) ) + { ll = lastrow - ipcur + 1; ll = Mmin( ll, llcur ); llcur -= ll; } + else { ll = 0; } + + IWORK[i] = ipcur; ipcur += ll; IWORK[i+1] = ipcur; + } +/* + * Equilibration phase + */ + if( TRANS == HplNoTrans ) + { + if( left ) + { + HPL_spreadN( PBCST, IFLAG, PANEL, HplLeft, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + + if( right ) + { + HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + } + else + { + if( left ) + { + HPL_spreadT( PBCST, IFLAG, PANEL, HplLeft, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + + if( right ) + { + HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + } + } + } +/* + * Finally update IPLEN with the indexes corresponding to the new dis- + * tribution of U - IPLEN[nprow] remained unchanged. + */ + for( i = 0; i < nprow; i++ ) IPLEN[i] = ( i < ip ? i*smax : i*smin + ip ); +/* + * End of HPL_equil + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_logsort.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_logsort.c new file mode 100644 index 000000000..0715159bd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_logsort.c @@ -0,0 +1,185 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_logsort +( + const int NPROCS, + const int ICURROC, + int * IPLEN, + int * IPMAP, + int * IPMAPM1 +) +#else +void HPL_logsort +( NPROCS, ICURROC, IPLEN, IPMAP, IPMAPM1 ) + const int NPROCS; + const int ICURROC; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_logsort computes an array IPMAP and its inverse IPMAPM1 that + * contain the logarithmic sorted processes id with repect to the local + * number of rows of U that they own. This is necessary to ensure that + * the logarithmic spreading of U is optimal in terms of number of steps + * and communication volume as well. In other words, the larget pieces + * of U will be sent a minimal number of times. + * + * Arguments + * ========= + * + * NPROCS (global input) const int + * On entry, NPROCS specifies the number of process rows in the + * process grid. NPROCS is at least one. + * + * ICURROC (global input) const int + * On entry, ICURROC is the source process row. + * + * IPLEN (global input/output) int * + * On entry, IPLEN is an array of dimension NPROCS+1, such that + * IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U, + * that process i-1 has. On exit, IPLEN[i] is the number of + * rows of U in the processes before process IPMAP[i] after the + * sort, with the convention that IPLEN[NPROCS] is the total + * number of rows of the panel. In other words, IPLEN[i+1] - + * IPLEN[i] is the number of rows of A that should be moved to + * the process IPMAP[i]. IPLEN is such that the number of rows + * of the source process row is IPLEN[1] - IPLEN[0], and the + * remaining entries of this array are sorted so that the + * quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROCS. On exit, + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myroc] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROCS. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROCS) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dist, i, ip, iplen_i, iplen_j, itmp, j, k; +/* .. + * .. Executable Statements .. + */ +/* + * Compute the logarithmic distance between process j and process 0, as + * well as the maximum logarithmic distance. IPMAPM1 is workarray here. + */ + for( j = 0, dist = 0; j < NPROCS; j++ ) + { + IPMAP[j] = MModAdd( j, ICURROC, NPROCS ); ip = j; itmp = 0; + do { if( ip & 1 ) itmp++; ip >>= 1; } while ( ip ); + IPMAPM1[j] = itmp; if( itmp > dist ) dist = itmp; + } +/* + * Shift IPLEN[1..NPROCS] of ICURROC places, so that IPLEN[1] is now + * what used to be IPLEN[ICURROC+1]. Initialize IPMAP, so that IPMAP[0] + * is ICURROC. + */ + for( j = 0; j < ICURROC; j++ ) + { + for( i = 2, itmp = IPLEN[1]; i <= NPROCS; i++ ) IPLEN[i-1] = IPLEN[i]; + IPLEN[NPROCS] = itmp; + } +/* + * logarithmic sort + */ + for( k = 1; k <= dist; k++ ) + { + for( j = 1; j < NPROCS; j++ ) + { + if( IPMAPM1[j] == k ) + { + for( i = 2; i < NPROCS; i++ ) + { + if( k < IPMAPM1[i] ) + { + iplen_i = IPLEN[i+1]; iplen_j = IPLEN[j+1]; + + if( iplen_j < iplen_i ) + { + IPLEN[j+1] = iplen_i; IPLEN[i+1] = iplen_j; + itmp = IPMAP[j]; IPMAP[j] = IPMAP[i]; + IPMAP[i] = itmp; + } + } + } + } + } + } +/* + * Compute IPLEN and IPMAPM1 (the inverse of IPMAP) + */ + IPLEN[0] = 0; + + for( i = 0; i < NPROCS; i++ ) + { + IPMAPM1[ IPMAP[i] ] = i; + IPLEN[i+1] += IPLEN[i]; + } +/* + * End of HPL_logsort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesv.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesv.c new file mode 100644 index 000000000..ced74269e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesv.c @@ -0,0 +1,116 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesv +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesv +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesv factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with or without look-ahead. The lower triangular factor is left + * unpivoted and the pivots are not returned. The right hand side is the + * N+1 column of the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( A->n <= 0 ) return; + + A->info = 0; + + if( ( ALGO->depth == 0 ) || ( GRID->npcol == 1 ) ) + { + HPL_pdgesv0( GRID, ALGO, A ); + } + else + { + HPL_pdgesvK2( GRID, ALGO, A ); + } +/* + * Solve upper triangular system + */ + if( A->info == 0 ) HPL_pdtrsv( GRID, A ); +/* + * End of HPL_pdgesv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesv0.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesv0.c new file mode 100644 index 000000000..d79b6fa55 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesv0.c @@ -0,0 +1,167 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesv0 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesv0 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesv0 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * without look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, j, jb, n, nb, tag=MSGID_BEGIN_FACT, + test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( N = A->n ) <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + + HPL_pdupdate = ALGO->upfun; nb = A->nb; +/* + * Allocate a panel list of length 1 - Allocate panel[0] resources + */ + panel = (HPL_T_panel **)malloc( sizeof( HPL_T_panel * ) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesv0", "Memory allocation failed" ); } + + HPL_pdpanel_new( GRID, ALGO, N, N+1, Mmin( N, nb ), A, 0, 0, tag, + &panel[0] ); +/* + * Loop over the columns of A + */ + for( j = 0; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && GRID->mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Release panel resources - re-initialize panel data structure + */ + (void) HPL_pdpanel_free( panel[0] ); + HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[0] ); +/* + * Factor and broadcast current panel - update + */ + HPL_pdfact( panel[0] ); + (void) HPL_binit( panel[0] ); + do + { (void) HPL_bcast( panel[0], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[0] ); + HPL_pdupdate( NULL, NULL, panel[0], -1 ); +/* + * Update message id for next factorization + */ + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Release panel resources and panel list + */ + (void) HPL_pdpanel_disp( &panel[0] ); + + if( panel ) free( panel ); +/* + * End of HPL_pdgesv0 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesvK1.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesvK1.c new file mode 100644 index 000000000..ff1958cfc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesvK1.c @@ -0,0 +1,222 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesvK1 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesvK1 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesvK1 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, depth, icurcol=0, j, jb, jj=0, jstart, + k, mycol, n, nb, nn, npcol, nq, + tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + mycol = GRID->mycol; npcol = GRID->npcol; + depth = ALGO->depth; HPL_pdupdate = ALGO->upfun; + N = A->n; nb = A->nb; + + if( N <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + +/* + * Allocate a panel list of length depth + 1 (depth >= 1) + */ + panel = (HPL_T_panel **)malloc( (size_t)(depth+1)*sizeof( HPL_T_panel *) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesvK1", "Memory allocation failed" ); } +/* + * Create and initialize the first depth panels + */ + nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0; + + for( k = 0; k < depth; k++ ) + { + jb = Mmin( nn, nb ); + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart, + tag, &panel[k] ); + nn -= jb; jstart += jb; + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Initialize the lookahead - Factor jstart columns: panel[0..depth-1] + */ + for( k = 0, j = 0; k < depth; k++ ) + { + jb = jstart - j; jb = Mmin( jb, nb ); j += jb; +/* + * Factor and broadcast k-th panel - use long topology for those + */ + HPL_pdfact( panel[k] ); + (void) HPL_binit( panel[k] ); + do + { (void) HPL_bcast( panel[k], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[k] ); +/* + * Partial update of the depth-1-k panels in front of me + */ + if( k < depth - 1 ) + { + nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol ); + HPL_pdupdate( NULL, NULL, panel[k], nn ); + } + } +/* + * Main loop over the remaining columns of A + */ + for( j = jstart; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Allocate current panel resources - Finish latest update - Factor and + * broadcast current panel + */ + HPL_pdpanel_new( GRID, ALGO, n, n+1, jb, A, j, j, tag, &panel[depth] ); + + if( mycol == icurcol ) + { + nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) /* partial updates 0..depth-1 */ + HPL_pdupdate( NULL, NULL, panel[k], nn ); + HPL_pdfact( panel[depth] ); /* factor current panel */ + } + else { nn = 0; } + /* Finish the latest update and broadcast the current panel */ + (void) HPL_binit( panel[depth] ); + HPL_pdupdate( panel[depth], &test, panel[0], nq-nn ); + (void) HPL_bwait( panel[depth] ); +/* + * Release latest panel resources - circular of the panel pointers + * Go to the next process row and column - update the message ids for + * broadcast + */ + (void) HPL_pdpanel_disp( &panel[0] ); + for( k = 0; k < depth; k++ ) panel[k] = panel[k+1]; + + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Clean-up: Finish updates - release panels and panel list + */ + nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) + { + HPL_pdupdate( NULL, NULL, panel[k], nn ); + (void) HPL_pdpanel_disp( &panel[k] ); + } + + if( panel ) free( panel ); +/* + * End of HPL_pdgesvK1 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesvK2.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesvK2.c new file mode 100644 index 000000000..dec506ab9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdgesvK2.c @@ -0,0 +1,231 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesvK2 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesvK2 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesvK2 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * p, * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, depth, icurcol=0, j, jb, jj=0, jstart, + k, mycol, n, nb, nn, npcol, nq, + tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + mycol = GRID->mycol; npcol = GRID->npcol; + depth = ALGO->depth; HPL_pdupdate = ALGO->upfun; + N = A->n; nb = A->nb; + + if( N <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + +/* + * Allocate a panel list of length depth + 1 (depth >= 1) + */ + panel = (HPL_T_panel **)malloc( (size_t)(depth+1) * sizeof( HPL_T_panel *) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesvK2", "Memory allocation failed" ); } +/* + * Create and initialize the first depth panels + */ + nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0; + + for( k = 0; k < depth; k++ ) + { + jb = Mmin( nn, nb ); + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart, + tag, &panel[k] ); + nn -= jb; jstart += jb; + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Create last depth+1 panel + */ + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, Mmin( nn, nb ), A, jstart, + jstart, tag, &panel[depth] ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); +/* + * Initialize the lookahead - Factor jstart columns: panel[0..depth-1] + */ + for( k = 0, j = 0; k < depth; k++ ) + { + jb = jstart - j; jb = Mmin( jb, nb ); j += jb; +/* + * Factor and broadcast k-th panel + */ + HPL_pdfact( panel[k] ); + (void) HPL_binit( panel[k] ); + do + { (void) HPL_bcast( panel[k], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[k] ); +/* + * Partial update of the depth-k-1 panels in front of me + */ + if( k < depth - 1 ) + { + nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol ); + HPL_pdupdate( NULL, NULL, panel[k], nn ); + } + } +/* + * Main loop over the remaining columns of A + */ + for( j = jstart; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Initialize current panel - Finish latest update, Factor and broadcast + * current panel + */ + (void) HPL_pdpanel_free( panel[depth] ); + HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[depth] ); + + if( mycol == icurcol ) + { + nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) /* partial updates 0..depth-1 */ + (void) HPL_pdupdate( NULL, NULL, panel[k], nn ); + HPL_pdfact( panel[depth] ); /* factor current panel */ + } + else { nn = 0; } + /* Finish the latest update and broadcast the current panel */ + (void) HPL_binit( panel[depth] ); + HPL_pdupdate( panel[depth], &test, panel[0], nq-nn ); + (void) HPL_bwait( panel[depth] ); +/* + * Circular of the panel pointers: + * xtmp = x[0]; for( k=0; k < depth; k++ ) x[k] = x[k+1]; x[d] = xtmp; + * + * Go to next process row and column - update the message ids for broadcast + */ + p = panel[0]; for( k = 0; k < depth; k++ ) panel[k] = panel[k+1]; + panel[depth] = p; + + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Clean-up: Finish updates - release panels and panel list + */ + nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) + { + (void) HPL_pdupdate( NULL, NULL, panel[k], nn ); + (void) HPL_pdpanel_disp( &panel[k] ); + } + (void) HPL_pdpanel_disp( &panel[depth] ); + + if( panel ) free( panel ); +/* + * End of HPL_pdgesvK2 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c new file mode 100644 index 000000000..b4433e1be --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c @@ -0,0 +1,432 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp00N +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp00N +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp00N applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * Bi-directional exchange is used to perform the swap :: broadcast of + * the row panel U at once, resulting in a lower number of messages than + * usual as well as a lower communication volume. With P process rows and + * assuming bi-directional links, the running time of this function can + * be approximated by: + * + * log_2(P) * (lat + NB*LocQ(N) / bdwth) + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. Mono + * directional links will double this communication cost. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be broadcast and swapped) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + HPL_T_grid * grid; + double * A, * U, * W; + void * vptr = NULL; + int * ipID, * lindxA, * lindxAU, * llen, + * llen_sv; + unsigned int ip2, ip2_=1, ipdist, ipow=1, mask=1, + mydist, mydis_; + int Cmsgid=MSGID_BEGIN_PFACT, Np2, align, + hdim, i, icurrow, *iflag, ipA, ipW, *ipl, + iprow, jb, k, lda, ldW, myrow, n, nprow, + partner, root, size_, usize; +#define LDU jb +/* .. + * .. Executable Statements .. + */ + n = Mmin( NN, PANEL->n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Retrieve parameters from the PANEL data structure + */ + grid = PANEL->grid; nprow = grid->nprow; myrow = grid->myrow; + comm = grid->col_comm; ip2 = (unsigned int)grid->row_ip2; + hdim = grid->row_hdim; align = PANEL->algo->align; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; usize = jb * n; + ldW = n + 1; +/* + * Allocate space for temporary W (ldW * jb) + */ + vptr = (void*)malloc( + ((size_t)(align) + ((size_t)(jb) * (size_t)(ldW))) * sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlaswp00N", "Memory allocation failed" ); } + + W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) ); +/* + * Construct ipID and its local counter parts lindxA, lindxAU - llen is + * the number of rows/columns that I have in workspace and that I should + * send. Compute lindx_, ipA, llen if it has not already been done for + * this panel; + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + lindxA = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k; + llen = lindxAU + k; llen_sv = llen + nprow; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } + else if( *iflag == 1 ) /* HPL_pdlaswp01N called before: reuse ipID */ + { + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } +/* + * Copy the llen_sv into llen - Reset ipA to its correct value + */ + ipA = llen_sv[myrow]; + for( i = 0; i < nprow; i++ ) { llen[i] = llen_sv[i]; } +/* + * For i in [0..2*jb), lindxA[i] is the offset in A of a row that ulti- + * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ). In icurrow, + * we directly pack into U, otherwise we pack into workspace. The first + * entry of each column packed in workspace is in fact the row or column + * offset in U where it should go to. + */ + if( myrow == icurrow ) + { + HPL_dlaswp01N( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } + else + { + HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * Algorithm for bi-directional data exchange: + * + * As long as I have not talked to a process that already had the data + * from icurrow, I will be sending the workspace, otherwise I will be + * sending U. Note that the columns in workspace contain the local index + * in U they should go to. + * + * If I am receiving from a process that has the data from icurrow, I + * will be receiving in U, copy the data of U that stays into A, and + * then the columns I have in workspace into U; otherwise I will be re- + * ceiving in the remaining workspace. If I am one of those processes + * that already has the data from icurrow, I will be immediately copying + * the data I have in my workspace into U. + * + * When I receive U, some of U should be copied in my piece of A before + * I can copy the rows I have in my workspace into U. This information + * is kept in the lists lindx_: the row lindxAU[i] should be copied in + * the row lindxA[i] of my piece of A, just as in the reversed initial + * packing operation. Those rows are thus the first ones in the work ar- + * ray. After this operation has been performed, I will not need + * those lindx arrays, and I will always be sending a buffer of size + * jb x n, or n x jb, that is, U. + * + * At every step of the algorithm, it is necesary to update the list + * llen, so that I can figure out how large the next messages I will be + * sending/receiving are. It is obvious when I am sending U. It is not + * otherwise. + * + * We choose icurrow to be the source of the bi-directional exchange. + * This allows the processes in the non-power 2 part to receive U at the + * first exchange, and then broadcast internally this U so that those + * processes can grab their piece of A. + */ + if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; } + ipW = ipA; + Np2 = ( ( size_ = nprow - ip2 ) != 0 ); + mydist = (unsigned int)MModSub( myrow, icurrow, nprow ); +/* + * bi-directional exchange: If nprow is not a power of 2, proc[i-ip2] + * receives local data from proc[i] for all i in [ip2..nprow); icurrow + * is the source, these last process indexes are relative to icurrow. + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + + if( mydist == 0 ) /* I am the current row: I send U and recv W */ + { + (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW, + Cmsgid, partner, comm ); + if( llen[partner] > 0 ) + HPL_dlaswp03N( llen[partner], n, U, LDU, W, W+1, ldW ); + } + else if( mydist == ip2 ) + { /* I recv U for later Bcast, I send my W */ + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + } + else /* None of us is icurrow, we exchange our Ws */ + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm ); + } + else + { + (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, + partner, Cmsgid, comm ); + if( llen[partner] > 0 ) ipW += llen[partner]; + } + } + } +/* + * Update llen + */ + for( i = 1; i < size_; i++ ) + { + iprow = MModAdd( icurrow, i, nprow ); + partner = MModAdd( iprow, (int)(ip2), nprow ); + llen[ iprow ] += llen[ partner ]; + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * power of 2 part of the processes collection: only processes [0..ip2) + * are working; some of them (mydist >> (k+1) == 0) either send or re- + * ceive U. At every step k, k is in [0 .. hdim), of the algorithm, a + * process pair that exchanges U is such that (mydist >> (k+1) == 0). + * Among those processes, the ones that are sending U are such that + * mydist >> k == 0. + */ + if( mydist < ip2 ) + { + k = 0; + + while( k < hdim ) + { + partner = (int)(mydist ^ ipow); + partner = MModAdd( icurrow, partner, nprow ); +/* + * Exchange and combine the local results - If I receive U, then I must + * copy from U the rows that belong to my piece of A, and then update U + * by copying in it the rows I have accumulated in W. Otherwise, I re- + * ceive W. In this later case, and I have U, I shall update my copy of + * U by copying in it the rows I have accumulated in W. If I did not + * have U before, I simply need to update my pointer in W for later use. + */ + if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 ) + { + if( ( mydist >> (unsigned int)(k) ) == 0 ) + { + (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW, + ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + HPL_dlaswp03N( llen[partner], n, U, LDU, Mptr( W, 0, ipW, + ldW ), Mptr( W, 1, ipW, ldW ), ldW ); + ipW += llen[partner]; + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + HPL_dlaswp04N( ipA, llen[myrow], n, U, LDU, A, lda, W, + W+1, ldW, lindxA, lindxAU ); + } + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0, + ipW, ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + ipW += llen[partner]; + } +/* + * Update llen - Go to next process pairs + */ + iprow = icurrow; ipdist = 0; + do + { + if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist ) + { + partner = MModAdd( icurrow, partner, nprow ); + llen[iprow] += llen[partner]; + llen[partner] = llen[iprow]; + } + iprow = MModAdd( iprow, 1, nprow ); ipdist++; + + } while( ipdist < ip2 ); + + ipow <<= 1; k++; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + } + else + { +/* + * non power of 2 part of the process collection: proc[ip2] broadcast U + * to procs[ip2..nprow) (relatively to icurrow). + */ + if( size_ > 1 ) + { + k = size_ - 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = (unsigned int)MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + + } + else if( partner < size_ ) + { + (void) HPL_send( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + } + } + ip2_ >>= 1; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2_ > 0 ); + } +/* + * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece + * of A. + */ + HPL_dlaswp05N( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } +/* + * If nprow is not a power of 2, proc[i-ip2] sends global result to + * proc[i] for all i in [ip2..nprow); + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + if( ( mydist & ip2 ) != 0 ) + { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); } + else + { (void) HPL_send( U, usize, partner, Cmsgid, comm ); } + } + + if( vptr ) free( vptr ); +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp00N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c new file mode 100644 index 000000000..7a9764c09 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c @@ -0,0 +1,433 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp00T +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp00T +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp00T applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * Bi-directional exchange is used to perform the swap :: broadcast of + * the row panel U at once, resulting in a lower number of messages than + * usual as well as a lower communication volume. With P process rows and + * assuming bi-directional links, the running time of this function can + * be approximated by: + * + * log_2(P) * (lat + NB*LocQ(N) / bdwth) + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. Mono + * directional links will double this communication cost. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be broadcast and swapped) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + HPL_T_grid * grid; + double * A, * U, * W; + void * vptr = NULL; + int * ipID, * lindxA, * lindxAU, * llen, + * llen_sv; + unsigned int ip2, ip2_=1, ipdist, ipow=1, mask=1, + mydist, mydis_; + int Cmsgid=MSGID_BEGIN_PFACT, Np2, align, + hdim, i, icurrow, *iflag, ipA, ipW, *ipl, + iprow, jb, k, lda, ldW, myrow, n, nprow, + partner, root, size_, usize; +#define LDU n +/* .. + * .. Executable Statements .. + */ + n = Mmin( NN, PANEL->n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Retrieve parameters from the PANEL data structure + */ + grid = PANEL->grid; nprow = grid->nprow; myrow = grid->myrow; + comm = grid->col_comm; ip2 = (unsigned int)grid->row_ip2; + hdim = grid->row_hdim; align = PANEL->algo->align; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; usize = jb * n; + ldW = n + 1; +/* + * Allocate space for temporary W (ldW * jb) + */ + vptr = (void*)malloc( ( (size_t)(align) + + ((size_t)(jb) * (size_t)(ldW))) * + sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlaswp00T", "Memory allocation failed" ); } + + W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) ); +/* + * Construct ipID and its local counter parts lindxA, lindxAU - llen is + * the number of rows/columns that I have in workspace and that I should + * send. Compute lindx_, ipA, llen if it has not already been done for + * this panel; + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + lindxA = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k; + llen = lindxAU + k; llen_sv = llen + nprow; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } + else if( *iflag == 1 ) /* HPL_pdlaswp01T called before: reuse ipID */ + { + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } +/* + * Copy the llen_sv into llen - Reset ipA to its correct value + */ + ipA = llen_sv[myrow]; + for( i = 0; i < nprow; i++ ) { llen[i] = llen_sv[i]; } +/* + * For i in [0..2*jb), lindxA[i] is the offset in A of a row that ulti- + * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ). In icurrow, + * we directly pack into U, otherwise we pack into workspace. The first + * entry of each column packed in workspace is in fact the row or column + * offset in U where it should go to. + */ + if( myrow == icurrow ) + { + HPL_dlaswp01T( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } + else + { + HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * Algorithm for bi-directional data exchange: + * + * As long as I have not talked to a process that already had the data + * from icurrow, I will be sending the workspace, otherwise I will be + * sending U. Note that the columns in workspace contain the local index + * in U they should go to. + * + * If I am receiving from a process that has the data from icurrow, I + * will be receiving in U, copy the data of U that stays into A, and + * then the columns I have in workspace into U; otherwise I will be re- + * ceiving in the remaining workspace. If I am one of those processes + * that already has the data from icurrow, I will be immediately copying + * the data I have in my workspace into U. + * + * When I receive U, some of U should be copied in my piece of A before + * I can copy the rows I have in my workspace into U. This information + * is kept in the lists lindx_: the row lindxAU[i] should be copied in + * the row lindxA[i] of my piece of A, just as in the reversed initial + * packing operation. Those rows are thus the first ones in the work ar- + * ray. After this operation has been performed, I will not need + * those lindx arrays, and I will always be sending a buffer of size + * jb x n, or n x jb, that is, U. + * + * At every step of the algorithm, it is necesary to update the list + * llen, so that I can figure out how large the next messages I will be + * sending/receiving are. It is obvious when I am sending U. It is not + * otherwise. + * + * We choose icurrow to be the source of the bi-directional exchange. + * This allows the processes in the non-power 2 part to receive U at the + * first exchange, and then broadcast internally this U so that those + * processes can grab their piece of A. + */ + if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; } + ipW = ipA; + Np2 = ( ( size_ = nprow - ip2 ) != 0 ); + mydist = (unsigned int)MModSub( myrow, icurrow, nprow ); +/* + * bi-directional exchange: If nprow is not a power of 2, proc[i-ip2] + * receives local data from proc[i] for all i in [ip2..nprow); icurrow + * is the source, these last process indexes are relative to icurrow. + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + + if( mydist == 0 ) /* I am the current row: I send U and recv W */ + { + (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW, + Cmsgid, partner, comm ); + if( llen[partner] > 0 ) + HPL_dlaswp03T( llen[partner], n, U, LDU, W, W+1, ldW ); + } + else if( mydist == ip2 ) + { /* I recv U for later Bcast, I send my W */ + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + } + else /* None of us is icurrow, we exchange our Ws */ + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm ); + } + else + { + (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, + partner, Cmsgid, comm ); + if( llen[partner] > 0 ) ipW += llen[partner]; + } + } + } +/* + * Update llen + */ + for( i = 1; i < size_; i++ ) + { + iprow = MModAdd( icurrow, i, nprow ); + partner = MModAdd( iprow, (int)(ip2), nprow ); + llen[ iprow ] += llen[ partner ]; + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * power of 2 part of the processes collection: only processes [0..ip2) + * are working; some of them (mydist >> (k+1) == 0) either send or re- + * ceive U. At every step k, k is in [0 .. hdim), of the algorithm, a + * process pair that exchanges U is such that (mydist >> (k+1) == 0). + * Among those processes, the ones that are sending U are such that + * mydist >> k == 0. + */ + if( mydist < ip2 ) + { + k = 0; + + while( k < hdim ) + { + partner = (int)(mydist ^ ipow); + partner = MModAdd( icurrow, partner, nprow ); +/* + * Exchange and combine the local results - If I receive U, then I must + * copy from U the rows that belong to my piece of A, and then update U + * by copying in it the rows I have accumulated in W. Otherwise, I re- + * ceive W. In this later case, and I have U, I shall update my copy of + * U by copying in it the rows I have accumulated in W. If I did not + * have U before, I simply need to update my pointer in W for later use. + */ + if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 ) + { + if( ( mydist >> (unsigned int)(k) ) == 0 ) + { + (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW, + ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + HPL_dlaswp03T( llen[partner], n, U, LDU, Mptr( W, 0, ipW, + ldW ), Mptr( W, 1, ipW, ldW ), ldW ); + ipW += llen[partner]; + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + HPL_dlaswp04T( ipA, llen[myrow], n, U, LDU, A, lda, W, + W+1, ldW, lindxA, lindxAU ); + } + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0, + ipW, ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + ipW += llen[partner]; + } +/* + * Update llen - Go to next process pairs + */ + iprow = icurrow; ipdist = 0; + do + { + if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist ) + { + partner = MModAdd( icurrow, partner, nprow ); + llen[iprow] += llen[partner]; + llen[partner] = llen[iprow]; + } + iprow = MModAdd( iprow, 1, nprow ); ipdist++; + + } while( ipdist < ip2 ); + + ipow <<= 1; k++; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + } + else + { +/* + * non power of 2 part of the process collection: proc[ip2] broadcast U + * to procs[ip2..nprow) (relatively to icurrow). + */ + if( size_ > 1 ) + { + k = size_ - 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = (unsigned int)MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + + } + else if( partner < size_ ) + { + (void) HPL_send( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + } + } + ip2_ >>= 1; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2_ > 0 ); + } +/* + * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece + * of A. + */ + HPL_dlaswp05T( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } +/* + * If nprow is not a power of 2, proc[i-ip2] sends global result to + * proc[i] for all i in [ip2..nprow); + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + if( ( mydist & ip2 ) != 0 ) + { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); } + else + { (void) HPL_send( U, usize, partner, Cmsgid, comm ); } + } + + if( vptr ) free( vptr ); +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp00T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c new file mode 100644 index 000000000..31f219840 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c @@ -0,0 +1,217 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp01N +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp01N +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp01N applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * A "Spread then roll" algorithm performs the swap :: broadcast of the + * row panel U at once, resulting in a minimal communication volume and + * a "very good" use of the connectivity if available. With P process + * rows and assuming bi-directional links, the running time of this + * function can be approximated by: + * + * (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. K is + * a constant in (2,3] that depends on the achieved bandwidth during a + * simultaneous message exchange between two processes. An empirical + * optimistic value of K is typically 2.4. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * U; + int * ipID, * iplen, * ipmap, * ipmapm1, + * iwork, * lindxA = NULL, * lindxAU, + * permU; + static int equil=-1; + int icurrow, * iflag, * ipA, * ipl, jb, k, + lda, myrow, n, nprow; +#define LDU jb +/* .. + * .. Executable Statements .. + */ + n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Decide whether equilibration should be performed or not + */ + if( equil == -1 ) equil = PANEL->algo->equil; +/* + * Retrieve parameters from the PANEL data structure + */ + nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; +/* + * Compute ipID (if not already done for this panel). lindxA and lindxAU + * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1 + * are of size nprow, permU is of length jb, and this function needs a + * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): + * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1) + * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1); + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + ipA = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1; + lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1; + ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( *iflag == 0 ) /* HPL_pdlaswp00N called before: reuse ipID */ + { + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( ( *iflag == 1 ) && ( equil != 0 ) ) + { /* HPL_pdlaswp01N was call before only re-compute IPLEN, IPMAP */ + HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 ); + *iflag = 1; + } +/* + * Copy into U the rows to be spread (local to icurrow) + */ + if( myrow == icurrow ) + { HPL_dlaswp01N( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } +/* + * Spread U - optionally probe for column panel + */ + HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen, + ipmap, ipmapm1 ); +/* + * Local exchange (everywhere but in process row icurrow) + */ + if( myrow != icurrow ) + { + k = ipmapm1[myrow]; + HPL_dlaswp06N( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, iplen[k], + 0, LDU ), LDU, lindxA ); + } +/* + * Equilibration + */ + if( equil != 0 ) + HPL_equil( PBCST, IFLAG, PANEL, HplNoTrans, n, U, LDU, iplen, + ipmap, ipmapm1, iwork ); +/* + * Rolling phase + */ + HPL_rollN( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 ); +/* + * Permute U in every process row + */ + HPL_dlaswp00N( jb, n, U, LDU, permU ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp01N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c new file mode 100644 index 000000000..0c4de2669 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c @@ -0,0 +1,217 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp01T +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp01T +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp01T applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * A "Spread then roll" algorithm performs the swap :: broadcast of the + * row panel U at once, resulting in a minimal communication volume and + * a "very good" use of the connectivity if available. With P process + * rows and assuming bi-directional links, the running time of this + * function can be approximated by: + * + * (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. K is + * a constant in (2,3] that depends on the achieved bandwidth during a + * simultaneous message exchange between two processes. An empirical + * optimistic value of K is typically 2.4. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * U; + int * ipID, * iplen, * ipmap, * ipmapm1, + * iwork, * lindxA = NULL, * lindxAU, + * permU; + static int equil=-1; + int icurrow, * iflag, * ipA, * ipl, jb, k, + lda, myrow, n, nprow; +#define LDU n +/* .. + * .. Executable Statements .. + */ + n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Decide whether equilibration should be performed or not + */ + if( equil == -1 ) equil = PANEL->algo->equil; +/* + * Retrieve parameters from the PANEL data structure + */ + nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; +/* + * Compute ipID (if not already done for this panel). lindxA and lindxAU + * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1 + * are of size nprow, permU is of length jb, and this function needs a + * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): + * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1) + * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1); + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + ipA = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1; + lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1; + ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( *iflag == 0 ) /* HPL_pdlaswp00T called before: reuse ipID */ + { + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( ( *iflag == 1 ) && ( equil != 0 ) ) + { /* HPL_pdlaswp01T was call before only re-compute IPLEN, IPMAP */ + HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 ); + *iflag = 1; + } +/* + * Copy into U the rows to be spread (local to icurrow) + */ + if( myrow == icurrow ) + { HPL_dlaswp01T( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } +/* + * Spread U - optionally probe for column panel + */ + HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen, + ipmap, ipmapm1 ); +/* + * Local exchange (everywhere but in process row icurrow) + */ + if( myrow != icurrow ) + { + k = ipmapm1[myrow]; + HPL_dlaswp06T( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, 0, + iplen[k], LDU ), LDU, lindxA ); + } +/* + * Equilibration + */ + if( equil != 0 ) + HPL_equil( PBCST, IFLAG, PANEL, HplTrans, n, U, LDU, iplen, ipmap, + ipmapm1, iwork ); +/* + * Rolling phase + */ + HPL_rollT( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 ); +/* + * Permute U in every process row + */ + HPL_dlaswp10N( n, jb, U, LDU, permU ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp01T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdtrsv.c new file mode 100644 index 000000000..d2135130a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdtrsv.c @@ -0,0 +1,296 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdtrsv +( + HPL_T_grid * GRID, + HPL_T_pmat * AMAT +) +#else +void HPL_pdtrsv +( GRID, AMAT ) + HPL_T_grid * GRID; + HPL_T_pmat * AMAT; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdtrsv solves an upper triangular system of linear equations. + * + * The rhs is the last column of the N by N+1 matrix A. The solve starts + * in the process column owning the Nth column of A, so the rhs b may + * need to be moved one process column to the left at the beginning. The + * routine therefore needs a column vector in every process column but + * the one owning b. The result is replicated in all process rows, and + * returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. + * + * The algorithm uses decreasing one-ring broadcast in process rows and + * columns implemented in terms of synchronous communication point to + * point primitives. The lookahead of depth 1 is used to minimize the + * critical path. This entire operation is essentially ``latency'' bound + * and an estimate of its running time is given by: + * + * (move rhs) lat + N / ( P bdwth ) + + * (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + + * gam2 N^2 / ( P Q ), + * + * where gam2 is an estimate of the Level 2 BLAS rate of execution. + * There are N / NB diagonal blocks. One must exchange 2 messages of + * length NB to compute the next NB entries of the vector solution, as + * well as performing a total of N^2 floating point operations. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * AMAT (local input/output) HPL_T_pmat * + * On entry, AMAT points to the data structure containing the + * local array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm Ccomm, Rcomm; + double * A=NULL, * Aprev=NULL, * Aptr, * XC=NULL, + * XR=NULL, * Xd=NULL, * Xdprev=NULL, + * W=NULL; + int Alcol, Alrow, Anpprev, Anp, Anq, Bcol, + Cmsgid, GridIsNotPx1, GridIsNot1xQ, Rmsgid, + Wfr=0, colprev, kb, kbprev, lda, mycol, + myrow, n, n1, n1p, n1pprev=0, nb, npcol, + nprow, rowprev, tmp1, tmp2; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PTRSV ); +#endif + if( ( n = AMAT->n ) <= 0 ) return; + nb = AMAT->nb; lda = AMAT->ld; A = AMAT->A; XR = AMAT->X; + + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Rcomm = GRID->row_comm; Rmsgid = MSGID_BEGIN_PTRSV; + Ccomm = GRID->col_comm; Cmsgid = MSGID_BEGIN_PTRSV + 1; + GridIsNot1xQ = ( nprow > 1 ); GridIsNotPx1 = ( npcol > 1 ); +/* + * Move the rhs in the process column owning the last column of A. + */ + Mnumroc( Anp, n, nb, nb, myrow, 0, nprow ); + Mnumroc( Anq, n, nb, nb, mycol, 0, npcol ); + + tmp1 = ( n - 1 ) / nb; + Alrow = tmp1 - ( tmp1 / nprow ) * nprow; + Alcol = tmp1 - ( tmp1 / npcol ) * npcol; + kb = n - tmp1 * nb; + + Aptr = (double *)(A); XC = Mptr( Aptr, 0, Anq, lda ); + Mindxg2p( n, nb, nb, Bcol, 0, npcol ); + + if( ( Anp > 0 ) && ( Alcol != Bcol ) ) + { + if( mycol == Bcol ) + { (void) HPL_send( XC, Anp, Alcol, Rmsgid, Rcomm ); } + else if( mycol == Alcol ) + { (void) HPL_recv( XC, Anp, Bcol, Rmsgid, Rcomm ); } + } + Rmsgid = ( Rmsgid + 2 > + MSGID_END_PTRSV ? MSGID_BEGIN_PTRSV : Rmsgid + 2 ); + if( mycol != Alcol ) + { for( tmp1=0; tmp1 < Anp; tmp1++ ) XC[tmp1] = HPL_rzero; } +/* + * Set up lookahead + */ + n1 = ( npcol - 1 ) * nb; n1 = Mmax( n1, nb ); + if( Anp > 0 ) + { + W = (double*)malloc( (size_t)(Mmin( n1, Anp )) * sizeof( double ) ); + if( W == NULL ) + { HPL_pabort( __LINE__, "HPL_pdtrsv", "Memory allocation failed" ); } + Wfr = 1; + } + + Anpprev = Anp; Xdprev = XR; Aprev = Aptr = Mptr( Aptr, 0, Anq, lda ); + tmp1 = n - kb; tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1pprev, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); + + if( myrow == Alrow ) { Anpprev = ( Anp -= kb ); } + if( mycol == Alcol ) + { + Aprev = ( Aptr -= lda * kb ); Anq -= kb; Xdprev = ( Xd = XR + Anq ); + if( myrow == Alrow ) + { + HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit, + kb, Aptr+Anp, lda, XC+Anp, 1 ); + HPL_dcopy( kb, XC+Anp, 1, Xd, 1 ); + } + } + + rowprev = Alrow; Alrow = MModSub1( Alrow, nprow ); + colprev = Alcol; Alcol = MModSub1( Alcol, npcol ); + kbprev = kb; n -= kb; + tmp1 = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); +/* + * Start the operations + */ + while( n > 0 ) + { + if( mycol == Alcol ) { Aptr -= lda * kb; Anq -= kb; Xd = XR + Anq; } + if( myrow == Alrow ) { Anp -= kb; } +/* + * Broadcast (decreasing-ring) of previous solution block in previous + * process column, compute partial update of current block and send it + * to current process column. + */ + if( mycol == colprev ) + { +/* + * Send previous solution block in process row above + */ + if( myrow == rowprev ) + { + if( GridIsNot1xQ ) + (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ), + Cmsgid, Ccomm ); + } + else + { + (void) HPL_recv( Xdprev, kbprev, MModAdd1( myrow, nprow ), + Cmsgid, Ccomm ); + } +/* + * Compute partial update of previous solution block and send it to cur- + * rent column + */ + if( n1pprev > 0 ) + { + tmp1 = Anpprev - n1pprev; + HPL_dgemv( HplColumnMajor, HplNoTrans, n1pprev, kbprev, + -HPL_rone, Aprev+tmp1, lda, Xdprev, 1, HPL_rone, + XC+tmp1, 1 ); + if( GridIsNotPx1 ) + (void) HPL_send( XC+tmp1, n1pprev, Alcol, Rmsgid, Rcomm ); + } +/* + * Finish the (decreasing-ring) broadcast of the solution block in pre- + * vious process column + */ + if( ( myrow != rowprev ) && + ( myrow != MModAdd1( rowprev, nprow ) ) ) + (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ), + Cmsgid, Ccomm ); + } + else if( mycol == Alcol ) + { +/* + * Current column receives and accumulates partial update of previous + * solution block + */ + if( n1pprev > 0 ) + { + (void) HPL_recv( W, n1pprev, colprev, Rmsgid, Rcomm ); + HPL_daxpy( n1pprev, HPL_rone, W, 1, XC+Anpprev-n1pprev, 1 ); + } + } +/* + * Solve current diagonal block + */ + if( ( mycol == Alcol ) && ( myrow == Alrow ) ) + { + HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit, + kb, Aptr+Anp, lda, XC+Anp, 1 ); + HPL_dcopy( kb, XC+Anp, 1, XR+Anq, 1 ); + } +/* +* Finish previous update +*/ + if( ( mycol == colprev ) && ( ( tmp1 = Anpprev - n1pprev ) > 0 ) ) + HPL_dgemv( HplColumnMajor, HplNoTrans, tmp1, kbprev, -HPL_rone, + Aprev, lda, Xdprev, 1, HPL_rone, XC, 1 ); +/* +* Save info of current step and update info for the next step +*/ + if( mycol == Alcol ) { Xdprev = Xd; Aprev = Aptr; } + if( myrow == Alrow ) { Anpprev -= kb; } + rowprev = Alrow; colprev = Alcol; + n1pprev = n1p; kbprev = kb; n -= kb; + Alrow = MModSub1( Alrow, nprow ); Alcol = MModSub1( Alcol, npcol ); + tmp1 = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); + + Rmsgid = ( Rmsgid+2 > MSGID_END_PTRSV ? + MSGID_BEGIN_PTRSV : Rmsgid+2 ); + Cmsgid = ( Cmsgid+2 > MSGID_END_PTRSV ? + MSGID_BEGIN_PTRSV+1 : Cmsgid+2 ); + } +/* + * Replicate last solution block + */ + if( mycol == colprev ) + (void) HPL_broadcast( (void *)(XR), kbprev, HPL_DOUBLE, rowprev, + Ccomm ); + + if( Wfr ) free( W ); +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PTRSV ); +#endif +/* + * End of HPL_pdtrsv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateNN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateNN.c new file mode 100644 index 000000000..7e31ddcd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateNN.c @@ -0,0 +1,442 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateNN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateNN +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateNN broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU jb +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, n ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, 0, nn, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateNN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateNT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateNT.c new file mode 100644 index 000000000..faa3ef207 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateNT.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateNT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateNT +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateNT broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU n +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, nn, 0, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateNT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateTN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateTN.c new file mode 100644 index 000000000..a16aa26a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateTN.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateTN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateTN +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateTN broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU jb +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, n ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, 0, nn, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateTN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateTT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateTT.c new file mode 100644 index 000000000..81e6cc4b7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pdupdateTT.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateTT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateTT +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateTT broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU n +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, nn, 0, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateTT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_perm.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_perm.c new file mode 100644 index 000000000..bf7cc4503 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_perm.c @@ -0,0 +1,131 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_perm +( + const int N, + int * LINDXA, + int * LINDXAU, + int * IWORK +) +#else +void HPL_perm +( N, LINDXA, LINDXAU, IWORK ) + const int N; + int * LINDXA; + int * LINDXAU; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_perm combines two index arrays and generate the corresponding + * permutation. First, this function computes the inverse of LINDXA, and + * then combine it with LINDXAU. Second, in order to be able to perform + * the permutation in place, LINDXAU is overwritten by the sequence of + * permutation producing the same result. What we ultimately want to + * achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the + * call to this function, this in place permutation can be performed by + * for i in [0..N) swap U[i] with U[LINDXAU[i]]. + * + * Arguments + * ========= + * + * N (global input) const int + * On entry, N specifies the length of the arrays LINDXA and + * LINDXAU. N should be at least zero. + * + * LINDXA (global input/output) int * + * On entry, LINDXA is an array of dimension N containing the + * source indexes. On exit, LINDXA contains the combined index + * array. + * + * LINDXAU (global input/output) int * + * On entry, LINDXAU is an array of dimension N containing the + * target indexes. On exit, LINDXAU contains the sequence of + * permutation, that should be applied in increasing order to + * permute the underlying array U in place. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension N. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j, k, fndd; +/* .. + * .. Executable Statements .. + */ +/* + * Inverse LINDXA - combine LINDXA and LINDXAU - Initialize IWORK + */ + for( i = 0; i < N; i++ ) { IWORK[LINDXA[i]] = i; } + for( i = 0; i < N; i++ ) { LINDXA[i] = LINDXAU[IWORK[i]]; IWORK[i] = i; } + + for( i = 0; i < N; i++ ) + { + /* search LINDXA such that LINDXA[j] == i */ + j = 0; do { fndd = ( LINDXA[j] == i ); j++; } while( !fndd ); j--; + /* search IWORK such that IWORK[k] == j */ + k = 0; do { fndd = ( IWORK[k] == j ); k++; } while( !fndd ); k--; + /* swap IWORK[i] and IWORK[k]; LINDXAU[i] = k */ + j = IWORK[i]; IWORK[i] = IWORK[k]; IWORK[k] = j; + LINDXAU[i] = k; + } +/* + * End of HPL_perm + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pipid.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pipid.c new file mode 100644 index 000000000..ab5ef949f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_pipid.c @@ -0,0 +1,187 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pipid +( + HPL_T_panel * PANEL, + int * K, + int * IPID +) +#else +void HPL_pipid +( PANEL, K, IPID ) + HPL_T_panel * PANEL; + int * K; + int * IPID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pipid computes an array IPID that contains the source and final + * destination of matrix rows resulting from the application of N + * interchanges as computed by the LU factorization with row partial + * pivoting. The array IPID is such that the row of global index IPID(i) + * should be mapped onto the row of global index IPID(i+1). Note that we + * cannot really know the length of IPID a priori. However, we know that + * this array is at least 2*N long, since there are N rows to swap and + * broadcast. The length of this array must be smaller than or equal to + * 4*N, since every row is swapped with at most a single distinct remote + * row. The algorithm constructing IPID goes as follows: Let IA be the + * global index of the first row to be swapped. + * + * For every row src IA + i with i in [0..N) to be swapped with row dst + * such that dst is given by DPIV[i]: + * + * Is row src the destination of a previous row of the current block, + * that is, is there k odd such that IPID(k) is equal to src ? + * Yes: update this destination with dst. For example, if the + * pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, + * we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it + * was thought so far ... + * No : add the pair (src,dst) at the end of IPID; row src has not + * been moved yet. + * + * Is row dst different from src the destination of a previous row of + * the current block, i.e., is there k odd such that IPID(k) is equal to + * dst ? + * Yes: update IPID(k) with src. For example, if the pivot array + * is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in + * fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought + * so far ... + * No : add the pair (dst,src) at the end of IPID; row dst has not + * been moved yet. + * + * Note that when src is equal to dst, the pair (dst,src) should not be + * added to IPID in order to avoid duplicated entries in this array. + * During the construction of the array IPID, we make sure that the + * first N entries are such that IPID(k) with k odd is equal to IA+k/2. + * For k in [0..K/2), the row of global index IPID(2*k) should be + * mapped onto the row of global index IPID(2*k+1). + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global output) int * + * On exit, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global output) int * + * On entry, IPID is an array of length 4*N. On exit, the first + * K entries of that array contain the src and final destination + * resulting from the application of the N interchanges as + * specified by DPIV. The pairs (src,dst) are contiguously + * stored and sorted so that IPID(2*i+1) is equal to IA+i with i + * in [0..N) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, fndd, fnds, ia, i, j, jb, lst, off, + src; + double * dpiv; +/* .. + * .. Executable Statements .. + */ + dpiv = PANEL->DPIV; jb = PANEL->jb; src = ia = PANEL->ia; + dst = (int)(dpiv[0]); IPID[0] = dst; IPID[1] = src; *K = 2; + if( src != dst ) { IPID[2] = src; IPID[3] = dst; *K += 2; } + + for( i = 1; i < jb; i++ ) + { + fnds = 0; j = 1; + + if( ( src = ia + i ) == ( dst = (int)(dpiv[i]) ) ) + { + do { if( src == IPID[j] ) { fnds = j; } else { j += 2; } } + while( !( fnds ) && ( j < *K ) ); + if( !fnds ) { lst = *K; off = 2; IPID[lst] = src; } + else { lst = fnds-1; off = 0; } + IPID[lst+1] = dst; + } + else + { + fndd = 0; + do + { + if ( src == IPID[j] ) { fnds = j; } + else if( dst == IPID[j] ) { fndd = j; } + j += 2; + } + while( ( !( fnds ) || !( fndd ) ) && ( j < *K ) ); + if( !fnds ) { IPID[*K] = src; IPID[*K+1] = dst; off = 2; } + else { IPID[fnds] = dst; off = 0; } + if( !fndd ) { lst = *K+off; IPID[lst ] = dst; off += 2; } + else { lst = fndd-1; } + IPID[lst+1] = src; + } +/* + * Enforce IPID(1,i) equal to src = ia + i + */ + if( lst != ( j = ( i << 1 ) ) ) + { + src = IPID[j ]; IPID[j ] = IPID[lst ]; IPID[lst ] = src; + dst = IPID[j+1]; IPID[j+1] = IPID[lst+1]; IPID[lst+1] = dst; + } + *K += off; + } +/* + * End of HPL_pipid + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx0.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx0.c new file mode 100644 index 000000000..be12639d0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx0.c @@ -0,0 +1,281 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx0 +( + HPL_T_panel * PANEL, + const int K, + int * IPID, + int * LINDXA, + int * LINDXAU, + int * LLEN +) +#else +void HPL_plindx0 +( PANEL, K, IPID, LINDXA, LINDXAU, LLEN ) + HPL_T_panel * PANEL; + const int K; + int * IPID; + int * LINDXA; + int * LINDXAU; + int * LLEN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx0 computes two local arrays LINDXA and LINDXAU containing + * the local source and final destination position resulting from the + * application of row interchanges. + * + * On entry, the array IPID of length K is such that the row of global + * index IPID(i) should be mapped onto row of global index IPID(i+1). + * Let IA be the global index of the first row to be swapped. For k in + * [0..K/2), the row of global index IPID(2*k) should be mapped onto the + * row of global index IPID(2*k+1). The question then, is to determine + * which rows should ultimately be part of U. + * + * First, some rows of the process ICURROW may be swapped locally. One + * of this row belongs to U, the other one belongs to my local piece of + * A. The other rows of the current block are swapped with remote rows + * and are thus not part of U. These rows however should be sent along, + * and grabbed by the other processes as we progress in the exchange + * phase. + * + * So, assume that I am ICURROW and consider a row of index IPID(2*i) + * that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less + * than N, this row is locally swapped and should be copied into U at + * the position IPID(2*i+1) - IA. No row will be exchanged for this one. + * If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be + * locally copied into my local piece of A at the position corresponding + * to the row of global index IPID(2*i+1). + * + * If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) + * is to be swapped away and strictly speaking does not belong to U, but + * to A remotely. Since this process will however send this array U, + * this row is copied into U, exactly where the row IPID(2*i+1) should + * go. For this, we search IPID for k1, such that IPID(2*k1) is equal to + * IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position + * IPID(2*k1+1)-IA. + * + * It is thus important to put the rows that go into U, i.e., such that + * IPID(2*i+1) - IA is less than N at the begining of the array IPID. By + * doing so, U is formed, and the local copy is performed in just one + * sweep. + * + * Two lists LINDXA and LINDXAU are built. LINDXA contains the local + * index of the rows I have that should be copied. LINDXAU contains the + * local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A + * is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). In the process + * ICURROW, the initial packing algorithm proceeds as follows. + * + * for all entries in IPID, + * if IPID(2*i) is in ICURROW, + * if IPID(2*i+1) is in ICURROW, + * if( IPID(2*i+1) - IA < N ) + * save corresponding local position + * of this row (LINDXA); + * save local position (LINDXAU) in U + * where this row goes; + * [copy row IPID(2*i) in U at position + * IPID(2*i+1)-IA; ]; + * else + * save corresponding local position of + * this row (LINDXA); + * save local position (-LINDXAU) in A + * where this row goes; + * [copy row IPID(2*i) in my piece of A + * at IPID(2*i+1);] + * end if + * else + * find k1 such that IPID(2*k1) = IPID(2*i+1); + * copy row IPID(2*i) in U at position + * IPID(2*k1+1)-IA; + * save corresponding local position of this + * row (LINDXA); + * save local position (LINDXAU) in U where + * this row goes; + * end if + * end if + * end for + * + * Second, if I am not the current row process ICURROW, all source rows + * in IPID that I own are part of U. Indeed, they are swapped with one + * row of the current block of rows, and the main factorization + * algorithm proceeds one row after each other. The processes different + * from ICURROW, should exchange and accumulate those rows until they + * receive some data previously owned by the process ICURROW. + * + * In processes different from ICURROW, the initial packing algorithm + * proceeds as follows. Consider a row of global index IPID(2*i) that I + * own. When I will be receiving data previously owned by ICURROW, i.e., + * U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, + * and this particular row of U should be first copied into my piece of + * A, at A(il,:), where il is the local row index corresponding to + * IPID(2*i). Now,initially, this row will be packed into workspace, say + * as the kth row of that work array. The following algorithm sets + * LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row + * should be copied. LINDXA(k) stores the local index in A where this + * row of U should be copied, i.e il. + * + * for all entries in IPID, + * if IPID(2*i) is not in ICURROW, + * copy row IPID(2*i) in work array; + * save corresponding local position + * of this row (LINDXA); + * save position (LINDXAU) in U where + * this row should be copied; + * end if + * end for + * + * Since we are at it, we also globally figure out how many rows every + * process has. That is necessary, because it would rather be cumbersome + * to figure it on the fly during the bi-directional exchange phase. + * This information is kept in the array LLEN of size NPROW. Also note + * that the arrays LINDXA and LINDXAU are of max length equal to 2*N. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * LINDXA (local output) int * + * On entry, LINDXA is an array of dimension 2*N. On exit, this + * array contains the local indexes of the rows of A I have that + * should be copied into U. + * + * LINDXAU (local output) int * + * On exit, LINDXAU is an array of dimension 2*N. On exit, this + * array contains the local destination information encoded as + * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be + * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). + * + * LLEN (global output) int * + * On entry, LLEN is an array of length NPROW. On exit, it + * contains how many rows every process has. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, dstrow, fndd, i, ia, icurrow, il, + ip=0, iroff, j, jb, myrow, nb, nprow, + src, srcrow; +/* .. + * .. Executable Statements .. + */ +/* + * Compute the local arrays LINDXA and LINDXAU containing the local + * source and final destination position resulting from the application + * of N interchanges. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + icurrow = PANEL->prow; jb = PANEL->jb; + nb = PANEL->nb; ia = PANEL->ia; + iroff = PANEL->ii; + + for( i = 0; i < nprow; i++ ) LLEN[i] = 0; + + for( i = 0; i < K; i += 2 ) + { + src = IPID[i]; + Mindxg2p( src, nb, nb, srcrow, 0, nprow ); LLEN[ srcrow ]++; + + if( myrow == srcrow ) + { + Mindxg2l( il, src, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; dst = IPID[i+1]; + + if( myrow == icurrow ) + { + Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + if( dstrow == icurrow ) + { + if( dst - ia < jb ) { LINDXAU[ip] = dst - ia; } + else + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXAU[ip] = iroff - il; + } + } + else + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + LINDXAU[ip] = IPID[j-1] - ia; + } + } + else { LINDXAU[ip] = dst - ia; } + + ip++; + } + } +/* + * End of HPL_plindx0 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx1.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx1.c new file mode 100644 index 000000000..a24fd4c56 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx1.c @@ -0,0 +1,275 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx1 +( + HPL_T_panel * PANEL, + const int K, + const int * IPID, + int * IPA, + int * LINDXA, + int * LINDXAU, + int * IPLEN, + int * IPMAP, + int * IPMAPM1, + int * PERMU, + int * IWORK +) +#else +void HPL_plindx1 +( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK ) + HPL_T_panel * PANEL; + const int K; + const int * IPID; + int * IPA; + int * LINDXA; + int * LINDXAU; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; + int * PERMU; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx1 computes two local arrays LINDXA and LINDXAU containing + * the local source and final destination position resulting from the + * application of row interchanges. In addition, this function computes + * three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic + * mapping information for the spreading phase. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) const int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * IPA (global output) int * + * On exit, IPA specifies the number of rows that the current + * process row has that either belong to U or should be swapped + * with remote rows of A. + * + * LINDXA (global output) int * + * On entry, LINDXA is an array of dimension 2*N. On exit, this + * array contains the local indexes of the rows of A I have that + * should be copied into U. + * + * LINDXAU (global output) int * + * On exit, LINDXAU is an array of dimension 2*N. On exit, this + * array contains the local destination information encoded as + * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be + * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). + * + * IPLEN (global output) int * + * On entry, IPLEN is an array of dimension NPROW + 1. On exit, + * this array is such that IPLEN[i] is the number of rows of A + * in the processes before process IPMAP[i] after the sort + * with the convention that IPLEN[nprow] is the total number of + * rows of the panel. In other words IPLEN[i+1]-IPLEN[i] is the + * local number of rows of A that should be moved to the process + * IPMAP[i]. IPLEN is such that the number of rows of the source + * process row can be computed as IPLEN[1] - IPLEN[0], and the + * remaining entries of this array are sorted so that the + * quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROW. On exit, this + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myrow] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROW. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROCS) + * + * PERMU (global output) int * + * On entry, PERMU is an array of dimension JB. On exit, PERMU + * contains a sequence of permutations, that should be applied + * in increasing order to permute in place the row panel U. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension 2*JB. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int * iwork; + int dst, dstrow, fndd, i, ia, icurrow, il, + ip, ipU, iroff, j, jb, myrow, nb, nprow, + src, srcrow; +/* .. + * .. Executable Statements .. + */ +/* + * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1 + */ + HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 ); +/* + * Compute the local arrays LINDXA and LINDXAU containing the local + * source and final destination position resulting from the application + * of N interchanges. Compute LINDXA and LINDXAU in icurrow, and LINDXA + * elsewhere and PERMU in every process. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + jb = PANEL->jb; nb = PANEL->nb; ia = PANEL->ia; + iroff = PANEL->ii; icurrow = PANEL->prow; + + iwork = IWORK + jb; + + if( myrow == icurrow ) + { + for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) + { + src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + + if( srcrow == icurrow ) + { + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + + Mindxg2l( il, src, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; + + if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) + { + PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; + j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; + IPLEN[il]++; ipU++; + } + else if( dstrow != icurrow ) + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + + PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow]; + j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; + IPLEN[il]++; ipU++; + } + else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) ) + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXAU[ip] = iroff - il; + } + ip++; + } + } + *IPA = ip; + } + else + { + for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) + { + src = IPID[i ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); +/* + * LINDXA[i] is the local index of the row of A that belongs into U + */ + if( myrow == dstrow ) + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; ip++; + } +/* + * iwork[i] is the local (current) position index in U + * PERMU[i] is the local (final) destination index in U + */ + if( srcrow == icurrow ) + { + if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) + { + PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; + iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; + } + else if( dstrow != icurrow ) + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow]; + iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; + } + } + } + *IPA = 0; + } +/* + * Simplify iwork and PERMU, return in PERMU the sequence of permutation + * that need to be apply to U after it has been broadcast. + */ + HPL_perm( jb, iwork, PERMU, IWORK ); +/* + * Reset IPLEN to its correct value + */ + for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1]; + IPLEN[0] = 0; +/* + * End of HPL_plindx1 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx10.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx10.c new file mode 100644 index 000000000..fa460fd35 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_plindx10.c @@ -0,0 +1,155 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx10 +( + HPL_T_panel * PANEL, + const int K, + const int * IPID, + int * IPLEN, + int * IPMAP, + int * IPMAPM1 +) +#else +void HPL_plindx10 +( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PANEL; + const int K; + const int * IPID; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx10 computes three arrays IPLEN, IPMAP and IPMAPM1 that + * contain the logarithmic mapping information for the spreading phase. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) const int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * IPLEN (global output) int * + * On entry, IPLEN is an array of dimension NPROW + 1. On exit, + * this array is such that IPLEN[i] is the number of rows of A + * in the processes before process IMAP[i] after the sort, with + * the convention that IPLEN[nprow] is the total number of rows. + * In other words, IPLEN[i+1] - IPLEN[i] is the local number of + * rows of A that should be moved for each process. IPLEN is + * such that the number of rows of the source process row can be + * computed as IPLEN[1] - IPLEN[0], and the remaining entries of + * this array are sorted so that the quantities IPLEN[i+1] - + * IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROW. On exit, this + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myrow] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROW. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROW) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, dstrow, i, ia, icurrow, jb, nb, + nprow, src, srcrow; +/* .. + * .. Executable Statements .. + */ + nprow = PANEL->grid->nprow; jb = PANEL->jb; nb = PANEL->nb; + ia = PANEL->ia; icurrow = PANEL->prow; +/* + * Compute redundantly the local number of rows that each process has + * and that belong to U in IPLEN[1 .. nprow+1] + */ + for( i = 0; i <= nprow; i++ ) IPLEN[i] = 0; + + for( i = 0; i < K; i += 2 ) + { + src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + if( srcrow == icurrow ) + { + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + if( ( dstrow != srcrow ) || ( dst - ia < jb ) ) IPLEN[dstrow+1]++; + } + } +/* + * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1 + * (the inverse of IPMAP) + */ + HPL_logsort( nprow, icurrow, IPLEN, IPMAP, IPMAPM1 ); +/* + * End of HPL_plindx10 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_rollN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_rollN.c new file mode 100644 index 000000000..e68590a01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_rollN.c @@ -0,0 +1,225 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +void HPL_rollN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int N, + double * U, + const int LDU, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_rollN +( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int N; + double * U; + const int LDU; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rollN rolls the local arrays containing the local pieces of U, so + * that on exit to this function U is replicated in every process row. + * In addition, this function probe for the presence of the column panel + * and forwards it when available. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be rolled) information. + * + * N (local input) const int + * On entry, N specifies the number of columns of U. N must be + * at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[NPROW]). + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process row. + * + * IPMAP (global input) const int * + * On entry, IMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Datatype type[2]; + MPI_Status status; + MPI_Request request; + MPI_Comm comm; + int Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS, + ierr=MPI_SUCCESS, il, k, l, lengthR, + lengthS, mydist, myrow, next, npm1, nprow, + partner, prev; +/* .. + * .. Executable Statements .. + */ + if( N <= 0 ) return; + + npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow; + comm = PANEL->grid->col_comm; +/* + * Rolling phase + */ + mydist = IPMAPM1[myrow]; + prev = IPMAP[MModSub1( mydist, nprow )]; + next = IPMAP[MModAdd1( mydist, nprow )]; + + for( k = 0; k < npm1; k++ ) + { + l = (int)( (unsigned int)(k) >> 1 ); + + if( ( ( mydist + k ) & 1 ) != 0 ) + { + il = MModAdd( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModSub( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev; + } + else + { + il = MModSub( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModAdd( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next; + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lengthR, LDU, MPI_DOUBLE, + &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, ibufR, 0, LDU ), 1, type[I_RECV], + partner, Cmsgid, comm, &request ); + } + + if( lengthS > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lengthS, LDU, MPI_DOUBLE, + &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibufS, 0, LDU ), 1, type[I_SEND], + partner, Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_SEND] ); + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_RECV] ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_rollN", "MPI call failed" ); } +/* + * End of HPL_rollN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_rollT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_rollT.c new file mode 100644 index 000000000..0160c9412 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_rollT.c @@ -0,0 +1,259 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +void HPL_rollT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int N, + double * U, + const int LDU, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_rollT +( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int N; + double * U; + const int LDU; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rollT rolls the local arrays containing the local pieces of U, so + * that on exit to this function U is replicated in every process row. + * In addition, this function probe for the presence of the column panel + * and forwards it when available. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be rolled) information. + * + * N (local input) const int + * On entry, N specifies the local number of rows of U. N must + * be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,N). + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process row. + * + * IPMAP (global input) const int * + * On entry, IMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#if 0 + MPI_Datatype type[2]; +#endif + MPI_Status status; + MPI_Request request; + MPI_Comm comm; + int Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS, + ierr=MPI_SUCCESS, il, k, l, lengthR, + lengthS, mydist, myrow, next, npm1, nprow, + partner, prev; +/* .. + * .. Executable Statements .. + */ + if( N <= 0 ) return; + + npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow; + comm = PANEL->grid->col_comm; +/* + * Rolling phase + */ + mydist = IPMAPM1[myrow]; + prev = IPMAP[MModSub1( mydist, nprow )]; + next = IPMAP[MModAdd1( mydist, nprow )]; + + for( k = 0; k < npm1; k++ ) + { + l = (int)( (unsigned int)(k) >> 1 ); + + if( ( ( mydist + k ) & 1 ) != 0 ) + { + il = MModAdd( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModSub( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev; + } + else + { + il = MModSub( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModAdd( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next; + } + + if( lengthR > 0 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lengthR * LDU, MPI_DOUBLE, + &type[I_RECV] ); + else + ierr = MPI_Type_vector( lengthR, N, LDU, MPI_DOUBLE, + &type[I_RECV] ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, 0, ibufR, LDU ), 1, type[I_RECV], + partner, Cmsgid, comm, &request ); +#else +/* + * In our case, LDU is N - Do not use the MPI datatype. + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, 0, ibufR, LDU ), lengthR*LDU, + MPI_DOUBLE, partner, Cmsgid, comm, &request ); +#endif + } + + if( lengthS > 0 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lengthS*LDU, MPI_DOUBLE, + &type[I_SEND] ); + else + ierr = MPI_Type_vector( lengthS, N, LDU, MPI_DOUBLE, + &type[I_SEND] ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibufS, LDU ), 1, type[I_SEND], + partner, Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_SEND] ); +#else +/* + * In our case, LDU is N - Do not use the MPI datatype. + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibufS, LDU ), lengthS*LDU, + MPI_DOUBLE, partner, Cmsgid, comm ); +#endif + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); +#if 0 + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_RECV] ); +#endif + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_rollT", "MPI call failed" ); } +/* + * End of HPL_rollT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_spreadN.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_spreadN.c new file mode 100644 index 000000000..202611e7f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_spreadN.c @@ -0,0 +1,303 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_spreadN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_SIDE SIDE, + const int N, + double * U, + const int LDU, + const int SRCDIST, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_spreadN +( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_SIDE SIDE; + const int N; + double * U; + const int LDU; + const int SRCDIST; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_spreadN spreads the local array containing local pieces of U, so + * that on exit to this function, a piece of U is contained in every + * process row. The array IPLEN contains the number of rows of U, that + * should be spread on any given process row. This function also probes + * for the presence of the column panel PBCST. In case of success, this + * panel will be forwarded. If PBCST is NULL on input, this probing + * mechanism will be disabled. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be spread) information. + * + * SIDE (global input) const enum HPL_SIDE + * On entry, SIDE specifies whether the local piece of U located + * in process IPMAP[SRCDIST] should be spread to the right or to + * the left. This feature is used by the equilibration process. + * + * N (global input) const int + * On entry, N specifies the local number of columns of U. N + * must be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[nprow]). + * + * SRCDIST (local input) const int + * On entry, SRCDIST specifies the source process that spreads + * its piece of U. + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process before process IPMAP[i], with the convention + * that IPLEN[nprow] is the total number of rows. In other words + * IPLEN[i+1] - IPLEN[i] is the local number of rows of U that + * should be moved to process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Datatype type; + MPI_Status status; + MPI_Comm comm; + unsigned int ip2=1, mask=1, mydist, mydist2; + int Cmsgid=MSGID_BEGIN_PFACT, ibuf, + ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow, + npm1, nprow, partner; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + comm = PANEL->grid->col_comm; +/* + * Spread U to the left + */ + if( SIDE == HplLeft ) + { + nprow = ( npm1 = SRCDIST ) + 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) > + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2; + lgth = IPLEN[nprow]; + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + else if( partner < nprow ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il += ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + else + { + npm1 = ( nprow -= SRCDIST ) - 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) < + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist -= SRCDIST ); il = ip2; + lgth = IPLEN[SRCDIST+nprow]; +/* + * Spread U to the right - offset the IPLEN, and IPMAP arrays + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + k = il ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ); + k = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf; + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + else if( partner < nprow ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il += ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_spreadN", "MPI call failed" ); } +/* + * End of HPL_spreadN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_spreadT.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_spreadT.c new file mode 100644 index 000000000..1adf93507 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/HPL_spreadT.c @@ -0,0 +1,372 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_spreadT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_SIDE SIDE, + const int N, + double * U, + const int LDU, + const int SRCDIST, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_spreadT +( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_SIDE SIDE; + const int N; + double * U; + const int LDU; + const int SRCDIST; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_spreadT spreads the local array containing local pieces of U, so + * that on exit to this function, a piece of U is contained in every + * process row. The array IPLEN contains the number of columns of U, + * that should be spread on any given process row. This function also + * probes for the presence of the column panel PBCST. If available, + * this panel will be forwarded. If PBCST is NULL on input, this + * probing mechanism will be disabled. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be spread) information. + * + * SIDE (global input) const enum HPL_SIDE + * On entry, SIDE specifies whether the local piece of U located + * in process IPMAP[SRCDIST] should be spread to the right or to + * the left. This feature is used by the equilibration process. + * + * N (global input) const int + * On entry, N specifies the local number of rows of U. N must + * be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,N). + * + * SRCDIST (local input) const int + * On entry, SRCDIST specifies the source process that spreads + * its piece of U. + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process before process IPMAP[i], with the convention + * that IPLEN[nprow] is the total number of rows. In other words + * IPLEN[i+1] - IPLEN[i] is the local number of rows of U that + * should be moved to process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#if 0 + MPI_Datatype type; +#endif + MPI_Status status; + MPI_Comm comm; + unsigned int ip2=1, mask=1, mydist, mydist2; + int Cmsgid=MSGID_BEGIN_PFACT, ibuf, + ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow, + npm1, nprow, partner; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + comm = PANEL->grid->col_comm; +/* + * Spread U + */ + if( SIDE == HplLeft ) + { + nprow = ( npm1 = SRCDIST ) + 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) > + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2; + lgth = IPLEN[nprow]; + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[npm1-partner], + Cmsgid, comm, &status ); +#endif + } + else if( partner < nprow ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[npm1-partner], + Cmsgid, comm ); +#endif + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il += ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + else + { + npm1 = ( nprow -= SRCDIST ) - 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) < + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist -= SRCDIST ); il = ip2; +/* + * Spread to the right - offset the IPLEN and IPMAP arrays + */ + lgth = IPLEN[SRCDIST+nprow]; +/* + * Spread U + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + k = il ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ); + k = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf; + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[SRCDIST+partner], + Cmsgid, comm, &status ); +#endif + } + else if( partner < nprow ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[SRCDIST+partner], + Cmsgid, comm ); +#endif + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il += ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_spreadT", "MPI call failed" ); } +/* + * End of HPL_spreadT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/intel64/Make.inc new file mode 120000 index 000000000..3ee301793 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/intel64/Make.inc @@ -0,0 +1 @@ +/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/intel64/Makefile new file mode 100644 index 000000000..7898665f0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/src/pgesv/intel64/Makefile @@ -0,0 +1,136 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ + $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_pgeobj = \ + HPL_pipid.o HPL_plindx0.o HPL_pdlaswp00N.o \ + HPL_pdlaswp00T.o HPL_perm.o HPL_logsort.o \ + HPL_plindx10.o HPL_plindx1.o HPL_spreadN.o \ + HPL_spreadT.o HPL_rollN.o HPL_rollT.o \ + HPL_equil.o HPL_pdlaswp01N.o HPL_pdlaswp01T.o \ + HPL_pdupdateNN.o HPL_pdupdateNT.o HPL_pdupdateTN.o \ + HPL_pdupdateTT.o HPL_pdtrsv.o HPL_pdgesv0.o \ + HPL_pdgesvK1.o HPL_pdgesvK2.o HPL_pdgesv.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pgeobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pgeobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pipid.o : ../HPL_pipid.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pipid.c +HPL_plindx0.o : ../HPL_plindx0.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx0.c +HPL_pdlaswp00N.o : ../HPL_pdlaswp00N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00N.c +HPL_pdlaswp00T.o : ../HPL_pdlaswp00T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00T.c +HPL_perm.o : ../HPL_perm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_perm.c +HPL_logsort.o : ../HPL_logsort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_logsort.c +HPL_plindx10.o : ../HPL_plindx10.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx10.c +HPL_plindx1.o : ../HPL_plindx1.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx1.c +HPL_spreadN.o : ../HPL_spreadN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadN.c +HPL_spreadT.o : ../HPL_spreadT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadT.c +HPL_rollN.o : ../HPL_rollN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollN.c +HPL_rollT.o : ../HPL_rollT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollT.c +HPL_equil.o : ../HPL_equil.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_equil.c +HPL_pdlaswp01N.o : ../HPL_pdlaswp01N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01N.c +HPL_pdlaswp01T.o : ../HPL_pdlaswp01T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01T.c +HPL_pdupdateNN.o : ../HPL_pdupdateNN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNN.c +HPL_pdupdateNT.o : ../HPL_pdupdateNT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNT.c +HPL_pdupdateTN.o : ../HPL_pdupdateTN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTN.c +HPL_pdupdateTT.o : ../HPL_pdupdateTT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTT.c +HPL_pdtrsv.o : ../HPL_pdtrsv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtrsv.c +HPL_pdgesv0.o : ../HPL_pdgesv0.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv0.c +HPL_pdgesvK1.o : ../HPL_pdgesvK1.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK1.c +HPL_pdgesvK2.o : ../HPL_pdgesvK2.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK2.c +HPL_pdgesv.o : ../HPL_pdgesv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/Makefile.am new file mode 100644 index 000000000..452ea5f06 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/Makefile.am @@ -0,0 +1,13 @@ + +AM_CPPFLAGS = -I$(top_srcdir)/include + +xhpl_LDADD = ../src/libhpl.a + +bin_PROGRAMS = xhpl + +xhpl_SOURCES = \ +matgen/HPL_jumpit.c matgen/HPL_rand.c matgen/HPL_setran.c matgen/HPL_xjumpm.c \ +matgen/HPL_lmul.c matgen/HPL_ladd.c \ +pmatgen/HPL_pdmatgen.c \ +ptest/HPL_pddriver.c ptest/HPL_pdinfo.c ptest/HPL_pdtest.c \ +ptimer/HPL_ptimer.c ptimer/HPL_ptimer_cputime.c ptimer/HPL_ptimer_walltime.c diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/Makefile.in new file mode 100644 index 000000000..034564545 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/Makefile.in @@ -0,0 +1,698 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +bin_PROGRAMS = xhpl$(EXEEXT) +subdir = testing +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/include/hplconfig.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" +PROGRAMS = $(bin_PROGRAMS) +am__dirstamp = $(am__leading_dot)dirstamp +am_xhpl_OBJECTS = matgen/HPL_jumpit.$(OBJEXT) \ + matgen/HPL_rand.$(OBJEXT) matgen/HPL_setran.$(OBJEXT) \ + matgen/HPL_xjumpm.$(OBJEXT) matgen/HPL_lmul.$(OBJEXT) \ + matgen/HPL_ladd.$(OBJEXT) pmatgen/HPL_pdmatgen.$(OBJEXT) \ + ptest/HPL_pddriver.$(OBJEXT) ptest/HPL_pdinfo.$(OBJEXT) \ + ptest/HPL_pdtest.$(OBJEXT) ptimer/HPL_ptimer.$(OBJEXT) \ + ptimer/HPL_ptimer_cputime.$(OBJEXT) \ + ptimer/HPL_ptimer_walltime.$(OBJEXT) +xhpl_OBJECTS = $(am_xhpl_OBJECTS) +xhpl_DEPENDENCIES = ../src/libhpl.a +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = matgen/$(DEPDIR)/HPL_jumpit.Po \ + matgen/$(DEPDIR)/HPL_ladd.Po matgen/$(DEPDIR)/HPL_lmul.Po \ + matgen/$(DEPDIR)/HPL_rand.Po matgen/$(DEPDIR)/HPL_setran.Po \ + matgen/$(DEPDIR)/HPL_xjumpm.Po \ + pmatgen/$(DEPDIR)/HPL_pdmatgen.Po \ + ptest/$(DEPDIR)/HPL_pddriver.Po ptest/$(DEPDIR)/HPL_pdinfo.Po \ + ptest/$(DEPDIR)/HPL_pdtest.Po ptimer/$(DEPDIR)/HPL_ptimer.Po \ + ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po \ + ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(xhpl_SOURCES) +DIST_SOURCES = $(xhpl_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIBS = @BLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build_alias = @build_alias@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host_alias = @host_alias@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/include +xhpl_LDADD = ../src/libhpl.a +xhpl_SOURCES = \ +matgen/HPL_jumpit.c matgen/HPL_rand.c matgen/HPL_setran.c matgen/HPL_xjumpm.c \ +matgen/HPL_lmul.c matgen/HPL_ladd.c \ +pmatgen/HPL_pdmatgen.c \ +ptest/HPL_pddriver.c ptest/HPL_pdinfo.c ptest/HPL_pdtest.c \ +ptimer/HPL_ptimer.c ptimer/HPL_ptimer_cputime.c ptimer/HPL_ptimer_walltime.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu testing/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu testing/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) +matgen/$(am__dirstamp): + @$(MKDIR_P) matgen + @: > matgen/$(am__dirstamp) +matgen/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) matgen/$(DEPDIR) + @: > matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_jumpit.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_rand.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_setran.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_xjumpm.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_lmul.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_ladd.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +pmatgen/$(am__dirstamp): + @$(MKDIR_P) pmatgen + @: > pmatgen/$(am__dirstamp) +pmatgen/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pmatgen/$(DEPDIR) + @: > pmatgen/$(DEPDIR)/$(am__dirstamp) +pmatgen/HPL_pdmatgen.$(OBJEXT): pmatgen/$(am__dirstamp) \ + pmatgen/$(DEPDIR)/$(am__dirstamp) +ptest/$(am__dirstamp): + @$(MKDIR_P) ptest + @: > ptest/$(am__dirstamp) +ptest/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ptest/$(DEPDIR) + @: > ptest/$(DEPDIR)/$(am__dirstamp) +ptest/HPL_pddriver.$(OBJEXT): ptest/$(am__dirstamp) \ + ptest/$(DEPDIR)/$(am__dirstamp) +ptest/HPL_pdinfo.$(OBJEXT): ptest/$(am__dirstamp) \ + ptest/$(DEPDIR)/$(am__dirstamp) +ptest/HPL_pdtest.$(OBJEXT): ptest/$(am__dirstamp) \ + ptest/$(DEPDIR)/$(am__dirstamp) +ptimer/$(am__dirstamp): + @$(MKDIR_P) ptimer + @: > ptimer/$(am__dirstamp) +ptimer/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ptimer/$(DEPDIR) + @: > ptimer/$(DEPDIR)/$(am__dirstamp) +ptimer/HPL_ptimer.$(OBJEXT): ptimer/$(am__dirstamp) \ + ptimer/$(DEPDIR)/$(am__dirstamp) +ptimer/HPL_ptimer_cputime.$(OBJEXT): ptimer/$(am__dirstamp) \ + ptimer/$(DEPDIR)/$(am__dirstamp) +ptimer/HPL_ptimer_walltime.$(OBJEXT): ptimer/$(am__dirstamp) \ + ptimer/$(DEPDIR)/$(am__dirstamp) + +xhpl$(EXEEXT): $(xhpl_OBJECTS) $(xhpl_DEPENDENCIES) $(EXTRA_xhpl_DEPENDENCIES) + @rm -f xhpl$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(xhpl_OBJECTS) $(xhpl_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f matgen/*.$(OBJEXT) + -rm -f pmatgen/*.$(OBJEXT) + -rm -f ptest/*.$(OBJEXT) + -rm -f ptimer/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_jumpit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_ladd.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_lmul.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_rand.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_setran.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_xjumpm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pmatgen/$(DEPDIR)/HPL_pdmatgen.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pddriver.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pdinfo.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pdtest.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) +installdirs: + for dir in "$(DESTDIR)$(bindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f matgen/$(DEPDIR)/$(am__dirstamp) + -rm -f matgen/$(am__dirstamp) + -rm -f pmatgen/$(DEPDIR)/$(am__dirstamp) + -rm -f pmatgen/$(am__dirstamp) + -rm -f ptest/$(DEPDIR)/$(am__dirstamp) + -rm -f ptest/$(am__dirstamp) + -rm -f ptimer/$(DEPDIR)/$(am__dirstamp) + -rm -f ptimer/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic mostlyclean-am + +distclean: distclean-am + -rm -f matgen/$(DEPDIR)/HPL_jumpit.Po + -rm -f matgen/$(DEPDIR)/HPL_ladd.Po + -rm -f matgen/$(DEPDIR)/HPL_lmul.Po + -rm -f matgen/$(DEPDIR)/HPL_rand.Po + -rm -f matgen/$(DEPDIR)/HPL_setran.Po + -rm -f matgen/$(DEPDIR)/HPL_xjumpm.Po + -rm -f pmatgen/$(DEPDIR)/HPL_pdmatgen.Po + -rm -f ptest/$(DEPDIR)/HPL_pddriver.Po + -rm -f ptest/$(DEPDIR)/HPL_pdinfo.Po + -rm -f ptest/$(DEPDIR)/HPL_pdtest.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-binPROGRAMS + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f matgen/$(DEPDIR)/HPL_jumpit.Po + -rm -f matgen/$(DEPDIR)/HPL_ladd.Po + -rm -f matgen/$(DEPDIR)/HPL_lmul.Po + -rm -f matgen/$(DEPDIR)/HPL_rand.Po + -rm -f matgen/$(DEPDIR)/HPL_setran.Po + -rm -f matgen/$(DEPDIR)/HPL_xjumpm.Po + -rm -f pmatgen/$(DEPDIR)/HPL_pdmatgen.Po + -rm -f ptest/$(DEPDIR)/HPL_pddriver.Po + -rm -f ptest/$(DEPDIR)/HPL_pdinfo.Po + -rm -f ptest/$(DEPDIR)/HPL_pdtest.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-binPROGRAMS clean-generic cscopelist-am ctags ctags-am \ + distclean distclean-compile distclean-generic distclean-tags \ + distdir dvi dvi-am html html-am info info-am install \ + install-am install-binPROGRAMS install-data install-data-am \ + install-dvi install-dvi-am install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-binPROGRAMS + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_dmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_dmatgen.c new file mode 100644 index 000000000..c14ef0fd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_dmatgen.c @@ -0,0 +1,134 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dmatgen +( + const int M, + const int N, + double * A, + const int LDA, + const int ISEED +) +#else +void HPL_dmatgen +( M, N, A, LDA, ISEED ) + const int M; + const int N; + double * A; + const int LDA; + const int ISEED; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dmatgen generates (or regenerates) a random matrix A. + * + * The pseudo-random generator uses the linear congruential algorithm: + * X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer + * Programming, Knuth 1973, Vol. 2. + * + * Arguments + * ========= + * + * M (input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * A (output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * this array contains the coefficients of the randomly + * generated matrix. + * + * LDA (input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * ISEED (input) const int + * On entry, ISEED specifies the seed number to generate the + * matrix A. ISEED must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int iadd[2], ia1[2], ic1[2], iran1[2], + jseed[2], mult[2]; + int i, incA = LDA - M, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; +/* + * Initialize the random sequence + */ + mult [0] = HPL_MULT0; mult [1] = HPL_MULT1; + iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1; + jseed[0] = ISEED; jseed[1] = 0; + + HPL_xjumpm( 1, mult, iadd, jseed, iran1, ia1, ic1 ); + HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 ); +/* + * Generate an M by N matrix + */ + for( j = 0; j < N; A += incA, j++ ) + for( i = 0; i < M; A++, i++ ) *A = HPL_rand(); +/* + * End of HPL_dmatgen + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_jumpit.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_jumpit.c new file mode 100644 index 000000000..4d4dc4db5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_jumpit.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_jumpit +( + int * MULT, + int * IADD, + int * IRANN, + int * IRANM +) +#else +void HPL_jumpit +( MULT, IADD, IRANN, IRANM ) + int * MULT; + int * IADD; + int * IRANN; + int * IRANM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_jumpit jumps in the random sequence from the number X(n) encoded + * in IRANN to the number X(m) encoded in IRANM using the constants A + * and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A + * and C obviously depend on m and n, see the function HPL_xjumpm in + * order to initialize them. + * + * Arguments + * ========= + * + * MULT (local input) int * + * On entry, MULT is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant A. + * + * IADD (local input) int * + * On entry, IADD is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant C. + * + * IRANN (local input) int * + * On entry, IRANN is an array of dimension 2, that contains + * the 16-lower and 15-higher bits of the encoding of X(n). + * + * IRANM (local output) int * + * On entry, IRANM is an array of dimension 2. On exit, this + * array contains respectively the 16-lower and 15-higher bits + * of the encoding of X(m). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + HPL_lmul( IRANN, MULT, j ); /* j = IRANN * MULT; */ + HPL_ladd( j, IADD, IRANM ); /* IRANM = j + IADD; */ + HPL_setran( 0, IRANM ); /* irand = IRANM */ +/* + * End of HPL_jumpit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_ladd.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_ladd.c new file mode 100644 index 000000000..0d4e4c08c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_ladd.c @@ -0,0 +1,126 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_ladd +( + int * J, + int * K, + int * I +) +#else +void HPL_ladd +( J, K, I ) + int * J; + int * K; + int * I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ladd adds without carry two long positive integers K and J and + * puts the result into I. The long integers I, J, K are encoded on 64 + * bits using an array of 2 integers. The 32-lower bits are stored in + * the first entry of each array, the 32-higher bits in the second + * entry. + * + * Arguments + * ========= + * + * J (local input) int * + * On entry, J is an integer array of dimension 2 containing the + * encoded long integer J. + * + * K (local input) int * + * On entry, K is an integer array of dimension 2 containing the + * encoded long integer K. + * + * I (local output) int * + * On entry, I is an integer array of dimension 2. On exit, this + * array contains the encoded long integer result. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + unsigned int itmp0, itmp1; + unsigned int ktmp0 = K[0] & 65535, ktmp1 = (unsigned)K[0] >> 16; + unsigned int ktmp2 = K[1] & 65535, ktmp3 = (unsigned)K[1] >> 16; + unsigned int jtmp0 = J[0] & 65535, jtmp1 = (unsigned)J[0] >> 16; + unsigned int jtmp2 = J[1] & 65535, jtmp3 = (unsigned)J[1] >> 16; + +/* .. + * .. Executable Statements .. + */ +/* + * K[1] K[0] K I[0] = (K[0]+J[0]) % 2^32 + * XXXX XXXX carry = (K[0]+J[0]) / 2^32 + * + * + J[1] J[0] J I[1] = K[1] + J[1] + carry + * XXXX XXXX I[1] = I[1] % 2^32 + * ------------- + * I[1] I[0] + * 0XXX XXXX I + */ + itmp0 = ktmp0 + jtmp0; + itmp1 = itmp0 >> 16; I[0] = itmp0 - (itmp1 << 16 ); + itmp1 += ktmp1 + jtmp1; I[0] |= (itmp1 & 65535) << 16; + itmp0 = (itmp1 >> 16) + ktmp2 + jtmp2; + I[1] = itmp0 - ((itmp0 >> 16 ) << 16); + itmp1 = (itmp0 >> 16) + ktmp3 + jtmp3; + I[1] |= (itmp1 & 65535) << 16; +/* + * End of HPL_ladd + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_lmul.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_lmul.c new file mode 100644 index 000000000..254b192f6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_lmul.c @@ -0,0 +1,131 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_lmul +( + int * K, + int * J, + int * I +) +#else +void HPL_lmul +( K, J, I ) + int * K; + int * J; + int * I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_lmul multiplies without carry two long positive integers K and J + * and puts the result into I. The long integers I, J, K are encoded on + * 64 bits using an array of 2 integers. The 32-lower bits are stored in + * the first entry of each array, the 32-higher bits in the second entry + * of each array. For efficiency purposes, the intrisic modulo function + * is inlined. + * + * Arguments + * ========= + * + * K (local input) int * + * On entry, K is an integer array of dimension 2 containing the + * encoded long integer K. + * + * J (local input) int * + * On entry, J is an integer array of dimension 2 containing the + * encoded long integer J. + * + * I (local output) int * + * On entry, I is an integer array of dimension 2. On exit, this + * array contains the encoded long integer result. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int r, c; + unsigned int kk[4], jj[4], res[5]; +/* .. + * .. Executable Statements .. + */ +/* + * Addition is done with 16 bits at a time. Multiplying two 16-bit + * integers yields a 32-bit result. The lower 16-bits of the result + * are kept in I, and the higher 16-bits are carried over to the + * next multiplication. + */ + for (c = 0; c < 2; ++c) { + kk[2*c] = K[c] & 65535; + kk[2*c+1] = ((unsigned)K[c] >> 16) & 65535; + jj[2*c] = J[c] & 65535; + jj[2*c+1] = ((unsigned)J[c] >> 16) & 65535; + } + + res[0] = 0; + for (c = 0; c < 4; ++c) { + res[c+1] = (res[c] >> 16) & 65535; + res[c] &= 65535; + for (r = 0; r < c+1; ++r) { + res[c] = kk[r] * jj[c-r] + (res[c] & 65535); + res[c+1] += (res[c] >> 16) & 65535; + } + } + + for (c = 0; c < 2; ++c) + I[c] = (int)(((res[2*c+1] & 65535) << 16) | (res[2*c] & 65535)); +/* + * End of HPL_lmul + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_rand.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_rand.c new file mode 100644 index 000000000..fe4e12f5e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_rand.c @@ -0,0 +1,94 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_rand( void ) +#else +double HPL_rand() +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rand generates the next number in the random sequence. This + * function ensures that this number lies in the interval (-0.5, 0.5]. + * + * The static array irand contains the information (2 integers) required + * to generate the next number in the sequence X(n). This number is + * computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the + * constant d is the largest 64 bit positive unsigned integer. The array + * irand is then updated for the generation of the next number X(n+1) + * in the random sequence as follows X(n+1) = a * X(n) + c. The + * constants a and c should have been preliminarily stored in the arrays + * ias and ics as 2 pairs of integers. The initialization of ias, ics + * and irand is performed by the function HPL_setran. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + HPL_setran( 3, j ); +/* + * return number between -0.5 and 0.5 + */ + return( HPL_HALF - + (((j[0] & 65535) + ((unsigned)j[0] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF + + (j[1] & 65535) + ((unsigned)j[1] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF ); +/* + * End of HPL_rand + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_setran.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_setran.c new file mode 100644 index 000000000..1a3ca73aa --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_setran.c @@ -0,0 +1,115 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int ias[2], ics[2], irand[2]; + +#ifdef STDC_HEADERS +void HPL_setran +( + const int OPTION, + int * IRAN +) +#else +void HPL_setran +( OPTION, IRAN ) + const int OPTION; + int * IRAN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_setran initializes the random generator with the encoding of the + * first number X(0) in the sequence, and the constants a and c used to + * compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), + * a and c are stored in the static variables irand, ias and ics. When + * OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the + * values of the input array IRAN. When OPTION is 3, IRAN is set to the + * current value of irand, and irand is then incremented. + * + * Arguments + * ========= + * + * OPTION (local input) const int + * On entry, OPTION is an integer that specifies the operations + * to be performed on the random generator as specified above. + * + * IRAN (local input/output) int * + * On entry, IRAN is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of a random number. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + if( OPTION == 3 ) + { /* return current value */ + IRAN[0] = irand[0]; IRAN[1] = irand[1]; + HPL_lmul( irand, ias, j ); /* j = irand * ias; */ + HPL_ladd( j, ics, irand ); /* irand = j + ics; */ + } + else if( OPTION == 0 ) { irand[0] = IRAN[0]; irand[1] = IRAN[1]; } + else if( OPTION == 1 ) { ias [0] = IRAN[0]; ias [1] = IRAN[1]; } + else if( OPTION == 2 ) { ics [0] = IRAN[0]; ics [1] = IRAN[1]; } +/* + * End of HPL_setran + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_xjumpm.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_xjumpm.c new file mode 100644 index 000000000..ae70bbc16 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/HPL_xjumpm.c @@ -0,0 +1,158 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_xjumpm +( + const int JUMPM, + int * MULT, + int * IADD, + int * IRANN, + int * IRANM, + int * IAM, + int * ICM +) +#else +void HPL_xjumpm +( JUMPM, MULT, IADD, IRANN, IRANM, IAM, ICM ) + const int JUMPM; + int * MULT; + int * IADD; + int * IRANN; + int * IRANM; + int * IAM; + int * ICM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_xjumpm computes the constants A and C to jump JUMPM numbers in + * the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in + * MULT and IADD specify how to jump from one entry in the sequence to + * the next. + * + * Arguments + * ========= + * + * JUMPM (local input) const int + * On entry, JUMPM specifies the number of entries in the + * sequence to jump over. When JUMPM is less or equal than zero, + * A and C are not computed, IRANM is set to IRANN corresponding + * to a jump of size zero. + * + * MULT (local input) int * + * On entry, MULT is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant a to jump from + * X(n) to X(n+1) = a*X(n) + c in the random sequence. + * + * IADD (local input) int * + * On entry, IADD is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant c to jump from + * X(n) to X(n+1) = a*X(n) + c in the random sequence. + * + * IRANN (local input) int * + * On entry, IRANN is an array of dimension 2. that contains the + * 16-lower and 15-higher bits of the encoding of X(n). + * + * IRANM (local output) int * + * On entry, IRANM is an array of dimension 2. On exit, this + * array contains respectively the 16-lower and 15-higher bits + * of the encoding of X(n+JUMPM). + * + * IAM (local output) int * + * On entry, IAM is an array of dimension 2. On exit, when JUMPM + * is greater than zero, this array contains the encoded + * constant A to jump from X(n) to X(n+JUMPM) in the random + * sequence. IAM(0:1) contains respectively the 16-lower and + * 15-higher bits of this constant A. When JUMPM is less or + * equal than zero, this array is not referenced. + * + * ICM (local output) int * + * On entry, ICM is an array of dimension 2. On exit, when JUMPM + * is greater than zero, this array contains the encoded + * constant C to jump from X(n) to X(n+JUMPM) in the random + * sequence. ICM(0:1) contains respectively the 16-lower and + * 15-higher bits of this constant C. When JUMPM is less or + * equal than zero, this array is not referenced. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2], k; +/* .. + * .. Executable Statements .. + */ + if( JUMPM > 0 ) + { + IAM[0] = MULT[0]; IAM[1] = MULT[1]; /* IAM = MULT; */ + ICM[0] = IADD[0]; ICM[1] = IADD[1]; /* ICM = IADD; */ + for( k = 1; k <= JUMPM-1; k++ ) + { + HPL_lmul( IAM, MULT, j ); /* j = IAM * MULT; */ + IAM[0] = j[0]; IAM[1] = j[1]; /* IAM = j; */ + HPL_lmul( ICM, MULT, j ); /* j = ICM * MULT; */ + HPL_ladd( IADD, j, ICM ); /* ICM = IADD + j; */ + } + HPL_lmul( IRANN, IAM, j ); /* j = IRANN * IAM; */ + HPL_ladd( j, ICM, IRANM ); /* IRANM = j + ICM; */ + } + else + { /* IRANM = IRANN */ + IRANM[0] = IRANN[0]; IRANM[1] = IRANN[1]; + } +/* + * End of HPL_xjumpm + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/intel64/Make.inc new file mode 120000 index 000000000..3ee301793 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/intel64/Make.inc @@ -0,0 +1 @@ +/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/intel64/Makefile new file mode 100644 index 000000000..f027fbc06 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/matgen/intel64/Makefile @@ -0,0 +1,95 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_matgen.h +# +## Object files ######################################################## +# +HPL_matobj = \ + HPL_dmatgen.o HPL_ladd.o HPL_lmul.o \ + HPL_xjumpm.o HPL_jumpit.o HPL_rand.o \ + HPL_setran.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_matobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_matobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dmatgen.o : ../HPL_dmatgen.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dmatgen.c +HPL_ladd.o : ../HPL_ladd.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ladd.c +HPL_lmul.o : ../HPL_lmul.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_lmul.c +HPL_xjumpm.o : ../HPL_xjumpm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_xjumpm.c +HPL_jumpit.o : ../HPL_jumpit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_jumpit.c +HPL_rand.o : ../HPL_rand.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rand.c +HPL_setran.o : ../HPL_setran.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_setran.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c new file mode 100644 index 000000000..2d129c863 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c @@ -0,0 +1,198 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdmatgen +( + const HPL_T_grid * GRID, + const int M, + const int N, + const int NB, + double * A, + const int LDA, + const int ISEED +) +#else +void HPL_pdmatgen +( GRID, M, N, NB, A, LDA, ISEED ) + const HPL_T_grid * GRID; + const int M; + const int N; + const int NB; + double * A; + const int LDA; + const int ISEED; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdmatgen generates (or regenerates) a parallel random matrix A. + * + * The pseudo-random generator uses the linear congruential algorithm: + * X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer + * Programming, Knuth 1973, Vol. 2. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * M (global input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,LocQ(N)). + * On exit, this array contains the coefficients of the randomly + * generated matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * ISEED (global input) const int + * On entry, ISEED specifies the seed number to generate the + * matrix A. ISEED must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int iadd [2], ia1 [2], ia2 [2], ia3 [2], + ia4 [2], ia5 [2], ib1 [2], ib2 [2], + ib3 [2], ic1 [2], ic2 [2], ic3 [2], + ic4 [2], ic5 [2], iran1[2], iran2[2], + iran3[2], iran4[2], itmp1[2], itmp2[2], + itmp3[2], jseed[2], mult [2]; + int ib, iblk, ik, jb, jblk, jk, jump1, jump2, + jump3, jump4, jump5, jump6, jump7, lmb, + lnb, mblks, mp, mycol, myrow, nblks, + npcol, nprow, nq; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + + mult [0] = HPL_MULT0; mult [1] = HPL_MULT1; + iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1; + jseed[0] = ISEED; jseed[1] = 0; +/* + * Generate an M by N matrix starting in process (0,0) + */ + Mnumroc( mp, M, NB, NB, myrow, 0, nprow ); + Mnumroc( nq, N, NB, NB, mycol, 0, npcol ); + + if( ( mp <= 0 ) || ( nq <= 0 ) ) return; +/* + * Local number of blocks and size of the last one + */ + mblks = ( mp + NB - 1 ) / NB; lmb = mp - ( ( mp - 1 ) / NB ) * NB; + nblks = ( nq + NB - 1 ) / NB; lnb = nq - ( ( nq - 1 ) / NB ) * NB; +/* + * Compute multiplier/adder for various jumps in random sequence + */ + jump1 = 1; jump2 = nprow * NB; jump3 = M; jump4 = npcol * NB; + jump5 = NB; jump6 = mycol; jump7 = myrow * NB; + + HPL_xjumpm( jump1, mult, iadd, jseed, iran1, ia1, ic1 ); + HPL_xjumpm( jump2, mult, iadd, iran1, itmp1, ia2, ic2 ); + HPL_xjumpm( jump3, mult, iadd, iran1, itmp1, ia3, ic3 ); + HPL_xjumpm( jump4, ia3, ic3, iran1, itmp1, ia4, ic4 ); + HPL_xjumpm( jump5, ia3, ic3, iran1, itmp1, ia5, ic5 ); + HPL_xjumpm( jump6, ia5, ic5, iran1, itmp3, itmp1, itmp2 ); + HPL_xjumpm( jump7, mult, iadd, itmp3, iran1, itmp1, itmp2 ); + HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 ); +/* + * Save value of first number in sequence + */ + ib1[0] = iran1[0]; ib1[1] = iran1[1]; + ib2[0] = iran1[0]; ib2[1] = iran1[1]; + ib3[0] = iran1[0]; ib3[1] = iran1[1]; + + for( jblk = 0; jblk < nblks; jblk++ ) + { + jb = ( jblk == nblks - 1 ? lnb : NB ); + for( jk = 0; jk < jb; jk++ ) + { + for( iblk = 0; iblk < mblks; iblk++ ) + { + ib = ( iblk == mblks - 1 ? lmb : NB ); + for( ik = 0; ik < ib; A++, ik++ ) *A = HPL_rand(); + HPL_jumpit( ia2, ic2, ib1, iran2 ); + ib1[0] = iran2[0]; ib1[1] = iran2[1]; + } + A += LDA - mp; + HPL_jumpit( ia3, ic3, ib2, iran3 ); + ib1[0] = iran3[0]; ib1[1] = iran3[1]; + ib2[0] = iran3[0]; ib2[1] = iran3[1]; + } + HPL_jumpit( ia4, ic4, ib3, iran4 ); + ib1[0] = iran4[0]; ib1[1] = iran4[1]; + ib2[0] = iran4[0]; ib2[1] = iran4[1]; + ib3[0] = iran4[0]; ib3[1] = iran4[1]; + } +/* + * End of HPL_pdmatgen + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/intel64/Make.inc new file mode 120000 index 000000000..3ee301793 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/intel64/Make.inc @@ -0,0 +1 @@ +/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/intel64/Makefile new file mode 100644 index 000000000..bf33fcd7b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/pmatgen/intel64/Makefile @@ -0,0 +1,81 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_matgen.h $(INCdir)/hpl_pmisc.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pmatgen.h +# +## Object files ######################################################## +# +HPL_pmaobj = \ + HPL_pdmatgen.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pmaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pmaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pdmatgen.o : ../HPL_pdmatgen.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmatgen.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL.dat b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL.dat new file mode 100644 index 000000000..47aee883e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL.dat @@ -0,0 +1,31 @@ +HPLinpack benchmark input file +Innovative Computing Laboratory, University of Tennessee +HPL.out output file name (if any) +6 device out (6=stdout,7=stderr,file) +4 # of problems sizes (N) +29 30 34 35 Ns +4 # of NBs +1 2 3 4 NBs +0 PMAP process mapping (0=Row-,1=Column-major) +3 # of process grids (P x Q) +2 1 4 Ps +2 4 1 Qs +16.0 threshold +3 # of panel fact +0 1 2 PFACTs (0=left, 1=Crout, 2=Right) +2 # of recursive stopping criterium +2 4 NBMINs (>= 1) +1 # of panels in recursion +2 NDIVs +3 # of recursive panel fact. +0 1 2 RFACTs (0=left, 1=Crout, 2=Right) +1 # of broadcast +0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) +1 # of lookahead depth +0 DEPTHs (>=0) +2 SWAP (0=bin-exch,1=long,2=mix) +64 swapping threshold +0 L1 in (0=transposed,1=no-transposed) form +0 U in (0=transposed,1=no-transposed) form +1 Equilibration (0=no,1=yes) +8 memory alignment in double (> 0) diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pddriver.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pddriver.c new file mode 100644 index 000000000..5e4050f48 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pddriver.c @@ -0,0 +1,293 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int main +( + int ARGC, + char * * ARGV +) +#else +int main( ARGC, ARGV ) +/* + * .. Scalar Arguments .. + */ + int ARGC; +/* + * .. Array Arguments .. + */ + char * * ARGV; +#endif +{ +/* + * Purpose + * ======= + * + * main is the main driver program for testing the HPL routines. + * This program is driven by a short data file named "HPL.dat". + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int nval [HPL_MAX_PARAM], + nbval [HPL_MAX_PARAM], + pval [HPL_MAX_PARAM], + qval [HPL_MAX_PARAM], + nbmval[HPL_MAX_PARAM], + ndvval[HPL_MAX_PARAM], + ndhval[HPL_MAX_PARAM]; + + HPL_T_FACT pfaval[HPL_MAX_PARAM], + rfaval[HPL_MAX_PARAM]; + + HPL_T_TOP topval[HPL_MAX_PARAM]; + + HPL_T_grid grid; + HPL_T_palg algo; + HPL_T_test test; + int L1notran, Unotran, align, equil, in, inb, + inbm, indh, indv, ipfa, ipq, irfa, itop, + mycol, myrow, ns, nbs, nbms, ndhs, ndvs, + npcol, npfs, npqs, nprow, nrfs, ntps, + rank, size, tswap; + HPL_T_ORDER pmapping; + HPL_T_FACT rpfa; + HPL_T_SWAP fswap; +/* .. + * .. Executable Statements .. + */ + MPI_Init( &ARGC, &ARGV ); +#ifdef HPL_CALL_VSIPL + vsip_init((void*)0); +#endif + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + MPI_Comm_size( MPI_COMM_WORLD, &size ); +/* + * Read and check validity of test parameters from input file + * + * HPL Version 1.0, Linpack benchmark input file + * Your message here + * HPL.out output file name (if any) + * 6 device out (6=stdout,7=stderr,file) + * 4 # of problems sizes (N) + * 29 30 34 35 Ns + * 4 # of NBs + * 1 2 3 4 NBs + * 0 PMAP process mapping (0=Row-,1=Column-major) + * 3 # of process grids (P x Q) + * 2 1 4 Ps + * 2 4 1 Qs + * 16.0 threshold + * 3 # of panel fact + * 0 1 2 PFACTs (0=left, 1=Crout, 2=Right) + * 2 # of recursive stopping criterium + * 2 4 NBMINs (>= 1) + * 1 # of panels in recursion + * 2 NDIVs + * 3 # of recursive panel fact. + * 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) + * 1 # of broadcast + * 0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + * 1 # of lookahead depth + * 0 DEPTHs (>=0) + * 2 SWAP (0=bin-exch,1=long,2=mix) + * 4 swapping threshold + * 0 L1 in (0=transposed,1=no-transposed) form + * 0 U in (0=transposed,1=no-transposed) form + * 1 Equilibration (0=no,1=yes) + * 8 memory alignment in double (> 0) + */ + HPL_pdinfo( &test, &ns, nval, &nbs, nbval, &pmapping, &npqs, pval, qval, + &npfs, pfaval, &nbms, nbmval, &ndvs, ndvval, &nrfs, rfaval, + &ntps, topval, &ndhs, ndhval, &fswap, &tswap, &L1notran, + &Unotran, &equil, &align ); +/* + * Loop over different process grids - Define process grid. Go to bottom + * of process grid loop if this case does not use my process. + */ + for( ipq = 0; ipq < npqs; ipq++ ) + { + (void) HPL_grid_init( MPI_COMM_WORLD, pmapping, pval[ipq], qval[ipq], + &grid ); + (void) HPL_grid_info( &grid, &nprow, &npcol, &myrow, &mycol ); + + if( ( myrow < 0 ) || ( myrow >= nprow ) || + ( mycol < 0 ) || ( mycol >= npcol ) ) goto label_end_of_npqs; + + for( in = 0; in < ns; in++ ) + { /* Loop over various problem sizes */ + for( inb = 0; inb < nbs; inb++ ) + { /* Loop over various blocking factors */ + for( indh = 0; indh < ndhs; indh++ ) + { /* Loop over various lookahead depths */ + for( itop = 0; itop < ntps; itop++ ) + { /* Loop over various broadcast topologies */ + for( irfa = 0; irfa < nrfs; irfa++ ) + { /* Loop over various recursive factorizations */ + for( ipfa = 0; ipfa < npfs; ipfa++ ) + { /* Loop over various panel factorizations */ + for( inbm = 0; inbm < nbms; inbm++ ) + { /* Loop over various recursive stopping criteria */ + for( indv = 0; indv < ndvs; indv++ ) + { /* Loop over various # of panels in recursion */ +/* + * Set up the algorithm parameters + */ + algo.btopo = topval[itop]; algo.depth = ndhval[indh]; + algo.nbmin = nbmval[inbm]; algo.nbdiv = ndvval[indv]; + + algo.pfact = rpfa = pfaval[ipfa]; + + if( L1notran != 0 ) + { + if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllN; + else if( rpfa == HPL_CROUT ) algo.pffun = HPL_pdpancrN; + else algo.pffun = HPL_pdpanrlN; + + algo.rfact = rpfa = rfaval[irfa]; + if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllN; + else if( rpfa == HPL_CROUT ) algo.rffun = HPL_pdrpancrN; + else algo.rffun = HPL_pdrpanrlN; + + if( Unotran != 0 ) algo.upfun = HPL_pdupdateNN; + else algo.upfun = HPL_pdupdateNT; + } + else + { + if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllT; + else if( rpfa == HPL_CROUT ) algo.pffun = HPL_pdpancrT; + else algo.pffun = HPL_pdpanrlT; + + algo.rfact = rpfa = rfaval[irfa]; + if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllT; + else if( rpfa == HPL_CROUT ) algo.rffun = HPL_pdrpancrT; + else algo.rffun = HPL_pdrpanrlT; + + if( Unotran != 0 ) algo.upfun = HPL_pdupdateTN; + else algo.upfun = HPL_pdupdateTT; + } + + algo.fswap = fswap; algo.fsthr = tswap; + algo.equil = equil; algo.align = align; + + HPL_pdtest( &test, &grid, &algo, nval[in], nbval[inb] ); + + } + } + } + } + } + } + } + } + (void) HPL_grid_exit( &grid ); +label_end_of_npqs: ; + } +/* + * Print ending messages, close output file, exit. + */ + if( rank == 0 ) + { + test.ktest = test.kpass + test.kfail + test.kskip; +#ifndef HPL_DETAILED_TIMING + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); +#else + if( test.thrsh > HPL_rzero ) + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); +#endif + + HPL_fprintf( test.outfp, "\n%s %6d %s\n", "Finished", test.ktest, + "tests with the following results:" ); + if( test.thrsh > HPL_rzero ) + { + HPL_fprintf( test.outfp, " %6d %s\n", test.kpass, + "tests completed and passed residual checks," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kfail, + "tests completed and failed residual checks," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kskip, + "tests skipped because of illegal input values." ); + } + else + { + HPL_fprintf( test.outfp, " %6d %s\n", test.kpass, + "tests completed without checking," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kskip, + "tests skipped because of illegal input values." ); + } + + HPL_fprintf( test.outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( test.outfp, "\nEnd of Tests.\n" ); + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); + + if( ( test.outfp != stdout ) && ( test.outfp != stderr ) ) + (void) fclose( test.outfp ); + } +#ifdef HPL_CALL_VSIPL + vsip_finalize((void*)0); +#endif + MPI_Finalize(); + exit( 0 ); + + return( 0 ); +/* + * End of main + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pdinfo.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pdinfo.c new file mode 100644 index 000000000..4ede45be6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pdinfo.c @@ -0,0 +1,1182 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdinfo +( + HPL_T_test * TEST, + int * NS, + int * N, + int * NBS, + int * NB, + HPL_T_ORDER * PMAPPIN, + int * NPQS, + int * P, + int * Q, + int * NPFS, + HPL_T_FACT * PF, + int * NBMS, + int * NBM, + int * NDVS, + int * NDV, + int * NRFS, + HPL_T_FACT * RF, + int * NTPS, + HPL_T_TOP * TP, + int * NDHS, + int * DH, + HPL_T_SWAP * FSWAP, + int * TSWAP, + int * L1NOTRAN, + int * UNOTRAN, + int * EQUIL, + int * ALIGN +) +#else +void HPL_pdinfo +( TEST, NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN ) + HPL_T_test * TEST; + int * NS; + int * N; + int * NBS; + int * NB; + HPL_T_ORDER * PMAPPIN; + int * NPQS; + int * P; + int * Q; + int * NPFS; + HPL_T_FACT * PF; + int * NBMS; + int * NBM; + int * NDVS; + int * NDV; + int * NRFS; + HPL_T_FACT * RF; + int * NTPS; + HPL_T_TOP * TP; + int * NDHS; + int * DH; + HPL_T_SWAP * FSWAP; + int * TSWAP; + int * L1NOTRAN; + int * UNOTRAN; + int * EQUIL; + int * ALIGN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdinfo reads the startup information for the various tests and + * transmits it to all processes. + * + * Arguments + * ========= + * + * TEST (global output) HPL_T_test * + * On entry, TEST points to a testing data structure. On exit, + * the fields of this data structure are initialized as follows: + * TEST->outfp specifies the output file where the results will + * be printed. It is only defined and used by the process 0 of + * the grid. TEST->thrsh specifies the threshhold value for the + * test ratio. TEST->epsil is the relative machine precision of + * the distributed computer. Finally the test counters, kfail, + * kpass, kskip, ktest are initialized to zero. + * + * NS (global output) int * + * On exit, NS specifies the number of different problem sizes + * to be tested. NS is less than or equal to HPL_MAX_PARAM. + * + * N (global output) int * + * On entry, N is an array of dimension HPL_MAX_PARAM. On exit, + * the first NS entries of this array contain the problem sizes + * to run the code with. + * + * NBS (global output) int * + * On exit, NBS specifies the number of different distribution + * blocking factors to be tested. NBS must be less than or equal + * to HPL_MAX_PARAM. + * + * NB (global output) int * + * On exit, PMAPPIN specifies the process mapping onto the no- + * des of the MPI machine configuration. PMAPPIN defaults to + * row-major ordering. + * + * PMAPPIN (global output) HPL_T_ORDER * + * On entry, NB is an array of dimension HPL_MAX_PARAM. On exit, + * the first NBS entries of this array contain the values of the + * various distribution blocking factors, to run the code with. + * + * NPQS (global output) int * + * On exit, NPQS specifies the number of different values that + * can be used for P and Q, i.e., the number of process grids to + * run the code with. NPQS must be less than or equal to + * HPL_MAX_PARAM. + * + * P (global output) int * + * On entry, P is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPQS entries of this array contain the values of P, + * the number of process rows of the NPQS grids to run the code + * with. + * + * Q (global output) int * + * On entry, Q is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPQS entries of this array contain the values of Q, + * the number of process columns of the NPQS grids to run the + * code with. + * + * NPFS (global output) int * + * On exit, NPFS specifies the number of different values that + * can be used for PF : the panel factorization algorithm to run + * the code with. NPFS is less than or equal to HPL_MAX_PARAM. + * + * PF (global output) HPL_T_FACT * + * On entry, PF is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPFS entries of this array contain the various + * panel factorization algorithms to run the code with. + * + * NBMS (global output) int * + * On exit, NBMS specifies the number of various recursive + * stopping criteria to be tested. NBMS must be less than or + * equal to HPL_MAX_PARAM. + * + * NBM (global output) int * + * On entry, NBM is an array of dimension HPL_MAX_PARAM. On + * exit, the first NBMS entries of this array contain the values + * of the various recursive stopping criteria to be tested. + * + * NDVS (global output) int * + * On exit, NDVS specifies the number of various numbers of + * panels in recursion to be tested. NDVS is less than or equal + * to HPL_MAX_PARAM. + * + * NDV (global output) int * + * On entry, NDV is an array of dimension HPL_MAX_PARAM. On + * exit, the first NDVS entries of this array contain the values + * of the various numbers of panels in recursion to be tested. + * + * NRFS (global output) int * + * On exit, NRFS specifies the number of different values that + * can be used for RF : the recursive factorization algorithm to + * be tested. NRFS is less than or equal to HPL_MAX_PARAM. + * + * RF (global output) HPL_T_FACT * + * On entry, RF is an array of dimension HPL_MAX_PARAM. On exit, + * the first NRFS entries of this array contain the various + * recursive factorization algorithms to run the code with. + * + * NTPS (global output) int * + * On exit, NTPS specifies the number of different values that + * can be used for the broadcast topologies to be tested. NTPS + * is less than or equal to HPL_MAX_PARAM. + * + * TP (global output) HPL_T_TOP * + * On entry, TP is an array of dimension HPL_MAX_PARAM. On exit, + * the first NTPS entries of this array contain the various + * broadcast (along rows) topologies to run the code with. + * + * NDHS (global output) int * + * On exit, NDHS specifies the number of different values that + * can be used for the lookahead depths to be tested. NDHS is + * less than or equal to HPL_MAX_PARAM. + * + * DH (global output) int * + * On entry, DH is an array of dimension HPL_MAX_PARAM. On + * exit, the first NDHS entries of this array contain the values + * of lookahead depths to run the code with. Such a value is at + * least 0 (no-lookahead) or greater than zero. + * + * FSWAP (global output) HPL_T_SWAP * + * On exit, FSWAP specifies the swapping algorithm to be used in + * all tests. + * + * TSWAP (global output) int * + * On exit, TSWAP specifies the swapping threshold as a number + * of columns when the mixed swapping algorithm was chosen. + * + * L1NOTRA (global output) int * + * On exit, L1NOTRAN specifies whether the upper triangle of the + * panels of columns should be stored in no-transposed form + * (L1NOTRAN=1) or in transposed form (L1NOTRAN=0). + * + * UNOTRAN (global output) int * + * On exit, UNOTRAN specifies whether the panels of rows should + * be stored in no-transposed form (UNOTRAN=1) or transposed + * form (UNOTRAN=0) during their broadcast. + * + * EQUIL (global output) int * + * On exit, EQUIL specifies whether equilibration during the + * swap-broadcast of the panel of rows should be performed + * (EQUIL=1) or not (EQUIL=0). + * + * ALIGN (global output) int * + * On exit, ALIGN specifies the alignment of the dynamically + * allocated buffers in double precision words. ALIGN is greater + * than zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + char file[HPL_LINE_MAX], line[HPL_LINE_MAX], + auth[HPL_LINE_MAX], num [HPL_LINE_MAX]; + FILE * infp; + int * iwork = NULL; + char * lineptr; + int error=0, fid, i, j, lwork, maxp, nprocs, + rank, size; +/* .. + * .. Executable Statements .. + */ + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + MPI_Comm_size( MPI_COMM_WORLD, &size ); +/* + * Initialize the TEST data structure with default values + */ + TEST->outfp = stderr; TEST->epsil = 2.0e-16; TEST->thrsh = 16.0; + TEST->kfail = TEST->kpass = TEST->kskip = TEST->ktest = 0; +/* + * Process 0 reads the input data, broadcasts to other processes and + * writes needed information to TEST->outfp. + */ + if( rank == 0 ) + { +/* + * Open file and skip data file header + */ + if( ( infp = fopen( "HPL.dat", "r" ) ) == NULL ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "cannot open file HPL.dat" ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) fgets( auth, HPL_LINE_MAX - 2, infp ); +/* + * Read name and unit number for summary output file + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", file ); + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); + fid = atoi( num ); + if ( fid == 6 ) TEST->outfp = stdout; + else if( fid == 7 ) TEST->outfp = stderr; + else if( ( TEST->outfp = fopen( file, "w" ) ) == NULL ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "cannot open file %s.", + file ); + error = 1; goto label_error; + } +/* + * Read and check the parameter values for the tests. + * + * Problem size (>=0) (N) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NS = atoi( num ); + if( ( *NS < 1 ) || ( *NS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %d", + "Number of values of N is less than 1 or greater than", + HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( N[ i ] = atoi( num ) ) < 0 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of N less than 0" ); + error = 1; goto label_error; + } + } +/* + * Block size (>=1) (NB) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NBS = atoi( num ); + if( ( *NBS < 1 ) || ( *NBS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NB is less than 1 or", + "greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NBS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NB[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NB less than 1" ); + error = 1; goto label_error; + } + } +/* + * Process grids, mapping, (>=1) (P, Q) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); + *PMAPPIN = ( atoi( num ) == 1 ? HPL_COLUMN_MAJOR : HPL_ROW_MAJOR ); + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NPQS = atoi( num ); + if( ( *NPQS < 1 ) || ( *NPQS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of grids is less", + "than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPQS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( P[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of P less than 1" ); + error = 1; goto label_error; + } + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPQS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( Q[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of Q less than 1" ); + error = 1; goto label_error; + } + } +/* + * Check for enough processes in machine configuration + */ + maxp = 0; + for( i = 0; i < *NPQS; i++ ) + { nprocs = P[i] * Q[i]; maxp = Mmax( maxp, nprocs ); } + if( maxp > size ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Need at least %d processes for these tests", maxp ); + error = 1; goto label_error; + } +/* + * Checking threshold value (TEST->thrsh) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); TEST->thrsh = atof( num ); +/* + * Panel factorization algorithm (PF) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NPFS = atoi( num ); + if( ( *NPFS < 1 ) || ( *NPFS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "number of values of PFACT", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPFS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) PF[ i ] = HPL_LEFT_LOOKING; + else if( j == 1 ) PF[ i ] = HPL_CROUT; + else if( j == 2 ) PF[ i ] = HPL_RIGHT_LOOKING; + else PF[ i ] = HPL_RIGHT_LOOKING; + } +/* + * Recursive stopping criterium (>=1) (NBM) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NBMS = atoi( num ); + if( ( *NBMS < 1 ) || ( *NBMS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NBMIN", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NBMS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NBM[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NBMIN less than 1" ); + error = 1; goto label_error; + } + } +/* + * Number of panels in recursion (>=2) (NDV) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NDVS = atoi( num ); + if( ( *NDVS < 1 ) || ( *NDVS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NDIV", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NDVS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NDV[ i ] = atoi( num ) ) < 2 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NDIV less than 2" ); + error = 1; goto label_error; + } + } +/* + * Recursive panel factorization (RF) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NRFS = atoi( num ); + if( ( *NRFS < 1 ) || ( *NRFS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of RFACT", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NRFS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) RF[ i ] = HPL_LEFT_LOOKING; + else if( j == 1 ) RF[ i ] = HPL_CROUT; + else if( j == 2 ) RF[ i ] = HPL_RIGHT_LOOKING; + else RF[ i ] = HPL_RIGHT_LOOKING; + } +/* + * Broadcast topology (TP) (0=rg, 1=2rg, 2=rgM, 3=2rgM, 4=L) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NTPS = atoi( num ); + if( ( *NTPS < 1 ) || ( *NTPS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of BCAST", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NTPS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) TP[ i ] = HPL_1RING; + else if( j == 1 ) TP[ i ] = HPL_1RING_M; + else if( j == 2 ) TP[ i ] = HPL_2RING; + else if( j == 3 ) TP[ i ] = HPL_2RING_M; + else if( j == 4 ) TP[ i ] = HPL_BLONG; + else if( j == 5 ) TP[ i ] = HPL_BLONG_M; + else TP[ i ] = HPL_1RING_M; + } +/* + * Lookahead depth (>=0) (NDH) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NDHS = atoi( num ); + if( ( *NDHS < 1 ) || ( *NDHS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of DEPTH", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NDHS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); + lineptr += strlen( num ) + 1; + if( ( DH[ i ] = atoi( num ) ) < 0 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of DEPTH less than 0" ); + error = 1; goto label_error; + } + } +/* + * Swapping algorithm (0,1 or 2) (FSWAP) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); j = atoi( num ); + if( j == 0 ) *FSWAP = HPL_SWAP00; + else if( j == 1 ) *FSWAP = HPL_SWAP01; + else if( j == 2 ) *FSWAP = HPL_SW_MIX; + else *FSWAP = HPL_SWAP01; +/* + * Swapping threshold (>=0) (TSWAP) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *TSWAP = atoi( num ); + if( *TSWAP <= 0 ) *TSWAP = 0; +/* + * L1 in (no-)transposed form (0 or 1) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *L1NOTRAN = atoi( num ); + if( ( *L1NOTRAN != 0 ) && ( *L1NOTRAN != 1 ) ) *L1NOTRAN = 0; +/* + * U in (no-)transposed form (0 or 1) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *UNOTRAN = atoi( num ); + if( ( *UNOTRAN != 0 ) && ( *UNOTRAN != 1 ) ) *UNOTRAN = 0; +/* + * Equilibration (0=no, 1=yes) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *EQUIL = atoi( num ); + if( ( *EQUIL != 0 ) && ( *EQUIL != 1 ) ) *EQUIL = 1; +/* + * Memory alignment in bytes (> 0) (ALIGN) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *ALIGN = atoi( num ); + if( *ALIGN <= 0 ) *ALIGN = 4; +/* + * Close input file + */ +label_error: + if (infp != NULL) + (void) fclose( infp ); + } + else { TEST->outfp = NULL; } +/* + * Check for error on reading input file + */ + (void) HPL_all_reduce( (void *)(&error), 1, HPL_INT, HPL_max, + MPI_COMM_WORLD ); + if( error ) + { + if( rank == 0 ) + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Illegal input in file HPL.dat. Exiting ..." ); + MPI_Finalize(); +#ifdef HPL_CALL_VSIPL + (void) vsip_finalize( NULL ); +#endif + exit( 1 ); + } +/* + * Compute and broadcast machine epsilon + */ + TEST->epsil = HPL_pdlamch( MPI_COMM_WORLD, HPL_MACH_EPS ); +/* + * Pack information arrays and broadcast + */ + (void) HPL_broadcast( (void *)(&(TEST->thrsh)), 1, HPL_DOUBLE, 0, + MPI_COMM_WORLD ); +/* + * Broadcast array sizes + */ + iwork = (int *)malloc( (size_t)(15) * sizeof( int ) ); + if( rank == 0 ) + { + iwork[ 0] = *NS; iwork[ 1] = *NBS; + iwork[ 2] = ( *PMAPPIN == HPL_ROW_MAJOR ? 0 : 1 ); + iwork[ 3] = *NPQS; iwork[ 4] = *NPFS; iwork[ 5] = *NBMS; + iwork[ 6] = *NDVS; iwork[ 7] = *NRFS; iwork[ 8] = *NTPS; + iwork[ 9] = *NDHS; iwork[10] = *TSWAP; iwork[11] = *L1NOTRAN; + iwork[12] = *UNOTRAN; iwork[13] = *EQUIL; iwork[14] = *ALIGN; + } + (void) HPL_broadcast( (void *)iwork, 15, HPL_INT, 0, MPI_COMM_WORLD ); + if( rank != 0 ) + { + *NS = iwork[ 0]; *NBS = iwork[ 1]; + *PMAPPIN = ( iwork[ 2] == 0 ? HPL_ROW_MAJOR : HPL_COLUMN_MAJOR ); + *NPQS = iwork[ 3]; *NPFS = iwork[ 4]; *NBMS = iwork[ 5]; + *NDVS = iwork[ 6]; *NRFS = iwork[ 7]; *NTPS = iwork[ 8]; + *NDHS = iwork[ 9]; *TSWAP = iwork[10]; *L1NOTRAN = iwork[11]; + *UNOTRAN = iwork[12]; *EQUIL = iwork[13]; *ALIGN = iwork[14]; + } + if( iwork ) free( iwork ); +/* + * Pack information arrays and broadcast + */ + lwork = (*NS) + (*NBS) + 2 * (*NPQS) + (*NPFS) + (*NBMS) + + (*NDVS) + (*NRFS) + (*NTPS) + (*NDHS) + 1; + + if (lwork < 0) + exit(EXIT_FAILURE); + + + iwork = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); + if( rank == 0 ) + { + j = 0; + for( i = 0; i < *NS; i++ ) { iwork[j] = N [i]; j++; } + for( i = 0; i < *NBS; i++ ) { iwork[j] = NB[i]; j++; } + for( i = 0; i < *NPQS; i++ ) { iwork[j] = P [i]; j++; } + for( i = 0; i < *NPQS; i++ ) { iwork[j] = Q [i]; j++; } + for( i = 0; i < *NPFS; i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; + else if( PF[i] == HPL_CROUT ) iwork[j] = 1; + else if( PF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; + j++; + } + for( i = 0; i < *NBMS; i++ ) { iwork[j] = NBM[i]; j++; } + for( i = 0; i < *NDVS; i++ ) { iwork[j] = NDV[i]; j++; } + for( i = 0; i < *NRFS; i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; + else if( RF[i] == HPL_CROUT ) iwork[j] = 1; + else if( RF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; + j++; + } + for( i = 0; i < *NTPS; i++ ) + { + if( TP[i] == HPL_1RING ) iwork[j] = 0; + else if( TP[i] == HPL_1RING_M ) iwork[j] = 1; + else if( TP[i] == HPL_2RING ) iwork[j] = 2; + else if( TP[i] == HPL_2RING_M ) iwork[j] = 3; + else if( TP[i] == HPL_BLONG ) iwork[j] = 4; + else if( TP[i] == HPL_BLONG_M ) iwork[j] = 5; + j++; + } + for( i = 0; i < *NDHS; i++ ) { iwork[j] = DH[i]; j++; } + + if( *FSWAP == HPL_SWAP00 ) iwork[j] = 0; + else if( *FSWAP == HPL_SWAP01 ) iwork[j] = 1; + else if( *FSWAP == HPL_SW_MIX ) iwork[j] = 2; + j++; + } + (void) HPL_broadcast( (void*)iwork, lwork, HPL_INT, 0, + MPI_COMM_WORLD ); + if ((rank != 0) && (iwork != NULL)) + { + j = 0; + for( i = 0; i < *NS; i++ ) { N [i] = iwork[j]; j++; } + for( i = 0; i < *NBS; i++ ) { NB[i] = iwork[j]; j++; } + for( i = 0; i < *NPQS; i++ ) { P [i] = iwork[j]; j++; } + for( i = 0; i < *NPQS; i++ ) { Q [i] = iwork[j]; j++; } + + for( i = 0; i < *NPFS; i++ ) + { + if( iwork[j] == 0 ) PF[i] = HPL_LEFT_LOOKING; + else if( iwork[j] == 1 ) PF[i] = HPL_CROUT; + else if( iwork[j] == 2 ) PF[i] = HPL_RIGHT_LOOKING; + j++; + } + for( i = 0; i < *NBMS; i++ ) { NBM[i] = iwork[j]; j++; } + for( i = 0; i < *NDVS; i++ ) { NDV[i] = iwork[j]; j++; } + for( i = 0; i < *NRFS; i++ ) + { + if( iwork[j] == 0 ) RF[i] = HPL_LEFT_LOOKING; + else if( iwork[j] == 1 ) RF[i] = HPL_CROUT; + else if( iwork[j] == 2 ) RF[i] = HPL_RIGHT_LOOKING; + j++; + } + for( i = 0; i < *NTPS; i++ ) + { + if( iwork[j] == 0 ) TP[i] = HPL_1RING; + else if( iwork[j] == 1 ) TP[i] = HPL_1RING_M; + else if( iwork[j] == 2 ) TP[i] = HPL_2RING; + else if( iwork[j] == 3 ) TP[i] = HPL_2RING_M; + else if( iwork[j] == 4 ) TP[i] = HPL_BLONG; + else if( iwork[j] == 5 ) TP[i] = HPL_BLONG_M; + j++; + } + for( i = 0; i < *NDHS; i++ ) { DH[i] = iwork[j]; j++; } + + if( iwork[j] == 0 ) *FSWAP = HPL_SWAP00; + else if( iwork[j] == 1 ) *FSWAP = HPL_SWAP01; + else if( iwork[j] == 2 ) *FSWAP = HPL_SW_MIX; + j++; + + if( iwork ) free( iwork ); + } +/* + * regurgitate input + */ + if( rank == 0 ) + { + + if (TEST->outfp != NULL){ + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "HPLinpack 2.3 -- High-Performance Linpack benchmark -- ", + " December 2, 2018" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Written by A. Petitet and R. Clint Whaley, ", + "Innovative Computing Laboratory, UTK" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Modified by Piotr Luszczek, ", + "Innovative Computing Laboratory, UTK" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Modified by Julien Langou, ", + "University of Colorado Denver"); + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + + HPL_fprintf( TEST->outfp, "\n%s\n", + "An explanation of the input/output parameters follows:" ); + HPL_fprintf( TEST->outfp, "%s\n", + "T/V : Wall time / encoded variant." ); + HPL_fprintf( TEST->outfp, "%s\n", + "N : The order of the coefficient matrix A." ); + HPL_fprintf( TEST->outfp, "%s\n", + "NB : The partitioning blocking factor." ); + HPL_fprintf( TEST->outfp, "%s\n", + "P : The number of process rows." ); + HPL_fprintf( TEST->outfp, "%s\n", + "Q : The number of process columns." ); + HPL_fprintf( TEST->outfp, "%s\n", + "Time : Time in seconds to solve the linear system." ); + HPL_fprintf( TEST->outfp, "%s\n\n", + "Gflops : Rate of execution for solving the linear system." ); + HPL_fprintf( TEST->outfp, "%s\n", + "The following parameter values will be used:" ); +/* + * Problem size + */ + HPL_fprintf( TEST->outfp, "\nN :" ); + for( i = 0; i < Mmin( 8, *NS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + if( *NS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + if( *NS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + } + } +/* + * Distribution blocking factor + */ + HPL_fprintf( TEST->outfp, "\nNB :" ); + for( i = 0; i < Mmin( 8, *NBS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + if( *NBS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NBS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + if( *NBS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NBS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + } + } +/* + * Process mapping + */ + HPL_fprintf( TEST->outfp, "\nPMAP :" ); + if( *PMAPPIN == HPL_ROW_MAJOR ) + HPL_fprintf( TEST->outfp, " Row-major process mapping" ); + else if( *PMAPPIN == HPL_COLUMN_MAJOR ) + HPL_fprintf( TEST->outfp, " Column-major process mapping" ); +/* + * Process grid + */ + HPL_fprintf( TEST->outfp, "\nP :" ); + for( i = 0; i < Mmin( 8, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + if( *NPQS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + if( *NPQS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPQS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + } + } + HPL_fprintf( TEST->outfp, "\nQ :" ); + for( i = 0; i < Mmin( 8, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + if( *NPQS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + if( *NPQS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPQS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + } + } +/* + * Panel Factorization + */ + HPL_fprintf( TEST->outfp, "\nPFACT :" ); + for( i = 0; i < Mmin( 8, *NPFS ); i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NPFS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPFS ); i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NPFS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPFS; i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + } + } +/* + * Recursive stopping criterium + */ + HPL_fprintf( TEST->outfp, "\nNBMIN :" ); + for( i = 0; i < Mmin( 8, *NBMS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + if( *NBMS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NBMS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + if( *NBMS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NBMS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + } + } +/* + * Number of panels in recursion + */ + HPL_fprintf( TEST->outfp, "\nNDIV :" ); + for( i = 0; i < Mmin( 8, *NDVS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + if( *NDVS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NDVS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + if( *NDVS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NDVS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + } + } +/* + * Recursive Factorization + */ + HPL_fprintf( TEST->outfp, "\nRFACT :" ); + for( i = 0; i < Mmin( 8, *NRFS ); i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NRFS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NRFS ); i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NRFS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NRFS; i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + } + } +/* + * Broadcast topology + */ + HPL_fprintf( TEST->outfp, "\nBCAST :" ); + for( i = 0; i < Mmin( 8, *NTPS ); i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + if( *NTPS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NTPS ); i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + if( *NTPS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NTPS; i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + } + } +/* + * Lookahead depths + */ + HPL_fprintf( TEST->outfp, "\nDEPTH :" ); + for( i = 0; i < Mmin( 8, *NDHS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + if( *NDHS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NDHS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + if( *NDHS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NDHS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + } + } +/* + * Swapping algorithm + */ + HPL_fprintf( TEST->outfp, "\nSWAP :" ); + if( *FSWAP == HPL_SWAP00 ) + HPL_fprintf( TEST->outfp, " Binary-exchange" ); + else if( *FSWAP == HPL_SWAP01 ) + HPL_fprintf( TEST->outfp, " Spread-roll (long)" ); + else if( *FSWAP == HPL_SW_MIX ) + HPL_fprintf( TEST->outfp, " Mix (threshold = %d)", *TSWAP ); +/* + * L1 storage form + */ + HPL_fprintf( TEST->outfp, "\nL1 :" ); + if( *L1NOTRAN != 0 ) + HPL_fprintf( TEST->outfp, " no-transposed form" ); + else + HPL_fprintf( TEST->outfp, " transposed form" ); +/* + * U storage form + */ + HPL_fprintf( TEST->outfp, "\nU :" ); + if( *UNOTRAN != 0 ) + HPL_fprintf( TEST->outfp, " no-transposed form" ); + else + HPL_fprintf( TEST->outfp, " transposed form" ); +/* + * Equilibration + */ + HPL_fprintf( TEST->outfp, "\nEQUIL :" ); + if( *EQUIL != 0 ) + HPL_fprintf( TEST->outfp, " yes" ); + else + HPL_fprintf( TEST->outfp, " no" ); +/* + * Alignment + */ + HPL_fprintf( TEST->outfp, "\nALIGN : %d double precision words", + *ALIGN ); + + HPL_fprintf( TEST->outfp, "\n\n" ); +/* + * For testing only + */ + if( TEST->thrsh > HPL_rzero ) + { + HPL_fprintf( TEST->outfp, "%s%s\n\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( TEST->outfp, "%s\n", + "- The matrix A is randomly generated for each test." ); + HPL_fprintf( TEST->outfp, "%s\n", + "- The following scaled residual check will be computed:" ); + HPL_fprintf( TEST->outfp, "%s\n", + " ||Ax-b||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )" ); + HPL_fprintf( TEST->outfp, "%s %21.6e\n", + "- The relative machine precision (eps) is taken to be ", + TEST->epsil ); + HPL_fprintf( TEST->outfp, "%s %11.1f\n\n", + "- Computational tests pass if scaled residuals are less than ", + TEST->thrsh ); + } + } + } +/* + * End of HPL_pdinfo + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pdtest.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pdtest.c new file mode 100644 index 000000000..73a62a7ff --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/HPL_pdtest.c @@ -0,0 +1,438 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdtest +( + HPL_T_test * TEST, + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int N, + const int NB +) +#else +void HPL_pdtest +( TEST, GRID, ALGO, N, NB ) + HPL_T_test * TEST; + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int N; + const int NB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdtest performs one test given a set of parameters such as the + * process grid, the problem size, the distribution blocking factor ... + * This function generates the data, calls and times the linear system + * solver, checks the accuracy of the obtained vector solution and + * writes this information to the file pointed to by TEST->outfp. + * + * Arguments + * ========= + * + * TEST (global input) HPL_T_test * + * On entry, TEST points to a testing data structure: outfp + * specifies the output file where the results will be printed. + * It is only defined and used by the process 0 of the grid. + * thrsh specifies the threshhold value for the test ratio. + * Concretely, a test is declared "PASSED" if and only if the + * following inequality is satisfied: + * ||Ax-b||_oo / ( epsil * + * ( || x ||_oo * || A ||_oo + || b ||_oo ) * + * N ) < thrsh. + * epsil is the relative machine precision of the distributed + * computer. Finally the test counters, kfail, kpass, kskip and + * ktest are updated as follows: if the test passes, kpass is + * incremented by one; if the test fails, kfail is incremented + * by one; if the test is skipped, kskip is incremented by one. + * ktest is left unchanged. + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters to be used for this test. + * + * N (global input) const int + * On entry, N specifies the order of the coefficient matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_DETAILED_TIMING + double HPL_w[HPL_TIMING_N]; +#endif + HPL_T_pmat mat; + double wtime[1]; + int info[3]; + double Anorm1, AnormI, Gflops, Xnorm1, XnormI, + BnormI, resid0, resid1; + double * Bptr; + void * vptr = NULL; + static int first=1; + int ii, ip2, mycol, myrow, npcol, nprow, nq; + char ctop, cpfact, crfact; + time_t current_time_start, current_time_end; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + + mat.n = N; mat.nb = NB; mat.info = 0; + mat.mp = HPL_numroc( N, NB, NB, myrow, 0, nprow ); + nq = HPL_numroc( N, NB, NB, mycol, 0, npcol ); + mat.nq = nq + 1; +/* + * Allocate matrix, right-hand-side, and vector solution x. [ A | b ] is + * N by N+1. One column is added in every process column for the solve. + * The result however is stored in a 1 x N vector replicated in every + * process row. In every process, A is lda * (nq+1), x is 1 * nq and the + * workspace is mp. + * + * Ensure that lda is a multiple of ALIGN and not a power of 2 + */ + mat.ld = ( ( Mmax( 1, mat.mp ) - 1 ) / ALGO->align ) * ALGO->align; + do + { + ii = ( mat.ld += ALGO->align ); ip2 = 1; + while( ii > 1 ) { ii >>= 1; ip2 <<= 1; } + } + while( mat.ld == ip2 ); +/* + * Allocate dynamic memory + */ + vptr = (void*)malloc( ( (size_t)(ALGO->align) + + (size_t)(mat.ld+1) * (size_t)(mat.nq) ) * + sizeof(double) ); + info[0] = (vptr == NULL); info[1] = myrow; info[2] = mycol; + (void) HPL_all_reduce( (void *)(info), 3, HPL_INT, HPL_max, + GRID->all_comm ); + if( info[0] != 0 ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", + "[%d,%d] %s", info[1], info[2], + "Memory allocation failed for A, x and b. Skip." ); + (TEST->kskip)++; + /* some processes might have succeeded with allocation */ + if (vptr) free(vptr); + return; + } +/* + * generate matrix and right-hand-side, [ A | b ] which is N by N+1. + */ + mat.A = (double *)HPL_PTR( vptr, + ((size_t)(ALGO->align) * sizeof(double) ) ); + mat.X = Mptr( mat.A, 0, mat.nq, mat.ld ); + HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED ); +#ifdef HPL_CALL_VSIPL + mat.block = vsip_blockbind_d( (vsip_scalar_d *)(mat.A), + (vsip_length)(mat.ld * mat.nq), + VSIP_MEM_NONE ); +#endif +/* + * Solve linear system + */ + HPL_ptimer_boot(); (void) HPL_barrier( GRID->all_comm ); + time( ¤t_time_start ); + HPL_ptimer( 0 ); + HPL_pdgesv( GRID, ALGO, &mat ); + HPL_ptimer( 0 ); + time( ¤t_time_end ); +#ifdef HPL_CALL_VSIPL + (void) vsip_blockrelease_d( mat.block, VSIP_TRUE ); + vsip_blockdestroy_d( mat.block ); +#endif +/* + * Gather max of all CPU and WALL clock timings and print timing results + */ + HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME, + 1, 0, wtime ); + + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + if( first ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "T/V N NB P Q", + " Time Gflops" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + if( TEST->thrsh <= HPL_rzero ) first = 0; + } +/* + * 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve. + * Print WALL time + */ + Gflops = ( ( (double)(N) / 1.0e+9 ) * + ( (double)(N) / wtime[0] ) ) * + ( ( 2.0 / 3.0 ) * (double)(N) + ( 3.0 / 2.0 ) ); + + cpfact = ( ( (HPL_T_FACT)(ALGO->pfact) == + (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ? (char)('L') : + ( ( (HPL_T_FACT)(ALGO->pfact) == (HPL_T_FACT)(HPL_CROUT) ) ? + (char)('C') : (char)('R') ) ); + crfact = ( ( (HPL_T_FACT)(ALGO->rfact) == + (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ? (char)('L') : + ( ( (HPL_T_FACT)(ALGO->rfact) == (HPL_T_FACT)(HPL_CROUT) ) ? + (char)('C') : (char)('R') ) ); + + if( ALGO->btopo == HPL_1RING ) ctop = '0'; + else if( ALGO->btopo == HPL_1RING_M ) ctop = '1'; + else if( ALGO->btopo == HPL_2RING ) ctop = '2'; + else if( ALGO->btopo == HPL_2RING_M ) ctop = '3'; + else if( ALGO->btopo == HPL_BLONG ) ctop = '4'; + else /* if( ALGO->btopo == HPL_BLONG_M ) */ ctop = '5'; + + if( wtime[0] > HPL_rzero ) { + HPL_fprintf( TEST->outfp, + "W%c%1d%c%c%1d%c%1d%12d %5d %5d %5d %18.2f %19.4e\n", + ( GRID->order == HPL_ROW_MAJOR ? 'R' : 'C' ), + ALGO->depth, ctop, crfact, ALGO->nbdiv, cpfact, ALGO->nbmin, + N, NB, nprow, npcol, wtime[0], Gflops ); + HPL_fprintf( TEST->outfp, + "HPL_pdgesv() start time %s\n", ctime( ¤t_time_start ) ); + HPL_fprintf( TEST->outfp, + "HPL_pdgesv() end time %s\n", ctime( ¤t_time_end ) ); + } + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME, + HPL_TIMING_N, HPL_TIMING_BEG, HPL_w ); + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "--VVV--VVV--VVV--VVV--VVV--VVV--VVV--V", + "VV--VVV--VVV--VVV--VVV--VVV--VVV--VVV-" ); +/* + * Recursive panel factorization + */ + if( HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time rfact . . . : %18.2f\n", + HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] ); +/* + * Panel factorization + */ + if( HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time pfact . . : %18.2f\n", + HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] ); +/* + * Panel factorization (swap) + */ + if( HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time mxswp . . : %18.2f\n", + HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] ); +/* + * Update + */ + if( HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time update . . : %18.2f\n", + HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] ); +/* + * Update (swap) + */ + if( HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time laswp . . : %18.2f\n", + HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] ); +/* + * Upper triangular system solve + */ + if( HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time up tr sv . : %18.2f\n", + HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] ); + + if( TEST->thrsh <= HPL_rzero ) + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + } +#endif +/* + * Quick return, if I am not interested in checking the computations + */ + if( TEST->thrsh <= HPL_rzero ) + { (TEST->kpass)++; if( vptr ) free( vptr ); return; } +/* + * Check info returned by solve + */ + if( mat.info != 0 ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", "%s %d, %s", + "Error code returned by solve is", mat.info, "skip" ); + (TEST->kskip)++; + if( vptr ) free( vptr ); return; + } +/* + * Check computation, re-generate [ A | b ], compute norm 1 and inf of A and x, + * and norm inf of b - A x. Display residual checks. + */ + HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED ); + Anorm1 = HPL_pdlange( GRID, HPL_NORM_1, N, N, NB, mat.A, mat.ld ); + AnormI = HPL_pdlange( GRID, HPL_NORM_I, N, N, NB, mat.A, mat.ld ); +/* + * Because x is distributed in process rows, switch the norms + */ + XnormI = HPL_pdlange( GRID, HPL_NORM_1, 1, N, NB, mat.X, 1 ); + Xnorm1 = HPL_pdlange( GRID, HPL_NORM_I, 1, N, NB, mat.X, 1 ); +/* + * If I am in the col that owns b, (1) compute local BnormI, (2) all_reduce to + * find the max (in the col). Then (3) broadcast along the rows so that every + * process has BnormI. Note that since we use a uniform distribution in [-0.5,0.5] + * for the entries of B, it is very likely that BnormI (<=,~) 0.5. + */ + Bptr = Mptr( mat.A, 0, nq, mat.ld ); + if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ){ + if( mat.mp > 0 ) + { + BnormI = Bptr[HPL_idamax( mat.mp, Bptr, 1 )]; BnormI = Mabs( BnormI ); + } + else + { + BnormI = HPL_rzero; + } + (void) HPL_all_reduce( (void *)(&BnormI), 1, HPL_DOUBLE, HPL_max, + GRID->col_comm ); + } + (void) HPL_broadcast( (void *)(&BnormI), 1, HPL_DOUBLE, + HPL_indxg2p( N, NB, NB, 0, npcol ), + GRID->row_comm ); +/* + * If I own b, compute ( b - A x ) and ( - A x ) otherwise + */ + if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ) + { + HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone, + mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1 ); + } + else if( nq > 0 ) + { + HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone, + mat.A, mat.ld, mat.X, 1, HPL_rzero, Bptr, 1 ); + } + else { for( ii = 0; ii < mat.mp; ii++ ) Bptr[ii] = HPL_rzero; } +/* + * Reduce the distributed residual in process column 0 + */ + if( mat.mp > 0 ) + (void) HPL_reduce( Bptr, mat.mp, HPL_DOUBLE, HPL_sum, 0, + GRID->row_comm ); +/* + * Compute || b - A x ||_oo + */ + resid0 = HPL_pdlange( GRID, HPL_NORM_I, N, 1, NB, Bptr, mat.ld ); +/* + * Computes and displays norms, residuals ... + */ + if( N <= 0 ) + { + resid1 = HPL_rzero; + } + else + { + resid1 = resid0 / ( TEST->epsil * ( AnormI * XnormI + BnormI ) * (double)(N) ); + } + + if( resid1 < TEST->thrsh ) (TEST->kpass)++; + else (TEST->kfail)++; + + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( TEST->outfp, "%s%16.8e%s%s\n", + "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ", resid1, + " ...... ", ( resid1 < TEST->thrsh ? "PASSED" : "FAILED" ) ); + + if(resid1 >= TEST->thrsh ) + { + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||Ax-b||_oo . . . . . . . . . . . . . . . . . = ", resid0 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||A||_oo . . . . . . . . . . . . . . . . . . . = ", AnormI ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||A||_1 . . . . . . . . . . . . . . . . . . . = ", Anorm1 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||x||_oo . . . . . . . . . . . . . . . . . . . = ", XnormI ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||x||_1 . . . . . . . . . . . . . . . . . . . = ", Xnorm1 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||b||_oo . . . . . . . . . . . . . . . . . . . = ", BnormI ); + } + } + if( vptr ) free( vptr ); +/* + * End of HPL_pdtest + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/intel64/Make.inc new file mode 120000 index 000000000..3ee301793 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/intel64/Make.inc @@ -0,0 +1 @@ +/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/intel64/Makefile new file mode 100644 index 000000000..cfc96e667 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptest/intel64/Makefile @@ -0,0 +1,94 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_gesv.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h \ + $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h $(INCdir)/hpl_pmatgen.h \ + $(INCdir)/hpl_ptimer.h $(INCdir)/hpl_ptest.h +# +## Executable names #################################################### +# +xhpl = $(BINdir)/xhpl +# +## Object files ######################################################## +# +HPL_pteobj = \ + HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o +# +## Targets ############################################################# +# +all : dexe +# +dexe : dexe.grd +# +$(BINdir)/HPL.dat : ../HPL.dat + ( $(CP) ../HPL.dat $(BINdir) ) +# +dexe.grd: $(HPL_pteobj) $(HPLlib) + $(LINKER) $(LINKFLAGS) -o $(xhpl) $(HPL_pteobj) $(HPL_LIBS) + $(MAKE) $(BINdir)/HPL.dat + $(TOUCH) dexe.grd +# +# ###################################################################### +# +HPL_pddriver.o : ../HPL_pddriver.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pddriver.c +HPL_pdinfo.o : ../HPL_pdinfo.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdinfo.c +HPL_pdtest.o : ../HPL_pdtest.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtest.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer.c new file mode 100644 index 000000000..202416079 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer.c @@ -0,0 +1,358 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int HPL_ptimer_disabled; +static double HPL_ptimer_cpusec [HPL_NPTIMER], + HPL_ptimer_cpustart [HPL_NPTIMER], + HPL_ptimer_wallsec [HPL_NPTIMER], + HPL_ptimer_wallstart[HPL_NPTIMER]; +/* + * --------------------------------------------------------------------- + * User callable functions + * --------------------------------------------------------------------- + */ +#ifdef STDC_HEADERS +void HPL_ptimer_boot( void ) +#else +void HPL_ptimer_boot() +#endif +{ +/* + * HPL_ptimer_boot (re)sets all timers to 0, and enables HPL_ptimer. + */ +/* + * .. Local Variables .. + */ + int i; +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 0; + + for( i = 0; i < HPL_NPTIMER; i++ ) + { + HPL_ptimer_cpusec [i] = HPL_ptimer_wallsec [i] = HPL_rzero; + HPL_ptimer_cpustart[i] = HPL_ptimer_wallstart[i] = HPL_PTIMER_STARTFLAG; + } +/* + * End of HPL_ptimer_boot + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer( const int I ) +#else +void HPL_ptimer( I ) + const int I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer provides a "stopwatch" functionality cpu/wall timer in + * seconds. Up to 64 separate timers can be functioning at once. The + * first call starts the timer, and the second stops it. This routine + * can be disenabled by calling HPL_ptimer_disable(), so that calls to + * the timer are ignored. This feature can be used to make sure certain + * sections of code do not affect timings, even if they call routines + * which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable + * the timer functionality. One can retrieve the current value of a + * timer by calling + * + * t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + * + * where I is the timer index in [0..64). To inititialize the timer + * functionality, one must have called HPL_ptimer_boot() prior to any of + * the functions mentioned above. + * + * Arguments + * ========= + * + * I (global input) const int + * On entry, I specifies the timer to stop/start. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( HPL_ptimer_disabled ) return; +/* + * If timer has not been started, start it. Otherwise, stop it and add + * interval to count + */ + if( HPL_ptimer_wallstart[I] == HPL_PTIMER_STARTFLAG ) + { + HPL_ptimer_wallstart[I] = HPL_ptimer_walltime(); + HPL_ptimer_cpustart [I] = HPL_ptimer_cputime (); + } + else + { + HPL_ptimer_cpusec [I] += HPL_ptimer_cputime ()-HPL_ptimer_cpustart [I]; + HPL_ptimer_wallsec [I] += HPL_ptimer_walltime()-HPL_ptimer_wallstart[I]; + HPL_ptimer_wallstart[I] = HPL_PTIMER_STARTFLAG; + } +/* + * End of HPL_ptimer + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_enable( void ) +#else +void HPL_ptimer_enable() +#endif +{ +/* + * HPL_ptimer_enable sets it so calls to HPL_ptimer are not ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 0; + return; +/* + * End of HPL_ptimer_enable + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_disable( void ) +#else +void HPL_ptimer_disable() +#endif +{ +/* + * HPL_ptimer_disable sets it so calls to HPL_ptimer are ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 1; + return; +/* + * End of HPL_ptimer_disable + */ +} + +#ifdef STDC_HEADERS +double HPL_ptimer_inquire +( + const HPL_T_PTIME TMTYPE, + const int I +) +#else +double HPL_ptimer_inquire( TMTYPE, I ) + const int I; + const HPL_T_PTIME TMTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer_inquire returns wall- or cpu- time that has accumulated in + * timer I. + * + * Arguments + * ========= + * + * TMTYPE (global input) const HPL_T_PTIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_PTIME : wall clock time is returned, + * = HPL_CPU_PTIME : CPU time is returned (default). + * + * I (global input) const int + * On entry, I specifies the timer to return. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double time; +/* .. + * .. Executable Statements .. + */ +/* + * If wall- or cpu-time are not available on this machine, return + * HPL_PTIMER_ERROR + */ + if( TMTYPE == HPL_WALL_PTIME ) + { + if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR ) + time = HPL_PTIMER_ERROR; + else + time = HPL_ptimer_wallsec[I]; + } + else + { + if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR ) + time = HPL_PTIMER_ERROR; + else + time = HPL_ptimer_cpusec [I]; + } + return( time ); +/* + * End of HPL_ptimer_inquire + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_combine +( + MPI_Comm COMM, + const HPL_T_PTIME_OP OPE, + const HPL_T_PTIME TMTYPE, + const int N, + const int IBEG, + double * TIMES +) +#else +void HPL_ptimer_combine( COMM, OPE, TMTYPE, N, IBEG, TIMES ) + const int IBEG, N; + const HPL_T_PTIME_OP OPE; + const HPL_T_PTIME TMTYPE; + MPI_Comm COMM; + double * TIMES; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer_combine combines the timing information stored on a scope + * of processes into the user TIMES array. + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection on + * which the timings are taken. + * + * OPE (global input) const HPL_T_PTIME_OP + * On entry, OP specifies what combine operation should be done + * as follows: + * = HPL_AMAX_PTIME get max. time on any process (default), + * = HPL_AMIN_PTIME get min. time on any process, + * = HPL_SUM_PTIME get sum of times across processes. + * + * TMTYPE (global input) const HPL_T_PTIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_PTIME : wall clock time is returned, + * = HPL_CPU_PTIME : CPU time is returned (default). + * + * N (global input) const int + * On entry, N specifies the number of timers to combine. + * + * IBEG (global input) const int + * On entry, IBEG specifies the first timer to be combined. + * + * TIMES (global output) double * + * On entry, TIMES is an array of dimension at least N. On exit, + * this array contains the requested timing information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, tmpdis; +/* .. + * .. Executable Statements .. + */ + tmpdis = HPL_ptimer_disabled; HPL_ptimer_disabled = 1; +/* + * Timer has been disabled for combine operation - copy timing informa- + * tion into user times array. If wall- or cpu-time are not available + * on this machine, fill in times with HPL_PTIMER_ERROR flag and return. + */ + if( TMTYPE == HPL_WALL_PTIME ) + { + if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR ) + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return; } + else + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_wallsec[IBEG+i]; } + } + else + { + if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR ) + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return; } + else + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_cpusec[IBEG+i]; } + } +/* + * Combine all nodes information, restore HPL_ptimer_disabled, and return + */ + for( i = 0; i < N; i++ ) TIMES[i] = Mmax( HPL_rzero, TIMES[i] ); + + if( OPE == HPL_AMAX_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM ); + else if( OPE == HPL_AMIN_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_min, COMM ); + else if( OPE == HPL_SUM_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_sum, COMM ); + else + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM ); + + HPL_ptimer_disabled = tmpdis; +/* + * End of HPL_ptimer_combine + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c new file mode 100644 index 000000000..711ef185d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c @@ -0,0 +1,146 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_ptimer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, + * the clock() function is used to return an approximation of processor + * time used by the program. The value returned is the CPU time used so + * far as a clock_t; to get the number of seconds used, the result is + * divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C + * standard library. If HPL_USE_TIMES is defined, the times() function + * is used instead. This function returns the current process times. + * times() returns the number of clock ticks that have elapsed since the + * system has been up. Otherwise and by default, the standard library + * function getrusage() is used. + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_CLOCK ) + +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + static double cps = CLOCKS_PER_SEC; + double d; + clock_t t1; + static clock_t t0 = 0; + + if( t0 == 0 ) t0 = clock(); + t1 = clock() - t0; + d = (double)(t1) / cps; + return( d ); +} + +#elif defined( HPL_USE_TIMES ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + clock_t t1; + struct tms ts; + static double ClockTick = HPL_rzero; + + if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK)); + (void) times( &ts ); + return( (double)(ts.tms_utime) / ClockTick ); +} + +/* #elif defined( HPL_USE_GETRUSAGE ) */ +#else + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + struct rusage ruse; + + (void) getrusage( RUSAGE_SELF, &ruse ); + return( (double)( ruse.ru_utime.tv_sec ) + + ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) ); +} + +/* +#else + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + return( HPL_PTIMER_ERROR ); +} +*/ + +#endif +/* + * End of HPL_ptimer_cputime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c new file mode 100644 index 000000000..96cbd300f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c @@ -0,0 +1,103 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_ptimer_walltime returns the elapsed (wall-clock) time. + * + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_GETTIMEOFDAY ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_walltime( void ) +#else +double HPL_ptimer_walltime() +#endif +{ + struct timeval tp; + static long start=0, startu; + + if( !start ) + { + (void) gettimeofday( &tp, NULL ); + start = tp.tv_sec; + startu = tp.tv_usec; + return( HPL_rzero ); + } + (void) gettimeofday( &tp, NULL ); + + return( (double)( tp.tv_sec - start ) + + ( (double)( tp.tv_usec-startu ) / 1000000.0 ) ); +} + +#else + +#ifdef STDC_HEADERS +double HPL_ptimer_walltime( void ) +#else +double HPL_ptimer_walltime() +#endif +{ + return( MPI_Wtime() ); +} + +#endif +/* + * End of HPL_ptimer_walltime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/intel64/Make.inc new file mode 120000 index 000000000..3ee301793 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/intel64/Make.inc @@ -0,0 +1 @@ +/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/intel64/Makefile new file mode 100644 index 000000000..971500764 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/ptimer/intel64/Makefile @@ -0,0 +1,84 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_ptimer.h +# +## Object files ######################################################## +# +HPL_ptiobj = \ + HPL_ptimer.o HPL_ptimer_cputime.o HPL_ptimer_walltime.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_ptiobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_ptiobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_ptimer.o : ../HPL_ptimer.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer.c +HPL_ptimer_cputime.o : ../HPL_ptimer_cputime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_cputime.c +HPL_ptimer_walltime.o : ../HPL_ptimer_walltime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_walltime.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer.c new file mode 100644 index 000000000..3be9665f7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer.c @@ -0,0 +1,253 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int HPL_timer_disabled; +static double HPL_timer_cpusec [HPL_NTIMER], + HPL_timer_cpustart [HPL_NTIMER], + HPL_timer_wallsec [HPL_NTIMER], + HPL_timer_wallstart[HPL_NTIMER]; +/* + * --------------------------------------------------------------------- + * User callable functions + * --------------------------------------------------------------------- + */ +#ifdef STDC_HEADERS +void HPL_timer_boot( void ) +#else +void HPL_timer_boot() +#endif +{ +/* + * HPL_timer_boot (re)sets all timers to 0, and enables HPL_timer. + */ +/* + * .. Local Variables .. + */ + int i; +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 0; + + for( i = 0; i < HPL_NTIMER; i++ ) + { + HPL_timer_cpusec [i] = HPL_timer_wallsec [i] = HPL_rzero; + HPL_timer_cpustart[i] = HPL_timer_wallstart[i] = HPL_TIMER_STARTFLAG; + } +/* + * End of HPL_timer_boot + */ +} + +#ifdef STDC_HEADERS +void HPL_timer( const int I ) +#else +void HPL_timer( I ) + const int I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_timer provides a "stopwatch" functionality cpu/wall timer in + * seconds. Up to 64 separate timers can be functioning at once. The + * first call starts the timer, and the second stops it. This routine + * can be disenabled by calling HPL_timer_disable(), so that calls to + * the timer are ignored. This feature can be used to make sure certain + * sections of code do not affect timings, even if they call routines + * which have HPL_timer calls in them. HPL_timer_enable() will re-enable + * the timer functionality. One can retrieve the current value of a + * timer by calling + * + * t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + * + * where I is the timer index in [0..64). To initialize the timer + * functionality, one must have called HPL_timer_boot() prior to any of + * the functions mentioned above. + * + * Arguments + * ========= + * + * I (global input) const int + * On entry, I specifies the timer to stop/start. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( HPL_timer_disabled ) return; +/* + * If timer has not been started, start it. Otherwise, stop it and add + * interval to count + */ + if( HPL_timer_wallstart[I] == HPL_TIMER_STARTFLAG ) + { + HPL_timer_wallstart[I] = HPL_timer_walltime(); + HPL_timer_cpustart [I] = HPL_timer_cputime (); + } + else + { + HPL_timer_cpusec [I] += HPL_timer_cputime () - HPL_timer_cpustart [I]; + HPL_timer_wallsec [I] += HPL_timer_walltime() - HPL_timer_wallstart[I]; + HPL_timer_wallstart[I] = HPL_TIMER_STARTFLAG; + } +/* + * End of HPL_timer + */ +} + +#ifdef STDC_HEADERS +void HPL_timer_enable( void ) +#else +void HPL_timer_enable() +#endif +{ +/* + * HPL_timer_enable sets it so calls to HPL_timer are not ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 0; + return; +/* + * End of HPL_timer_enable + */ +} + +#ifdef STDC_HEADERS +void HPL_timer_disable( void ) +#else +void HPL_timer_disable() +#endif +{ +/* + * HPL_timer_disable sets it so calls to HPL_timer are ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 1; + return; +/* + * End of HPL_timer_disable + */ +} + +#ifdef STDC_HEADERS +double HPL_timer_inquire +( + const HPL_T_TIME TMTYPE, + const int I +) +#else +double HPL_timer_inquire( TMTYPE, I ) + const int I; + const HPL_T_TIME TMTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_timer_inquire returns wall- or cpu- time that has accumulated in + * timer I. + * + * Arguments + * ========= + * + * TMTYPE (global input) const HPL_T_TIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_TIME : wall clock time is returned, + * = HPL_CPU_TIME : CPU time is returned (default). + * + * I (global input) const int + * On entry, I specifies the timer to return. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double time; +/* .. + * .. Executable Statements .. + */ +/* + * If wall- or cpu-time are not available on this machine, return + * HPL_TIMER_ERROR + */ + if( TMTYPE == HPL_WALL_TIME ) + { + if( HPL_timer_walltime() == HPL_TIMER_ERROR ) + time = HPL_TIMER_ERROR; + else + time = HPL_timer_wallsec[I]; + } + else + { + if( HPL_timer_cputime() == HPL_TIMER_ERROR ) + time = HPL_TIMER_ERROR; + else + time = HPL_timer_cpusec [I]; + } + return( time ); +/* + * End of HPL_timer_inquire + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer_cputime.c new file mode 100644 index 000000000..4a7f9dfef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer_cputime.c @@ -0,0 +1,145 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_timer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, + * the clock() function is used to return an approximation of processor + * time used by the program. The value returned is the CPU time used so + * far as a clock_t; to get the number of seconds used, the result is + * divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C + * standard library. If HPL_USE_TIMES is defined, the times() function + * is used instead. This function returns the current process times. + * times() returns the number of clock ticks that have elapsed since the + * system has been up. Otherwise and by default, the standard library + * function getrusage() is used. + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_CLOCK ) + +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + static double cps = CLOCKS_PER_SEC; + double d; + clock_t t1; + static clock_t t0 = 0; + + if( t0 == 0 ) t0 = clock(); + t1 = clock() - t0; + d = (double)(t1) / cps; + return( d ); +} + +#elif defined( HPL_USE_TIMES ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + clock_t t1; + struct tms ts; + static double ClockTick = HPL_rzero; + + if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK)); + (void) times( &ts ); + return( (double)(ts.tms_utime) / ClockTick ); +} + +/* #elif defined( HPL_USE_GETRUSAGE ) */ +#else + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + struct rusage ruse; + (void) getrusage( RUSAGE_SELF, &ruse ); + return( (double)( ruse.ru_utime.tv_sec ) + + ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) ); +} + +/* +#else + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + return( HPL_TIMER_ERROR ); +} +*/ + +#endif +/* + * End of HPL_timer_cputime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer_walltime.c new file mode 100644 index 000000000..f4f44f202 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/HPL_timer_walltime.c @@ -0,0 +1,88 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_timer_walltime returns the elapsed (wall-clock) time. + * + * + * --------------------------------------------------------------------- + */ + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_walltime( void ) +#else +double HPL_timer_walltime() +#endif +{ + struct timeval tp; + static long start=0, startu; + + if( !start ) + { + (void) gettimeofday( &tp, NULL ); + start = tp.tv_sec; + startu = tp.tv_usec; + return( HPL_rzero ); + } + (void) gettimeofday( &tp, NULL ); + + return( (double)( tp.tv_sec - start ) + + ( (double)( tp.tv_usec-startu ) / 1000000.0 ) ); +} +/* + * End of HPL_timer_walltime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/intel64/Make.inc new file mode 120000 index 000000000..3ee301793 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/intel64/Make.inc @@ -0,0 +1 @@ +/home/kmcgrie/OneBench/temp/applications.benchmarking.oneapi.onebench/hplinpack/dpcpp/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/intel64/Makefile new file mode 100644 index 000000000..b8009e88a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/testing/timer/intel64/Makefile @@ -0,0 +1,84 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_timer.h +# +## Object files ######################################################## +# +HPL_timobj = \ + HPL_timer.o HPL_timer_cputime.o HPL_timer_walltime.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_timobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_timobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_timer.o : ../HPL_timer.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer.c +HPL_timer_cputime.o : ../HPL_timer_cputime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_cputime.c +HPL_timer_walltime.o : ../HPL_timer_walltime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_walltime.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/1rinM.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/1rinM.jpg new file mode 100755 index 000000000..9af78f844 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/1rinM.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/1ring.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/1ring.jpg new file mode 100755 index 000000000..73e4391cf Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/1ring.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2-273x48.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2-273x48.jpg new file mode 100755 index 000000000..23795f8b9 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2-273x48.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2rinM.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2rinM.jpg new file mode 100755 index 000000000..c294e0d07 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2rinM.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2ring.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2ring.jpg new file mode 100755 index 000000000..f37187f13 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/2ring.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_abort.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_abort.html new file mode 100755 index 000000000..49a4bd318 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_abort.html @@ -0,0 +1,67 @@ + + +HPL_abort HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_abort halts execution. + +

Synopsis

+#include "hpl.h"

+void +HPL_abort( +int +LINE, +const char * +SRNAME, +const char * +FORM, +... +); + +

Description

+HPL_abort +displays an error message on stderr and halts execution. + +

Arguments

+
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+
+
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   HPL_abort( __LINE__, __FILE__, "Halt.\n" );
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_fprintf, +HPL_warn. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_all_reduce.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_all_reduce.html new file mode 100755 index 000000000..591cdd596 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_all_reduce.html @@ -0,0 +1,67 @@ + + +HPL_all_reduce HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_all_reduce All reduce operation. + +

Synopsis

+#include "hpl.h"

+int +HPL_all_reduce( +void * +BUFFER, +const int +COUNT, +const HPL_T_TYPE +DTYPE, +const HPL_T_OP +OP, +MPI_Comm +COMM +); + +

Description

+HPL_all_reduce +performs a global reduce operation across all +processes of a group leaving the results on all processes. + +

Arguments

+
+BUFFER  (local input/global output)   void *
+        On entry,  BUFFER  points to  the  buffer to be combined.  On
+        exit, this array contains the combined data and  is identical
+        on all processes in the group.
+
+
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+
+
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+
+OP      (global input)                const HPL_T_OP 
+        On entry, OP is a pointer to the local combine function.
+
+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_barrier, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_barrier.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_barrier.html new file mode 100755 index 000000000..86ae426ad --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_barrier.html @@ -0,0 +1,41 @@ + + +HPL_barrier HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_barrier Barrier operation. + +

Synopsis

+#include "hpl.h"

+int +HPL_barrier( +MPI_Comm +COMM +); + +

Description

+HPL_barrier +blocks the caller until all process members have call it. +The call returns at any process only after all group members have +entered the call. + +

Arguments

+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_all_reduce, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_bcast.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_bcast.html new file mode 100755 index 000000000..079325ed7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_bcast.html @@ -0,0 +1,46 @@ + + +HPL_bcast HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_bcast Perform the row broadcast. + +

Synopsis

+#include "hpl.h"

+int +HPL_bcast( +HPL_T_panel * +PANEL, +int * +IFLAG +); + +

Description

+HPL_bcast +broadcasts the current panel. Successful completion is +indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to +HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was +not completed, in which case this function should be called again. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+
+IFLAG   (output)                      int *
+        On exit,  IFLAG  indicates  whether  or not the broadcast has
+        occured.
+
+ +

See Also

+HPL_binit, +HPL_bwait. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_binit.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_binit.html new file mode 100755 index 000000000..0f9a9e1ae --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_binit.html @@ -0,0 +1,37 @@ + + +HPL_binit HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_binit Initialize the row broadcast. + +

Synopsis

+#include "hpl.h"

+int +HPL_binit( +HPL_T_panel * +PANEL +); + +

Description

+HPL_binit +initializes a row broadcast. Successful completion is +indicated by the returned error code HPL_SUCCESS. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+ +

See Also

+HPL_bcast, +HPL_bwait. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_broadcast.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_broadcast.html new file mode 100755 index 000000000..6e24b2c2b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_broadcast.html @@ -0,0 +1,67 @@ + + +HPL_broadcast HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_broadcast Broadcast operation. + +

Synopsis

+#include "hpl.h"

+int +HPL_broadcast( +void * +BUFFER, +const int +COUNT, +const HPL_T_TYPE +DTYPE, +const int +ROOT, +MPI_Comm +COMM +); + +

Description

+HPL_broadcast +broadcasts a message from the process with rank ROOT to +all processes in the group. + +

Arguments

+
+BUFFER  (local input/output)          void *
+        On entry,  BUFFER  points to  the  buffer to be broadcast. On
+        exit, this array contains the broadcast data and is identical
+        on all processes in the group.
+
+
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+
+
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+
+ROOT    (global input)                const int
+        On entry, ROOT is the coordinate of the source process.
+
+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+ +

See Also

+HPL_reduce, +HPL_all_reduce, +HPL_barrier, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_bwait.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_bwait.html new file mode 100755 index 000000000..f1dd51e7b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_bwait.html @@ -0,0 +1,38 @@ + + +HPL_bwait HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_bwait Finalize the row broadcast. + +

Synopsis

+#include "hpl.h"

+int +HPL_bwait( +HPL_T_panel * +PANEL +); + +

Description

+HPL_bwait +HPL_bwait waits for the row broadcast of the current panel to +terminate. Successful completion is indicated by the returned error +code HPL_SUCCESS. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+ +

See Also

+HPL_binit, +HPL_bcast. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_copyL.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_copyL.html new file mode 100755 index 000000000..4b98963ac --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_copyL.html @@ -0,0 +1,42 @@ + + +HPL_copyL HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_copyL Copy the current panel into a contiguous workspace. + +

Synopsis

+#include "hpl.h"

+void +HPL_copyL( +HPL_T_panel * +PANEL +); + +

Description

+HPL_copyL +copies the panel of columns, the L1 replicated submatrix, +the pivot array and the info scalar into a contiguous workspace for +later broadcast. + +The copy of this panel into a contiguous buffer can be enforced by +specifying -DHPL_COPY_L in the architecture specific Makefile. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+ +

See Also

+HPL_binit, +HPL_bcast, +HPL_bwait. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_daxpy.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_daxpy.html new file mode 100755 index 000000000..c34d0b2e8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_daxpy.html @@ -0,0 +1,89 @@ + + +HPL_daxpy HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_daxpy y := y + alpha * x. + +

Synopsis

+#include "hpl.h"

+void +HPL_daxpy( +const int +N, +const double +ALPHA, +const double * +X, +const int +INCX, +double * +Y, +const int +INCY +); + +

Description

+HPL_daxpy +scales the vector x by alpha and adds it to y. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero, then the entries of the incremented array X
+        need not be set on input.
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the scaled entries of the incremented array X.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_daxpy( 3, 2.0, x, 1, y, 1 );
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dcopy, +HPL_dscal, +HPL_dswap. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dcopy.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dcopy.html new file mode 100755 index 000000000..2a4a485b5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dcopy.html @@ -0,0 +1,81 @@ + + +HPL_dcopy HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dcopy y := x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dcopy( +const int +N, +const double * +X, +const int +INCX, +double * +Y, +const int +INCY +); + +

Description

+HPL_dcopy +copies the vector x into the vector y. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the entries of the incremented array X.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_dcopy( 3, x, 1, y, 1 );
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_daxpy, +HPL_dscal, +HPL_dswap. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dgemm.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dgemm.html new file mode 100755 index 000000000..667c0ff01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dgemm.html @@ -0,0 +1,178 @@ + + +HPL_dgemm HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dgemm C := alpha * op(A) * op(B) + beta * C. + +

Synopsis

+#include "hpl.h"

+void +HPL_dgemm( +const enum HPL_ORDER +ORDER, +const enum HPL_TRANS +TRANSA, +const enum HPL_TRANS +TRANSB, +const int +M, +const int +N, +const int +K, +const double +ALPHA, +const double * +A, +const int +LDA, +const double * +B, +const int +LDB, +const double +BETA, +double * +C, +const int +LDC +); + +

Description

+HPL_dgemm +performs one of the matrix-matrix operations + + C := alpha * op( A ) * op( B ) + beta * C + + where op( X ) is one of + + op( X ) = X or op( X ) = X^T. + +Alpha and beta are scalars, and A, B and C are matrices, with op(A) +an m by k matrix, op(B) a k by n matrix and C an m by n matrix. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+TRANSA  (local input)                 const enum HPL_TRANS
+        On entry, TRANSA  specifies the form of  op(A)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSA==HplNoTrans    : op( A ) = A,                     
+           TRANSA==HplTrans      : op( A ) = A^T,                   
+           TRANSA==HplConjTrans  : op( A ) = A^T.                   
+
+
+TRANSB  (local input)                 const enum HPL_TRANS
+        On entry, TRANSB  specifies the form of  op(B)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSB==HplNoTrans    : op( B ) = B,                     
+           TRANSB==HplTrans      : op( B ) = B^T,                   
+           TRANSB==HplConjTrans  : op( B ) = B^T.                   
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the  number  of rows  of the  matrix
+        op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the number  of columns of the matrix
+        op(B)  and  the number of columns of the matrix  C. N must be
+        at least zero.
+
+
+K       (local input)                 const int
+        On entry,  K  specifies  the  number of columns of the matrix
+        op(A) and the number of rows of the matrix op(B).  K  must be
+        be at least  zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied  as  zero  then the elements of the matrices A and B
+        need not be set on input.
+
+
+A       (local input)                 const double *
+        On entry,  A  is an array of dimension (LDA,ka),  where ka is
+        k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+        entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+        the array  A must contain the matrix A, otherwise the leading
+        k  by  m  part of the array  A  must  contain the  matrix  A.
+
+
+LDA     (local input)                 const int
+        On entry, LDA  specifies the first dimension of A as declared
+        in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+        LDA must be at least max(1,m), otherwise LDA must be at least
+        max(1,k).
+
+
+B       (local input)                 const double *
+        On entry, B is an array of dimension (LDB,kb),  where  kb  is
+        n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+        entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+        the array  B must contain the matrix B, otherwise the leading
+        n  by  k  part of the array  B  must  contain  the matrix  B.
+
+
+LDB     (local input)                 const int
+        On entry, LDB  specifies the first dimension of B as declared
+        in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+        LDB must be at least max(1,k), otherwise LDB must be at least
+        max(1,n).
+
+
+BETA    (local input)                 const double
+        On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+        supplied  as  zero  then  the  elements of the matrix C  need
+        not be set on input.
+
+
+C       (local input/output)          double *
+        On entry,  C  is an array of dimension (LDC,n). Before entry,
+        the  leading m by n part  of  the  array  C  must contain the
+        matrix C,  except when beta is zero, in which case C need not
+        be set on entry. On exit, the array  C  is overwritten by the
+        m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+
+
+LDC     (local input)                 const int
+        On entry, LDC  specifies the first dimension of C as declared
+        in  the   calling  (sub)  program.   LDC  must  be  at  least
+        max(1,m).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2], c[2*2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+   c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0;
+   HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans,
+              2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 );
+   printf("  [%f,%f]\n", c[0], c[2]);
+   printf("c=[%f,%f]\n", c[1], c[3]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dtrsm. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dgemv.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dgemv.html new file mode 100755 index 000000000..d5921a9b2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dgemv.html @@ -0,0 +1,146 @@ + + +HPL_dgemv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dgemv y := beta * y + alpha * op(A) * x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dgemv( +const enum HPL_ORDER +ORDER, +const enum HPL_TRANS +TRANS, +const int +M, +const int +N, +const double +ALPHA, +const double * +A, +const int +LDA, +const double * +X, +const int +INCX, +const double +BETA, +double * +Y, +const int +INCY +); + +

Description

+HPL_dgemv +performs one of the matrix-vector operations + + y := alpha * op( A ) * x + beta * y, + + where op( X ) is one of + + op( X ) = X or op( X ) = X^T. + +where alpha and beta are scalars, x and y are vectors and A is an m +by n matrix. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry,  TRANS  specifies the  operation to be performed as
+        follows:   
+           TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+           TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of  the matrix A.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero then  A and X  need not be set on input.
+
+
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n.  Before  entry, the leading m by n part  of the
+        array  A  must contain the matrix coefficients.
+
+
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m).
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+BETA    (local input)                 const double
+        On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+        supplied as zero then  Y  need not be set on input.
+
+
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        Before entry with BETA non-zero, the incremented array Y must
+        contain the vector  y.  On exit,  Y  is  overwritten  by  the
+        updated vector y.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2], y[2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+   HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0,
+              a, 2, x, 1, -1.0, y, 1 );
+   printf("y=[%f,%f]\n", y[0], y[1]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dger, +HPL_dtrsv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dger.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dger.html new file mode 100755 index 000000000..e4ea948ed --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dger.html @@ -0,0 +1,124 @@ + + +HPL_dger HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dger A := alpha * x * y^T + A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dger( +const enum HPL_ORDER +ORDER, +const int +M, +const int +N, +const double +ALPHA, +const double * +X, +const int +INCX, +double * +Y, +const int +INCY, +double * +A, +const int +LDA +); + +

Description

+HPL_dger +performs the rank 1 operation + + A := alpha * x * y^T + A, + +where alpha is a scalar, x is an m-element vector, y is an n-element +vector and A is an m by n matrix. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of  the matrix A.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero then  X and Y  need not be set on input.
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+Y       (local input)                 double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+
+A       (local input/output)          double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n.  Before  entry, the leading m by n part  of the
+        array  A  must contain the matrix coefficients. On exit, A is
+        overwritten by the updated matrix.
+
+
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2], y[2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+   HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1,
+             a, 2 );
+   printf("y=[%f,%f]\n", y[0], y[1]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dgemv, +HPL_dtrsv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlacpy.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlacpy.html new file mode 100755 index 000000000..b64d34e0c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlacpy.html @@ -0,0 +1,84 @@ + + +HPL_dlacpy HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlacpy B := A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlacpy( +const int +M, +const int +N, +const double * +A, +const int +LDA, +double * +B, +const int +LDB +); + +

Description

+HPL_dlacpy +copies an array A into an array B. + +

Arguments

+
+M       (local input)                 const int
+        On entry,  M specifies the number of rows of the arrays A and
+        B. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N specifies  the number of columns of the arrays A
+        and B. N must be at least zero.
+
+
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,N).
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+B       (local output)                double *
+        On entry, B points to an array of dimension (LDB,N). On exit,
+        B is overwritten with A.
+
+
+LDB     (local input)                 const int
+        On entry, LDB specifies the leading dimension of the array B.
+        LDB must be at least MAX(1,M).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0);
+   return(0);
+}
+
+ +

See Also

+HPL_dlatcpy. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlamch.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlamch.html new file mode 100755 index 000000000..cb87a90ba --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlamch.html @@ -0,0 +1,86 @@ + + +HPL_dlamch HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlamch determines machine-specific arithmetic constants. + +

Synopsis

+#include "hpl.h"

+double +HPL_dlamch( +const HPL_T_MACH +CMACH +); + +

Description

+HPL_dlamch +determines machine-specific arithmetic constants such as +the relative machine precision (eps), the safe minimum (sfmin) such +that 1 / sfmin does not overflow, the base of the machine (base), the +precision (prec), the number of (base) digits in the mantissa (t), +whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the +minimum exponent before (gradual) underflow (emin), the underflow +threshold (rmin) base**(emin-1), the largest exponent before overflow +(emax), the overflow threshold (rmax) (base**emax)*(1-eps). + +

Arguments

+
+CMACH   (local input)                 const HPL_T_MACH
+        Specifies the value to be returned by HPL_dlamch             
+           = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+           = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+           = HPL_MACH_BASE,  HPL_dlamch := base                      
+           = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+           = HPL_MACH_MLEN,  HPL_dlamch := t                         
+           = HPL_MACH_RND,   HPL_dlamch := rnd                       
+           = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+           = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+           = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+           = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+         
+        where                                                        
+         
+           eps   = relative machine precision,                       
+           sfmin = safe minimum,                                     
+           base  = base of the machine,                              
+           prec  = eps*base,                                         
+           t     = number of digits in the mantissa,                 
+           rnd   = 1.0 if rounding occurs in addition,               
+           emin  = minimum exponent before underflow,                
+           rmin  = underflow threshold,                              
+           emax  = largest exponent before overflow,                 
+           rmax  = overflow threshold.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double eps;
+   eps = HPL_dlamch( HPL_MACH_EPS );
+   printf("eps=%18.8e\n", eps);
+   exit(0); return(0);
+}
+
+ +

References

+This function has been manually translated from the Fortran 77 LAPACK +auxiliary function dlamch.f (version 2.0 -- 1992), that was itself +based on the function ENVRON by Malcolm and incorporated suggestions +by Gentleman and Marovich. See + +Malcolm M. A., Algorithms to reveal properties of floating-point +arithmetic., Comms. of the ACM, 15, 949-951 (1972). + +Gentleman W. M. and Marovich S. B., More on algorithms that reveal +properties of floating point arithmetic units., Comms. of the ACM, +17, 276-277 (1974). + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlange.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlange.html new file mode 100755 index 000000000..ce276e257 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlange.html @@ -0,0 +1,86 @@ + + +HPL_dlange HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlange Compute ||A||. + +

Synopsis

+#include "hpl.h"

+double +HPL_dlange( +const HPL_T_NORM +NORM, +const int +M, +const int +N, +const double * +A, +const int +LDA +); + +

Description

+HPL_dlange +returns the value of the one norm, or the infinity norm, +or the element of largest absolute value of a matrix A: + + max(abs(A(i,j))) when NORM = HPL_NORM_A, + norm1(A), when NORM = HPL_NORM_1, + normI(A), when NORM = HPL_NORM_I, + +where norm1 denotes the one norm of a matrix (maximum column sum) and +normI denotes the infinity norm of a matrix (maximum row sum). Note +that max(abs(A(i,j))) is not a matrix norm. + +

Arguments

+
+NORM    (local input)                 const HPL_T_NORM
+        On entry,  NORM  specifies  the  value to be returned by this
+        function as described above.
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+A       (local input)                 const double *
+        On entry,  A  points to an  array of dimension  (LDA,N), that
+        contains the matrix A.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 );
+   printf("norm=%f\n", norm);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dlaprnt, +HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaprnt.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaprnt.html new file mode 100755 index 000000000..f589ee2bb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaprnt.html @@ -0,0 +1,86 @@ + + +HPL_dlaprnt HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaprnt Print the matrix A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaprnt( +const int +M, +const int +N, +double * +A, +const int +IA, +const int +JA, +const int +LDA, +const char * +CMATNM +); + +

Description

+HPL_dlaprnt +prints to standard error an M-by-N matrix A. + +

Arguments

+
+M       (local input)                 const int
+        On entry,  M  specifies the number of rows of A. M must be at
+        least zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies the number of columns of A. N must be
+        at least zero.
+
+
+A       (local input)                 double *
+        On entry, A  points to an array of dimension (LDA,N).
+
+
+IA      (local input)                 const int
+        On entry, IA specifies the starting row index to be printed.
+
+
+JA      (local input)                 const int
+        On entry,  JA  specifies  the  starting  column index  to be
+        printed.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+
+
+CMATNM  (local input)                 const char *
+        On entry, CMATNM is the name of the matrix to be printed.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" );
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp00N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp00N.html new file mode 100755 index 000000000..8e36cf6c6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp00N.html @@ -0,0 +1,78 @@ + + +HPL_dlaswp00N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp00N performs a series of row interchanges. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp00N( +const int +M, +const int +N, +double * +A, +const int +LDA, +const int * +IPIV +); + +

Description

+HPL_dlaswp00N +performs a series of local row interchanges on a matrix +A. One row interchange is initiated for rows 0 through M-1 of A. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M specifies the number of rows of the array A to be
+        interchanged. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies  the number of columns of the array A.
+        N must be at least zero.
+
+
+A       (local input/output)          double *
+        On entry, A  points to an array of dimension (LDA,N) to which
+        the row interchanges will be  applied.  On exit, the permuted
+        matrix.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+IPIV    (local input)                 const int *
+        On entry,  IPIV  is  an  array of size  M  that  contains the
+        pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+        implies that local rows k and l are to be interchanged.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp01N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp01N.html new file mode 100755 index 000000000..aa8861d10 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp01N.html @@ -0,0 +1,109 @@ + + +HPL_dlaswp01N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp01N copies rows of A into itself and into U. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp01N( +const int +M, +const int +N, +double * +A, +const int +LDA, +double * +U, +const int +LDU, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp01N +copies scattered rows of A into itself and into an +array U. The row offsets in A of the source rows are specified by +LINDXA. The destination of those rows are specified by LINDXAU. A +positive value of LINDXAU indicates that the array destination is U, +and A otherwise. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        moved within A or copied into U. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        moved within A or copied into U. N must be at least zero.
+
+
+A       (local input/output)          double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be moved within A or
+        copied into U.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,N). The rows
+        of A specified by LINDXA are be copied within this array U at
+        the positions indicated by positive values of LINDXAU.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local  row indexes  of  A  that should be moved within  A  or
+        or copied into U.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local  row indexes of  U  where the rows of  A  should be
+        copied at. This array also contains the  local row offsets in
+        A where some of the rows of A should be moved to.  A positive
+        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+        should be copied into U at the position LINDXAU[i]; otherwise
+        the row  LINDXA[i]  of  A  should be moved  at  the  position
+        -LINDXAU[i] within A.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp01T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp01T.html new file mode 100755 index 000000000..9697471c5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp01T.html @@ -0,0 +1,110 @@ + + +HPL_dlaswp01T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp01T copies rows of A into itself and into U. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp01T( +const int +M, +const int +N, +double * +A, +const int +LDA, +double * +U, +const int +LDU, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp01T +copies scattered rows of A into itself and into an +array U. The row offsets in A of the source rows are specified by +LINDXA. The destination of those rows are specified by LINDXAU. A +positive value of LINDXAU indicates that the array destination is U, +and A otherwise. Rows of A are stored as columns in U. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        moved within A or copied into U. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        moved within A or copied into U. N must be at least zero.
+
+
+A       (local input/output)          double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be moved within A or
+        copied into U.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,M). The rows
+        of A specified by  LINDXA  are copied within this array  U at
+        the  positions indicated by positive values of LINDXAU.  The
+        rows of A are stored as columns in U.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local  row indexes  of  A  that should be moved within  A  or
+        or copied into U.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local  row indexes of  U  where the rows of  A  should be
+        copied at. This array also contains the  local row offsets in
+        A where some of the rows of A should be moved to.  A positive
+        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+        should be copied into U at the position LINDXAU[i]; otherwise
+        the row  LINDXA[i]  of  A  should be moved  at  the  position
+        -LINDXAU[i] within A.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp02N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp02N.html new file mode 100755 index 000000000..d4e1a0cf8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp02N.html @@ -0,0 +1,107 @@ + + +HPL_dlaswp02N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp02N pack rows of A into columns of W. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp02N( +const int +M, +const int +N, +const double * +A, +const int +LDA, +double * +W0, +double * +W, +const int +LDW, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp02N +packs scattered rows of an array A into workspace W. +The row offsets in A are specified by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        copied into W. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        copied into W. N must be at least zero.
+
+
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be copied into W.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+W0      (local input/output)          double *
+        On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local output)                double *
+        On entry, W  is an array of size (LDW,M). On exit, W contains
+        the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+        in W(:,i).
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied into W.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M  that  contains
+        the local  row indexes of  U that should be copied into A and
+        replaced by the rows of W.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp03N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp03N.html new file mode 100755 index 000000000..f5c4127b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp03N.html @@ -0,0 +1,95 @@ + + +HPL_dlaswp03N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp03N copy rows of W into U. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp03N( +const int +M, +const int +N, +double * +U, +const int +LDU, +const double * +W0, +const double * +W, +const int +LDW +); + +

Description

+HPL_dlaswp03N +copies columns of W into rows of an array U. The +destination in U of these columns contained in W is stored within W0. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies  the  number  of columns of  W  stored
+        contiguously that should be copied into U. M must be at least
+        zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the  length of columns of  W  stored
+        contiguously that should be copied into U. N must be at least
+        zero.
+
+
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,N).  Columns
+        of W are copied as rows within this array U at  the positions
+        specified in W0.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+
+
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M),  that contains data
+        to be copied into U. For i in [0..M),  entries W(:,i)  should
+        be copied into the row or column W0(i*LDW) of U.
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp03T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp03T.html new file mode 100755 index 000000000..010175313 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp03T.html @@ -0,0 +1,95 @@ + + +HPL_dlaswp03T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp03T copy columns of W into U. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp03T( +const int +M, +const int +N, +double * +U, +const int +LDU, +const double * +W0, +const double * +W, +const int +LDW +); + +

Description

+HPL_dlaswp03T +copies columns of W into an array U. The destination +in U of these columns contained in W is stored within W0. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies  the  number  of columns of  W  stored
+        contiguously that should be copied into U. M must be at least
+        zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the  length of columns of  W  stored
+        contiguously that should be copied into U. N must be at least
+        zero.
+
+
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,M).  Columns
+        of W are copied within the array U at the positions specified
+        in W0.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M),  that contains data
+        to be copied into U. For i in [0..M),  entries W(:,i)  should
+        be copied into the row or column W0(i*LDW) of U.
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp04N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp04N.html new file mode 100755 index 000000000..bb6cab0a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp04N.html @@ -0,0 +1,131 @@ + + +HPL_dlaswp04N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp04N copy rows of U in A and replace them with columns of W. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp04N( +const int +M0, +const int +M1, +const int +N, +double * +U, +const int +LDU, +double * +A, +const int +LDA, +const double * +W0, +const double * +W, +const int +LDW, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp04N +copies M0 rows of U into A and replaces those rows of U +with columns of W. In addition M1 - M0 columns of W are copied into +rows of U. + +

Arguments

+
+M0      (local input)                 const int
+        On entry, M0 specifies the number of rows of U that should be
+        copied into  A  and replaced by columns of  W.  M0 must be at
+        least zero.
+
+
+M1      (local input)                 const int
+        On entry, M1 specifies the number of columns of W that should
+        be copied into rows of U. M1 must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of U that should
+        be copied into A. N must be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  points to  an array of dimension (LDU,N).  This
+        array contains the rows that are to be copied into A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M1).
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows of U indicated by LINDXAU.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M0).
+
+
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M0+M1),  that  contains
+        data to be copied into U.  For i in [M0..M0+M1),  the entries
+        W(:,i) are copied into the row W0(i*LDW) of U.
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA  is an array of dimension  M0 containing the
+        local row indexes A into which rows of U are copied.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M0 that  contains
+        the local  row indexes of  U that should be copied into A and
+        replaced by the columns of W.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp04T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp04T.html new file mode 100755 index 000000000..0209a3689 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp04T.html @@ -0,0 +1,132 @@ + + +HPL_dlaswp04T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp04T copy columns of U in rows of A and replace them with columns of W. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp04T( +const int +M0, +const int +M1, +const int +N, +double * +U, +const int +LDU, +double * +A, +const int +LDA, +const double * +W0, +const double * +W, +const int +LDW, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp04T +copies M0 columns of U into rows of A and replaces those +columns of U with columns of W. In addition M1 - M0 columns of W are +copied into U. + +

Arguments

+
+M0      (local input)                 const int
+        On entry, M0 specifies the number of columns of U that should
+        be copied into A and replaced by columns of W.  M0 must be at
+        least zero.
+
+
+M1      (local input)                 const int
+        On entry, M1 specifies  the number of columnns of W that will
+        be copied into U. M1 must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies the length of the columns of  U  that
+        will be copied into rows of A. N must be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns that are to be copied into rows of
+        A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U indicated by LINDXAU.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M0).
+
+
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M0+M1),  that  contains
+        data to be copied into U.  For i in [M0..M0+M1),  the entries
+        W(:,i) are copied into the column W0(i*LDW) of U.
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA  is an array of dimension  M0 containing the
+        local row indexes A into which columns of U are copied.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M0 that  contains
+        the  local column indexes of  U  that should be copied into A
+        and replaced by the columns of W.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp05N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp05N.html new file mode 100755 index 000000000..f428b7354 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp05N.html @@ -0,0 +1,98 @@ + + +HPL_dlaswp05N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp05N copy rows of U into A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp05N( +const int +M, +const int +N, +double * +A, +const int +LDA, +const double * +U, +const int +LDU, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp05N +copies rows of U of global offset LINDXAU into rows of +A at positions indicated by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of U that should be
+        copied into A. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of U that should
+        be copied into A. N must be at least zero.
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows of U indicated by LINDXAU.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          const double *
+        On entry,  U  points to an array of dimension  (LDU,N).  This
+        array contains the rows that are to be copied into A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied from U.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local row indexes of U that should be copied in A.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp05T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp05T.html new file mode 100755 index 000000000..fffb9f320 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp05T.html @@ -0,0 +1,98 @@ + + +HPL_dlaswp05T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp05T copy rows of U into A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp05T( +const int +M, +const int +N, +double * +A, +const int +LDA, +const double * +U, +const int +LDU, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp05T +copies columns of U of global offset LINDXAU into rows +of A at positions indicated by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the columns of U that will
+        be copied into rows of A. N must be at least zero.
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U indicated by LINDXAU.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          const double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns that are to be copied into rows of
+        A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied from U.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local column indexes of U that should be copied in A.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp06N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp06N.html new file mode 100755 index 000000000..f28ab48c6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp06N.html @@ -0,0 +1,92 @@ + + +HPL_dlaswp06N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp06N swap rows of U with rows of A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp06N( +const int +M, +const int +N, +double * +A, +const int +LDA, +double * +U, +const int +LDU, +const int * +LINDXA +); + +

Description

+HPL_dlaswp06N +swaps rows of U with rows of A at positions +indicated by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        swapped with rows of U. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of A that should
+        be swapped with rows of U. N must be at least zero.
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows or columns of U.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,N).  This
+        array contains the rows of U that are to be swapped with rows
+        of A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be swapped with U.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp06T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp06T.html new file mode 100755 index 000000000..86032a9f4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp06T.html @@ -0,0 +1,92 @@ + + +HPL_dlaswp06T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp06T swap rows or columns of U with rows of A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp06T( +const int +M, +const int +N, +double * +A, +const int +LDA, +double * +U, +const int +LDU, +const int * +LINDXA +); + +

Description

+HPL_dlaswp06T +swaps columns of U with rows of A at positions +indicated by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        swapped with columns of U. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of A that should
+        be swapped with columns of U. N must be at least zero.
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns of  U  that are to be swapped with
+        rows of A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be swapped with U.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp10N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp10N.html new file mode 100755 index 000000000..84403ca79 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlaswp10N.html @@ -0,0 +1,77 @@ + + +HPL_dlaswp10N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp10N performs a series column interchanges. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp10N( +const int +M, +const int +N, +double * +A, +const int +LDA, +const int * +IPIV +); + +

Description

+HPL_dlaswp10N +performs a sequence of local column interchanges on a +matrix A. One column interchange is initiated for columns 0 through +N-1 of A. + +

Arguments

+
+M       (local input)                 const int
+        __arg0__
+
+
+N       (local input)                 const int
+        On entry,  M  specifies  the number of rows of the array A. M
+        must be at least zero.
+
+
+A       (local input/output)          double *
+        On entry, N specifies the number of columns of the array A. N
+        must be at least zero.
+
+
+LDA     (local input)                 const int
+        On entry, A  points to an  array of  dimension (LDA,N).  This
+        array contains the columns onto which the interchanges should
+        be applied. On exit, A contains the permuted matrix.
+
+
+IPIV    (local input)                 const int *
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlatcpy.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlatcpy.html new file mode 100755 index 000000000..fa1cca5d9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlatcpy.html @@ -0,0 +1,83 @@ + + +HPL_dlatcpy HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlatcpy B := A^T + +

Synopsis

+#include "hpl.h"

+void +HPL_dlatcpy( +const int +M, +const int +N, +const double * +A, +const int +LDA, +double * +B, +const int +LDB +); + +

Description

+HPL_dlatcpy +copies the transpose of an array A into an array B. + +

Arguments

+
+M       (local input)                 const int
+        On entry,  M specifies the number of  rows of the array B and
+        the number of columns of A. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N specifies the number of  rows of the array A and
+        the number of columns of B. N must be at least zero.
+
+
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,M).
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,N).
+
+
+B       (local output)                double *
+        On entry, B points to an array of dimension (LDB,N). On exit,
+        B is overwritten with the transpose of A.
+
+
+LDB     (local input)                 const int
+        On entry, LDB specifies the leading dimension of the array B.
+        LDB must be at least MAX(1,M).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dlacpy. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocmax.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocmax.html new file mode 100755 index 000000000..c3361f32d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocmax.html @@ -0,0 +1,87 @@ + + +HPL_dlocmax HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlocmax finds the maximum entry in matrix column. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlocmax( +HPL_T_panel * +PANEL, +const int +N, +const int +II, +const int +JJ, +double * +WORK +); + +

Description

+HPL_dlocmax +finds the maximum entry in the current column and packs +the useful information in WORK[0:3]. On exit, WORK[0] contains the +local maximum absolute value scalar, WORK[1] is the corresponding +local row index, WORK[2] is the corresponding global row index, and +WORK[3] is the coordinate of the process owning this max. When N is +less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set +to the total number of process rows. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of rows of the column
+        of A on which we operate.
+
+
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+
+
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is  a workarray of size at least 4.  On exit,
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.
+
+ +

See Also

+HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocswpN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocswpN.html new file mode 100755 index 000000000..b5c4b74a9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocswpN.html @@ -0,0 +1,79 @@ + + +HPL_dlocswpN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlocswpN locally swaps rows within panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlocswpN( +HPL_T_panel * +PANEL, +const int +II, +const int +JJ, +double * +WORK +); + +

Description

+HPL_dlocswpN +performs the local swapping operations within a panel. +The lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+
+
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.  The N0 length max
+        row is stored in WORK[4:4+N0-1];  Note  that this is also the
+        JJth row  (or column) of L1. The remaining part of this array
+        is used as workspace.
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocswpT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocswpT.html new file mode 100755 index 000000000..d31361543 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dlocswpT.html @@ -0,0 +1,79 @@ + + +HPL_dlocswpT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlocswpT locally swaps rows within panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlocswpT( +HPL_T_panel * +PANEL, +const int +II, +const int +JJ, +double * +WORK +); + +

Description

+HPL_dlocswpT +performs the local swapping operations within a panel. +The lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+
+
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.  The N0 length max
+        row is stored in WORK[4:4+N0-1];  Note  that this is also the
+        JJth row  (or column) of L1. The remaining part of this array
+        is used as workspace.
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dmatgen.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dmatgen.html new file mode 100755 index 000000000..7886da146 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dmatgen.html @@ -0,0 +1,73 @@ + + +HPL_dmatgen HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dmatgen random matrix generator. + +

Synopsis

+#include "hpl.h"

+void +HPL_dmatgen( +const int +M, +const int +N, +double * +A, +const int +LDA, +const int +ISEED +); + +

Description

+HPL_dmatgen +generates (or regenerates) a random matrix A. + +The pseudo-random generator uses the linear congruential algorithm: +X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer +Programming, Knuth 1973, Vol. 2. + +

Arguments

+
+M       (input)                       const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+
+
+N       (input)                       const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+A       (output)                      double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        this  array  contains   the   coefficients  of  the  randomly
+        generated matrix.
+
+
+LDA     (input)                       const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+
+
+ISEED   (input)                       const int
+        On entry, ISEED  specifies  the  seed  number to generate the
+        matrix A. ISEED must be at least zero.
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dscal.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dscal.html new file mode 100755 index 000000000..c13427f44 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dscal.html @@ -0,0 +1,74 @@ + + +HPL_dscal HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dscal x = alpha * x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dscal( +const int +N, +const double +ALPHA, +double * +X, +const int +INCX +); + +

Description

+HPL_dscal +scales the vector x by alpha. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vector x. N  must  be
+        at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero, then the entries of the incremented array X
+        need not be set on input.
+
+
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        On exit, the entries of the incremented array  X  are  scaled
+        by the scalar alpha.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   HPL_dscal( 3, 2.0, x, 1 );
+   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_daxpy, +HPL_dcopy, +HPL_dswap. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dswap.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dswap.html new file mode 100755 index 000000000..cae6980a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dswap.html @@ -0,0 +1,84 @@ + + +HPL_dswap HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dswap y <-> x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dswap( +const int +N, +double * +X, +const int +INCX, +double * +Y, +const int +INCY +); + +

Description

+HPL_dswap +swaps the vectors x and y. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+
+
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        On exit, the entries of the incremented array  X  are updated
+        with the entries of the incremented array Y.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the entries of the incremented array X.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_dswap( 3, x, 1, y, 1 );
+   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_daxpy, +HPL_dcopy, +HPL_dscal. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dtrsm.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dtrsm.html new file mode 100755 index 000000000..3d60e597f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dtrsm.html @@ -0,0 +1,168 @@ + + +HPL_dtrsm HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dtrsm B := A^{-1} * B or B := B * A^{-1}. + +

Synopsis

+#include "hpl.h"

+void +HPL_dtrsm( +const enum HPL_ORDER +ORDER, +const enum HPL_SIDE +SIDE, +const enum HPL_UPLO +UPLO, +const enum HPL_TRANS +TRANS, +const enum HPL_DIAG +DIAG, +const int +M, +const int +N, +const double +ALPHA, +const double * +A, +const int +LDA, +double * +B, +const int +LDB +); + +

Description

+HPL_dtrsm +solves one of the matrix equations + + op( A ) * X = alpha * B, or X * op( A ) = alpha * B, + +where alpha is a scalar, X and B are m by n matrices, A is a unit, or +non-unit, upper or lower triangular matrix and op(A) is one of + + op( A ) = A or op( A ) = A^T. + +The matrix X is overwritten on B. + +No test for singularity or near-singularity is included in this +routine. Such tests must be performed before calling this routine. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+SIDE    (local input)                 const enum HPL_SIDE
+        On entry, SIDE  specifies  whether  op(A) appears on the left
+        or right of X as follows:
+           SIDE==HplLeft    op( A ) * X = alpha * B,
+           SIDE==HplRight   X * op( A ) = alpha * B.
+
+
+UPLO    (local input)                 const enum HPL_UPLO
+        On  entry,   UPLO   specifies  whether  the  upper  or  lower
+        triangular  part  of the array  A  is to be referenced.  When
+        UPLO==HplUpper, only  the upper triangular part of A is to be
+        referenced, otherwise only the lower triangular part of A is 
+        to be referenced. 
+
+
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry, TRANSA  specifies the form of  op(A)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSA==HplNoTrans    : op( A ) = A,                     
+           TRANSA==HplTrans      : op( A ) = A^T,                   
+           TRANSA==HplConjTrans  : op( A ) = A^T.                   
+
+
+DIAG    (local input)                 const enum HPL_DIAG
+        On entry,  DIAG  specifies  whether  A  is unit triangular or
+        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+        and otherwise, A is not assumed to be unit triangular.
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of the  matrix B.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix B.
+        N must be at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied  as  zero then the elements of the matrix B need not
+        be set on input.
+
+
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+        otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+        k by k upper triangular  part of the array A must contain the
+        upper triangular  matrix and the  strictly  lower  triangular
+        part of A is not referenced.  When  UPLO==HplLower on  entry,
+        the  leading k by k lower triangular part of the array A must
+        contain the lower triangular matrix  and  the  strictly upper
+        triangular part of A is not referenced.
+         
+        Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+        not referenced  either,  but are assumed to be unity.
+
+
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+
+
+B       (local input/output)          double *
+        On entry,  B  points  to an array of size equal to or greater
+        than LDB * n.  Before entry, the leading  m by n  part of the
+        array B must contain the matrix  B, except when beta is zero,
+        in which case B need not be set on entry.  On exit, the array
+        B is overwritten by the m by n solution matrix.
+
+
+LDB     (local input)                 const int
+        On entry,  LDB  specifies  the  leading  dimension  of  B  as
+        declared  in  the  calling  (sub) program.  LDB  must  be  at
+        least MAX(1,m).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+   HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper,
+              HplNoTrans, HplNonUnit, 2, 2, 2.0,
+              a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dgemm. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dtrsv.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dtrsv.html new file mode 100755 index 000000000..3e4703529 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_dtrsv.html @@ -0,0 +1,136 @@ + + +HPL_dtrsv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dtrsv x := A^{-1} x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dtrsv( +const enum HPL_ORDER +ORDER, +const enum HPL_UPLO +UPLO, +const enum HPL_TRANS +TRANS, +const enum HPL_DIAG +DIAG, +const int +N, +const double * +A, +const int +LDA, +double * +X, +const int +INCX +); + +

Description

+HPL_dtrsv +solves one of the systems of equations + + A * x = b, or A^T * x = b, + +where b and x are n-element vectors and A is an n by n non-unit, or +unit, upper or lower triangular matrix. + +No test for singularity or near-singularity is included in this +routine. Such tests must be performed before calling this routine. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+UPLO    (local input)                 const enum HPL_UPLO
+        On  entry,   UPLO   specifies  whether  the  upper  or  lower
+        triangular  part  of the array  A  is to be referenced.  When
+        UPLO==HplUpper, only  the upper triangular part of A is to be
+        referenced, otherwise only the lower triangular part of A is 
+        to be referenced. 
+
+
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry,  TRANS  specifies  the equations  to  be  solved as
+        follows:
+           TRANS==HplNoTrans     A   * x = b,
+           TRANS==HplTrans       A^T * x = b.
+
+
+DIAG    (local input)                 const enum HPL_DIAG
+        On entry,  DIAG  specifies  whether  A  is unit triangular or
+        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+        and otherwise, A is not assumed to be unit triangular.
+
+
+N       (local input)                 const int
+        On entry, N specifies the order of the matrix A. N must be at
+        least zero.
+
+
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+        n by n upper triangular  part of the array A must contain the
+        upper triangular  matrix and the  strictly  lower  triangular
+        part of A is not referenced.  When  UPLO==HplLower  on entry,
+        the  leading n by n lower triangular part of the array A must
+        contain the lower triangular matrix  and  the  strictly upper
+        triangular part of A is not referenced.
+         
+        Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+        not referenced  either,  but are assumed to be unity.
+
+
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,n).
+
+
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        Before entry,  the  incremented array  X  must contain  the n
+        element right-hand side vector b. On exit,  X  is overwritten
+        with the solution vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2];
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+   x[0] = 2.0; x[1] = 1.0;
+   HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans,
+              HplNoUnit, a, 2, x, 1 );
+   printf("x=[%f,%f]\n", x[0], x[1]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dger, +HPL_dgemv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_equil.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_equil.html new file mode 100755 index 000000000..d64ecab99 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_equil.html @@ -0,0 +1,115 @@ + + +HPL_equil HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_equil Equilibrate U and forward the column panel L. + +

Synopsis

+#include "hpl.h"

+void +HPL_equil( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const enum HPL_TRANS +TRANS, +const int +N, +double * +U, +const int +LDU, +int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1, +int * +IWORK +); + +

Description

+HPL_equil +equilibrates the local pieces of U, so that on exit to +this function, pieces of U contained in every process row are of the +same size. This phase makes the rolling phase optimal. In addition, +this function probes for the column panel L and forwards it when +possible. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be equilibrated) information.
+
+
+TRANS   (global input)                const enum HPL_TRANS
+        On entry, TRANS specifies whether  U  is stored in transposed
+        or non-transposed form.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the number of rows or columns of  U. N
+        must be at least 0.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+        non-transposed form, and MAX(1,N) otherwise.
+
+
+IPLEN   (global input)                int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in process IPMAP[i].
+
+
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry, IPMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+
+
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension NPROW+1.
+
+ +

See Also

+HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_fprintf.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_fprintf.html new file mode 100755 index 000000000..d62b2c871 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_fprintf.html @@ -0,0 +1,58 @@ + + +HPL_fprintf HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_fprintf fprintf + fflush wrapper. + +

Synopsis

+#include "hpl.h"

+void +HPL_fprintf( +FILE * +STREAM, +const char * +FORM, +... +); + +

Description

+HPL_fprintf +is a wrapper around fprintf flushing the output stream. + +

Arguments

+
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   HPL_fprintf( stdout, "Hello World.\n" );
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_abort, +HPL_warn. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_exit.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_exit.html new file mode 100755 index 000000000..b42f315c9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_exit.html @@ -0,0 +1,39 @@ + + +HPL_grid_exit HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_grid_exit Exit process grid. + +

Synopsis

+#include "hpl.h"

+int +HPL_grid_exit( +HPL_T_grid * +GRID +); + +

Description

+HPL_grid_exit +marks the process grid object for deallocation. The +returned error code MPI_SUCCESS indicates successful completion. +Other error codes are (MPI) implementation dependent. + +

Arguments

+
+GRID    (local input/output)          HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid to be released.
+
+ +

See Also

+HPL_pnum, +HPL_grid_init, +HPL_grid_info. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_info.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_info.html new file mode 100755 index 000000000..47f63672d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_info.html @@ -0,0 +1,70 @@ + + +HPL_grid_info HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_grid_info Retrieve grid information. + +

Synopsis

+#include "hpl.h"

+int +HPL_grid_info( +const HPL_T_grid * +GRID, +int * +NPROW, +int * +NPCOL, +int * +MYROW, +int * +MYCOL +); + +

Description

+HPL_grid_info +returns the grid shape and the coordinates in the grid +of the calling process. Successful completion is indicated by the +returned error code MPI_SUCCESS. Other error codes depend on the MPI +implementation. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+NPROW   (global output)               int *
+        On exit,   NPROW  specifies the number of process rows in the
+        grid. NPROW is at least one.
+
+
+NPCOL   (global output)               int *
+        On exit,   NPCOL  specifies  the number of process columns in
+        the grid. NPCOL is at least one.
+
+
+MYROW   (global output)               int *
+        On exit,  MYROW  specifies my  row process  coordinate in the
+        grid. MYROW is greater than or equal  to zero  and  less than
+        NPROW.
+
+
+MYCOL   (global output)               int *
+        On exit,  MYCOL specifies my column process coordinate in the
+        grid. MYCOL is greater than or equal  to zero  and  less than
+        NPCOL.
+
+ +

See Also

+HPL_pnum, +HPL_grid_init, +HPL_grid_exit. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_init.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_init.html new file mode 100755 index 000000000..0bec56e6e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_grid_init.html @@ -0,0 +1,73 @@ + + +HPL_grid_init HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_grid_init Create a process grid. + +

Synopsis

+#include "hpl.h"

+int +HPL_grid_init( +MPI_Comm +COMM, +const HPL_T_ORDER +ORDER, +const int +NPROW, +const int +NPCOL, +HPL_T_grid * +GRID +); + +

Description

+HPL_grid_init +creates a NPROW x NPCOL process grid using column- or +row-major ordering from an initial collection of processes identified +by an MPI communicator. Successful completion is indicated by the +returned error code MPI_SUCCESS. Other error codes depend on the MPI +implementation. The coordinates of processes that are not part of the +grid are set to values outside of [0..NPROW) x [0..NPCOL). + +

Arguments

+
+COMM    (global/local input)          MPI_Comm
+        On entry,  COMM  is  the  MPI  communicator  identifying  the
+        initial  collection  of  processes out of which  the  grid is
+        formed.
+
+
+ORDER   (global input)                const HPL_T_ORDER
+        On entry, ORDER specifies how the processes should be ordered
+        in the grid as follows:
+           ORDER = HPL_ROW_MAJOR    row-major    ordering;
+           ORDER = HPL_COLUMN_MAJOR column-major ordering;
+
+
+NPROW   (global input)                const int
+        On entry,  NPROW  specifies the number of process rows in the
+        grid to be created. NPROW must be at least one.
+
+
+NPCOL   (global input)                const int
+        On entry,  NPCOL  specifies  the number of process columns in
+        the grid to be created. NPCOL must be at least one.
+
+
+GRID    (local input/output)          HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information to be initialized.
+
+ +

See Also

+HPL_pnum, +HPL_grid_info, +HPL_grid_exit. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_idamax.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_idamax.html new file mode 100755 index 000000000..f16b296f6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_idamax.html @@ -0,0 +1,68 @@ + + +HPL_idamax HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_idamax 1st k s.t. |x_k| = max_i(|x_i|). + +

Synopsis

+#include "hpl.h"

+int +HPL_idamax( +const int +N, +const double * +X, +const int +INCX +); + +

Description

+HPL_idamax +returns the index in an n-vector x of the first element +having maximum absolute value. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vector x. N  must  be
+        at least zero.
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3];
+   int    imax;
+   x[0] = 1.0; x[1] = 3.0; x[2] = 2.0;
+   imax = HPL_idamax( 3, x, 1 );
+   printf("imax=%d\n", imax);
+   exit(0);
+   return(0);
+}
+
+ +

See Also

+HPL_daxpy, +HPL_dcopy, +HPL_dscal, +HPL_dswap. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2l.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2l.html new file mode 100755 index 000000000..a3eb758da --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2l.html @@ -0,0 +1,71 @@ + + +HPL_indxg2l HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_indxg2l Map a global index into a local one. + +

Synopsis

+#include "hpl.h"

+int +HPL_indxg2l( +const int +IG, +const int +INB, +const int +NB, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_indxg2l +computes the local index of a matrix entry pointed to by +the global index IG. This local returned index is the same in all +processes. + +

Arguments

+
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+
+
+SRCPROC (input)                       const int
+        On entry, if SRCPROC = -1, the data  is not  distributed  but
+        replicated,  in  which  case  this  routine returns IG in all
+        processes. Otherwise, the value of SRCPROC is ignored.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2lp, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2lp.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2lp.html new file mode 100755 index 000000000..d9fa00436 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2lp.html @@ -0,0 +1,86 @@ + + +HPL_indxg2lp HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_indxg2lp Map a local index into a global one. + +

Synopsis

+#include "hpl.h"

+void +HPL_indxg2lp( +int * +IL, +int * +PROC, +const int +IG, +const int +INB, +const int +NB, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_indxg2lp +computes the local index of a matrix entry pointed to by +the global index IG as well as the process coordinate which posseses +this entry. The local returned index is the same in all processes. + +

Arguments

+
+IL      (output)                      int *
+        On exit, IL specifies the local index corresponding to IG. IL
+        is at least zero.
+
+
+PROC    (output)                      int *
+        On exit,  PROC  is the  coordinate of the process  owning the
+        entry specified by the global index IG. PROC is at least zero
+        and less than NPROCS.
+
+
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+SRCPROC (input)                       const int
+        On entry, if SRCPROC = -1, the data  is not  distributed  but
+        replicated,  in  which  case  this  routine returns IG in all
+        processes. Otherwise, the value of SRCPROC is ignored.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2p.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2p.html new file mode 100755 index 000000000..0068dede3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxg2p.html @@ -0,0 +1,70 @@ + + +HPL_indxg2p HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_indxg2p Map a global index into a process coordinate. + +

Synopsis

+#include "hpl.h"

+int +HPL_indxg2p( +const int +IG, +const int +INB, +const int +NB, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_indxg2p +computes the process coordinate which posseses the entry +of a matrix specified by a global index IG. + +

Arguments

+
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxl2g.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxl2g.html new file mode 100755 index 000000000..216e98057 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_indxl2g.html @@ -0,0 +1,78 @@ + + +HPL_indxl2g HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_indxl2g Map a index-process pair into a global index. + +

Synopsis

+#include "hpl.h"

+int +HPL_indxl2g( +const int +IL, +const int +INB, +const int +NB, +const int +PROC, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_indxl2g +computes the global index of a matrix entry pointed to +by the local index IL of the process indicated by PROC. + +

Arguments

+
+IL      (input)                       const int
+        On entry, IL specifies the local  index of the matrix  entry.
+        IL must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+PROC    (input)                       const int
+        On entry, PROC  specifies the coordinate of the process whose
+        local array row or column is to be determined. PROC  must  be
+        at least zero and strictly less than NPROCS.
+
+
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2lp, +HPL_indxg2p, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_infog2l.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_infog2l.html new file mode 100755 index 000000000..34feff72c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_infog2l.html @@ -0,0 +1,155 @@ + + +HPL_infog2l HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_infog2l global to local index translation. + +

Synopsis

+#include "hpl.h"

+void +HPL_infog2l( +int +I, +int +J, +const int +IMB, +const int +MB, +const int +INB, +const int +NB, +const int +RSRC, +const int +CSRC, +const int +MYROW, +const int +MYCOL, +const int +NPROW, +const int +NPCOL, +int * +II, +int * +JJ, +int * +PROW, +int * +PCOL +); + +

Description

+HPL_infog2l +computes the starting local index II, JJ corresponding to +the submatrix starting globally at the entry pointed by I, J. This +routine returns the coordinates in the grid of the process owning the +matrix entry of global indexes I, J, namely PROW and PCOL. + +

Arguments

+
+I       (global input)                int
+        On entry,  I  specifies  the  global  row index of the matrix
+        entry. I must be at least zero.
+
+
+J       (global input)                int
+        On entry,  J  specifies the global column index of the matrix
+        entry. J must be at least zero.
+
+
+IMB     (global input)                const int
+        On entry,  IMB  specifies  the size of the first row block of
+        the global matrix. IMB must be at least one.
+
+
+MB      (global input)                const int
+        On entry,  MB specifies the blocking factor used to partition
+        and  distribute the rows of the matrix A.  MB  must be larger
+        than one.
+
+
+INB     (global input)                const int
+        On entry, INB specifies the size of the first column block of
+        the global matrix. INB must be at least one.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the columns of the matrix A. NB must be larger
+        than one.
+
+
+RSRC    (global input)                const int
+        On entry,  RSRC  specifies  the row coordinate of the process
+        that possesses the row  I.  RSRC  must  be at least zero  and
+        strictly less than NPROW.
+
+
+CSRC    (global input)                const int
+        On entry, CSRC specifies the column coordinate of the process
+        that possesses the column J. CSRC  must be at least zero  and
+        strictly less than NPCOL.
+
+
+MYROW   (local input)                 const int
+        On entry, MYROW  specifies my  row process  coordinate in the
+        grid. MYROW is greater than or equal  to zero  and  less than
+        NPROW.
+
+
+MYCOL   (local input)                 const int
+        On entry, MYCOL specifies my column process coordinate in the
+        grid. MYCOL is greater than or equal  to zero  and  less than
+        NPCOL.
+
+
+NPROW   (global input)                const int
+        On entry,  NPROW  specifies the number of process rows in the
+        grid. NPROW is at least one.
+
+
+NPCOL   (global input)                const int
+        On entry,  NPCOL  specifies  the number of process columns in
+        the grid. NPCOL is at least one.
+
+
+II      (local output)                int *
+        On exit, II  specifies the  local  starting  row index of the
+        submatrix. On exit, II is at least 0.
+
+
+JJ      (local output)                int *
+        On exit, JJ  specifies the local starting column index of the
+        submatrix. On exit, JJ is at least 0.
+
+
+PROW    (global output)               int *
+        On exit, PROW is the row coordinate of the process owning the
+        entry specified by the global index I.  PROW is at least zero
+        and less than NPROW.
+
+
+PCOL    (global output)               int *
+        On exit, PCOL  is the column coordinate of the process owning
+        the entry specified by the global index J.  PCOL  is at least
+        zero and less than NPCOL.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_jumpit.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_jumpit.html new file mode 100755 index 000000000..be87a1f53 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_jumpit.html @@ -0,0 +1,65 @@ + + +HPL_jumpit HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_jumpit jump into the random sequence. + +

Synopsis

+#include "hpl.h"

+void +HPL_jumpit( +int * +MULT, +int * +IADD, +int * +IRANN, +int * +IRANM +); + +

Description

+HPL_jumpit +jumps in the random sequence from the number X(n) encoded +in IRANN to the number X(m) encoded in IRANM using the constants A +and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A +and C obviously depend on m and n, see the function HPL_xjumpm in +order to initialize them. + +

Arguments

+
+MULT    (local input)                 int *
+        On entry, MULT is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of the constant A.
+
+
+IADD    (local input)                 int *
+        On entry, IADD is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of the constant C.
+
+
+IRANN   (local input)                 int *
+        On entry,  IRANN  is an array of dimension 2,  that contains 
+        the 16-lower and 15-higher bits of the encoding of X(n).
+
+
+IRANM   (local output)                int *
+        On entry,  IRANM  is an array of dimension 2.  On exit, this
+        array contains respectively the 16-lower and  15-higher bits
+        of the encoding of X(m).
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ladd.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ladd.html new file mode 100755 index 000000000..0c42d80d8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ladd.html @@ -0,0 +1,57 @@ + + +HPL_ladd HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_ladd Adds two long positive integers. + +

Synopsis

+#include "hpl.h"

+void +HPL_ladd( +int * +J, +int * +K, +int * +I +); + +

Description

+HPL_ladd +adds without carry two long positive integers K and J and +puts the result into I. The long integers I, J, K are encoded on 64 +bits using an array of 2 integers. The 32-lower bits are stored in +the first entry of each array, the 32-higher bits in the second +entry. + +

Arguments

+
+J       (local input)                 int *
+        On entry, J is an integer array of dimension 2 containing the
+        encoded long integer J.
+
+
+K       (local input)                 int *
+        On entry, K is an integer array of dimension 2 containing the
+        encoded long integer K.
+
+
+I       (local output)                int *
+        On entry, I is an integer array of dimension 2. On exit, this
+        array contains the encoded long integer result.
+
+ +

See Also

+HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_lmul.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_lmul.html new file mode 100755 index 000000000..8ef70cba5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_lmul.html @@ -0,0 +1,58 @@ + + +HPL_lmul HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_lmul multiplies 2 long positive integers. + +

Synopsis

+#include "hpl.h"

+void +HPL_lmul( +int * +K, +int * +J, +int * +I +); + +

Description

+HPL_lmul +multiplies without carry two long positive integers K and J +and puts the result into I. The long integers I, J, K are encoded on +64 bits using an array of 2 integers. The 32-lower bits are stored in +the first entry of each array, the 32-higher bits in the second entry +of each array. For efficiency purposes, the intrisic modulo function +is inlined. + +

Arguments

+
+K       (local input)                 int *
+        On entry, K is an integer array of dimension 2 containing the
+        encoded long integer K.
+
+
+J       (local input)                 int *
+        On entry, J is an integer array of dimension 2 containing the
+        encoded long integer J.
+
+
+I       (local output)                int *
+        On entry, I is an integer array of dimension 2. On exit, this
+        array contains the encoded long integer result.
+
+ +

See Also

+HPL_ladd, +HPL_setran, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_logsort.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_logsort.html new file mode 100755 index 000000000..da271fc19 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_logsort.html @@ -0,0 +1,83 @@ + + +HPL_logsort HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_logsort Sort the processes in logarithmic order. + +

Synopsis

+#include "hpl.h"

+void +HPL_logsort( +const int +NPROCS, +const int +ICURROC, +int * +IPLEN, +int * +IPMAP, +int * +IPMAPM1 +); + +

Description

+HPL_logsort +computes an array IPMAP and its inverse IPMAPM1 that +contain the logarithmic sorted processes id with repect to the local +number of rows of U that they own. This is necessary to ensure that +the logarithmic spreading of U is optimal in terms of number of steps +and communication volume as well. In other words, the larget pieces +of U will be sent a minimal number of times. + +

Arguments

+
+NPROCS  (global input)                const int
+        On entry, NPROCS  specifies the number of process rows in the
+        process grid. NPROCS is at least one.
+
+
+ICURROC (global input)                const int
+        On entry, ICURROC is the source process row.
+
+
+IPLEN   (global input/output)         int *
+        On entry, IPLEN is an array of dimension NPROCS+1,  such that
+        IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+        that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+        rows of U  in the processes before process IPMAP[i] after the
+        sort,  with  the convention that  IPLEN[NPROCS] is  the total
+        number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+        IPLEN[i] is  the  number of rows of A that should be moved to
+        the process IPMAP[i].  IPLEN  is such that the number of rows
+        of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+        remaining  entries  of  this  array  are  sorted  so that the
+        quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+
+
+IPMAP   (global output)               int *
+        On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myroc] is the corresponding sorted process
+        coordinate.
+
+
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROCS)
+
+ +

See Also

+HPL_plindx1, +HPL_plindx10, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_max.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_max.html new file mode 100755 index 000000000..7cf0b0670 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_max.html @@ -0,0 +1,60 @@ + + +HPL_max HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_max Combine (max) two buffers. + +

Synopsis

+#include "hpl.h"

+void +HPL_max( +const int +N, +const void * +IN, +void * +INOUT, +const HPL_T_TYPE +DTYPE +); + +

Description

+HPL_max +combines (max) two buffers. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+
+
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+
+
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+
+
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_all_reduce, +HPL_barrier, +HPL_min, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_min.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_min.html new file mode 100755 index 000000000..9c109c338 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_min.html @@ -0,0 +1,60 @@ + + +HPL_min HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_min Combine (min) two buffers. + +

Synopsis

+#include "hpl.h"

+void +HPL_min( +const int +N, +const void * +IN, +void * +INOUT, +const HPL_T_TYPE +DTYPE +); + +

Description

+HPL_min +combines (min) two buffers. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+
+
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+
+
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+
+
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_all_reduce, +HPL_barrier, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_numroc.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_numroc.html new file mode 100755 index 000000000..fa617cac3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_numroc.html @@ -0,0 +1,79 @@ + + +HPL_numroc HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_numroc Compute the local number of row/columns. + +

Synopsis

+#include "hpl.h"

+int +HPL_numroc( +const int +N, +const int +INB, +const int +NB, +const int +PROC, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_numroc +returns the local number of matrix rows/columns process +PROC will get if we give out N rows/columns starting from global +index 0. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies the number of rows/columns being dealt
+        out. N must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+PROC    (input)                       const int
+        On entry, PROC specifies  the coordinate of the process whose
+        local portion is determined.  PROC must be at least zero  and
+        strictly less than NPROCS.
+
+
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2lp, +HPL_indxg2p, +HPL_indxl2g, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_numrocI.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_numrocI.html new file mode 100755 index 000000000..c1037a193 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_numrocI.html @@ -0,0 +1,86 @@ + + +HPL_numrocI HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_numrocI Compute the local number of row/columns. + +

Synopsis

+#include "hpl.h"

+int +HPL_numrocI( +const int +N, +const int +I, +const int +INB, +const int +NB, +const int +PROC, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_numrocI +returns the local number of matrix rows/columns process +PROC will get if we give out N rows/columns starting from global +index I. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies the number of rows/columns being dealt
+        out. N must be at least zero.
+
+
+I       (input)                       const int
+        On entry, I  specifies the global index of the matrix  entry
+        I must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of th
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+PROC    (input)                       const int
+        On entry, PROC specifies  the coordinate of the process whos
+        local portion is determined.  PROC must be at least zero  an
+        strictly less than NPROCS.
+
+
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  proces
+        that possesses the first row or column of the matrix. SRCPRO
+        must be at least zero and strictly less than NPROCS.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process row
+        or columns over which the matrix is distributed.  NPROCS mus
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2lp, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pabort.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pabort.html new file mode 100755 index 000000000..89aacbd9f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pabort.html @@ -0,0 +1,57 @@ + + +HPL_pabort HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pabort halts execution. + +

Synopsis

+#include "hpl.h"

+void +HPL_pabort( +int +LINE, +const char * +SRNAME, +const char * +FORM, +... +); + +

Description

+HPL_pabort +displays an error message on stderr and halts execution. + +

Arguments

+
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+
+
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

See Also

+HPL_fprintf, +HPL_pwarn. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_packL.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_packL.html new file mode 100755 index 000000000..1e8f8106c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_packL.html @@ -0,0 +1,59 @@ + + +HPL_packL HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_packL Form the MPI structure for the row ring broadcasts. + +

Synopsis

+#include "hpl.h"

+int +HPL_packL( +HPL_T_panel * +PANEL, +const int +INDEX, +const int +LEN, +const int +IBUF +); + +

Description

+HPL_packL +forms the MPI data type for the panel to be broadcast. +Successful completion is indicated by the returned error code +MPI_SUCCESS. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+
+INDEX   (input)                       const int
+        On entry,  INDEX  points  to  the  first entry of the  packed
+        buffer being broadcast.
+
+
+LEN     (input)                       const int
+        On entry, LEN is the length of the packed buffer.
+
+
+IBUF    (input)                       const int
+        On entry, IBUF  specifies the panel buffer/count/type entries
+        that should be initialized.
+
+ +

See Also

+HPL_binit, +HPL_bcast, +HPL_bwait. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pddriver.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pddriver.html new file mode 100755 index 000000000..adcc02e00 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pddriver.html @@ -0,0 +1,27 @@ + + +main HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+main HPL main timing program. + +

Synopsis

+#include "hpl.h"

+int +main(); + +

Description

+main +is the main driver program for testing the HPL routines. +This program is driven by a short data file named "HPL.dat". + +

See Also

+HPL_pdinfo, +HPL_pdtest. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdfact.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdfact.html new file mode 100755 index 000000000..f51cee5d2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdfact.html @@ -0,0 +1,78 @@ + + +HPL_pdfact HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdfact recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdfact( +HPL_T_panel * +PANEL +); + +

Description

+HPL_pdfact +recursively factorizes a 1-dimensional panel of columns. +The RPFACT function pointer specifies the recursive algorithm to be +used, either Crout, Left- or Right looking. NBMIN allows to vary the +recursive stopping criterium in terms of the number of columns in the +panel, and NDIV allow to specify the number of subpanels each panel +should be divided into. Usuallly a value of 2 will be chosen. Finally +PFACT is a function pointer specifying the non-recursive algorithm to +to be used on at most NBMIN columns. One can also choose here between +Crout, Left- or Right looking. Empirical tests seem to indicate that +values of 4 or 8 for NBMIN give the best results. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesv.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesv.html new file mode 100755 index 000000000..ebb9c18e4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesv.html @@ -0,0 +1,56 @@ + + +HPL_pdgesv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdgesv Solve A x = b. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdgesv( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +HPL_T_pmat * +A +); + +

Description

+HPL_pdgesv +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with or without look-ahead. The lower triangular factor is left +unpivoted and the pivots are not returned. The right hand side is the +N+1 column of the coefficient matrix. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+ +

See Also

+HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdtrsv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesv0.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesv0.html new file mode 100755 index 000000000..c137975d4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesv0.html @@ -0,0 +1,63 @@ + + +HPL_pdgesv0 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdgesv0 Factor an N x N+1 matrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdgesv0( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +HPL_T_pmat * +A +); + +

Description

+HPL_pdgesv0 +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +without look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdfact, +HPL_binit, +HPL_bcast, +HPL_bwait, +HPL_pdupdateNN, +HPL_pdupdateNT, +HPL_pdupdateTN, +HPL_pdupdateTT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesvK1.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesvK1.html new file mode 100755 index 000000000..1a19edc05 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesvK1.html @@ -0,0 +1,62 @@ + + +HPL_pdgesvK1 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdgesvK1 Factor an N x N+1 matrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdgesvK1( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +HPL_T_pmat * +A +); + +

Description

+HPL_pdgesvK1 +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdfact, +HPL_binit, +HPL_bcast, +HPL_bwait, +HPL_pdupdateNN, +HPL_pdupdateNT, +HPL_pdupdateTN, +HPL_pdupdateTT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesvK2.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesvK2.html new file mode 100755 index 000000000..f2a9a25f0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdgesvK2.html @@ -0,0 +1,63 @@ + + +HPL_pdgesvK2 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdgesvK2 Factor an N x N+1 matrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdgesvK2( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +HPL_T_pmat * +A +); + +

Description

+HPL_pdgesvK2 +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdfact, +HPL_binit, +HPL_bcast, +HPL_bwait, +HPL_pdupdateNN, +HPL_pdupdateNT, +HPL_pdupdateTN, +HPL_pdupdateTT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdinfo.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdinfo.html new file mode 100755 index 000000000..94a7f78c0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdinfo.html @@ -0,0 +1,252 @@ + + +HPL_pdinfo HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdinfo Read input parameter file. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdinfo( +HPL_T_test * +TEST, +int * +NS, +int * +N, +int * +NBS, +int * +NB, +HPL_T_ORDER * +PMAPPIN, +int * +NPQS, +int * +P, +int * +Q, +int * +NPFS, +HPL_T_FACT * +PF, +int * +NBMS, +int * +NBM, +int * +NDVS, +int * +NDV, +int * +NRFS, +HPL_T_FACT * +RF, +int * +NTPS, +HPL_T_TOP * +TP, +int * +NDHS, +int * +DH, +HPL_T_SWAP * +FSWAP, +int * +TSWAP, +int * +L1NOTRAN, +int * +UNOTRAN, +int * +EQUIL, +int * +ALIGN +); + +

Description

+HPL_pdinfo +reads the startup information for the various tests and +transmits it to all processes. + +

Arguments

+
+TEST    (global output)               HPL_T_test *
+        On entry, TEST  points to a testing data structure.  On exit,
+        the fields of this data structure are initialized as follows:
+        TEST->outfp  specifies the output file where the results will
+        be printed.  It is only defined and used by  the process 0 of
+        the grid.  TEST->thrsh specifies the threshhold value for the
+        test ratio.  TEST->epsil is the relative machine precision of
+        the distributed computer.  Finally  the test counters, kfail,
+        kpass, kskip, ktest are initialized to zero.
+
+
+NS      (global output)               int *
+        On exit,  NS  specifies the number of different problem sizes
+        to be tested. NS is less than or equal to HPL_MAX_PARAM.
+
+
+N       (global output)               int *
+        On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+        the first NS entries of this array contain the  problem sizes
+        to run the code with.
+
+
+NBS     (global output)               int *
+        On exit,  NBS  specifies the number of different distribution
+        blocking factors to be tested. NBS must be less than or equal
+        to HPL_MAX_PARAM.
+
+
+NB      (global output)               int *
+        On exit,  PMAPPIN  specifies the process mapping onto the no-
+        des of the  MPI machine configuration.  PMAPPIN  defaults  to
+        row-major ordering.
+
+
+PMAPPIN (global output)               HPL_T_ORDER *
+        On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NBS entries of this array contain the values of the
+        various distribution blocking factors, to run the code with.
+
+
+NPQS    (global output)               int *
+        On exit, NPQS  specifies the  number of different values that
+        can be used for P and Q, i.e., the number of process grids to
+        run  the  code with.  NPQS must be  less  than  or  equal  to
+        HPL_MAX_PARAM.
+
+
+P       (global output)               int *
+        On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NPQS entries of this array contain the values of P,
+        the number of process rows of the  NPQS grids to run the code
+        with.
+
+
+Q       (global output)               int *
+        On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NPQS entries of this array contain the values of Q,
+        the number of process columns of the  NPQS  grids to  run the
+        code with.
+
+
+NPFS    (global output)               int *
+        On exit, NPFS  specifies the  number of different values that
+        can be used for PF : the panel factorization algorithm to run
+        the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+
+
+PF      (global output)               HPL_T_FACT *
+        On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+        the first  NPFS  entries  of this array  contain  the various
+        panel factorization algorithms to run the code with.
+
+
+NBMS    (global output)               int *
+        On exit,  NBMS  specifies  the  number  of  various recursive
+        stopping criteria  to be tested.  NBMS  must be  less than or
+        equal to HPL_MAX_PARAM.
+
+
+NBM     (global output)               int *
+        On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NBMS entries of this array contain the values
+        of the various recursive stopping criteria to be tested.
+
+
+NDVS    (global output)               int *
+        On exit,  NDVS  specifies  the number  of various numbers  of
+        panels in recursion to be tested.  NDVS is less than or equal
+        to HPL_MAX_PARAM.
+
+
+NDV     (global output)               int *
+        On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NDVS entries of this array contain the values
+        of the various numbers of panels in recursion to be tested.
+
+
+NRFS    (global output)               int *
+        On exit, NRFS  specifies the  number of different values that
+        can be used for RF : the recursive factorization algorithm to
+        be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+
+
+RF      (global output)               HPL_T_FACT *
+        On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+        the first  NRFS  entries  of  this array contain  the various
+        recursive factorization algorithms to run the code with.
+
+
+NTPS    (global output)               int *
+        On exit, NTPS  specifies the  number of different values that
+        can be used for the  broadcast topologies  to be tested. NTPS
+        is less than or equal to HPL_MAX_PARAM.
+
+
+TP      (global output)               HPL_T_TOP *
+        On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+        the  first NTPS  entries of this  array  contain  the various
+        broadcast (along rows) topologies to run the code with.
+
+
+NDHS    (global output)               int *
+        On exit, NDHS  specifies the  number of different values that
+        can be used for the  lookahead depths to be  tested.  NDHS is
+        less than or equal to HPL_MAX_PARAM.
+
+
+DH      (global output)               int *
+        On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NDHS entries of this array contain the values
+        of lookahead depths to run the code with.  Such a value is at
+        least 0 (no-lookahead) or greater than zero.
+
+
+FSWAP   (global output)               HPL_T_SWAP *
+        On exit, FSWAP specifies the swapping algorithm to be used in
+        all tests.
+
+
+TSWAP   (global output)               int *
+        On exit,  TSWAP  specifies the swapping threshold as a number
+        of columns when the mixed swapping algorithm was chosen.
+
+
+L1NOTRA (global output)               int *
+        On exit, L1NOTRAN specifies whether the upper triangle of the
+        panels of columns  should  be stored  in  no-transposed  form
+        (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+
+
+UNOTRAN (global output)               int *
+        On exit, UNOTRAN  specifies whether the panels of rows should
+        be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+        form (UNOTRAN=0) during their broadcast.
+
+
+EQUIL   (global output)               int *
+        On exit,  EQUIL  specifies  whether  equilibration during the
+        swap-broadcast  of  the  panel of rows  should  be  performed
+        (EQUIL=1) or not (EQUIL=0).
+
+
+ALIGN   (global output)               int *
+        On exit,  ALIGN  specifies the alignment  of  the dynamically
+        allocated buffers in double precision words. ALIGN is greater
+        than zero.
+
+ +

See Also

+HPL_pddriver, +HPL_pdtest. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlamch.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlamch.html new file mode 100755 index 000000000..c1b51370a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlamch.html @@ -0,0 +1,67 @@ + + +HPL_pdlamch HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlamch determines machine-specific arithmetic constants. + +

Synopsis

+#include "hpl.h"

+double +HPL_pdlamch( +MPI_Comm +COMM, +const HPL_T_MACH +CMACH +); + +

Description

+HPL_pdlamch +determines machine-specific arithmetic constants such as +the relative machine precision (eps), the safe minimum(sfmin) such that +1/sfmin does not overflow, the base of the machine (base), the precision +(prec), the number of (base) digits in the mantissa (t), whether +rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum +exponent before (gradual) underflow (emin), the underflow threshold +(rmin)- base**(emin-1), the largest exponent before overflow (emax), the +overflow threshold (rmax) - (base**emax)*(1-eps). + +

Arguments

+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+
+CMACH   (global input)                const HPL_T_MACH
+        Specifies the value to be returned by HPL_pdlamch            
+           = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+           = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+           = HPL_MACH_BASE,  HPL_pdlamch := base                     
+           = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+           = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+           = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+           = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+           = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+           = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+           = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+         
+        where                                                        
+         
+           eps   = relative machine precision,                       
+           sfmin = safe minimum,                                     
+           base  = base of the machine,                              
+           prec  = eps*base,                                         
+           t     = number of digits in the mantissa,                 
+           rnd   = 1.0 if rounding occurs in addition,               
+           emin  = minimum exponent before underflow,                
+           rmin  = underflow threshold,                              
+           emax  = largest exponent before overflow,                 
+           rmax  = overflow threshold.
+
+ + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlange.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlange.html new file mode 100755 index 000000000..0d1affc3d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlange.html @@ -0,0 +1,88 @@ + + +HPL_pdlange HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlange Compute ||A||. + +

Synopsis

+#include "hpl.h"

+double +HPL_pdlange( +const HPL_T_grid * +GRID, +const HPL_T_NORM +NORM, +const int +M, +const int +N, +const int +NB, +const double * +A, +const int +LDA +); + +

Description

+HPL_pdlange +returns the value of the one norm, or the infinity norm, +or the element of largest absolute value of a distributed matrix A: + + + max(abs(A(i,j))) when NORM = HPL_NORM_A, + norm1(A), when NORM = HPL_NORM_1, + normI(A), when NORM = HPL_NORM_I, + +where norm1 denotes the one norm of a matrix (maximum column sum) and +normI denotes the infinity norm of a matrix (maximum row sum). Note +that max(abs(A(i,j))) is not a matrix norm. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+NORM    (global input)                const HPL_T_NORM
+        On entry,  NORM  specifies  the  value to be returned by this
+        function as described above.
+
+
+M       (global input)                const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+
+
+N       (global input)                const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+
+
+A       (local input)                 const double *
+        On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+        that contains the local pieces of the distributed matrix A.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+
+ +

See Also

+HPL_pdlaprnt, +HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaprnt.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaprnt.html new file mode 100755 index 000000000..0ce810db0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaprnt.html @@ -0,0 +1,94 @@ + + +HPL_pdlaprnt HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaprnt Print a distributed matrix A. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaprnt( +const HPL_T_grid * +GRID, +const int +M, +const int +N, +const int +NB, +double * +A, +const int +LDA, +const int +IAROW, +const int +IACOL, +const char * +CMATNM +); + +

Description

+HPL_pdlaprnt +prints to standard error a distributed matrix A. The +local pieces of A are sent to the process of coordinates (0,0) in +the grid and then printed. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+M       (global input)                const int
+        On entry,  M  specifies the number of rows of the coefficient
+        matrix A. M must be at least zero.
+
+
+N       (global input)                const int
+        On  entry,   N   specifies  the  number  of  columns  of  the
+        coefficient matrix A. N must be at least zero.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+
+
+A       (local input)                 double *
+        On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+        This array contains the coefficient matrix to be printed.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+
+
+IAROW   (global input)                const int
+        On entry,  IAROW  specifies the row process coordinate owning
+        the  first row of A.  IAROW  must be  larger than or equal to
+        zero and less than NPROW.
+
+
+IACOL   (global input)                const int
+        On entry,  IACOL  specifies  the  column  process  coordinate
+        owning the  first column  of A. IACOL  must be larger than or
+        equal to zero and less than NPCOL.
+
+
+CMATNM  (global input)                const char *
+        On entry, CMATNM is the name of the matrix to be printed.
+
+ +

See Also

+HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp00N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp00N.html new file mode 100755 index 000000000..07279fdb0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp00N.html @@ -0,0 +1,82 @@ + + +HPL_pdlaswp00N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaswp00N Broadcast a column panel L and swap the row panel U. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaswp00N( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdlaswp00N +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +Bi-directional exchange is used to perform the swap :: broadcast of +the row panel U at once, resulting in a lower number of messages than +usual as well as a lower communication volume. With P process rows and +assuming bi-directional links, the running time of this function can +be approximated by: + + log_2(P) * (lat + NB*LocQ(N) / bdwth) + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. Mono +directional links will double this communication cost. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be broadcast and swapped) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdupdateNN, +HPL_pdupdateTN, +HPL_pipid, +HPL_plindx0, +HPL_dlaswp01N, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp04N, +HPL_dlaswp05N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp00T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp00T.html new file mode 100755 index 000000000..08b8ea770 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp00T.html @@ -0,0 +1,82 @@ + + +HPL_pdlaswp00T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaswp00T Broadcast a column panel L and swap the row panel U. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaswp00T( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdlaswp00T +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +Bi-directional exchange is used to perform the swap :: broadcast of +the row panel U at once, resulting in a lower number of messages than +usual as well as a lower communication volume. With P process rows and +assuming bi-directional links, the running time of this function can +be approximated by: + + log_2(P) * (lat + NB*LocQ(N) / bdwth) + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. Mono +directional links will double this communication cost. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be broadcast and swapped) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdupdateNT, +HPL_pdupdateTT, +HPL_pipid, +HPL_plindx0, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03T, +HPL_dlaswp04T, +HPL_dlaswp05T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp01N.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp01N.html new file mode 100755 index 000000000..2d4772fda --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp01N.html @@ -0,0 +1,86 @@ + + +HPL_pdlaswp01N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaswp01N Broadcast a column panel L and swap the row panel U. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaswp01N( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdlaswp01N +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +A "Spread then roll" algorithm performs the swap :: broadcast of the +row panel U at once, resulting in a minimal communication volume and +a "very good" use of the connectivity if available. With P process +rows and assuming bi-directional links, the running time of this +function can be approximated by: + + (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. K is +a constant in (2,3] that depends on the achieved bandwidth during a +simultaneous message exchange between two processes. An empirical +optimistic value of K is typically 2.4. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdupdateNN, +HPL_pdupdateTN, +HPL_pipid, +HPL_plindx1, +HPL_plindx10, +HPL_spreadN, +HPL_equil, +HPL_rollN, +HPL_dlaswp00N, +HPL_dlaswp01N, +HPL_dlaswp06N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp01T.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp01T.html new file mode 100755 index 000000000..f6a5d8c4b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdlaswp01T.html @@ -0,0 +1,86 @@ + + +HPL_pdlaswp01T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaswp01T Broadcast a column panel L and swap the row panel U. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaswp01T( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdlaswp01T +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +A "Spread then roll" algorithm performs the swap :: broadcast of the +row panel U at once, resulting in a minimal communication volume and +a "very good" use of the connectivity if available. With P process +rows and assuming bi-directional links, the running time of this +function can be approximated by: + + (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. K is +a constant in (2,3] that depends on the achieved bandwidth during a +simultaneous message exchange between two processes. An empirical +optimistic value of K is typically 2.4. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdupdateNT, +HPL_pdupdateTT, +HPL_pipid, +HPL_plindx1, +HPL_plindx10, +HPL_spreadT, +HPL_equil, +HPL_rollT, +HPL_dlaswp10N, +HPL_dlaswp01T, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdmatgen.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdmatgen.html new file mode 100755 index 000000000..28fb95509 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdmatgen.html @@ -0,0 +1,87 @@ + + +HPL_pdmatgen HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdmatgen Parallel random matrix generator. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdmatgen( +const HPL_T_grid * +GRID, +const int +M, +const int +N, +const int +NB, +double * +A, +const int +LDA, +const int +ISEED +); + +

Description

+HPL_pdmatgen +generates (or regenerates) a parallel random matrix A. + +The pseudo-random generator uses the linear congruential algorithm: +X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer +Programming, Knuth 1973, Vol. 2. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+M       (global input)                const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+
+
+N       (global input)                const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+A       (local output)                double *
+        On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+        On exit, this array contains the coefficients of the randomly
+        generated matrix.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+
+
+ISEED   (global input)                const int
+        On entry, ISEED  specifies  the  seed  number to generate the
+        matrix A. ISEED must be at least zero.
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdmxswp.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdmxswp.html new file mode 100755 index 000000000..c11d2b2da --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdmxswp.html @@ -0,0 +1,96 @@ + + +HPL_pdmxswp HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdmxswp swaps and broacast the pivot row. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdmxswp( +HPL_T_panel * +PANEL, +const int +M, +const int +II, +const int +JJ, +double * +WORK +); + +

Description

+HPL_pdmxswp +swaps and broadcasts the absolute value max row using +bi-directional exchange. The buffer is partially set by HPL_dlocmax. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by + + log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) + +where lat and bdwth are the latency and bandwidth of the network for +double precision real elements. Communication only occurs in one +process column. Mono-directional links will cause the communication +cost to double. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of the matrix
+        column on which this function operates.
+
+
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+
+
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        It  is assumed that  HPL_dlocmax  was called  prior  to  this
+        routine to  initialize  the first four entries of this array.
+        On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+        Note that this is also the  JJth  row  (or column) of L1. The
+        remaining part is used as a temporary array.
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpancrN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpancrN.html new file mode 100755 index 000000000..663d2e266 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpancrN.html @@ -0,0 +1,100 @@ + + +HPL_pdpancrN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpancrN Crout panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpancrN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpancrN +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Crout variant of the usual +one-dimensional algorithm. The lower triangular N0-by-N0 upper block +of the panel is stored in no-transpose form (i.e. just like the input +matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpancrT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpancrT.html new file mode 100755 index 000000000..0e1490430 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpancrT.html @@ -0,0 +1,99 @@ + + +HPL_pdpancrT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpancrT Crout panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpancrT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpancrT +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Crout variant of the usual +one-dimensional algorithm. The lower triangular N0-by-N0 upper block +of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_disp.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_disp.html new file mode 100755 index 000000000..cb78fa4be --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_disp.html @@ -0,0 +1,38 @@ + + +HPL_pdpanel_disp HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanel_disp Deallocate a panel data structure. + +

Synopsis

+#include "hpl.h"

+int +HPL_pdpanel_disp( +HPL_T_panel * * +PANEL +); + +

Description

+HPL_pdpanel_disp +deallocates the panel structure and resources and +stores the error code returned by the panel factorization. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel * *
+        On entry,  PANEL  points  to  the  address  of the panel data
+        structure to be deallocated.
+
+ +

See Also

+HPL_pdpanel_new, +HPL_pdpanel_init, +HPL_pdpanel_free. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_free.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_free.html new file mode 100755 index 000000000..d33e5e400 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_free.html @@ -0,0 +1,38 @@ + + +HPL_pdpanel_free HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanel_free Deallocate the panel ressources. + +

Synopsis

+#include "hpl.h"

+int +HPL_pdpanel_free( +HPL_T_panel * +PANEL +); + +

Description

+HPL_pdpanel_free +deallocates the panel resources and stores the error +code returned by the panel factorization. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points  to  the  panel data  structure from
+        which the resources should be deallocated.
+
+ +

See Also

+HPL_pdpanel_new, +HPL_pdpanel_init, +HPL_pdpanel_disp. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_init.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_init.html new file mode 100755 index 000000000..2d105354f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_init.html @@ -0,0 +1,99 @@ + + +HPL_pdpanel_init HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanel_init Initialize the panel resources. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanel_init( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +const int +M, +const int +N, +const int +JB, +HPL_T_pmat * +A, +const int +IA, +const int +JA, +const int +TAG, +HPL_T_panel * +PANEL +); + +

Description

+HPL_pdpanel_init +initializes a panel data structure. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+M       (local input)                 const int
+        On entry, M specifies the global number of rows of the panel.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the  global number of columns of the
+        panel and trailing submatrix. N must be at least zero.
+
+
+JB      (global input)                const int
+        On entry, JB specifies is the number of columns of the panel.
+        JB must be at least zero.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+
+IA      (global input)                const int
+        On entry,  IA  is  the global row index identifying the panel
+        and trailing submatrix. IA must be at least zero.
+
+
+JA      (global input)                const int
+        On entry, JA is the global column index identifying the panel
+        and trailing submatrix. JA must be at least zero.
+
+
+TAG     (global input)                const int
+        On entry, TAG is the row broadcast message id.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+ +

See Also

+HPL_pdpanel_new, +HPL_pdpanel_disp, +HPL_pdpanel_free. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_new.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_new.html new file mode 100755 index 000000000..1b3029ecb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanel_new.html @@ -0,0 +1,99 @@ + + +HPL_pdpanel_new HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanel_new Create a panel data structure. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanel_new( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +const int +M, +const int +N, +const int +JB, +HPL_T_pmat * +A, +const int +IA, +const int +JA, +const int +TAG, +HPL_T_panel * * +PANEL +); + +

Description

+HPL_pdpanel_new +creates and initializes a panel data structure. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+M       (local input)                 const int
+        On entry, M specifies the global number of rows of the panel.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the  global number of columns of the
+        panel and trailing submatrix. N must be at least zero.
+
+
+JB      (global input)                const int
+        On entry, JB specifies is the number of columns of the panel.
+        JB must be at least zero.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+
+IA      (global input)                const int
+        On entry,  IA  is  the global row index identifying the panel
+        and trailing submatrix. IA must be at least zero.
+
+
+JA      (global input)                const int
+        On entry, JA is the global column index identifying the panel
+        and trailing submatrix. JA must be at least zero.
+
+
+TAG     (global input)                const int
+        On entry, TAG is the row broadcast message id.
+
+
+PANEL   (local input/output)          HPL_T_panel * *
+        On entry,  PANEL  points  to  the  address  of the panel data
+        structure to create and initialize.
+
+ +

See Also

+HPL_pdpanel_new, +HPL_pdpanel_init, +HPL_pdpanel_disp. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanllN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanllN.html new file mode 100755 index 000000000..386815fd2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanllN.html @@ -0,0 +1,100 @@ + + +HPL_pdpanllN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanllN Left-looking panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanllN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpanllN +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Left-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in no-transpose form (i.e. just like the +input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanllT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanllT.html new file mode 100755 index 000000000..04307e823 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanllT.html @@ -0,0 +1,99 @@ + + +HPL_pdpanllT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanllT Left-looking panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanllT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpanllT +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Left-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanrlN, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanrlN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanrlN.html new file mode 100755 index 000000000..8d705c63c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanrlN.html @@ -0,0 +1,100 @@ + + +HPL_pdpanrlN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanrlN Right-looking panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanrlN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpanrlN +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Right-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in no-transpose form (i.e. just like the +input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanrlT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanrlT.html new file mode 100755 index 000000000..af458e7a1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdpanrlT.html @@ -0,0 +1,99 @@ + + +HPL_pdpanrlT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanrlT Right-looking panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanrlT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpanrlT +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Right-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpancrN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpancrN.html new file mode 100755 index 000000000..9169c48cc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpancrN.html @@ -0,0 +1,97 @@ + + +HPL_pdrpancrN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpancrN Crout recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpancrN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpancrN +HPL_pdrpancrN recursively factorizes a panel of columns using the +recursive Crout variant of the usual one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpancrT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpancrT.html new file mode 100755 index 000000000..cc9047c3c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpancrT.html @@ -0,0 +1,97 @@ + + +HPL_pdrpancrT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpancrT Crout recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpancrT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpancrT +recursively factorizes a panel of columns using the +recursive Crout variant of the usual one-dimensional algorithm. +The lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanllN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanllN.html new file mode 100755 index 000000000..bf16e6009 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanllN.html @@ -0,0 +1,97 @@ + + +HPL_pdrpanllN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpanllN Left-looking recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpanllN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpanllN +recursively factorizes a panel of columns using the +recursive Left-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanllT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanllT.html new file mode 100755 index 000000000..9904fb326 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanllT.html @@ -0,0 +1,97 @@ + + +HPL_pdrpanllT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpanllT Left-looking recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpanllT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpanllT +recursively factorizes a panel of columns using the +recursive Left-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanrlN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanrlN.html new file mode 100755 index 000000000..9758c0722 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanrlN.html @@ -0,0 +1,97 @@ + + +HPL_pdrpanrlN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpanrlN Right-looking recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpanrlN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpanrlN +recursively factorizes a panel of columns using the +recursive Right-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanrlT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanrlT.html new file mode 100755 index 000000000..ed48a815d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdrpanrlT.html @@ -0,0 +1,97 @@ + + +HPL_pdrpanrlT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpanrlT Right-looking recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpanrlT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpanrlT +recursively factorizes a panel of columns using the +recursive Right-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdtest.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdtest.html new file mode 100755 index 000000000..1c11c34d7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdtest.html @@ -0,0 +1,81 @@ + + +HPL_pdtest HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdtest Perform one test. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdtest( +HPL_T_test * +TEST, +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +const int +N, +const int +NB +); + +

Description

+HPL_pdtest +performs one test given a set of parameters such as the +process grid, the problem size, the distribution blocking factor ... +This function generates the data, calls and times the linear system +solver, checks the accuracy of the obtained vector solution and +writes this information to the file pointed to by TEST->outfp. + +

Arguments

+
+TEST    (global input)                HPL_T_test *
+        On entry,  TEST  points  to a testing data structure:  outfp
+        specifies the output file where the results will be printed.
+        It is only defined and used by the process  0  of the  grid.
+        thrsh  specifies  the  threshhold value  for the test ratio.
+        Concretely, a test is declared "PASSED"  if and only if  the
+        following inequality is satisfied:
+        ||Ax-b||_oo / ( epsil *
+                        ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+                         N )  < thrsh.
+        epsil  is the  relative machine precision of the distributed
+        computer. Finally the test counters, kfail, kpass, kskip and
+        ktest are updated as follows:  if the test passes,  kpass is
+        incremented by one;  if the test fails, kfail is incremented
+        by one; if the test is skipped, kskip is incremented by one.
+        ktest is left unchanged.
+
+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters to be used for this test.
+
+
+N       (global input)                const int
+        On entry,  N specifies the order of the coefficient matrix A.
+        N must be at least zero.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+ +

See Also

+HPL_pddriver, +HPL_pdinfo. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdtrsv.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdtrsv.html new file mode 100755 index 000000000..0bb182dc9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdtrsv.html @@ -0,0 +1,64 @@ + + +HPL_pdtrsv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdtrsv Solve triu( A ) x = b. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdtrsv( +HPL_T_grid * +GRID, +HPL_T_pmat * +AMAT +); + +

Description

+HPL_pdtrsv +solves an upper triangular system of linear equations. + +The rhs is the last column of the N by N+1 matrix A. The solve starts +in the process column owning the Nth column of A, so the rhs b may +need to be moved one process column to the left at the beginning. The +routine therefore needs a column vector in every process column but +the one owning b. The result is replicated in all process rows, and +returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. + +The algorithm uses decreasing one-ring broadcast in process rows and +columns implemented in terms of synchronous communication point to +point primitives. The lookahead of depth 1 is used to minimize the +critical path. This entire operation is essentially ``latency'' bound +and an estimate of its running time is given by: + + (move rhs) lat + N / ( P bdwth ) + + (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + + gam2 N^2 / ( P Q ), + +where gam2 is an estimate of the Level 2 BLAS rate of execution. +There are N / NB diagonal blocks. One must exchange 2 messages of +length NB to compute the next NB entries of the vector solution, as +well as performing a total of N^2 floating point operations. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+AMAT    (local input/output)          HPL_T_pmat *
+        On entry,  AMAT  points  to the data structure containing the
+        local array information.
+
+ +

See Also

+HPL_pdgesv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateNN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateNN.html new file mode 100755 index 000000000..b77cddbce --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateNN.html @@ -0,0 +1,65 @@ + + +HPL_pdupdateNN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdupdateNN Broadcast a panel and update the trailing submatrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdupdateNN( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdupdateNN +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdlaswp00N, +HPL_pdlaswp01N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateNT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateNT.html new file mode 100755 index 000000000..4ecb1f687 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateNT.html @@ -0,0 +1,65 @@ + + +HPL_pdupdateNT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdupdateNT Broadcast a panel and update the trailing submatrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdupdateNT( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdupdateNT +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdlaswp00T, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateTN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateTN.html new file mode 100755 index 000000000..ae735bf84 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateTN.html @@ -0,0 +1,65 @@ + + +HPL_pdupdateTN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdupdateTN Broadcast a panel and update the trailing submatrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdupdateTN( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdupdateTN +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdlaswp00N, +HPL_pdlaswp01N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateTT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateTT.html new file mode 100755 index 000000000..7c69f8828 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pdupdateTT.html @@ -0,0 +1,65 @@ + + +HPL_pdupdateTT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdupdateTT Broadcast a panel and update the trailing submatrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdupdateTT( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdupdateTT +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdlaswp00T, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_perm.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_perm.html new file mode 100755 index 000000000..9312eb4eb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_perm.html @@ -0,0 +1,67 @@ + + +HPL_perm HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_perm Combine 2 index arrays - Generate the permutation. + +

Synopsis

+#include "hpl.h"

+void +HPL_perm( +const int +N, +int * +LINDXA, +int * +LINDXAU, +int * +IWORK +); + +

Description

+HPL_perm +combines two index arrays and generate the corresponding +permutation. First, this function computes the inverse of LINDXA, and +then combine it with LINDXAU. Second, in order to be able to perform +the permutation in place, LINDXAU is overwritten by the sequence of +permutation producing the same result. What we ultimately want to +achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the +call to this function, this in place permutation can be performed by +for i in [0..N) swap U[i] with U[LINDXAU[i]]. + +

Arguments

+
+N       (global input)                const int
+        On entry,  N  specifies the length of the arrays  LINDXA  and
+        LINDXAU. N should be at least zero.
+
+
+LINDXA  (global input/output)         int *
+        On entry,  LINDXA  is an array of dimension N  containing the
+        source indexes. On exit,  LINDXA  contains the combined index
+        array.
+
+
+LINDXAU (global input/output)         int *
+        On entry,  LINDXAU is an array of dimension N  containing the
+        target indexes.  On exit,  LINDXAU  contains  the sequence of
+        permutation,  that  should be applied  in increasing order to
+        permute the underlying array U in place.
+
+
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension N.
+
+ +

See Also

+HPL_plindx1, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pipid.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pipid.html new file mode 100755 index 000000000..e6deb3d93 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pipid.html @@ -0,0 +1,95 @@ + + +HPL_pipid HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pipid Simplify the pivot vector. + +

Synopsis

+#include "hpl.h"

+void +HPL_pipid( +HPL_T_panel * +PANEL, +int * +K, +int * +IPID +); + +

Description

+HPL_pipid +computes an array IPID that contains the source and final +destination of matrix rows resulting from the application of N +interchanges as computed by the LU factorization with row partial +pivoting. The array IPID is such that the row of global index IPID(i) +should be mapped onto the row of global index IPID(i+1). Note that we +cannot really know the length of IPID a priori. However, we know that +this array is at least 2*N long, since there are N rows to swap and +broadcast. The length of this array must be smaller than or equal to +4*N, since every row is swapped with at most a single distinct remote +row. The algorithm constructing IPID goes as follows: Let IA be the +global index of the first row to be swapped. + +For every row src IA + i with i in [0..N) to be swapped with row dst +such that dst is given by DPIV[i]: + +Is row src the destination of a previous row of the current block, +that is, is there k odd such that IPID(k) is equal to src ? + Yes: update this destination with dst. For example, if the +pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, +we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it +was thought so far ... + No : add the pair (src,dst) at the end of IPID; row src has not +been moved yet. + +Is row dst different from src the destination of a previous row of +the current block, i.e., is there k odd such that IPID(k) is equal to +dst ? + Yes: update IPID(k) with src. For example, if the pivot array +is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in +fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought +so far ... + No : add the pair (dst,src) at the end of IPID; row dst has not +been moved yet. + +Note that when src is equal to dst, the pair (dst,src) should not be +added to IPID in order to avoid duplicated entries in this array. +During the construction of the array IPID, we make sure that the +first N entries are such that IPID(k) with k odd is equal to IA+k/2. +For k in [0..K/2), the row of global index IPID(2*k) should be +mapped onto the row of global index IPID(2*k+1). + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+K       (global output)               int *
+        On exit, K specifies the number of entries in  IPID.  K is at
+        least 2*N, and at most 4*N.
+
+
+IPID    (global output)               int *
+        On entry, IPID is an array of length 4*N.  On exit, the first
+        K entries of that array contain the src and final destination
+        resulting  from  the  application of the  N  interchanges  as
+        specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+        stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+        in [0..N)
+
+ +

See Also

+HPL_pdlaswp00N, +HPL_pdlaswp00T, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx0.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx0.html new file mode 100755 index 000000000..f3dbbcdea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx0.html @@ -0,0 +1,187 @@ + + +HPL_plindx0 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_plindx0 Compute local swapping index arrays. + +

Synopsis

+#include "hpl.h"

+void +HPL_plindx0( +HPL_T_panel * +PANEL, +const int +K, +int * +IPID, +int * +LINDXA, +int * +LINDXAU, +int * +LLEN +); + +

Description

+HPL_plindx0 +computes two local arrays LINDXA and LINDXAU containing +the local source and final destination position resulting from the +application of row interchanges. + +On entry, the array IPID of length K is such that the row of global +index IPID(i) should be mapped onto row of global index IPID(i+1). +Let IA be the global index of the first row to be swapped. For k in +[0..K/2), the row of global index IPID(2*k) should be mapped onto the +row of global index IPID(2*k+1). The question then, is to determine +which rows should ultimately be part of U. + +First, some rows of the process ICURROW may be swapped locally. One +of this row belongs to U, the other one belongs to my local piece of +A. The other rows of the current block are swapped with remote rows +and are thus not part of U. These rows however should be sent along, +and grabbed by the other processes as we progress in the exchange +phase. + +So, assume that I am ICURROW and consider a row of index IPID(2*i) +that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less +than N, this row is locally swapped and should be copied into U at +the position IPID(2*i+1) - IA. No row will be exchanged for this one. +If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be +locally copied into my local piece of A at the position corresponding +to the row of global index IPID(2*i+1). + +If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) +is to be swapped away and strictly speaking does not belong to U, but +to A remotely. Since this process will however send this array U, +this row is copied into U, exactly where the row IPID(2*i+1) should +go. For this, we search IPID for k1, such that IPID(2*k1) is equal to +IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position +IPID(2*k1+1)-IA. + +It is thus important to put the rows that go into U, i.e., such that +IPID(2*i+1) - IA is less than N at the begining of the array IPID. By +doing so, U is formed, and the local copy is performed in just one +sweep. + +Two lists LINDXA and LINDXAU are built. LINDXA contains the local +index of the rows I have that should be copied. LINDXAU contains the +local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A +is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) +of A should be locally copied into A(-LINDXAU(k),:). In the process +ICURROW, the initial packing algorithm proceeds as follows. + + for all entries in IPID, + if IPID(2*i) is in ICURROW, + if IPID(2*i+1) is in ICURROW, + if( IPID(2*i+1) - IA < N ) + save corresponding local position + of this row (LINDXA); + save local position (LINDXAU) in U + where this row goes; + [copy row IPID(2*i) in U at position + IPID(2*i+1)-IA; ]; + else + save corresponding local position of + this row (LINDXA); + save local position (-LINDXAU) in A + where this row goes; + [copy row IPID(2*i) in my piece of A + at IPID(2*i+1);] + end if + else + find k1 such that IPID(2*k1) = IPID(2*i+1); + copy row IPID(2*i) in U at position + IPID(2*k1+1)-IA; + save corresponding local position of this + row (LINDXA); + save local position (LINDXAU) in U where + this row goes; + end if + end if + end for + +Second, if I am not the current row process ICURROW, all source rows +in IPID that I own are part of U. Indeed, they are swapped with one +row of the current block of rows, and the main factorization +algorithm proceeds one row after each other. The processes different +from ICURROW, should exchange and accumulate those rows until they +receive some data previously owned by the process ICURROW. + +In processes different from ICURROW, the initial packing algorithm +proceeds as follows. Consider a row of global index IPID(2*i) that I +own. When I will be receiving data previously owned by ICURROW, i.e., +U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, +and this particular row of U should be first copied into my piece of +A, at A(il,:), where il is the local row index corresponding to +IPID(2*i). Now,initially, this row will be packed into workspace, say +as the kth row of that work array. The following algorithm sets +LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row +should be copied. LINDXA(k) stores the local index in A where this +row of U should be copied, i.e il. + + for all entries in IPID, + if IPID(2*i) is not in ICURROW, + copy row IPID(2*i) in work array; + save corresponding local position + of this row (LINDXA); + save position (LINDXAU) in U where + this row should be copied; + end if + end for + +Since we are at it, we also globally figure out how many rows every +process has. That is necessary, because it would rather be cumbersome +to figure it on the fly during the bi-directional exchange phase. +This information is kept in the array LLEN of size NPROW. Also note +that the arrays LINDXA and LINDXAU are of max length equal to 2*N. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+
+
+IPID    (global input)                int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+
+
+LINDXA  (local output)                int *
+        On entry, LINDXA  is an array of dimension 2*N. On exit, this
+        array contains the local indexes of the rows of A I have that
+        should be copied into U.
+
+
+LINDXAU (local output)                int *
+        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+        array contains  the local destination  information encoded as
+        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+        of A should be locally copied into A(-LINDXAU(k),:).
+
+
+LLEN    (global output)               int *
+        On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+        contains how many rows every process has.
+
+ +

See Also

+HPL_pdlaswp00N, +HPL_pdlaswp00T, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx1.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx1.html new file mode 100755 index 000000000..0a49ede0b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx1.html @@ -0,0 +1,130 @@ + + +HPL_plindx1 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_plindx1 Compute local swapping index arrays. + +

Synopsis

+#include "hpl.h"

+void +HPL_plindx1( +HPL_T_panel * +PANEL, +const int +K, +const int * +IPID, +int * +IPA, +int * +LINDXA, +int * +LINDXAU, +int * +IPLEN, +int * +IPMAP, +int * +IPMAPM1, +int * +PERMU, +int * +IWORK +); + +

Description

+HPL_plindx1 +computes two local arrays LINDXA and LINDXAU containing +the local source and final destination position resulting from the +application of row interchanges. In addition, this function computes +three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic +mapping information for the spreading phase. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+
+
+IPID    (global input)                const int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+
+
+IPA     (global output)               int *
+        On exit,  IPA  specifies  the number of rows that the current
+        process row has that either belong to U  or should be swapped
+        with remote rows of A.
+
+
+LINDXA  (global output)               int *
+        On entry, LINDXA  is an array of dimension 2*N. On exit, this
+        array contains the local indexes of the rows of A I have that
+        should be copied into U.
+
+
+LINDXAU (global output)               int *
+        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+        array contains  the local destination  information encoded as
+        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+        of A should be locally copied into A(-LINDXAU(k),:).
+
+
+IPLEN   (global output)               int *
+        On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+        this array is such that  IPLEN[i]  is the number of rows of A
+        in  the  processes  before  process  IPMAP[i]  after the sort
+        with the convention that IPLEN[nprow]  is the total number of
+        rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+        local number of rows of A that should be moved to the process
+        IPMAP[i]. IPLEN is such that the number of rows of the source
+        process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+        remaining  entries  of  this  array  are  sorted  so that the
+        quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+
+
+IPMAP   (global output)               int *
+        On entry, IPMAP is an array of dimension NPROW. On exit, this
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myrow] is the corresponding sorted process
+        coordinate.
+
+
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROCS)
+
+
+PERMU   (global output)               int *
+        On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+        contains  a sequence of permutations,  that should be applied
+        in increasing order to permute in place the row panel U.
+
+
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension 2*JB.
+
+ +

See Also

+HPL_pdlaswp00N, +HPL_pdlaswp00T, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx10.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx10.html new file mode 100755 index 000000000..fbfd6be2f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_plindx10.html @@ -0,0 +1,87 @@ + + +HPL_plindx10 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_plindx10 Compute the logarithmic maps for the spreading. + +

Synopsis

+#include "hpl.h"

+void +HPL_plindx10( +HPL_T_panel * +PANEL, +const int +K, +const int * +IPID, +int * +IPLEN, +int * +IPMAP, +int * +IPMAPM1 +); + +

Description

+HPL_plindx10 +computes three arrays IPLEN, IPMAP and IPMAPM1 that +contain the logarithmic mapping information for the spreading phase. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+
+
+IPID    (global input)                const int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+
+
+IPLEN   (global output)               int *
+        On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+        this array is such that  IPLEN[i]  is the number of rows of A
+        in the processes  before process IMAP[i] after the sort, with
+        the convention that IPLEN[nprow] is the total number of rows.
+        In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+        rows of  A  that should be moved for each process.  IPLEN  is
+        such that the number of rows of the source process row can be
+        computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+        this  array are sorted  so  that  the quantities IPLEN[i+1] -
+        IPLEN[i] are logarithmically sorted.
+
+
+IPMAP   (global output)               int *
+        On entry, IPMAP is an array of dimension NPROW. On exit, this
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myrow] is the corresponding sorted process
+        coordinate.
+
+
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROW)
+
+ +

See Also

+HPL_pdlaswp00N, +HPL_pdlaswp00T, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pnum.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pnum.html new file mode 100755 index 000000000..8bedc3016 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pnum.html @@ -0,0 +1,54 @@ + + +HPL_pnum HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pnum Rank determination. + +

Synopsis

+#include "hpl.h"

+int +HPL_pnum( +const HPL_T_grid * +GRID, +const int +MYROW, +const int +MYCOL +); + +

Description

+HPL_pnum +determines the rank of a process as a function of its +coordinates in the grid. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+MYROW   (local input)                 const int
+        On entry,  MYROW  specifies the row coordinate of the process
+        whose rank is to be determined. MYROW must be greater than or
+        equal to zero and less than NPROW.
+
+
+MYCOL   (local input)                 const int
+        On entry,  MYCOL  specifies  the  column  coordinate  of  the
+        process whose rank is to be determined. MYCOL must be greater
+        than or equal to zero and less than NPCOL.
+
+ +

See Also

+HPL_grid_init, +HPL_grid_info, +HPL_grid_exit. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer.html new file mode 100755 index 000000000..abef45946 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer.html @@ -0,0 +1,49 @@ + + +HPL_ptimer HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_ptimer Timer facility. + +

Synopsis

+#include "hpl.h"

+void +HPL_ptimer( +const int +I +); + +

Description

+HPL_ptimer +provides a "stopwatch" functionality cpu/wall timer in +seconds. Up to 64 separate timers can be functioning at once. The +first call starts the timer, and the second stops it. This routine +can be disenabled by calling HPL_ptimer_disable(), so that calls to +the timer are ignored. This feature can be used to make sure certain +sections of code do not affect timings, even if they call routines +which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable +the timer functionality. One can retrieve the current value of a +timer by calling + +t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + +where I is the timer index in [0..64). To inititialize the timer +functionality, one must have called HPL_ptimer_boot() prior to any of +the functions mentioned above. + +

Arguments

+
+I       (global input)                const int
+        On entry, I specifies the timer to stop/start.
+
+ +

See Also

+HPL_ptimer_cputime, +HPL_ptimer_walltime. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer_cputime.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer_cputime.html new file mode 100755 index 000000000..cffd863b3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer_cputime.html @@ -0,0 +1,35 @@ + + +HPL_ptimer_cputime HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_ptimer_cputime Return the CPU time. + +

Synopsis

+#include "hpl.h"

+double +HPL_ptimer_cputime(); + +

Description

+HPL_ptimer_cputime +returns the cpu time. If HPL_USE_CLOCK is defined, +the clock() function is used to return an approximation of processor +time used by the program. The value returned is the CPU time used so +far as a clock_t; to get the number of seconds used, the result is +divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C +standard library. If HPL_USE_TIMES is defined, the times() function +is used instead. This function returns the current process times. +times() returns the number of clock ticks that have elapsed since the +system has been up. Otherwise and by default, the standard library +function getrusage() is used. + +

See Also

+HPL_ptimer_walltime, +HPL_ptimer. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer_walltime.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer_walltime.html new file mode 100755 index 000000000..a509897f1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_ptimer_walltime.html @@ -0,0 +1,26 @@ + + +HPL_ptimer_walltime HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_ptimer_walltime Return the elapsed (wall-clock) time. + +

Synopsis

+#include "hpl.h"

+double +HPL_ptimer_walltime(); + +

Description

+HPL_ptimer_walltime +returns the elapsed (wall-clock) time. + +

See Also

+HPL_ptimer_cputime, +HPL_ptimer. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pwarn.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pwarn.html new file mode 100755 index 000000000..221d23982 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_pwarn.html @@ -0,0 +1,63 @@ + + +HPL_pwarn HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pwarn displays an error message. + +

Synopsis

+#include "hpl.h"

+void +HPL_pwarn( +FILE * +STREAM, +int +LINE, +const char * +SRNAME, +const char * +FORM, +... +); + +

Description

+HPL_pwarn +displays an error message. + +

Arguments

+
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+
+
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+
+
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

See Also

+HPL_pabort, +HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rand.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rand.html new file mode 100755 index 000000000..5aef6669c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rand.html @@ -0,0 +1,40 @@ + + +HPL_rand HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_rand random number generator. + +

Synopsis

+#include "hpl.h"

+double +HPL_rand(); + +

Description

+HPL_rand +generates the next number in the random sequence. This +function ensures that this number lies in the interval (-0.5, 0.5]. + +The static array irand contains the information (2 integers) required +to generate the next number in the sequence X(n). This number is +computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the +constant d is the largest 64 bit positive integer. The array irand is +then updated for the generation of the next number X(n+1) in the +random sequence as follows X(n+1) = a * X(n) + c. The constants a and +c should have been preliminarily stored in the arrays ias and ics as +2 pairs of integers. The initialization of ias, ics and irand is +performed by the function HPL_setran. + +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_jumpit. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_recv.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_recv.html new file mode 100755 index 000000000..afcb570c5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_recv.html @@ -0,0 +1,67 @@ + + +HPL_recv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_recv Receive a message. + +

Synopsis

+#include "hpl.h"

+int +HPL_recv( +double * +RBUF, +int +RCOUNT, +int +SRC, +int +RTAG, +MPI_Comm +COMM +); + +

Description

+HPL_recv +is a simple wrapper around MPI_Recv. Its main purpose is +to allow for some experimentation / tuning of this simple routine. +Successful completion is indicated by the returned error code +HPL_SUCCESS. In the case of messages of length less than or equal to +zero, this function returns immediately. + +

Arguments

+
+RBUF    (local output)                double *
+        On entry, RBUF specifies the starting address of buffer to be
+        received.
+
+
+RCOUNT  (local input)                 int
+        On entry,  RCOUNT  specifies  the number  of double precision
+        entries in RBUF. RCOUNT must be at least zero.
+
+
+SRC     (local input)                 int
+        On entry, SRC  specifies the rank of the  sending  process in
+        the communication space defined by COMM.
+
+
+RTAG    (local input)                 int
+        On entry,  STAG specifies the message tag to be used for this
+        communication operation.
+
+
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+
+ +

See Also

+HPL_send, +HPL_sdrv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_reduce.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_reduce.html new file mode 100755 index 000000000..026435ed6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_reduce.html @@ -0,0 +1,75 @@ + + +HPL_reduce HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_reduce Reduce operation. + +

Synopsis

+#include "hpl.h"

+int +HPL_reduce( +void * +BUFFER, +const int +COUNT, +const HPL_T_TYPE +DTYPE, +const HPL_T_OP +OP, +const int +ROOT, +MPI_Comm +COMM +); + +

Description

+HPL_reduce +performs a global reduce operation across all processes of +a group. Note that the input buffer is used as workarray and in all +processes but the accumulating process corrupting the original data. + +

Arguments

+
+BUFFER  (local input/output)          void *
+        On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+        exit,  and  in process of rank  ROOT  this array contains the
+        reduced data.  This  buffer  is also used as workspace during
+        the operation in the other processes of the group.
+
+
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+
+
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+
+OP      (global input)                const HPL_T_OP 
+        On entry, OP is a pointer to the local combine function.
+
+
+ROOT    (global input)                const int
+        On entry, ROOT is the coordinate of the accumulating process.
+
+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+ +

See Also

+HPL_broadcast, +HPL_all_reduce, +HPL_barrier, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rollN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rollN.html new file mode 100755 index 000000000..1e1a49068 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rollN.html @@ -0,0 +1,99 @@ + + +HPL_rollN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_rollN Roll U and forward the column panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_rollN( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +N, +double * +U, +const int +LDU, +const int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1 +); + +

Description

+HPL_rollN +rolls the local arrays containing the local pieces of U, so +that on exit to this function U is replicated in every process row. +In addition, this function probe for the presence of the column panel +and forwards it when available. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be rolled) information.
+
+
+N       (local input)                 const int
+        On entry, N specifies the number of columns of  U.  N must be
+        at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least  MAX(1,IPLEN[NPROW]).
+
+
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process row.
+
+
+IPMAP   (global input)                const int *
+        On entry, IMAP  is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words,  IMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry,  IMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+
+ +

See Also

+HPL_pdlaswp01N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rollT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rollT.html new file mode 100755 index 000000000..a6ac29336 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_rollT.html @@ -0,0 +1,99 @@ + + +HPL_rollT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_rollT Roll U and forward the column panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_rollT( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +N, +double * +U, +const int +LDU, +const int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1 +); + +

Description

+HPL_rollT +rolls the local arrays containing the local pieces of U, so +that on exit to this function U is replicated in every process row. +In addition, this function probe for the presence of the column panel +and forwards it when available. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be rolled) information.
+
+
+N       (local input)                 const int
+        On entry, N specifies the local number of rows of  U.  N must
+        be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least  MAX(1,N).
+
+
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process row.
+
+
+IPMAP   (global input)                const int *
+        On entry, IMAP  is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words,  IMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry,  IMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+
+ +

See Also

+HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_sdrv.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_sdrv.html new file mode 100755 index 000000000..6f5b5880c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_sdrv.html @@ -0,0 +1,88 @@ + + +HPL_sdrv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_sdrv Send and receive a message. + +

Synopsis

+#include "hpl.h"

+int +HPL_sdrv( +double * +SBUF, +int +SCOUNT, +int +STAG, +double * +RBUF, +int +RCOUNT, +int +RTAG, +int +PARTNER, +MPI_Comm +COMM +); + +

Description

+HPL_sdrv +is a simple wrapper around MPI_Sendrecv. Its main purpose is +to allow for some experimentation and tuning of this simple function. +Messages of length less than or equal to zero are not sent nor +received. Successful completion is indicated by the returned error +code HPL_SUCCESS. + +

Arguments

+
+SBUF    (local input)                 double *
+        On entry, SBUF specifies the starting address of buffer to be
+        sent.
+
+
+SCOUNT  (local input)                 int
+        On entry,  SCOUNT  specifies  the number  of double precision
+        entries in SBUF. SCOUNT must be at least zero.
+
+
+STAG    (local input)                 int
+        On entry,  STAG  specifies the message tag to be used for the
+        sending communication operation.
+
+
+RBUF    (local output)                double *
+        On entry, RBUF specifies the starting address of buffer to be
+        received.
+
+
+RCOUNT  (local input)                 int
+        On entry,  RCOUNT  specifies  the number  of double precision
+        entries in RBUF. RCOUNT must be at least zero.
+
+
+RTAG    (local input)                 int
+        On entry,  RTAG  specifies the message tag to be used for the
+        receiving communication operation.
+
+
+PARTNER (local input)                 int
+        On entry,  PARTNER  specifies  the rank of the  collaborative
+        process in the communication space defined by COMM.
+
+
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+
+ +

See Also

+HPL_send, +HPL_recv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_send.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_send.html new file mode 100755 index 000000000..05dcb7e6d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_send.html @@ -0,0 +1,67 @@ + + +HPL_send HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_send Send a message. + +

Synopsis

+#include "hpl.h"

+int +HPL_send( +double * +SBUF, +int +SCOUNT, +int +DEST, +int +STAG, +MPI_Comm +COMM +); + +

Description

+HPL_send +is a simple wrapper around MPI_Send. Its main purpose is +to allow for some experimentation / tuning of this simple routine. +Successful completion is indicated by the returned error code +MPI_SUCCESS. In the case of messages of length less than or equal to +zero, this function returns immediately. + +

Arguments

+
+SBUF    (local input)                 double *
+        On entry, SBUF specifies the starting address of buffer to be
+        sent.
+
+
+SCOUNT  (local input)                 int
+        On entry,  SCOUNT  specifies  the number of  double precision
+        entries in SBUF. SCOUNT must be at least zero.
+
+
+DEST    (local input)                 int
+        On entry, DEST specifies the rank of the receiving process in
+        the communication space defined by COMM.
+
+
+STAG    (local input)                 int
+        On entry,  STAG specifies the message tag to be used for this
+        communication operation.
+
+
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+
+ +

See Also

+HPL_recv, +HPL_sdrv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_setran.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_setran.html new file mode 100755 index 000000000..44f37e35e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_setran.html @@ -0,0 +1,52 @@ + + +HPL_setran HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_setran Manage the random number generator. + +

Synopsis

+#include "hpl.h"

+void +HPL_setran( +const int +OPTION, +int * +IRAN +); + +

Description

+HPL_setran +initializes the random generator with the encoding of the +first number X(0) in the sequence, and the constants a and c used to +compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), +a and c are stored in the static variables irand, ias and ics. When +OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the +values of the input array IRAN. When OPTION is 3, IRAN is set to the +current value of irand, and irand is then incremented. + +

Arguments

+
+OPTION  (local input)                 const int
+        On entry, OPTION  is an integer that specifies the operations
+        to be performed on the random generator as specified above.
+
+
+IRAN    (local input/output)          int *
+        On entry,  IRAN is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of a random number.
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_spreadN.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_spreadN.html new file mode 100755 index 000000000..f0d8f8938 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_spreadN.html @@ -0,0 +1,120 @@ + + +HPL_spreadN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_spreadN Spread row panel U and forward current column panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_spreadN( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const enum HPL_SIDE +SIDE, +const int +N, +double * +U, +const int +LDU, +const int +SRCDIST, +const int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1 +); + +

Description

+HPL_spreadN +spreads the local array containing local pieces of U, so +that on exit to this function, a piece of U is contained in every +process row. The array IPLEN contains the number of rows of U, that +should be spread on any given process row. This function also probes +for the presence of the column panel PBCST. In case of success, this +panel will be forwarded. If PBCST is NULL on input, this probing +mechanism will be disabled. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be spread) information.
+
+
+SIDE    (global input)                const enum HPL_SIDE
+        On entry, SIDE specifies whether the local piece of U located
+        in process IPMAP[SRCDIST] should be spread to the right or to
+        the left. This feature is used by the equilibration process.
+
+
+N       (global input)                const int
+        On entry,  N  specifies  the  local number of columns of U. N
+        must be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,IPLEN[nprow]).
+
+
+SRCDIST (local input)                 const int
+        On entry,  SRCDIST  specifies the source process that spreads
+        its piece of U.
+
+
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process before process IPMAP[i], with the  convention
+        that IPLEN[nprow] is the total number of rows. In other words
+        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+        should be moved to process IPMAP[i].
+
+
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry,  IPMAPM1 is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+
+ +

See Also

+HPL_pdlaswp01N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_spreadT.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_spreadT.html new file mode 100755 index 000000000..cec561646 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_spreadT.html @@ -0,0 +1,120 @@ + + +HPL_spreadT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_spreadT Spread row panel U and forward current column panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_spreadT( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const enum HPL_SIDE +SIDE, +const int +N, +double * +U, +const int +LDU, +const int +SRCDIST, +const int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1 +); + +

Description

+HPL_spreadT +spreads the local array containing local pieces of U, so +that on exit to this function, a piece of U is contained in every +process row. The array IPLEN contains the number of columns of U, +that should be spread on any given process row. This function also +probes for the presence of the column panel PBCST. If available, +this panel will be forwarded. If PBCST is NULL on input, this +probing mechanism will be disabled. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be spread) information.
+
+
+SIDE    (global input)                const enum HPL_SIDE
+        On entry, SIDE specifies whether the local piece of U located
+        in process IPMAP[SRCDIST] should be spread to the right or to
+        the left. This feature is used by the equilibration process.
+
+
+N       (global input)                const int
+        On entry,  N  specifies the local number of rows of U. N must
+        be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,N).
+
+
+SRCDIST (local input)                 const int
+        On entry,  SRCDIST  specifies the source process that spreads
+        its piece of U.
+
+
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process before process IPMAP[i], with the  convention
+        that IPLEN[nprow] is the total number of rows. In other words
+        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+        should be moved to process IPMAP[i].
+
+
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry,  IPMAPM1 is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+
+ +

See Also

+HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_sum.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_sum.html new file mode 100755 index 000000000..be785b99e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_sum.html @@ -0,0 +1,61 @@ + + +HPL_sum HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_sum Combine (sum) two buffers. + +

Synopsis

+#include "hpl.h"

+void +HPL_sum( +const int +N, +const void * +IN, +void * +INOUT, +const HPL_T_TYPE +DTYPE +); + +

Description

+HPL_sum +combines (sum) two buffers. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+
+
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+
+
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+
+
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_all_reduce, +HPL_barrier, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer.html new file mode 100755 index 000000000..8e6a79803 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer.html @@ -0,0 +1,49 @@ + + +HPL_timer HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_timer Timer facility. + +

Synopsis

+#include "hpl.h"

+void +HPL_timer( +const int +I +); + +

Description

+HPL_timer +provides a "stopwatch" functionality cpu/wall timer in +seconds. Up to 64 separate timers can be functioning at once. The +first call starts the timer, and the second stops it. This routine +can be disenabled by calling HPL_timer_disable(), so that calls to +the timer are ignored. This feature can be used to make sure certain +sections of code do not affect timings, even if they call routines +which have HPL_timer calls in them. HPL_timer_enable() will re-enable +the timer functionality. One can retrieve the current value of a +timer by calling + +t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + +where I is the timer index in [0..64). To initialize the timer +functionality, one must have called HPL_timer_boot() prior to any of +the functions mentioned above. + +

Arguments

+
+I       (global input)                const int
+        On entry, I specifies the timer to stop/start.
+
+ +

See Also

+HPL_timer_cputime, +HPL_timer_walltime. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer_cputime.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer_cputime.html new file mode 100755 index 000000000..0fa9b6575 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer_cputime.html @@ -0,0 +1,35 @@ + + +HPL_timer_cputime HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_timer_cputime Return the CPU time. + +

Synopsis

+#include "hpl.h"

+double +HPL_timer_cputime(); + +

Description

+HPL_timer_cputime +returns the cpu time. If HPL_USE_CLOCK is defined, +the clock() function is used to return an approximation of processor +time used by the program. The value returned is the CPU time used so +far as a clock_t; to get the number of seconds used, the result is +divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C +standard library. If HPL_USE_TIMES is defined, the times() function +is used instead. This function returns the current process times. +times() returns the number of clock ticks that have elapsed since the +system has been up. Otherwise and by default, the standard library +function getrusage() is used. + +

See Also

+HPL_timer_walltime, +HPL_timer. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer_walltime.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer_walltime.html new file mode 100755 index 000000000..92588e49f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_timer_walltime.html @@ -0,0 +1,26 @@ + + +HPL_timer_walltime HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_timer_walltime Return the elapsed (wall-clock) time. + +

Synopsis

+#include "hpl.h"

+double +HPL_timer_walltime(); + +

Description

+HPL_timer_walltime +returns the elapsed (wall-clock) time. + +

See Also

+HPL_timer_cputime, +HPL_timer. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_warn.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_warn.html new file mode 100755 index 000000000..773df9ae0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_warn.html @@ -0,0 +1,74 @@ + + +HPL_warn HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_warn displays an error message. + +

Synopsis

+#include "hpl.h"

+void +HPL_warn( +FILE * +STREAM, +int +LINE, +const char * +SRNAME, +const char * +FORM, +... +); + +

Description

+HPL_warn +displays an error message. + +

Arguments

+
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+
+
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+
+
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   HPL_warn( stderr, __LINE__, __FILE__,
+             "Demo.\n" );
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_abort, +HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_xjumpm.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_xjumpm.html new file mode 100755 index 000000000..794ae3a8b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/HPL_xjumpm.html @@ -0,0 +1,97 @@ + + +HPL_xjumpm HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_xjumpm Compute constants to jump in the random sequence. + +

Synopsis

+#include "hpl.h"

+void +HPL_xjumpm( +const int +JUMPM, +int * +MULT, +int * +IADD, +int * +IRANN, +int * +IRANM, +int * +IAM, +int * +ICM +); + +

Description

+HPL_xjumpm +computes the constants A and C to jump JUMPM numbers in +the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in +MULT and IADD specify how to jump from one entry in the sequence to +the next. + +

Arguments

+
+JUMPM   (local input)                 const int
+        On entry,  JUMPM  specifies  the  number  of entries  in  the
+        sequence to jump over. When JUMPM is less or equal than zero,
+        A and C are not computed, IRANM is set to IRANN corresponding
+        to a jump of size zero.
+
+
+MULT    (local input)                 int *
+        On entry, MULT is an array of dimension 2,  that contains the
+        16-lower  and 15-higher bits of the constant  a  to jump from
+        X(n) to X(n+1) = a*X(n) + c in the random sequence.
+
+
+IADD    (local input)                 int *
+        On entry, IADD is an array of dimension 2,  that contains the
+        16-lower  and 15-higher bits of the constant  c  to jump from
+        X(n) to X(n+1) = a*X(n) + c in the random sequence.
+
+
+IRANN   (local input)                 int *
+        On entry, IRANN is an array of dimension 2. that contains the
+        16-lower and 15-higher bits of the encoding of X(n).
+
+
+IRANM   (local output)                int *
+        On entry,  IRANM  is an array of dimension 2.   On exit, this
+        array  contains respectively  the 16-lower and 15-higher bits
+        of the encoding of X(n+JUMPM).
+
+
+IAM     (local output)                int *
+        On entry, IAM is an array of dimension 2. On exit, when JUMPM
+        is  greater  than  zero,  this  array  contains  the  encoded
+        constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+        sequence. IAM(0:1)  contains  respectively  the  16-lower and
+        15-higher  bits  of this constant  A. When  JUMPM  is less or
+        equal than zero, this array is not referenced.
+
+
+ICM     (local output)                int *
+        On entry, ICM is an array of dimension 2. On exit, when JUMPM
+        is  greater  than  zero,  this  array  contains  the  encoded
+        constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+        sequence. ICM(0:1)  contains  respectively  the  16-lower and
+        15-higher  bits  of this constant  C. When  JUMPM  is less or
+        equal than zero, this array is not referenced.
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/algorithm.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/algorithm.html new file mode 100755 index 000000000..9b1d7222e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/algorithm.html @@ -0,0 +1,299 @@ + + +HPL Algorithm + + + + +

HPL Algorithm

+ + +This page provides a high-level description of the algorithm used in +this package. As indicated below, HPL contains in fact many possible +variants for various operations. Defaults could have been chosen, or +even variants could be selected during the execution. Due to the +performance requirements, it was decided to leave the user with the +opportunity of choosing, so that an "optimal" set of parameters could +easily be experimentally determined for a given machine configuration. +From a numerical accuracy point of view, all possible +combinations are rigorously equivalent to each other even though the +result may slightly differ (bit-wise). +

+ + +
+ +

Main Algorithm

+ +This software package solves a linear system of order n: A x = b by +first computing the LU factorization with row partial pivoting of the +n-by-n+1 coefficient matrix [A b] = [[L,U] y]. Since the lower triangular +factor L is applied to b as the factorization progresses, the solution x +is obtained by solving the upper triangular system U x = y. The lower +triangular matrix L is left unpivoted and the array of pivots is not +returned.

+ + + + + + +
+The data is distributed onto a two-dimensional P-by-Q grid of processes +according to the block-cyclic scheme to ensure "good" load balance +as well as the scalability of the algorithm. The n-by-n+1 coefficient +matrix is first logically partitioned into nb-by-nb blocks, that are +cyclically "dealt" onto the P-by-Q process grid. This is done in both +dimensions of the matrix.
+ + + + + +
+The right-looking variant has been chosen for the main loop of the LU +factorization. This means that at each iteration of the loop a panel of +nb columns is factorized, and the trailing submatrix is updated. Note +that this computation is thus logically partitioned with the same block +size nb that was used for the data distribution.
+
+ +

Panel Factorization

+ + + + + + +
+At a given iteration of the main loop, and because of the cartesian +property of the distribution scheme, each panel factorization occurs in +one column of processes. This particular part of the computation lies +on the critical path of the overall algorithm. The user is offered the +choice of three (Crout, left- and right-looking) matrix-multiply based +recursive variants. The software also allows the user to choose in how +many sub-panels the current panel should be divided into during the +recursion. Furthermore, one can also select at run-time the recursion +stopping criterium in terms of the number of columns left to factorize. +When this threshold is reached, the sub-panel will then be factorized +using one of the three Crout, left- or right-looking matrix-vector based +variant. Finally, for each panel column the pivot search, the associated +swap and broadcast operation of the pivot row are combined into one +single communication step. A binary-exchange (leave-on-all) reduction +performs these three operations at once.
+
+ +

Panel Broadcast

+ +Once the panel factorization has been computed, this panel of columns +is broadcast to the other process columns. There are many possible +broadcast algorithms and the software currently offers 6 variants to +choose from. These variants are described below assuming that process 0 +is the source of the broadcast for convenience. "->" means "sends to". +
    +
  • Increasing-ring: 0 -> 1; 1 -> 2; 2 -> 3 and so on. +This algorithm is the classic one; it has the caveat that process 1 has +to send a message. +
    + +
    + +
  • Increasing-ring (modified): 0 -> 1; 0 -> 2; 2 -> 3 +and so on. Process 0 sends two messages and process 1 only receives one +message. This algorithm is almost always better, if not the best. +
    + +
    + +
  • Increasing-2-ring: The Q processes are divided into +two parts: 0 -> 1 and 0 -> Q/2; Then processes 1 and Q/2 act as sources +of two rings: 1 -> 2, Q/2 -> Q/2+1; 2 -> 3, Q/2+1 -> to Q/2+2 and so on. +This algorithm has the advantage of reducing the time by which the last +process will receive the panel at the cost of process 0 sending 2 +messages. +
    + +
    + +
  • Increasing-2-ring (modified): As one may expect, +first 0 -> 1, then the Q-1 processes left are divided into two equal +parts: 0 -> 2 and 0 -> Q/2; Processes 2 and Q/2 act then as sources of +two rings: 2 -> 3, Q/2 -> Q/2+1; 3 -> 4, Q/2+1 -> to Q/2+2 and so on. +This algorithm is probably the most serious competitor to the increasing +ring modified variant. +
    + +
    + +
  • Long (bandwidth reducing): as opposed to the +previous variants, this algorithm and its follower synchronize all +processes involved in the operation. The message is chopped into Q equal +pieces that are scattered across the Q processes. +
    + +
    +The pieces are then rolled in Q-1 steps. The scatter phase uses a binary +tree and the rolling phase exclusively uses mutual message exchanges. In +odd steps 0 <-> 1, 2 <-> 3, 4 <-> 5 and so on; in even steps Q-1 <-> 0, +1 <-> 2, 3 <-> 4, 5 <-> 6 and so on. +
    + +
    +More messages are exchanged, however the total volume of communication is +independent of Q, making this algorithm particularly suitable for large +messages. This algorithm becomes competitive when the nodes are "very +fast" and the network (comparatively) "very slow".

    + +
  • Long (bandwidth reducing modified): same as above, +except that 0 -> 1 first, and then the Long variant is used on processes +0,2,3,4 .. Q-1.

    +
    + + +
    + +
+ +The rings variants are distinguished by a probe mechanism that activates +them. In other words, a process involved in the broadcast and different +from the source asynchronously probes for the message to receive. When +the message is available the broadcast proceeds, and otherwise the +function returns. This allows to interleave the broadcast operation with +the update phase. This contributes to reduce the idle time spent by those +processes waiting for the factorized panel. This mechanism is necessary +to accomodate for various computation/communication performance ratio.

+
+ +

Look-ahead

+ +Once the panel has been broadcast or say during this broadcast operation, +the trailing submatrix is updated using the last panel in the look-ahead +pipe: as mentioned before, the panel factorization lies on the critical +path, which means that when the kth panel has been factorized and then +broadcast, the next most urgent task to complete is the factorization and +broadcast of the k+1 th panel. This technique is often refered to as +"look-ahead" or "send-ahead" in the literature. This package allows to +select various "depth" of look-ahead. By convention, a depth of zero +corresponds to no lookahead, in which case the trailing submatrix is +updated by the panel currently broadcast. Look-ahead consumes some extra +memory to essentially keep all the panels of columns currently in the +look-ahead pipe. A look-ahead of depth 1 (maybe 2) is likely to achieve +the best performance gain.

+
+ +

Update

+ +The update of the trailing submatrix by the last panel in the look-ahead +pipe is made of two phases. First, the pivots must be applied to form the +current row panel U. U should then be solved by the upper triangle of the +column panel. U finally needs to be broadcast to each process row so that +the local rank-nb update can take place. We choose to combine the +swapping and broadcast of U at the cost of replicating the solve. Two +algorithms are available for this communication operation. +
    +
  • Binary-exchange: this is a modified variant of the +binary-exchange (leave on all) reduction operation. Every process column +performs the same operation. The algorithm essentially works as follows. +It pretends reducing the row panel U, but at the beginning the only valid +copy is owned by the current process row. The other process rows will +contribute rows of A they own that should be copied in U and replace them +with rows that were originally in the current process row. The complete +operation is performed in log(P) steps. For the sake of simplicity, let +assume that P is a power of two. At step k, process row p exchanges a +message with process row p+2^k. There are essentially two cases. First, +one of those two process rows has received U in a previous step. The +exchange occurs. One process swaps its local rows of A into U. Both +processes copy in U remote rows of A. Second, none of those process rows +has received U, the exchange occurs, and both processes simply add those +remote rows to the list they have accumulated so far. At each step, a +message of the size of U is exchanged by at least one pair of process +rows.

    + +
  • Long: this is a bandwidth reducing variant +accomplishing the same task. The row panel is first spread (using a tree) +among the process rows with respect to the pivot array. This is a scatter +(V variant for MPI users). Locally, every process row then swaps these +rows with the the rows of A it owns and that belong to U. These buffers +are then rolled (P-1 steps) to finish the broadcast of U. Every process +row permutes U and proceed with the computational part of the update. A +couple of notes: process rows are logarithmically sorted before +spreading, so that processes receiving the largest number of rows are +first in the tree. This makes the communication volume optimal for this +phase. Finally, before rolling and after the local swap, an equilibration +phase occurs during which the local pieces of U are uniformly spread +across the process rows. A tree-based algorithm is used. This operation +is necessary to keep the rolling phase optimal even when the pivot rows +are not equally distributed in process rows. This algorithm has a +complexity in terms of communication volume that solely depends on the +size of U. In particular, the number of process rows only impacts the +number of messages exchanged. It will thus outperforms the previous +variant for large problems on large machine configurations.

    + +
+ +The user can select any of the two variants above. In addition, a mix is +possible as well. The "binary-exchange" algorithm will be used when U +contains at most a certain number of columns. Choosing at least the block +size nb as the threshold value is clearly recommended when look-ahead is +on.

+
+ +

Backward Substitution

+ +The factorization has just now ended, the back-substitution remains to be +done. For this, we choose a look-ahead of depth one variant. The +right-hand-side is forwarded in process rows in a decreasing-ring +fashion, so that we solve Q * nb entries at a time. At each step, this +shrinking piece of the right-hand-side is updated. The process just above +the one owning the current diagonal block of the matrix A updates first +its last nb piece of x, forwards it to the previous process column, then +broadcast it in the process column in a decreasing-ring fashion as well. +The solution is then updated and sent to the previous process column. The +solution of the linear system is left replicated in every process row.

+
+ +

Checking the Solution

+ +To verify the result obtained, the input matrix and right-hand side are +regenerated. The normwise backward error (see formula below) is then +computed. A solution is considered as "numerically correct" when this +quantity is less than a threshold value of the order of 1.0. In the +expression below, eps is the relative (distributed-memory) machine +precision. + +
    +
  • || Ax - b ||_oo / ( eps * ( || A ||_oo * || x ||_oo + || b ||_oo ) * n ) +
+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/aprunner.gif b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/aprunner.gif new file mode 100755 index 000000000..6508c806f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/aprunner.gif differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/copyright.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/copyright.html new file mode 100755 index 000000000..934282c81 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/copyright.html @@ -0,0 +1,66 @@ + + +HPL Copyright and Licensing Terms + + + + +

HPL Copyright Notice and Licensing Terms

+ +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +
    +
  1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +
  2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions, and the following disclaimer in the +documentation and/or other materials provided with the distribution. +
  3. All advertising materials mentioning features or use of this +software must display the following acknowledgement: This product +includes software developed at the University of Tennessee, +Knoxville, Innovative Computing Laboratory. +
  4. The name of the University, the name of the Laboratory, or the +names of its contributors may not be used to endorse or promote +products derived from this software without specific written +permission. +
+ +

Disclaimer

+ +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +`AS IS' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/documentation.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/documentation.html new file mode 100755 index 000000000..152188041 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/documentation.html @@ -0,0 +1,304 @@ + + +HPL Documentation + + + + +

HPL Documentation

+ +The HPL software distribution comes with a set of text files explaining +how to install, run and tune the software. These files reside in the top +level directory and their names are in upper case. To a large extent, +this page reproduces them. In addition, man- and HTML-pages are provided +for every routine in the package. To access the man pages, one must add +hpl/man to its MANPATH environment variable. The HTML pages can be +accessed on this site, or by pointing your browser to your local hpl/www +directory. Finally, the source code has been heavily documented. Despite +all the other documentation efforts, the source code remains the most +trustworthy and truthful piece of information about what goes on in HPL. +

+ +

HPL Functions HTML Pages

+ +Computational Kernels Wrappers When calling the Fortran +77 BLAS interface, these C functions allow to confine the C to Fortran +77 interface issues to a small subset of routines. + + + +
+
+ +Local Auxiliaries Basic functionality, local swap functions. + + + +
+
+ +Parallel Auxiliaries Index computations, parallel basic +functionality. + + + +
+
+ +Grid Management Most of these routines have a direct +MPI equivalent. On new systems, when the entire MPI functionality is +not yet readily available, these functions are particularly convenient +since they rely on a mininal subset of the MPI standard. + + +
+
+ +Panel Management + + +
+
+ +Panel Factorization Recursive (matrix-multiply based) and +(matrix-vector based) panel factorization. + + +
+
+ +Panel Broadcast + + +
+
+ +Update + + +
+
+ +Main Factorization / Look-ahead + + +
+
+ +Backward Substitution + + +
+
+ +Matrix generation A C version of the ScaLAPACK random +matrix generator with less functionality though. + +
+
+ +Timers Sequential and parallel timing utilities. + +
+
+ +Main Testing / Timing Driver + + +
+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/errata.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/errata.html new file mode 100755 index 000000000..24275d2dd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/errata.html @@ -0,0 +1,116 @@ + + +HPL Errata-Bugs + + + + +

HPL Errata - Bugs

+ +

Issues fixed in Version 2.1, October 26th, 2012

+ +The output now reports exact time stamps before and after the +execution of the solver function pdgesv() was run. This could +allow for accurate accounting of running time for data center +management purposes. For example as reporting power +consumption. This is important for the Green500 project.

+ +Fixed an out-of-bounds access to arrays in the HPL_spreadN() +and HPL_spreadT() functions. This may cause segmentation +fault signals. It was reported by Stephen Whalen from Cray.

+ +

Issues fixed in Version 2.0, September 10th, 2008

+ +Gregory Bauer found a problem size corresponding to the +periodicity of the pseudo-random matrix generator used in the +HPL timing program. This causes the LU factorization to +detect the singularity of the input matrix as it should have.

+ +A problem size of 2^17 = 131072 causes columns 14 modulo 2^14 +(i.e. 16384) (starting from 0) to be bitwise identical on a +homogeneous platform. Every problem size being a power of 2 +and larger than 2^15 will feature a similar problem if one +searches far enough in the columns of the square input matrix.

+ +The pseudo-random generator uses the linear congruential +algorithm: X(n+1) = (a * X(n) + c) mod m as described in the +Art of Computer Programming, Knuth 1973, Vol. 2. In the HPL +case, m is set to 2^31.

+ +It is very important to realize that this issue is a problem +of the testing part of the HPL software. The numerical +properties of the algorithms used in the factorization and +the solve should not be questioned because of this. In fact, +this is just the opposite: the factorization demonstrated the +weakness of the testing part of the software by detecting the +singularity of the input matrix.

+ +This issue of the testing program is not easy to fix. This +pseudo-random generator has very useful properties despite +this. It is thus currently recommended to HPL users willing +to test matrices of size larger than 2^15 to not use power +twos.

+ +This issue has been fixed by changing the pseudo-random +matrix generator. Now the periodicity of the generator is +2^64.

+ +

Issues fixed in Version 1.0b, December 15th, 2004

+ +When the matrix size is such that one needs more than 16 GB +per MPI rank, the intermediate calculation (mat.ld+1) * +mat.nq in HPL_pdtest.c ends up overflowing because it is +done using 32-bit arithmetic. This issue has been fixed by +typecasting to size_t; Thanks to John Baron.

+ +

Issues fixed in Version 1.0a, January 20th, 2004

+ +The MPI process grid numbering scheme defaults now to row- +major ordering. This option can now be selected at run time.

+ +The inlined assembly timer routine that was causing the +compilation to fail when using gcc version 3.3 and above has +been removed from the package.

+ +Various building problems on the T3E have been fixed; Thanks +to Edward Anderson.

+ +

Issues fixed in Version 1.0, September 27th, 2000

+ +Due to a couple errors spotted in the VSIPL port of the +software, the distribution contained in the tar file of +September 9th, 2000 had been updated on September 27th, 2000 +with a corrected distribution. These problems were +not affecting in any way possible the BLAS version of the +software. If you are using the VSIPL port of HPL, +and want to make sure you are indeed using the latest +corrected version, please check the date contained in the +file HPL.build.log contained in the main directory.

+ + + + +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/faqs.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/faqs.html new file mode 100755 index 000000000..ad853e760 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/faqs.html @@ -0,0 +1,126 @@ + + +HPL Frequently Asked Questions + + + + +

HPL Frequently Asked Questions

+ + +
+ +

What problem size N should I run ?

+ +In order to find out the best performance of your system, the +largest problem size fitting in memory is what you should aim for. +The amount of memory used by HPL is essentially the size of the +coefficient matrix. So for example, if you have 4 nodes with 256 Mb +of memory on each, this corresponds to 1 Gb total, i.e., 125 M double +precision (8 bytes) elements. The square root of that number is +11585. One definitely needs to leave some memory for the OS as well +as for other things, so a problem size of 10000 is likely to fit. As +a rule of thumb, 80 % of the total amount of memory is a good guess. +If the problem size you pick is too large, swapping will occur, and +the performance will drop. If multiple processes are spawn on each +node (say you have 2 processors per node), what counts is the +available amount of memory to each process.

+
+ +

What block size NB should I use ?

+ +HPL uses the block size NB for the data distribution as well as for +the computational granularity. From a data distribution point of +view, the smallest NB, the better the load balance. You definitely +want to stay away from very large values of NB. From a computation +point of view, a too small value of NB may limit the computational +performance by a large factor because almost no data reuse will occur +in the highest level of the memory hierarchy. The number of messages +will also increase. Efficient matrix-multiply routines are often +internally blocked. Small multiples of this blocking factor are +likely to be good block sizes for HPL. The bottom line is that "good" +block sizes are almost always in the [32 .. 256] interval. The best +values depend on the computation / communication performance ratio of +your system. To a much less extent, the problem size matters as well. +Say for example, you emperically found that 44 was a good block size +with respect to performance. 88 or 132 are likely to give slightly +better results for large problem sizes because of a slighlty higher +flop rate.

+
+ +

What process grid ratio P x Q should I use ?

+ +This depends on the physical interconnection network you have. +Assuming a mesh or a switch HPL "likes" a 1:k ratio with k in [1..3]. +In other words, P and Q should be approximately equal, with Q +slightly larger than P. Examples: 2 x 2, 2 x 4, 2 x 5, 3 x 4, 4 x 4, +4 x 6, 5 x 6, 4 x 8 ... If you are running on a simple Ethernet +network, there is only one wire through which all the messages are +exchanged. On such a network, the performance and scalability of HPL +is strongly limited and very flat process grids are likely to be the +best choices: 1 x 4, 1 x 8, 2 x 4 ...

+
+ +

What about the one processor case ?

+ +HPL has been designed to perform well for large problem sizes on +hundreds of nodes and more. The software works on one node and for +large problem sizes, one can usually achieve pretty good performance +on a single processor as well. For small problem sizes however, the +overhead due to message-passing, local indexing and so on can be +significant.

+
+ +

Why so many options in HPL.dat ?

+ +There are quite a few reasons. First off, these options are useful to +determine what matters and what does not on your system. Second, HPL +is often used in the context of early evaluation of new systems. In +such a case, everything is usually not quite working right, and it is +convenient to be able to vary these parameters without recompiling. +Finally, every system has its own peculiarities and one is likely to +be willing to emperically determine the best set of parameters. In +any case, one can always follow the advice provided in the +tuning section of this document and not +worry about the complexity of the input file.

+
+ +

Can HPL be Outperformed ?

+ +Certainly. There is always room for performance improvements. +Specific knowledge about a particular system is always a source of +performance gains. Even from a generic point of view, better +algorithms or more efficient formulation of the classic ones are +potential winners.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/index.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/index.html new file mode 100755 index 000000000..a3a53abfe --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/index.html @@ -0,0 +1,178 @@ + + + +HPL - A Portable Implementation of the High-Performance +Linpack Benchmark for Distributed-Memory Computers + + + + + +
+ + + + + +
+

HPL - A Portable Implementation of the High-Performance Linpack +Benchmark for Distributed-Memory Computers

+
+ + +
+ + + + + + + +
Version 2.2 +A. Petitet, +R. C. Whaley, +J. Dongarra, +A. Cleary +December 2, 2018 +# Accesses +
+

+ +HPL is a software package that solves a (random) +dense linear system in double precision (64 bits) arithmetic +on distributed-memory computers. It can thus be regarded as +a portable as well as freely available implementation of the High +Performance Computing Linpack Benchmark.

+ +The algorithm used by HPL can be summarized by the +following keywords: Two-dimensional block-cyclic data distribution +- Right-looking variant of the LU factorization with row partial +pivoting featuring multiple look-ahead depths - Recursive panel +factorization with pivot search and column broadcast combined - +Various virtual panel broadcast topologies - bandwidth reducing +swap-broadcast algorithm - backward substitution with look-ahead +of depth 1.

+ +The HPL package provides a testing and timing program to quantify +the accuracy of the obtained solution as well as +the time it took to compute it. The best performance +achievable by this software on your system depends on a large variety +of factors. Nonetheless, with some restrictive assumptions on the +interconnection network, the algorithm described here and its +attached implementation are scalable in the sense +that their parallel efficiency is maintained constant with respect +to the per processor memory usage.

+ +The HPL software package requires the availibility +on your system of an implementation of the Message Passing Interface +MPI (1.1 compliant). +An implementation of either the Basic Linear Algebra +Subprograms BLAS or the Vector Signal Image +Processing Library VSIPL is also needed. +Machine-specific as well as generic implementations of +MPI, the +BLAS and +VSIPL are available for a large +variety of systems.

+ +Acknowledgements: This work was supported in part +by a grant from the Department of Energy's Lawrence +Livermore National Laboratory and Los Alamos National Laboratory +as part of the ASCI Projects contract numbers B503962 and +12187-001-00 4R. + +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ +
+Innovative Computing Laboratory
+last revised December 2, 2018
+
+ +
+#########################################################################
+
+file    hpl-2.3.tar.gz
+for     HPL 2.3 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: December 2, 2018
+
+#########################################################################
+
+file    hpl-2.2.tar.gz
+for     HPL 2.2 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: February 24, 2016
+
+#########################################################################
+
+file    hpl-2.1.tar.gz
+for     HPL 2.1 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: October 26, 2012
+
+#########################################################################
+
+file    hpl-2.0.tar.gz
+for     HPL 2.0 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary
+Updated: September 10, 2008
+
+#########################################################################
+
+file    hpl.tgz
+for     HPL 1.0a - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary
+Updated: January 20, 2004
+ +######################################################################### + +file hpl_qs22-2008-11-30.patch +for Implementation of the High-Performance Linpack benchmark for IBM +, QS22 systems with PowerXCell 8i processors. The file is a patch +, for HPL 1.0a. +by IBM + +file IBM_LICENSE.TXT +for IBM Copyright notice for QS22 HPL +by IBM + +file IBM_README.txt +for README for IBM QS22 HPL +by IBM +Updated: November 30, 2008 + + +######################################################################### +
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/links.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/links.html new file mode 100755 index 000000000..da2639e99 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/links.html @@ -0,0 +1,89 @@ + + +HPL Related Links + + + + +

HPL Related Links

+ +The list of links below contains some relevant material to this +work. This list is provided for illustrative purposes, and should be +regarded as an initial starting point for the interested reader. This +list is by all means not meant to be exhaustive.

+ +

Message Passing Interface (MPI)

+ +MPI is a library specification for message-passing, proposed as a +standard by a broadly based committee of vendors, implementors, and +users. Machine-specific (optimized) as well as freely available MPI +libraries are available for a large variety of systems. Browse the +Message Passing Interface (MPI) +standard web page for more information.

+ +

Basic Linear Algebra Subroutines (BLAS)

+ +The BLAS are high quality +"building block" routines for performing basic vector and matrix +operations. A lot of "BLAS-related" information can be found at this +site. In particular, a reference implementation is available. This +reference implementation is not optimized for any +system, and it is therefore not recommended to use it +for benchmarking purposes. +However, machine-specific +optimized BLAS libraries are available for a variety of computer +systems. For further details, please contact your local vendor +representative. Alternatively, one may also consider using automatic +code generators such as ATLAS. +This tool automatically generates a complete and optimized BLAS +library for a large variety of modern systems.

+ +

Vector Signal Image Processing Library (VSIPL)

+ +VSIPL is an API defined by an open +standard comprised of embedded signal and image processing hardware and +software vendors, academia, users, and government labs. A lot of +"VSIPL-related" information can be found at this site. In particular, a +reference implementation is available. Machine-specific optimized VSIPL +libraries are available for a variety of computer systems. For further +details, please contact your local vendor representative.

+ +

TOP 500 List

+ +The TOP 500 +is an ordered list of the 500 most powerful computer systems worldwide. +Computers are ranked in this list by their performance on the + +LINPACK Benchmark.

+ +

Parallel Dense Linear Algebra Software Libraries

+ +Browse the Netlib software repository +or the National HPCC Software Exchange +to find a large collection of freely available linear algebra libraries. +

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/main.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/main.jpg new file mode 100755 index 000000000..df62edd33 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/main.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/mat2.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/mat2.jpg new file mode 100755 index 000000000..25afdc44c Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/mat2.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/pfact.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/pfact.jpg new file mode 100755 index 000000000..33a7e55cb Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/pfact.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/references.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/references.html new file mode 100755 index 000000000..95c6db176 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/references.html @@ -0,0 +1,276 @@ + + +HPL References + + + + +

HPL References

+ + +The list of references below contains some relevant published material +to this work. This list is provided for illustrative purposes, and +should be regarded as an initial starting point for the interested +reader. This list is by all means not meant to be exhaustive. +

+ +The references have been sorted in four categories and chronologically +listed within each category. The four categories are + +
+ +

Linpack Benchmark

+ +
    + + +
  • LINPACK Users Guide, J. Dongarra, J. Bunch, C. Moler and +G. W. Stewart, SIAM, Philadelphia, PA, 1979. + + +
  • Performance of Various Computers Using Standard Linear Equations +Software, J. Dongarra, Technical Report CS-89-85, University of +Tennessee, 1989. (An updated version of this report can be found at + +http://www.netlib.org/benchmark/performance.ps). + + +
  • Towards Peak Parallel LINPACK Performance on 400, +R. Bisseling and L. Loyens, Supercomputer, Vol. 45, pp. 20-27, 1991. + +
  • Massively Parallel LINPACK Benchmark on the Intel Touchstone +DELTA and iPSC/860 Systems, R. van de Geijn, 1991 Annual Users +Conference Proceedings. Intel Supercomputer Users Group, Dallas, TX, +1991. + +
  • The LINPACK Benchmark on the AP 1000, R. Brent, Frontiers, +1992, pp. 128-135, McLean, VA, 1992. + + +
  • Implementation of BLAS Level 3 and LINPACK Benchmark on the +AP1000, R. Brent and P. Strazdins, Fujitsu Scientific and Technical +Journal, Vol. 5, No. 1, pp. 61-70, 1993. + + +
  • LU Factorization and the LINPACK Benchmark on the Intel +Paragon, D. Womble, D. Greenberg, D. Wheat and S. Riesen, Sandia +Technical Report, 1994. + + +
  • Massively Parallel Distributed Computing: Worlds First 281 +Gigaflop Supercomputer, J. Bolen, A. Davis, B. Dazey, S. Gupta, +G. Henry, D. Robboy, G. Schiffler, D. Scott, M. Stallcup, A. Taraghi, +S. Wheat from Intel SSD, L. Fisk, G. Istrail, C. Jong, R. Riesen, +L. Shuler, from Sandia National Laboratories, Proceedings of the Intel +Supercomputer Users Group 1995. + + +
  • High Performance Software on Intel Pentium Pro Processors or +Micro-Ops to TeraFLOPS, B. Greer and G. Henry, Proceedings of the +SuperComputing 1997 Conference, ACM SIGARCH - IEEE Computer Society +Press - ISBN: 0-89791-985-8, San Jose, CA, 1997. + +
+ +
+ +

Parallel LU Factorization

+ +
    + + +
  • Communication Complexity of the Gaussian Elimination Algorithm +on Multiprocessors, Y. Saad, Linear Algebra and Its Applications, +Vol. 77, pp. 315-340, 1986. + + +
  • LU Factorization Algorithms on Distributed-Memory Multiprocessor +Architectures, G. Geist and C. Romine, SIAM Journal on Scientific +and Statistical Computing, Vol. 9, pp. 639-649, 1988. + + +
  • Parallel LU Decomposition on a Transputer Network, +R. Bisseling and J. van der Vorst, Lecture Notes in Computer Sciences, +Springer-Verlag, Eds. G. van Zee and J. van der Vorst, Vol. 384, +pp. 61-77, 1989. + + +
  • The Distributed Solution of Linear Systems Using the Torus-Wrap +Data Mapping, C. Ashcraft, ECA-TR-147, Boeing Computer Services, +Seattle, WA, 1990. + +
  • Experiments with Multicomputer LU-Decomposition, E. van de +Velde, Concurrency: Practice and Experience, Vol. 2, pp. 1-26, 1990. + + +
  • A Taxonomy of Distributed Dense LU Factorization Methods, +C. Ashcraft, ECA-TR-161, Boeing Computer Services, Seattle, WA, 1991. + + +
  • The Torus-Wrap Mapping for Dense Matrix Calculations on Massively +Parallel Computers, B. Hendrickson and D. Womble, SIAM Journal on +Scientific and Statistical Computing, Vol. 15, pp. 1201-1226, 1994. + +
  • Scalability Issues in the Design of a Library for Dense Linear +Algebra, J. Dongarra, R. van de Geijn and D. Walker, Journal of +Parallel and Distributed Computing, Vol. 22, No. 3, pp. 523-537, 1994. + + +
  • Matrix Factorization using Distributed Panels on the Fujitsu +AP1000, P. Strazdins, Proceedings of the IEEE First International +Conference on Algorithms And Architectures for Parallel Processing +ICA3PP-95, Brisbane, 1995. + + +
  • The Design and Implementation of the ScaLAPACK LU, QR, and +Cholesky Factorization Routines, J. Choi, J. Dongarra, S. Ostrouchov, +A. Petitet, D. Walker and R. C. Whaley, Scientific Programming, Vol. 5, +pp. 173-184, 1996. + +
+ +
+ +

Recursive LU Factorization

+ +
    + + +
  • Locality of Reference in LU Decomposition with partial +pivoting, S. Toledo, SIAM Journal on Matrix. Anal. Appl., Vol. 18, +No. 4, 1997. + +
  • Recursion Leads to Automatic Variable Blocking for Dense +Linear-Algebra Algorithms, F. Gustavson, IBM Journal of Research +and Development, Vol. 41, No. 6, pp. 737-755, 1997 + +
+ +
+ +

Parallel Matrix Multiply

+ +
    + + +
  • Matrix Algorithms on a Hypercube I: Matrix Multiplication, +G. Fox, S. Otto and A. Hey, Parallel Computing, Vol. 3, pp. 17-31, 1987. + + +
  • Basic Matrix Subprograms for Distributed-Memory Systems, +A. Elster, Proceedings of the Fifth Distributed-Memory Computing +Conference, Eds. D. Walker and Q. Stout, IEEE Press, pp. 311-316, 1990. + + +
  • The Parallelization of Level 2 and 3 BLAS Operations on +Distributed-Memory Machines, M. Aboelaze, N. Chrisochoides +and E. Houstis, CSD-TR-91-007, Purdue University, West Lafayette, +IN, 1991. + + +
  • The Multicomputer Toolbox Approach to Concurrent BLAS and LACS, +R. Falgout, A. Skjellum, S. Smith and C. Still, Proceedings of the +Scalable High Performance Computing Conference SHPCC-92, IEEE Computer +Society Press, 1992. + + +
  • A High Performance Matrix Multiplication Algorithm on a +Distributed-Memory Parallel Computer, Using Overlapped Communication, +R. Agarwal, F. Gustavson and M. Zubair, IBM Journal or Research and +Development, Vol. 38, No. 6, pp. 673-681, 1994. + +
  • PUMMA: Parallel Universal Matrix Multiplication Algorithms on +Distributed-Memory Concurrent Computers, J. Choi, J. Dongarra and +D. Walker, Concurrency: Practice and Experience, Vol. 6, No. 7, +pp. 543-570, 1994. + +
  • Matrix Multiplication on the Intel Touchstone DELTA, +S. Huss-Lederman, E. Jacobson, A. Tsao and G. Zhang, Concurrency: +Practice and Experience, Vol. 6, No. 7, pp. 571-594, 1994. + + +
  • A Three-Dimensional Approach to Parallel Matrix Multiplication, +R. Agarwal, S. Balle, F. Gustavson, M. Joshi and P. Palkar, IBM Journal +or Research and Development, Vol. 39, No. 5, pp. 575-582, 1995. + + +
  • A High Performance Parallel Strassen Implementation, +B. Grayson and R. van de Geijn, Parallel Processing Letters, Vol. 6, +No. 1, pp. 3-12, 1996. + + +
  • Parallel Implementation of BLAS: General Techniques for Level +3 BLAS, A. Chtchelkanova, J. Gunnels, G. Morrow, J. Overfelt and +R. van de Geijn, Concurrency: Practice and Experience, Vol. 9, No. 9, +pp. 837-857, 1997. + +
  • A Poly-Algorithm for Parallel Dense Matrix Multiplication on +Two-Dimensional Process Grid Topologies, J. Li, R. Falgout and +A. Skjellum, Concurrency: Practice and Experience, Vol. 9, No. 5, +pp. 345-389, 1997. + +
  • SUMMA: Scalable Universal Matrix Multiplication Algorithm, +R. van de Geijn and J. Watts, Concurrency: Practice and Experience, +Vol. 9, No. 4, pp. 255-274, 1997. + +
+ +
+ +

Parallel Triangular Solve

+ +
    + + +
  • Parallel Solution Triangular Systems on Distributed-Memory +Multiprocessors, M. Heath and C. Romine, SIAM Journal on Scientific +and Statistical Computing, Vol. 9, pp. 558-588, 1988. + +
  • A Parallel Triangular Solver for a Distributed-Memory +Multiprocessor, G. Li and T. Coleman, SIAM Journal on Scientific +and Statistical Computing, Vol. 9, No. 3, pp. 485-502, 1988. + + +
  • A New Method for Solving Triangular Systems on Distributed-Memory +Message-Passing Multiprocessor, G. Li and T. Coleman, SIAM Journal +on Scientific and Statistical Computing, Vol. 10, No. 2, pp. 382-396, +1989. + + +
  • Parallel Triangular System Solving on a Mesh Network of +Transputers, R. Bisseling and J. van der Vorst, SIAM Journal +on Scientific and Statistical Computing, Vol. 12, pp. 787-799, 1991. + +
+ + +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/results.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/results.html new file mode 100755 index 000000000..9a7d8b8af --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/results.html @@ -0,0 +1,243 @@ + + +HPL Results + + + + + + + +
+ + +

HPL Performance Results

+ + +The performance achieved by this software package on a few machine +configurations is shown below. These results are only provided for +illustrative purposes. By the time you read this, those systems +have changed, they may not even exist anymore and one can surely +not exactly reproduce the state in which these machines were when +those measurements have been obtained. To obtain accurate figures +on your system, it is absolutely necessary to +download the software and run it there. + +
+
+ + + +
+
+ +

4 AMD Athlon K7 500 Mhz (256 Mb) - (2x) 100 Mbs +Switched - 2 NICs per node (channel bonding)

+ +
+ + + + + + + +
OS Linux 6.2 RedHat (Kernel 2.2.14)
C compiler gcc (egcs-2.91.66 egcs-1.1.2 release)
C flags -fomit-frame-pointer -O3 -funroll-loops
MPI MPIch 1.2.1
BLAS ATLAS (Version 3.0 beta)
Comments 09 / 00

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Performance (Gflops) w.r.t Problem size on 4 nodes. +
GRID 2000 5000 800010000
1 x 4 1.28 1.73 1.89 1.95
2 x 2 1.17 1.68 1.88 1.93
4 x 1 0.81 1.43 1.70 1.80

+

+ +
+

8 Duals Intel PIII 550 Mhz (512 Mb) - Myrinet

+ +
+ + + + + + + + + +
OS Linux 6.1 RedHat (Kernel 2.2.15)
C compiler gcc (egcs-2.91.66 egcs-1.1.2 release)
C flags -fomit-frame-pointer -O3 -funroll-loops
MPI MPI GM (Version 1.2.3)
BLAS ATLAS (Version 3.0 beta)
Comments UTK / ICL - Torc cluster - 09 / 00

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Performance (Gflops) w.r.t Problem size on 8- and 16-processors grids. +
GRID 2000 5000 8000100001500020000
2 x 4 1.76 2.32 2.51 2.58 2.72 2.73
4 x 4 2.27 3.94 4.46 4.68 5.00 5.16

+

+ +
+

Compaq 64 nodes (4 ev67 667 Mhz processors per node) +AlphaServer SC

+ +
+ + + + + + + + +
OS Tru64 Version 5
C compiler cc Version 6.1
C flags -arch host -tune host -std -O5
MPI -lmpi -lelan
BLAS CXML
Comments ORNL / NCCS + - falcon - 09 / 00

+

+ +In the table below, each row corresponds to a given number of cpus (or +processors) and nodes. The first row for example is denoted by 1 / 1, +i.e., 1 cpu / 1 node. Rmax is given in Gflops, and the value of Nmax +in fact corresponds to 351 Mb per cpu for all machine configurations.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CPUS / NODES GRID N 1/2 Nmax Rmax (Gflops) Parallel Efficiency
1 / 1 1 x 1 150 6625 1.136 1.000
4 / 1 2 x 2 800 13250 4.360 0.960
16 / 4 4 x 4 2300 26500 17.00 0.935
64 / 16 8 x 8 5700 53000 67.50 0.928
256 / 64 16 x 16 14000 106000 263.6 0.906

+

+For Rmax shown in the table, the parallel efficiency per cpu has been +computed using the performance achieved by HPL on 1 cpu. That is fair, +since the CXML matrix multiply routine was achieving at best 1.24 Gflops +for large matrix operands on one cpu, it would have been difficult for a +sequential Linpack benchmark implementation to achieve much more than +1.136 Gflops on this same cpu. For constant load (as in the table 351 Mb +per cpu for Nmax), HPL scales almost linearly as it should. + +

+The authors acknowledge the use of the Oak Ridge National Laboratory +Compaq computer, funded by the Department of Energy's Office +of Science and Energy Efficiency programs.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/roll.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/roll.jpg new file mode 100755 index 000000000..88d2c56af Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/roll.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/rollM.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/rollM.jpg new file mode 100755 index 000000000..0d7f076fd Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/rollM.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/scalability.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/scalability.html new file mode 100755 index 000000000..00bb1a27e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/scalability.html @@ -0,0 +1,200 @@ + + +HPL Scalability Analysis + + + + +

HPL Scalability Analysis

+ +The machine model used for the +analysis is first described. This crude model is then used to first +estimate the parallel running time of the various phases of the +algorithm namely + +Finally the parallel efficiency +of the entire algorithm is estimated according to this machine model. +We show that for a given set of parameters HPL is scalable +not only with respect to the amount of computation, but also with +respect to the communication volume.

+
+ +

The Machine Model

+ +Distributed-memory computers consist of processors that are connected +using a message passing interconnection network. Each processor has +its own memory called the local memory, which is accessible only to +that processor. As the time to access a remote memory is longer than +the time to access a local one, such computers are often referred to +as Non-Uniform Memory Access (NUMA) machines.

+ +The interconnection network of our machine model is static, meaning +that it consists of point-to-point communication links among +processors. This type of network is also referred to as a direct +network as opposed to dynamic networks. The latter are constructed +from switches and communication links. These links are dynamically +connected to one another by the switching elements to establish, at +run time, the paths between processors memories.

+ +The interconnection network of the two-dimensional machine model +considered here is a static, fully connected physical topology. It +is also assumed that processors can be treated equally in terms +of local performance and that the communication rate between two +processors depends on the processors considered.

+ +Our model assumes that a processor can send or receive data on only +one of its communication ports at a time (assuming it has more than +one). In the literature, this assumption is also referred to as the +one-port communication model.

+ +The time spent to communicate a message between two given processors +is called the communication time Tc. In our machine model, Tc is +approximated by a linear function of the number L of double +precision (64-bits) items communicated. Tc is the sum of the time to +prepare the message for transmission (alpha) and the time (beta * L) +taken by the message of length L to traverse the network to its +destination, i.e.,

+
+Tc = alpha + beta L.

+
+ +Finally, the model assumes that the communication links are +bi-directional, that is, the time for two processors to send each +other a message of length L is also Tc. A processor can send and/or +receive a message on only one of its communication links at a time. +In particular, a processor can send a message while receiving another +message from the processor it is sending to at the same time.

+ +Since this document is only concerned with regular local dense linear +algebra operations, the time taken to perform one floating point +operation is assumed to be summarized by three constants gam1, +gam2 and gam3. These quantitites are flop rates approximations of the +vector-vector, matrix-vector and matrix-matrix operations for each +processor. This very crude approximation summarizes all the steps +performed by a processor to achieve such a computation. Obviously, +such a model neglects all the phenomena occurring in the processor +components, such as cache misses, pipeline startups, memory load or +store, floating point arithmetic and so on, that may influence the +value of these constants as a function of the problem size for +example.

+ +Similarly, the model does not make any assumption on the amount of +physical memory per node. It is assumed that if a process has been +spawn on a processor, one has ensured that enough memory was +available on that processor. In other words, swapping will not occur +during the modeled computation.

+ + +This machine model is a very crude approximation that is designed +specifically to illustrate the cost of the dominant factors of our +particular case.

+
+
+ +

Panel Factorization and Broadcast

+ +Let consider an M-by-N panel distributed over a P-process column. +Because of the recursive formulation of the panel factorization, it +is reasonable to consider that the floating point operations will +be performed at matrix-matrix multiply "speed". For every column in +the panel a binary-exchange is performed on 2*N data items. When this +panel is broadcast, what matters is the time that the next process +column will spend in this communication operation. Assuming one +chooses the increasing-ring (modified) +variant, only one message needs to be taken into account. The +execution time of the panel factorization and broadcast can thus be +approximated by:

+
+Tpfact( M, N ) = (M/P - N/3) N^2 gam3 + N log(P)( alpha + beta 2 N ) + +alpha + beta M N / P.

+
+
+ +

Trailing Submatrix Update

+ +Let consider the update phase of an N-by-N trailing submatrix +distributed on a P-by-Q process grid. From a computational point of +view one has to (triangular) solve N right-hand-sides and perform a +local rank-NB update of this trailing submatrix. Assuming one chooses +the long variant, the execution +time of the update operation can be approximated by:

+
+Tupdate( N, NB ) = gam3 ( N NB^2 / Q + 2 N^2 NB / ( P Q ) ) + +alpha ( log( P ) + P - 1 ) + 3 beta N NB / Q.

+
+The constant "3" in front of the "beta" term is obtained by counting +one for the (logarithmic) spread phase and two for the rolling phase; +In the case of bi-directional links this constant 3 should therefore +be only a 2.

+
+ +

Backward Substitution

+ +The number of floating point operations performed during the backward +substitution in given by N^2 / (P*Q). Because of the lookahead, the +communication cost can be approximated at each step by two messages +of length NB, i.e., the time to communicate the NB-piece of the +solution vector from one diagonal block of the matrix to another. It +follows that the execution time of the backward substitution can be +approximated by:

+
+Tbacks( N, NB ) = gam2 N^2 / (P Q) + N ( alpha / NB + 2 beta ).

+
+
+ +

Putting it All Together

+ +The total execution time of the algorithm described above is given by

+
+Sum(k=0,N,NB)[Tpfact( N-k, NB ) + Tupdate( N-k-NB, NB )] + +Tbacks( N, NB ).

+
+That is, by only considering only the dominant term in alpha, beta and +gam3:

+
+Thpl = 2 gam3 N^3 / ( 3 P Q ) + beta N^2 (3 P + Q) / ( 2 P Q ) + +alpha N ((NB + 1) log(P) + P) / NB.

+
+The serial execution time is given by Tser = 2 gam3 N^3 / 3. If we +define the parallel efficiency E as the ratio Tser / ( P Q Thpl ), we +obtain:

+
+E = 1 / ( 1 + 3 beta (3 P + Q) / ( 4 gam3 N ) + +3 alpha P Q ((NB + 1) log(P) + P) / (2 N^2 NB gam3) ).

+
+This last equality shows that when the memory usage per processor +N^2 / (P Q) is maintained constant, the parallel efficiency slowly +decreases only because of the alpha term. The communication volume +(the beta term) however remains constant. Due to these results, HPL +is said to be scalable not only with respect to the +amount of computation, but also with respect to the communication +volume.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/software.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/software.html new file mode 100755 index 000000000..34d82b2b7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/software.html @@ -0,0 +1,109 @@ + + +HPL Software + + + + +

HPL Software

+ +

Download and Installation

+ +
    +
  1. Download the tar-gzipped file, +issue then "gunzip hpl-2.3.tar.gz; tar -xvf hpl-2.3.tar" and this +should create an hpl-2.3 directory containing the distribution. +We call this directory the top level directory. + +
  2. Create a file Make.<arch> in the top-level directory. +For this purpose, you may want to re-use one contained in the +setup directory. This Make.<arch> file essentially contains +the compilers, libraries, and their paths to be used on your system. + +
  3. Type "make arch=<arch>". This should create an executable +in the bin/<arch> directory called xhpl. For example, on our +Linux PII cluster, I create a file called Make.Linux_PII in the +top-level directory. Then, I type "make arch=Linux_PII". This +creates the executable file bin/Linux_PII/xhpl. + +
  4. Quick check: run a few tests (assuming you have 4 nodes for +interactive use) by issuing the following commands from the top +level directory: "cd bin/<arch> ; mpirun -np 4 xhpl". This +should produce quite a bit of meaningful output on the screen. + +
  5. Most of the performance parameters can be tuned, by modifying +the input file bin/<arch>/HPL.dat. See the +tuning page or the TUNING file in the +top-level directory. +
+
+ +

Compile Time Options

+ +At the end of the "model" Make.<arch>, the user is given +the opportunity to override some default compile options of this +software. The list of these options and their meaning is:

+ +
+ + + + + + + + + +
-DHPL_COPY_Lforce the copy of the panel L before bcast
-DHPL_CALL_CBLAScall the BLAS C interface
-DHPL_CALL_VSIPLcall the vsip library
-DHPL_DETAILED_TIMINGenable detailed timers

+

+ +The user must choose between either the BLAS Fortran 77 interface, +or the BLAS C interface, or the VSIPL library depending on which +computational kernels are available on his system. Only one of these +options should be selected. If you choose the BLAS Fortran 77 +interface, it is necessary to fill out the machine-specific C to +Fortran 77 interface section of the Make.<arch> file. To do +this, please refer to the Make.<arch> examples contained in +the setup directory.

+ +By default HPL will: +
    +
  • not copy L before broadcast, +
  • call the BLAS Fortran 77 interface, +
  • not display detailed timing information. +
+ +As an example, suppose one wants this software to copy the panel of +columns into a contiguous buffer before broadcasting. It should +be more efficient to let the software create the appropriate MPI +user-defined data type since this may avoid the data copy. So, it +is a strange idea, but one insists. To achieve this one would add +-DHPL_COPY_L to the definition of HPL_OPTS at the end of the file +Make.<arch>. Issue then a "make clean arch=<arch> ; +make build arch=<arch>" and the executable will be re-build +with that feature in.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/spread.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/spread.jpg new file mode 100755 index 000000000..56c255a3f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/spread.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/spreadM.jpg b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/spreadM.jpg new file mode 100755 index 000000000..433e4c077 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/spreadM.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/tuning.html b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/tuning.html new file mode 100755 index 000000000..fbbf17fb7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/dpcpp/hpl-2.3/www/tuning.html @@ -0,0 +1,476 @@ + + +HPL Tuning + + + + +

HPL Tuning

+ +After having built the executable hpl/bin/<arch>/xhpl, +one may want to modify the input data file HPL.dat. This file +should reside in the same directory as the executable +hpl/bin/<arch>/xhpl. An example HPL.dat file is +provided by default. This file contains information about the +problem sizes, machine configuration, and algorithm features +to be used by the executable. It is 31 lines long. All the +selected parameters will be printed in the output generated +by the executable.

+ +We first describe the meaning of each line of this input file +below. Finally, a few useful +experimental guide lines to set up the file are given at +the end of this page.

+
+ +

Description of the HPL.dat File

+ +Line 1: (unused) Typically one would use +this line for its own good. For example, it could be used +to summarize the content of the input file. By default this +line reads: +
+HPL Linpack benchmark input file
+
+ +
+Line 2: (unused) same as line 1. By default +this line reads: +
+Innovative Computing Laboratory, University of Tennessee
+
+ +
+Line 3: the user can choose where the +output should be redirected to. In the case of a file, a +name is necessary, and this is the line where one wants to +specify it. Only the first name on this line is significant. +By default, the line reads: +
+HPL.out  output file name (if any)
+
+ +This means that if one chooses to redirect the output to a +file, the file will be called "HPL.out". The rest of the line +is unused, and this space to put some informative comment on +the meaning of this line.

+ +
+Line 4: This line specifies where the output +should go. The line is formatted, it must begin with a +positive integer, the rest is unsignificant. 3 choices are +possible for the positive integer, 6 means that the output +will go the standard output, 7 means that the output will +go to the standard error. Any other integer means that the +output should be redirected to a file, which name has been +specified in the line above. This line by default reads: +
+6        device out (6=stdout,7=stderr,file)
+
+which means that the output generated by the executable +should be redirected to the standard output.

+ +
+Line 5: This line specifies the number of +problem sizes to be executed. This number should be less than +or equal to 20. The first integer is significant, the rest +is ignored. If the line reads: +
+3        # of problems sizes (N)
+
+this means that the user is willing to run 3 problem sizes +that will be specified in the next line.

+ +
+Line 6: This line specifies the problem sizes +one wants to run. Assuming the line above started with 3, +the 3 first positive integers are significant, the rest is +ignored. For example: +
+3000 6000 10000    Ns
+
+means that one wants xhpl to run 3 (specified in line 5) +problem sizes, namely 3000, 6000 and 10000.

+ +
+Line 7: This line specifies the number of +block sizes to be runned. This number should be less than or +equal to 20. The first integer is significant, the rest is +ignored. If the line reads: +
+5        # of NBs
+
+this means that the user is willing to use 5 block sizes that +will be specified in the next line.

+ +
+Line 8: This line specifies the block sizes +one wants to run. Assuming the line above started with 5, +the 5 first positive integers are significant, the rest is +ignored. For example: +
+80 100 120 140 160 NBs
+
+means that one wants xhpl to use 5 (specified in line 7) +block sizes, namely 80, 100, 120, 140 and 160.

+ +
+Line 9: This line specifies how the MPI +processes should be mapped onto the nodes of your platform. +There are currently two possible mappings, namely row- and +column-major. This feature is mainly useful when these nodes +are themselves multi-processor computers. A row-major mapping +is recommended.

+ +
+Line 10: This line specifies the number of +process grid to be runned. This number should be less than +or equal to 20. The first integer is significant, the rest is +ignored. If the line reads: +
+2        # of process grids (P x Q)
+
+this means that you are willing to try 2 process grid sizes +that will be specified in the next line.

+ +
+Line 11-12: These two lines specify the +number of process rows and columns of each grid you want to +run on. Assuming the line above (10) started with 2, the 2 +first positive integers of those two lines are significant, +the rest is ignored. For example: +
+1 2          Ps
+6 8          Qs
+
+means that one wants to run xhpl on 2 process grids (line +10), namely 1-by-6 and 2-by-8. Note: In this example, it is +required then to start xhpl on at least 16 nodes (max +of Pi-by-Qi). The runs on the two grids will be consecutive. +If one was starting xhpl on more than 16 nodes, say 52, only +6 would be used for the first grid (1x6) and then 16 (2x8) +would be used for the second grid. The fact that you started +the MPI job on 52 nodes, will not make HPL use all of them. +In this example, only 16 would be used. If one wants to run +xhpl with 52 processes one needs to specify a grid of 52 +processes, for example the following lines would do the job: +
+4  2         Ps
+13 8         Qs
+
+ +
+Line 13: This line specifies the threshold +to which the residuals should be compared with. The residuals +should be or order 1, but are in practice slightly less than +this, typically 0.001. This line is made of a real number, +the rest is not significant. For example: +
+16.0         threshold
+
+In practice, a value of 16.0 will cover most cases. For +various reasons, it is possible that some of the residuals +become slightly larger, say for example 35.6. xhpl will flag +those runs as failed, however they can be considered as +correct. A run should be considered as failed if the residual +is a few order of magnitude bigger than 1 for example 10^6 or +more. Note: if one was to specify a threshold of 0.0, all +tests would be flagged as failed, even though the answer is +likely to be correct. It is allowed to specify a negative +value for this threshold, in which case the checks will be +by-passed, no matter what the threshold value is, as soon as +it is negative. This feature allows to save time when +performing a lot of experiments, say for instance during the +tuning phase. Example: +
+-16.0        threshold
+
+ +
+The remaning lines allow to specifies algorithmic features. +xhpl will run all possible combinations of those for each +problem size, block size, process grid combination. This is +handy when one looks for an "optimal" set of parameters. To +understand a little bit better, let say first a few words +about the algorithm implemented in HPL. Basically this is a +right-looking version with row-partial pivoting. The panel +factorization is matrix-matrix operation based and recursive, +dividing the panel into NDIV subpanels at each step. This +part of the panel factorization is denoted below by +"recursive panel fact. (RFACT)". The recursion stops when +the current panel is made of less than or equal to NBMIN +columns. At that point, xhpl uses a matrix-vector operation +based factorization denoted below by "PFACTs". Classic +recursion would then use NDIV=2, NBMIN=1. There are +essentially 3 numerically equivalent LU factorization +algorithm variants (left-looking, Crout and right-looking). +In HPL, one can choose every one of those for the RFACT, as +well as the PFACT. The following lines of HPL.dat allows you +to set those parameters.

+Lines 14-21: (Example 1) +
+3       # of panel fact
+0 1 2   PFACTs (0=left, 1=Crout, 2=Right)
+4       # of recursive stopping criterium
+1 2 4 8 NBMINs (>= 1)
+3       # of panels in recursion
+2 3 4   NDIVs
+3       # of recursive panel fact.
+0 1 2   RFACTs (0=left, 1=Crout, 2=Right)
+
+ +This example would try all variants of PFACT, 4 values for +NBMIN, namely 1, 2, 4 and 8, 3 values for NDIV namely 2, 3 +and 4, and all variants for RFACT.

+Lines 14-21: (Example 2) +
+2       # of panel fact
+2 0     PFACTs (0=left, 1=Crout, 2=Right)
+2       # of recursive stopping criterium
+4 8     NBMINs (>= 1)
+1       # of panels in recursion
+2       NDIVs
+1       # of recursive panel fact.
+2       RFACTs (0=left, 1=Crout, 2=Right)
+
+This example would try 2 variants of PFACT namely right +looking and left looking, 2 values for NBMIN, namely 4 and 8, +1 value for NDIV namely 2, and one variant for RFACT.

+ +
+In the main loop of the algorithm, the current panel of +column is broadcast in process rows using a virtual ring +topology. HPL offers various choices and one most likely want +to use the increasing ring modified encoded as 1. 3 and 4 are +also good choices.

+Lines 22-23: (Example 1) +
+1       # of broadcast
+1       BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+
+This will cause HPL to broadcast the current panel using the +increasing ring modified topology.

+Lines 22-23: (Example 2) +
+2       # of broadcast
+0 4     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+
+This will cause HPL to broadcast the current panel using the +increasing ring virtual topology and the long message +algorithm.

+ +
+Lines 24-25 allow to specify the look-ahead +depth used by HPL. A depth of 0 means that the next panel +is factorized after the update by the current panel is +completely finished. A depth of 1 means that the next +panel is immediately factorized after being updated. The +update by the current panel is then finished. A depth of k +means that the k next panels are factorized immediately after +being updated. The update by the current panel is then +finished. It turns out that a depth of 1 seems to give the +best results, but may need a large problem size before one +can see the performance gain. So use 1, if you do not know +better, otherwise you may want to try 0. Look-ahead of +depths 3 and larger will probably not give you better +results.

+Lines 24-25: (Example 1): +
+1       # of lookahead depth
+1       DEPTHs (>=0)
+
+This will cause HPL to use a look-ahead of depth 1.

+Lines 24-25: (Example 2): +
+2       # of lookahead depth
+0 1     DEPTHs (>=0)
+
+This will cause HPL to use a look-ahead of depths 0 and 1.

+ +
+Lines 26-27 allow to specify the swapping +algorithm used by HPL for all tests. There are currently +two swapping algorithms available, one based on "binary +exchange" and the other one based on a "spread-roll" +procedure (also called "long" below). For large problem +sizes, this last one is likely to be more efficient. The user +can also choose to mix both variants, that is "binary-exchange" +for a number of columns less than a threshold value, and then +the "spread-roll" algorithm. This threshold value is then +specified on Line 27.

+Lines 26-27: (Example 1): +
+1       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold
+
+This will cause HPL to use the "long" or "spread-roll" +swapping algorithm. Note that a threshold is specified in +that example but not used by HPL.

+Lines 26-27: (Example 2): +
+2       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold
+
+This will cause HPL to use the "long" or "spread-roll" +swapping algorithm as soon as there is more than 60 columns +in the row panel. Otherwise, the "binary-exchange" algorithm +will be used instead.

+ +
+Line 28 allows to specify whether the upper +triangle of the panel of columns should be stored in +no-transposed or transposed form. Example: +
+0            L1 in (0=transposed,1=no-transposed) form
+
+ +
+Line 29 allows to specify whether the panel +of rows U should be stored in no-transposed or transposed +form. Example: +
+0            U  in (0=transposed,1=no-transposed) form
+
+ +
+Line 30 enables / disables the equilibration +phase. This option will not be used unless you selected 1 or +2 in Line 26. Example: +
+1            Equilibration (0=no,1=yes)
+
+ +
+Line 31 allows to specify the alignment in +memory for the memory space allocated by HPL. On modern +machines, one probably wants to use 4, 8 or 16. This may +result in a tiny amount of memory wasted. Example: +
+8       memory alignment in double (> 0)
+
+ +
+

Guide Lines

+ +
    +
  1. Figure out a good block size for the matrix multiply +routine. The best method is to try a few out. If you happen +to know the block size used by the matrix-matrix multiply +routine, a small multiple of that block size will do fine. +This particular topic is discussed in the +FAQs section.

    + +
  2. The process mapping should not matter if the nodes of +your platform are single processor computers. If these nodes +are multi-processors, a row-major mapping is recommended.

    + +
  3. HPL likes "square" or slightly flat process grids. Unless +you are using a very small process grid, stay away from the +1-by-Q and P-by-1 process grids. This particular topic is also +discussed in the FAQs section.

    + +
  4. Panel factorization parameters: a good start are the +following for the lines 14-21: +
    +1       # of panel fact
    +1       PFACTs (0=left, 1=Crout, 2=Right)
    +2       # of recursive stopping criterium
    +4 8     NBMINs (>= 1)
    +1       # of panels in recursion
    +2       NDIVs
    +1       # of recursive panel fact.
    +2       RFACTs (0=left, 1=Crout, 2=Right)
    +
    + +
  5. Broadcast parameters: at this time it is far from obvious +to me what the best setting is, so i would probably try them +all. If I had to guess I would probably start with the +following for the lines 22-23: +
    +2       # of broadcast
    +1 3     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
    +
    +The best broadcast depends on your problem size and harware +performance. My take is that 4 or 5 may be competitive for +machines featuring very fast nodes comparatively to the +network.

    + +
  6. Look-ahead depth: as mentioned above 0 or 1 are likely to +be the best choices. This also depends on the problem size +and machine configuration, so I would try "no look-ahead (0)" +and "look-ahead of depth 1 (1)". That is for lines 24-25: +
    +2       # of lookahead depth
    +0 1     DEPTHs (>=0)
    +
    + +
  7. Swapping: one can select only one of the three algorithm +in the input file. Theoretically, mix (2) should win, however +long (1) might just be good enough. The difference should be +small between those two assuming a swapping threshold of the +order of the block size (NB) selected. If this threshold is +very large, HPL will use bin_exch (0) most of the time and if +it is very small (< NB) long (1) will always be used. In +short and assuming the block size (NB) used is say 60, I +would choose for the lines 26-27: +
    +2       SWAP (0=bin-exch,1=long,2=mix)
    +60      swapping threshold 
    +
    +I would also try the long variant. For a very small number +of processes in every column of the process grid (say < 4), +very little performance difference should be observable.

    + +
  8. Local storage: I do not think Line 28 matters. Pick 0 in +doubt. Line 29 is more important. It controls how the panel +of rows should be stored. No doubt 0 is better. The caveat is +that in that case the matrix-multiply function is called with +( Notrans, Trans, ... ), that is C := C - A B^T. Unless the +computational kernel you are using has a very poor (with +respect to performance) implementation of that case, and is +much more efficient with ( Notrans, Notrans, ... ) just pick +0 as well. So, my choice: +
    +0       L1 in (0=transposed,1=no-transposed) form
    +0       U  in (0=transposed,1=no-transposed) form
    +
    + +
  9. Equilibration: It is hard to tell whether equilibration +should always be performed or not. Not knowing much about the +random matrix generated and because the overhead is so small +compared to the possible gain, I turn it on all the time. +
    +1       Equilibration (0=no,1=yes)
    +
    + +
  10. For alignment, 4 should be plenty, but just to be safe, +one may want to pick 8 instead. +
    +8       memory alignment in double (> 0)
    +
    +
+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/AUTHORS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/AUTHORS new file mode 100644 index 000000000..b08e25180 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/AUTHORS @@ -0,0 +1,6 @@ +Antoine Petitet +Clint Whaley rcwhaley@lsu.edu +Jack Dongarra dongarra@icl.utk.edu +Andy Cleary +Piotr Luszczek luszczek@icl.utk.edu +Julien Langou Julien.Langou@ucdenver.edu diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/BUGS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/BUGS new file mode 100644 index 000000000..08d694014 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/BUGS @@ -0,0 +1,9 @@ +============================================================== + List of the known problems with the HPL software + + Current as of release HPL - 2.3 - December 2, 2018 +============================================================== + +============================================================== + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/COPYING b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/COPYING new file mode 100644 index 000000000..08465d618 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/COPYING @@ -0,0 +1,45 @@ +====================================================================== + -- High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 + Antoine P. Petitet + University of Tennessee, Knoxville + Innovative Computing Laboratory + (C) Copyright 2000-2008 All Rights Reserved + + -- Copyright notice and Licensing terms: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. All advertising materials mentioning features or use of this + software must display the following acknowledgement: + This product includes software developed at the University of + Tennessee, Knoxville, Innovative Computing Laboratory. + + 4. The name of the University, the name of the Laboratory, or the + names of its contributors may not be used to endorse or promote + products derived from this software without specific written + permission. + + -- Disclaimer: + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +====================================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/COPYRIGHT b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/COPYRIGHT new file mode 100644 index 000000000..08465d618 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/COPYRIGHT @@ -0,0 +1,45 @@ +====================================================================== + -- High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 + Antoine P. Petitet + University of Tennessee, Knoxville + Innovative Computing Laboratory + (C) Copyright 2000-2008 All Rights Reserved + + -- Copyright notice and Licensing terms: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. All advertising materials mentioning features or use of this + software must display the following acknowledgement: + This product includes software developed at the University of + Tennessee, Knoxville, Innovative Computing Laboratory. + + 4. The name of the University, the name of the Laboratory, or the + names of its contributors may not be used to endorse or promote + products derived from this software without specific written + permission. + + -- Disclaimer: + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +====================================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/ChangeLog b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/ChangeLog new file mode 100644 index 000000000..1c2b36778 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/ChangeLog @@ -0,0 +1,16 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + Done list in version 1.0b, December 15th, 2004 + - Fixed problem with 32-bit integer overflow. + Thanks to John Baron. + + Done list in version 1.0a, January 1st, 2004 + - Added Row- or Column-major process mapping in data file + - Fixed compilation error for gcc 3.3 in walltime. + - Fixed building problems on the T3E; + Thanks to Edward Anderson. + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/HISTORY b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/HISTORY new file mode 100644 index 000000000..d6d59ee45 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/HISTORY @@ -0,0 +1,103 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + History + + - 09/09/00 Public release of Version 1.0 + + - 09/27/00 A couple of mistakes in the VSIPL port have been + corrected. The tar file as well as the web site were updated + on September 27th, 2000. Note that these problems were not + affecting the BLAS version of the software in any way. + + - 01/01/04 Version 1.0a + The MPI process grid numbering scheme is now an run-time + option. + The inlined assembly timer routine that caused the compila- + tion to fail when using gcc version 3.3 and above has been + removed from the package. + Various building problems on the T3E have been fixed; Thanks + to Edward Anderson. + + - 15/12/04 Version 1.0b + Weakness of the pseudo-random matrix generator found for pro- + blem sizes being power of twos and larger than 2^15; Thanks + to Gregory Bauer. This problem has not been fixed. It is thus + currently recommended to HPL users willing to test matrices + of size larger than 2^15 to not use power twos. + + When the matrix size is such that one needs > 16 GB per MPI + rank, the intermediate calculation (mat.ld+1) * mat.nq in + HPL_pdtest.c ends up overflowing because it is done using + 32-bit arithmetic. This issue has been fixed by typecasting + to size_t; Thanks to John Baron. + + - 09/10/08 Version 2.0 + + Piotr Luszczek changed to 64-bit RNG, modified files: + -- [M] include/hpl_matgen.h + -- [M] testing/matgen/HPL_ladd.c + -- [M] testing/matgen/HPL_lmul.c + -- [M] testing/matgen/HPL_rand.c + -- [M] testing/ptest/HPL_pdinfo.c + + For a motivation for the change, see: + Dongarra and Langou, ``The Problem with the Linpack + Benchmark Matrix Generator'', LAWN 206, June 2008. + + -- [M] testing/ptest/HPL_pdtest.c -- + + Julien Langou changed the test for correctness from + ||Ax-b||_oo / ( eps * ||A||_1 * N ) + ||Ax-b||_oo / ( eps * ||A||_1 * ||x||_1 ) + ||Ax-b||_oo / ( eps * ||A||_oo * ||x||_oo * N ) + to the normwise backward error + || r ||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N ) + See: + Nicholas J. Higham, ``Accuracy and Stability of Numerical Algorithms'', + Society for Industrial and Applied Mathematics, Philadelphia, PA, USA, + Second Edition, pages = xxx+680, ISBN = 0-89871-521-0, 2002. + + Note that in our case || b ||_oo is almost for sure + 1/2, we compute it anyway. + + - 10/26/2012 Version 2.1 + + Piotr Luszczek introduced exact time stamping for HPL_pdgesv(): + -- [M] dist/include/hpl_misc.h + -- [M] dist/testing/ptest/HPL_pdtest.c + + Piotr Luszczek fixed out-of-bounds access in data spreading functions + and exact time stamping for HPL_pdgesv(): + -- [M] dist/src/pgesv/HPL_spreadN.c + -- [M] dist/src/pgesv/HPL_spreadT.c + Thanks to Stephen Whalen from Cray. + + - 02/24/2016 Version 2.2 + + Piotr Luszczek added continuous reporting of factorization progress + submitted by Intel and make scripts that uses Intel software tools and + libraries and their Apple's Mac OS X equivalents. + + - 12/02/2018 Version 2.3 + + Piotr Luszczek removed deprecated MPI functions that are no longer + supported in some MPI implementations (for example Open MPI 4.0) and + replaced them with + modern equivalents in HPL_packL(): + -- [M] src/comm/HPL_packL.c + + Piotr Luszczek added one digit to the display of performance result + and changed display of scaled residual to scientific notation with + extra digits in HPL_pdtest(): + -- [M] testing/ptest/HPL_pdtest.c + + Piotr Luszczek added support for Autotools configuration packages + autoconf and automake: + -- [A] Makefile.am + -- [A] configure.ac + -- [A] acinclude.m4 + -- [A] src/Makefile.am + -- [A] testing/Makefile.am diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/INSTALL b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/INSTALL new file mode 100644 index 000000000..fec266c49 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/INSTALL @@ -0,0 +1,81 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + 1) Retrieve the tar file, then + + gunzip hpl.tgz; tar -xvf hpl.tar + + this will create an hpl directory, that we call below the + top-level directory. + + 2) Create a file Make. in the top-level directory. For + this purpose, you may want to re-use one contained in the + setup directory. This file essentially contains the compilers + and librairies with their paths to be used. + + 3) Type "make arch=". This should create an executable + in the bin/ directory called xhpl. + + For example, on our Linux PII cluster, I create a file called + Make.Linux_PII in the top-level directory. Then, I type + "make arch=Linux_PII" + This creates the executable file bin/Linux_PII/xhpl. + + 4) Quick check: run a few tests: + + cd bin/ + mpirun -np 4 xhpl + + 5) Tuning: Most of the performance parameters can be tuned, + by modifying the input file bin/HPL.dat. See the file TUNING + in the top-level directory. + +============================================================== + + Compile time options: At the end of the "model" Make., + --------------------- the user is given the opportunity to + compile the software with some specific compile options. The + list of this options and their meaning are: + + -DHPL_COPY_L + force the copy of the panel L before bcast; + + -DHPL_CALL_CBLAS + call the cblas interface; + + -DHPL_CALL_VSIPL + call the vsip library; + + -DHPL_DETAILED_TIMING + enables detail timers; + + The user must choose between either the BLAS Fortran 77 + interface, or the BLAS C interface, or the VSIPL library + depending on which computational kernels are available on his + system. Only one of these options should be selected. If you + choose the BLAS Fortran 77 interface, it is necessary to fill + out the machine-specific C to Fortran 77 interface section of + the Make. file. To do this, please refer to the + Make. examples contained in the setup directory. + + By default HPL will: + *) not copy L before broadcast, + *) call the BLAS Fortran 77 interface, + *) not display detailed timing information. + + As an example, suppose one wants HPL to copy the panel of + columns into a contiguous buffer before broadcasting. In + theory, it would be more efficient to let HPL create the + appropriate MPI user-defined data type since this may avoid + the data copy. So, it is a strange idea, but one insists. To + achieve this one would add -DHPL_COPY_L to the definition of + HPL_OPTS at the end of the file Make.. Issue then a + "make clean arch=; make build arch=" and the xhpl + executable will be re-build with that feature in. +============================================================== + + Check out the website www.netlib.org/benchmark/hpl for the + latest information. +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Make.intel64 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Make.intel64 new file mode 100644 index 000000000..15d4ed82a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Make.intel64 @@ -0,0 +1,236 @@ + # -- High Performance Computing Linpack Benchmark (HPL) + # Modifications Copyright (C) 2023 Intel Corporation​ + # + # -- Copyright notice and Licensing terms: + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: + # + # 1. Redistributions of source code must retain the above copyright + # notice, this list of conditions and the following disclaimer. + # + # 2. Redistributions in binary form must reproduce the above copyright + # notice, this list of conditions, and the following disclaimer in the + # documentation and/or other materials provided with the distribution. + # + # 3. All advertising materials mentioning features or use of this + # software must display the following acknowledgement: + # This product includes software developed at the University of + # Tennessee, Knoxville, Innovative Computing Laboratory. + # + # 4. The name of the University, the name of the Laboratory, or the + # names of its contributors may not be used to endorse or promote + # products derived from this software without specific written + # permission. + # + # -- Disclaimer: + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # --------------------------------------------------------------------- + # + #SPDX-License-Identifier: BSD-4-Clause + +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -fs +MKDIR = mkdir -p +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = intel64 +export ARCH = intel64 +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +# Set TOPdir to the location of where this is being built +TOPdir = $(CURDIR) +INCdir = $(TOPdir)/include +BINdir =$(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a + +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +OneAPIdir = $(ONEAPI_ROOT) +MPdir = $(OneAPIdir)/mpi/latest/ +MPinc = -I$(MPdir)/include/ +MPlib = -lmpi #$(MPdir)/lib/release/libmpi.so +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(OneAPIdir)/mkl/latest/lib/intel64/ +LAinc = -I$(OneAPIdir)/mkl/latest/include/intel64/ +LAlib = -L$(TOPdir)/src/cuda/ -ldgemm -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lpthread -liomp5 -lm -lstdc++ -L/opt/rocm/hipblas/lib/ -lhipblas -I$(TOPdir)/src/cuda/ +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) #$(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# -DASYOUGO enable timing information as you go (nonintrusive) +# -DASYOUGO2 slightly intrusive timing information +# -DASYOUGO2_DISPLAY display detailed DGEMM information +# -DENDEARLY end the problem early +# -DFASTSWAP insert to use DLASWP instead of HPL code +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -g +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = $(CC) +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- +MAKE = make VERBOSE=1 arch=$(ARCH) TOPdir=$(TOPdir) diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Make.top b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Make.top new file mode 100644 index 000000000..57e2d3fa9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Make.top @@ -0,0 +1,238 @@ + # -- High Performance Computing Linpack Benchmark (HPL) + # Modifications Copyright (C) 2023 Intel Corporation​ + # + # -- Copyright notice and Licensing terms: + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: + # + # 1. Redistributions of source code must retain the above copyright + # notice, this list of conditions and the following disclaimer. + # + # 2. Redistributions in binary form must reproduce the above copyright + # notice, this list of conditions, and the following disclaimer in the + # documentation and/or other materials provided with the distribution. + # + # 3. All advertising materials mentioning features or use of this + # software must display the following acknowledgement: + # This product includes software developed at the University of + # Tennessee, Knoxville, Innovative Computing Laboratory. + # + # 4. The name of the University, the name of the Laboratory, or the + # names of its contributors may not be used to endorse or promote + # products derived from this software without specific written + # permission. + # + # -- Disclaimer: + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # --------------------------------------------------------------------- + # + #SPDX-License-Identifier: BSD-4-Clause + +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +arch = UNKNOWN +# +include Make.$(arch) +# +## build ############################################################### +# +build_src : + ( $(CD) src/auxil/$(arch); $(MAKE) ) + ( $(CD) src/blas/$(arch); $(MAKE) ) + ( $(CD) src/comm/$(arch); $(MAKE) ) + ( $(CD) src/grid/$(arch); $(MAKE) ) + ( $(CD) src/panel/$(arch); $(MAKE) ) + ( $(CD) src/pauxil/$(arch); $(MAKE) ) + ( $(CD) src/pfact/$(arch); $(MAKE) ) + ( $(CD) src/pgesv/$(arch); $(MAKE) ) + ( $(CD) src/cuda/; $(MAKE) ) +# +build_tst : + ( $(CD) testing/matgen/$(arch); $(MAKE) ) + ( $(CD) testing/timer/$(arch); $(MAKE) ) + ( $(CD) testing/pmatgen/$(arch); $(MAKE) ) + ( $(CD) testing/ptimer/$(arch); $(MAKE) ) + ( $(CD) testing/ptest/$(arch); $(MAKE) ) +#( SPMS_make_cd`' testing/test/$(arch); SPMS_make_make`' ) +# +## startup ############################################################# +# +startup_dir : + - $(MKDIR) include/$(arch) + - $(MKDIR) lib + - $(MKDIR) lib/$(arch) + - $(MKDIR) bin + - $(MKDIR) bin/$(arch) +# +startup_src : + - $(MAKE) -f Make.top leaf le=src/auxil arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/blas arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/comm arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/grid arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/panel arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/pauxil arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/pfact arch=$(arch) + - $(MAKE) -f Make.top leaf le=src/pgesv arch=$(arch) +# +startup_tst : + - $(MAKE) -f Make.top leaf le=testing/matgen arch=$(arch) + - $(MAKE) -f Make.top leaf le=testing/timer arch=$(arch) + - $(MAKE) -f Make.top leaf le=testing/pmatgen arch=$(arch) + - $(MAKE) -f Make.top leaf le=testing/ptimer arch=$(arch) + - $(MAKE) -f Make.top leaf le=testing/ptest arch=$(arch) +#- SPMS_make_make`' -f Make.top leaf le=testing/test arch=$(arch) +# +## refresh ############################################################# +# +refresh_src : + - $(CP) makes/Make.auxil src/auxil/$(arch)/Makefile + - $(CP) makes/Make.blas src/blas/$(arch)/Makefile + - $(CP) makes/Make.comm src/comm/$(arch)/Makefile + - $(CP) makes/Make.grid src/grid/$(arch)/Makefile + - $(CP) makes/Make.panel src/panel/$(arch)/Makefile + - $(CP) makes/Make.pauxil src/pauxil/$(arch)/Makefile + - $(CP) makes/Make.pfact src/pfact/$(arch)/Makefile + - $(CP) makes/Make.pgesv src/pgesv/$(arch)/Makefile +# +refresh_tst : + - $(CP) makes/Make.matgen testing/matgen/$(arch)/Makefile + - $(CP) makes/Make.timer testing/timer/$(arch)/Makefile + - $(CP) makes/Make.pmatgen testing/pmatgen/$(arch)/Makefile + - $(CP) makes/Make.ptimer testing/ptimer/$(arch)/Makefile + - $(CP) makes/Make.ptest testing/ptest/$(arch)/Makefile +#- SPMS_make_cp`' makes/Make.test testing/test/$(arch)/Makefile +# +## clean ############################################################### +# +clean_src : + - ( $(CD) src/auxil/$(arch); $(MAKE) clean ) + - ( $(CD) src/blas/$(arch); $(MAKE) clean ) + - ( $(CD) src/comm/$(arch); $(MAKE) clean ) + - ( $(CD) src/grid/$(arch); $(MAKE) clean ) + - ( $(CD) src/panel/$(arch); $(MAKE) clean ) + - ( $(CD) src/pauxil/$(arch); $(MAKE) clean ) + - ( $(CD) src/pfact/$(arch); $(MAKE) clean ) + - ( $(CD) src/pgesv/$(arch); $(MAKE) clean ) + - ( $(CD) src/cuda/; $(MAKE) clean) +# +clean_tst : + - ( $(CD) testing/matgen/$(arch); $(MAKE) clean ) + - ( $(CD) testing/timer/$(arch); $(MAKE) clean ) + - ( $(CD) testing/pmatgen/$(arch); $(MAKE) clean ) + - ( $(CD) testing/ptimer/$(arch); $(MAKE) clean ) + - ( $(CD) testing/ptest/$(arch); $(MAKE) clean ) +#- ( SPMS_make_cd`' testing/test/$(arch); SPMS_make_make`' clean ) +# +## clean_arch ########################################################## +# +clean_arch_src : + - $(RM) -r src/auxil/$(arch) + - $(RM) -r src/blas/$(arch) + - $(RM) -r src/comm/$(arch) + - $(RM) -r src/grid/$(arch) + - $(RM) -r src/panel/$(arch) + - $(RM) -r src/pauxil/$(arch) + - $(RM) -r src/pfact/$(arch) + - $(RM) -r src/pgesv/$(arch) + - ( $(CD) src/cuda; $(MAKE) clean) +# +clean_arch_tst : + - $(RM) -r testing/matgen/$(arch) + - $(RM) -r testing/timer/$(arch) + - $(RM) -r testing/pmatgen/$(arch) + - $(RM) -r testing/ptimer/$(arch) + - $(RM) -r testing/ptest/$(arch) +#- SPMS_make_rm`' -r testing/test/$(arch) +# +## clean_arch_all ###################################################### +# +clean_arch_all : + - $(MAKE) -f Make.top clean_arch_src arch=$(arch) + - $(MAKE) -f Make.top clean_arch_tst arch=$(arch) + - $(RM) -r bin/$(arch) include/$(arch) lib/$(arch) +# +## clean_guard ######################################################### +# +clean_guard_src : + - ( $(CD) src/auxil/$(arch); $(RM) *.grd ) + - ( $(CD) src/blas/$(arch); $(RM) *.grd ) + - ( $(CD) src/comm/$(arch); $(RM) *.grd ) + - ( $(CD) src/grid/$(arch); $(RM) *.grd ) + - ( $(CD) src/panel/$(arch); $(RM) *.grd ) + - ( $(CD) src/pauxil/$(arch); $(RM) *.grd ) + - ( $(CD) src/pfact/$(arch); $(RM) *.grd ) + - ( $(CD) src/pgesv/$(arch); $(RM) *.grd ) +# +clean_guard_tst : + - ( $(CD) testing/matgen/$(arch); $(RM) *.grd ) + - ( $(CD) testing/timer/$(arch); $(RM) *.grd ) + - ( $(CD) testing/pmatgen/$(arch); $(RM) *.grd ) + - ( $(CD) testing/ptimer/$(arch); $(RM) *.grd ) + - ( $(CD) testing/ptest/$(arch); $(RM) *.grd ) +#- ( SPMS_make_cd`' testing/test/$(arch); SPMS_make_rm`' *.grd ) +# +## misc ################################################################ +# +leaf : + - ( $(CD) $(le) ; $(MKDIR) $(arch) ) + - ( $(CD) $(le)/$(arch) ; \ + $(LN_S) $(TOPdir)/Make.$(arch) Make.inc ) +# +######################################################################## diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile new file mode 100644 index 000000000..7ab3d9c54 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile @@ -0,0 +1,134 @@ + # -- High Performance Computing Linpack Benchmark (HPL) + # Modifications Copyright (C) 2023 Intel Corporation​ + # + # -- Copyright notice and Licensing terms: + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: + # + # 1. Redistributions of source code must retain the above copyright + # notice, this list of conditions and the following disclaimer. + # + # 2. Redistributions in binary form must reproduce the above copyright + # notice, this list of conditions, and the following disclaimer in the + # documentation and/or other materials provided with the distribution. + # + # 3. All advertising materials mentioning features or use of this + # software must display the following acknowledgement: + # This product includes software developed at the University of + # Tennessee, Knoxville, Innovative Computing Laboratory. + # + # 4. The name of the University, the name of the Laboratory, or the + # names of its contributors may not be used to endorse or promote + # products derived from this software without specific written + # permission. + # + # -- Disclaimer: + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # --------------------------------------------------------------------- + # + #SPDX-License-Identifier: BSD-4-Clause + +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# +SHELL = /bin/sh +# +arch = intel64 +make = 'make VERBOSE=1' +# +## Targets ############################################################# +# +all : install +# +# ###################################################################### +# +install : startup refresh build +# +startup : + $(MAKE) -f Make.top startup_dir arch=$(arch) + $(MAKE) -f Make.top startup_src arch=$(arch) + $(MAKE) -f Make.top startup_tst arch=$(arch) + $(MAKE) -f Make.top refresh_src arch=$(arch) + $(MAKE) -f Make.top refresh_tst arch=$(arch) +# +refresh : + $(MAKE) -f Make.top refresh_src arch=$(arch) + $(MAKE) -f Make.top refresh_tst arch=$(arch) +# +build : + $(MAKE) -f Make.top build_src arch=$(arch) + $(MAKE) -f Make.top build_tst arch=$(arch) +# +clean : + $(MAKE) -f Make.top clean_src arch=$(arch) + $(MAKE) -f Make.top clean_tst arch=$(arch) +# +clean_arch : + $(MAKE) -f Make.top clean_arch_src arch=$(arch) + $(MAKE) -f Make.top clean_arch_tst arch=$(arch) +# +clean_arch_all : + $(MAKE) -f Make.top clean_arch_all arch=$(arch) +# +clean_guard : + $(MAKE) -f Make.top clean_guard_src arch=$(arch) + $(MAKE) -f Make.top clean_guard_tst arch=$(arch) +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile.am new file mode 100644 index 000000000..1ad8c1b17 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src testing + +AM_CPPFLAGS = -I$(top_srcdir)/include diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile.in new file mode 100644 index 000000000..76f0e2dd6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/Makefile.in @@ -0,0 +1,772 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +subdir = . +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \ + $(am__configure_deps) $(am__DIST_COMMON) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/include/hplconfig.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + cscope distdir distdir-am dist dist-all distcheck +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +CSCOPE = cscope +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/include/hplconfig.h.in AUTHORS COPYING ChangeLog \ + INSTALL NEWS README THANKS TODO compile config.guess \ + config.sub depcomp install-sh missing +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + if test -d "$(distdir)"; then \ + find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -rf "$(distdir)" \ + || { sleep 5 && rm -rf "$(distdir)"; }; \ + else :; fi +am__post_remove_distdir = $(am__remove_distdir) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +DIST_ARCHIVES = $(distdir).tar.gz +GZIP_ENV = --best +DIST_TARGETS = dist-gzip +distuninstallcheck_listfiles = find . -type f -print +am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ + | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' +distcleancheck_listfiles = find . -type f -print +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIBS = @BLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build_alias = @build_alias@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host_alias = @host_alias@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = src testing +AM_CPPFLAGS = -I$(top_srcdir)/include +all: all-recursive + +.SUFFIXES: +am--refresh: Makefile + @: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --gnu'; \ + $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: $(am__configure_deps) + $(am__cd) $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) +$(am__aclocal_m4_deps): + +include/hplconfig.h: include/stamp-h1 + @test -f $@ || rm -f include/stamp-h1 + @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) include/stamp-h1 + +include/stamp-h1: $(top_srcdir)/include/hplconfig.h.in $(top_builddir)/config.status + @rm -f include/stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status include/hplconfig.h +$(top_srcdir)/include/hplconfig.h.in: $(am__configure_deps) + ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) + rm -f include/stamp-h1 + touch $@ + +distclean-hdr: + -rm -f include/hplconfig.h include/stamp-h1 + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscope: cscope.files + test ! -s cscope.files \ + || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) +clean-cscope: + -rm -f cscope.files +cscope.files: clean-cscope cscopelist +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + -rm -f cscope.out cscope.in.out cscope.po.out cscope.files + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + $(am__remove_distdir) + test -d "$(distdir)" || mkdir "$(distdir)" + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done + -test -n "$(am__skip_mode_fix)" \ + || find "$(distdir)" -type d ! -perm -755 \ + -exec chmod u+rwx,go+rx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r "$(distdir)" +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz + $(am__post_remove_distdir) + +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 + $(am__post_remove_distdir) + +dist-lzip: distdir + tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz + $(am__post_remove_distdir) + +dist-xz: distdir + tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz + $(am__post_remove_distdir) + +dist-tarZ: distdir + @echo WARNING: "Support for distribution archives compressed with" \ + "legacy program 'compress' is deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__post_remove_distdir) + +dist-shar: distdir + @echo WARNING: "Support for shar distribution archives is" \ + "deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz + $(am__post_remove_distdir) + +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__post_remove_distdir) + +dist dist-all: + $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' + $(am__post_remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.lz*) \ + lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ + *.tar.xz*) \ + xz -dc $(distdir).tar.xz | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + esac + chmod -R a-w $(distdir) + chmod u+w $(distdir) + mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst + chmod a-w $(distdir) + test -d $(distdir)/_build || exit 0; \ + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && am__cwd=`pwd` \ + && $(am__cd) $(distdir)/_build/sub \ + && ../../configure \ + $(AM_DISTCHECK_CONFIGURE_FLAGS) \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + --srcdir=../.. --prefix="$$dc_install_base" \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) dvi \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ + && cd "$$am__cwd" \ + || exit 1 + $(am__post_remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' +distuninstallcheck: + @test -n '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: trying to run $@ with an empty' \ + '$$(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + $(am__cd) '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic mostlyclean-am + +distclean: distclean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-hdr distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--refresh check check-am clean clean-cscope clean-generic \ + cscope cscopelist-am ctags ctags-am dist dist-all dist-bzip2 \ + dist-gzip dist-lzip dist-shar dist-tarZ dist-xz dist-zip \ + distcheck distclean distclean-generic distclean-hdr \ + distclean-tags distcleancheck distdir distuninstallcheck dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/NEWS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/NEWS new file mode 100644 index 000000000..d6d59ee45 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/NEWS @@ -0,0 +1,103 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + History + + - 09/09/00 Public release of Version 1.0 + + - 09/27/00 A couple of mistakes in the VSIPL port have been + corrected. The tar file as well as the web site were updated + on September 27th, 2000. Note that these problems were not + affecting the BLAS version of the software in any way. + + - 01/01/04 Version 1.0a + The MPI process grid numbering scheme is now an run-time + option. + The inlined assembly timer routine that caused the compila- + tion to fail when using gcc version 3.3 and above has been + removed from the package. + Various building problems on the T3E have been fixed; Thanks + to Edward Anderson. + + - 15/12/04 Version 1.0b + Weakness of the pseudo-random matrix generator found for pro- + blem sizes being power of twos and larger than 2^15; Thanks + to Gregory Bauer. This problem has not been fixed. It is thus + currently recommended to HPL users willing to test matrices + of size larger than 2^15 to not use power twos. + + When the matrix size is such that one needs > 16 GB per MPI + rank, the intermediate calculation (mat.ld+1) * mat.nq in + HPL_pdtest.c ends up overflowing because it is done using + 32-bit arithmetic. This issue has been fixed by typecasting + to size_t; Thanks to John Baron. + + - 09/10/08 Version 2.0 + + Piotr Luszczek changed to 64-bit RNG, modified files: + -- [M] include/hpl_matgen.h + -- [M] testing/matgen/HPL_ladd.c + -- [M] testing/matgen/HPL_lmul.c + -- [M] testing/matgen/HPL_rand.c + -- [M] testing/ptest/HPL_pdinfo.c + + For a motivation for the change, see: + Dongarra and Langou, ``The Problem with the Linpack + Benchmark Matrix Generator'', LAWN 206, June 2008. + + -- [M] testing/ptest/HPL_pdtest.c -- + + Julien Langou changed the test for correctness from + ||Ax-b||_oo / ( eps * ||A||_1 * N ) + ||Ax-b||_oo / ( eps * ||A||_1 * ||x||_1 ) + ||Ax-b||_oo / ( eps * ||A||_oo * ||x||_oo * N ) + to the normwise backward error + || r ||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N ) + See: + Nicholas J. Higham, ``Accuracy and Stability of Numerical Algorithms'', + Society for Industrial and Applied Mathematics, Philadelphia, PA, USA, + Second Edition, pages = xxx+680, ISBN = 0-89871-521-0, 2002. + + Note that in our case || b ||_oo is almost for sure + 1/2, we compute it anyway. + + - 10/26/2012 Version 2.1 + + Piotr Luszczek introduced exact time stamping for HPL_pdgesv(): + -- [M] dist/include/hpl_misc.h + -- [M] dist/testing/ptest/HPL_pdtest.c + + Piotr Luszczek fixed out-of-bounds access in data spreading functions + and exact time stamping for HPL_pdgesv(): + -- [M] dist/src/pgesv/HPL_spreadN.c + -- [M] dist/src/pgesv/HPL_spreadT.c + Thanks to Stephen Whalen from Cray. + + - 02/24/2016 Version 2.2 + + Piotr Luszczek added continuous reporting of factorization progress + submitted by Intel and make scripts that uses Intel software tools and + libraries and their Apple's Mac OS X equivalents. + + - 12/02/2018 Version 2.3 + + Piotr Luszczek removed deprecated MPI functions that are no longer + supported in some MPI implementations (for example Open MPI 4.0) and + replaced them with + modern equivalents in HPL_packL(): + -- [M] src/comm/HPL_packL.c + + Piotr Luszczek added one digit to the display of performance result + and changed display of scaled residual to scientific notation with + extra digits in HPL_pdtest(): + -- [M] testing/ptest/HPL_pdtest.c + + Piotr Luszczek added support for Autotools configuration packages + autoconf and automake: + -- [A] Makefile.am + -- [A] configure.ac + -- [A] acinclude.m4 + -- [A] src/Makefile.am + -- [A] testing/Makefile.am diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/THANKS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/THANKS new file mode 100644 index 000000000..1c5641ce4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/THANKS @@ -0,0 +1 @@ +This software was improved with contribution of external developers. diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/TODO b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/TODO new file mode 100644 index 000000000..1c2b36778 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/TODO @@ -0,0 +1,16 @@ +============================================================== + High Performance Computing Linpack Benchmark (HPL) + HPL - 2.3 - December 2, 2018 +============================================================== + + Done list in version 1.0b, December 15th, 2004 + - Fixed problem with 32-bit integer overflow. + Thanks to John Baron. + + Done list in version 1.0a, January 1st, 2004 + - Added Row- or Column-major process mapping in data file + - Fixed compilation error for gcc 3.3 in walltime. + - Fixed building problems on the T3E; + Thanks to Edward Anderson. + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/TUNING b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/TUNING new file mode 100644 index 000000000..24707f1fc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/TUNING @@ -0,0 +1,419 @@ +============================================================== + Performance Tuning and setting up the input data file HPL.dat + + Current as of release HPL - 2.3 - December 2, 2018 +============================================================== + Check out the website www.netlib.org/benchmark/hpl for the + latest information. + + After having built the executable hpl/bin//xhpl, one + may want to modify the input data file HPL.dat. This file + should reside in the same directory as the executable + hpl/bin//xhpl. An example HPL.dat file is provided by + default. This file contains information about the problem + sizes, machine configuration, and algorithm features to be + used by the executable. It is 30 lines long. All the selected + parameters will be printed in the output generated by the + executable. + + At the end of this file, there is a couple of experimental + guide lines that you may find useful. + +============================================================== + File HPL.dat (description): + + Line 1: (unused) Typically one would use this line for its + own good. For example, it could be used to summarize the con- + tent of the input file. By default this line reads: + + HPL Linpack benchmark input file + + Line 2: (unused) same as line 1. By default this line reads: + + Innovative Computing Laboratory, University of Tennessee + + Line 3: the user can choose where the output should be re- + directed to. In the case of a file, a name is necessary, and + this is the line where one wants to specify it. Only the + first name on this line is significative. By default, the li- + ne reads: + + HPL.out output file name (if any) + + This means that if one chooses to redirect the output to a + file, the file will be called "HPL.out". The rest of the line + is unused, and this space to put some informative comment on + the meaning of this line. + + Line 4: This line specifies where the output should go. The + line is formatted, it must be a positive integer, the rest is + unsignificant. 3 choices are possible for the positive inte- + ger, 6 means that the output will go the standard output, 7 + means that the output will go to the standard error. Any o- + ther integer means that the output should be redirected + to a file, which name has been specified in the line above. + This line by default reads: + + 6 device out (6=stdout,7=stderr,file) + + which means that the output generated by the executable + should be redirected to the standard output. + + Line 5: This line specifies the number of problem sizes to be + executed. This number should be less than or equal to 20. The + first integer is significant, the rest is ignored. If the + line reads: + + 3 # of problems sizes (N) + + this means that the user is willing to run 3 problem sizes + that will be specified in the next line. + + Line 6: This line specifies the problem sizes one wants to + run. Assuming the line above started with 3, the 3 first + positive integers are significant, the rest is ignored. For + example: + + 3000 6000 10000 Ns + + means that one wants xhpl to run 3 (specified in line 5) pro- + blem sizes, namely 3000, 6000 and 10000. + + Line 7: This line specifies the number of block sizes to be + runned. This number should be less than or equal to 20. + The first integer is significant, the rest is ignored. If the + line reads: + + 5 # of NBs + + this means that the user is willing to use 5 block sizes that + will be specified in the next line. + + Line 8: This line specifies the block sizes one wants to run. + Assuming the line above started with 5, the 5 first positive + integers are significant, the rest is ignored. For example: + + 80 100 120 140 160 NBs + + means that one wants xhpl to use 5 (specified in line 7) + block sizes, namely 80, 100, 120, 140 and 160. + + Line 9 specifies how the MPI processes should be mapped onto + the nodes of your platform. There are currently two possible + mappings, namely row- and column-major. This feature is main- + ly useful when these nodes are themselves multi-processor + computers. A row-major mapping is recommended. + + Line 10: This line specifies the number of process grid to + be runned. This number should be less than or equal to 20. + The first integer is significant, the rest is ignored. If the + line reads: + + 2 # of process grids (P x Q) + + this means that you are willing to try 2 process grid sizes + that will be specified in the next line. + + Line 11-12: These two lines specify the number of process + rows and columns of each grid you want to run on. Assuming + the line above (10) started with 2, the 2 first positive in- + tegers of those two lines are significant, the rest is igno- + red. For example: + + 1 2 Ps + 6 8 Qs + + means that one wants to run xhpl on 2 process grids (line + 10), namely 1 by 6 and 2 by 8. Note: In this example, it is + required then to start xhpl on at least 16 nodes (max of P_i + xQ_i). The runs on the two grids will be consecutive. If one + was starting xhpl on more than 16 nodes, say 52, only 6 would + be used for the first grid (1x6) and then 16 (2x8) would be + used for the second grid. The fact that you started the MPI + job on 52 nodes, will not make HPL use all of them. In this + example, only 16 would be used. If one wants to run xhpl with + 52 processes one needs to specify a grid of 52 processes, for + example the following lines would do the job: + + 4 2 Ps + 13 8 Qs + + Line 13: This line specifies the threshold the residuals + should be compared to. The residuals should be or order 1, + but are in practice slightly less than this, typically 0.001. + This line is made of a real number, the rest is unsignifi- + cant. For example: + + 16.0 threshold + + In practice, a value of 16.0 will cover most cases. For va- + rious reasons, it is possible that some of the residuals be- + come slightly larger, say for example 35.6. xhpl will flag + those runs as failed, however they can be considered as cor- + rect. A run can be considered as failed if the residual is a + few order of magnitude bigger than 1 for example 10^6 or mo- + re. Note: if one was to specify a threshold of 0.0, all tests + would be flagged as failed, even though the answer is likely + to be correct. It is allowed to specify a negative value for + this threshold, in which case the checks will be by-passed, + no matter what the value is, as soon as it is negative. This + feature allows to save time when performing a lot of experi- + ments, say for instance during the tuning phase. Example: + + -16.0 threshold + + The remaning lines allow to specifies algorithmic features. + xhpl will run all possible combinations of those for each + problem size, block size, process grid combination. This is + handy when one looks for an "optimal" set of parameters. To + understand a little bit better, let say first a few words + about the algorithm implemented in HPL. Basically this is a + right-looking version with row-partial pivoting. The panel + factorization is matrix-matrix operation based and recursive, + dividing the panel into NDIV subpanels at each step. This + part of the panel factorization is denoted below by + "recursive panel fact. (RFACT)". The recursion stops when the + current panel is made of less than or equal to NBMIN columns. + At that point, xhpl uses a matrix-vector operation based + factorization denoted below by "PFACTs". Classic recursion + would then use NDIV=2, NBMIN=1. There are essentially 3 + numerically equivalent LU factorization algorithm variants + (left-looking, Crout and right-looking). In HPL, one can + choose every one of those for the RFACT, as well as the + PFACT. The following lines of HPL.dat allows you to set those + parameters. + + Lines 14-21: (Example 1) + 3 # of panel fact + 0 1 2 PFACTs (0=left, 1=Crout, 2=Right) + 4 # of recursive stopping criterium + 1 2 4 8 NBMINs (>= 1) + 3 # of panels in recursion + 2 3 4 NDIVs + 3 # of recursive panel fact. + 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) + + This example would try all variants of PFACT, 4 values for + NBMIN, namely 1, 2, 4 and 8, 3 values for NDIV namely 2, 3 + and 4, and all variants for RFACT. Lines 14-21: (Example 1) + + 2 # of panel fact + 2 0 PFACTs (0=left, 1=Crout, 2=Right) + 2 # of recursive stopping criterium + 4 8 NBMINs (>= 1) + 1 # of panels in recursion + 2 NDIVs + 1 # of recursive panel fact. + 2 RFACTs (0=left, 1=Crout, 2=Right) + + This example would try 2 variants of PFACT namely right loo- + king and left looking, 2 values for NBMIN, namely 4 and 8, 1 + value for NDIV namely 2, and one variant for RFACT. + + In the main loop of the algorithm, the current panel of co- + lumn is broadcast in process rows using a virtual ring to- + pology. HPL offers various choices, and one most likely want + to use the increasing ring modified encoded as 1. 4 is also + a good choice. Lines 22-23: (Example 1): + + 1 # of broadcast + 1 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + + This will cause HPL to broadcast the current panel using the + increasing ring modified topology. Lines 22-23: (Example 2): + + 2 # of broadcast + 0 4 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + + This will cause HPL to broadcast the current panel using the + increasing ring virtual topology and the long message algori- + thm. + + Lines 24-25 allow to specify the look-ahead depth used by + HPL. A depth of 0 means that the next panel is factorized af- + ter the update by the current panel is completely finished. A + depth of 1 means that the next panel is factorized immediate- + ly after being updated. The update by the current panel is + then finished. A depth of k means that the k next panels are + factorized immediately after being updated. The update by the + current panel is then finished. It turns out that a depth of + 1 seems to give the best results, but may need a large pro- + blem size before one can see the performance gain. So use 1, + if you do not know better, otherwise you may want to try 0. + Look-ahead of depths 2 and larger will probably not give you + better results. Lines 24-25: (Example 1): + + 1 # of lookahead depth + 1 DEPTHs (>=0) + + This will cause HPL to use a look-ahead of depth 1. + Lines 24-25: (Example 2): + + 2 # of lookahead depth + 0 1 DEPTHs (>=0) + + This will cause HPL to use a look-ahead of depths 0 and 1. + + Lines 26-27 allow to specify the swapping algorithm used by + HPL for all tests. There are currently two swapping algo- + rithms available, one based on "binary exchange" and the + other one based on a "spread-roll" procedure (also called + "long" below. For large problem sizes, this last one is like- + ly to be more efficient. The user can also choose to mix both + variants, that is "binary-exchange" for a number of columns + less than a threshold value, and then the "spread-roll" al- + gorithm. This threshold value is then specified on Line 27. + Lines 26-27: (Example 1): + + 1 SWAP (0=bin-exch,1=long,2=mix) + 60 swapping threshold + + This will cause HPL to use the "long" or "spread-roll" swap- + ping algorithm. Note that a threshold is specified in that + example but not used by HPL. Lines 26-27: (Example 2): + + 2 SWAP (0=bin-exch,1=long,2=mix) + 60 swapping threshold + + This will cause HPL to use the "long" or "spread-roll" swap- + ping algorithm as soon as there is more than 60 columns in + the row panel. Otherwise, the "binary-exchange" algorithm + will be used instead. + + Line 28 allows to specify whether the upper triangle of the + panel of columns should be stored in no-transposed or + transposed form. Example: + + 0 L1 in (0=transposed,1=no-transposed) form + + Line 29 allows to specify whether the panel of rows U should + be stored in no-transposed or transposed form. Example: + + 0 U in (0=transposed,1=no-transposed) form + + Line 30 enables/disables the equilibration phase. This option + will not be used unless you selected 1 or 2 in Line 26. Ex: + + 1 Equilibration (0=no,1=yes) + + + Line 31 allows to specify the alignment in memory for the + memory space allocated by HPL. On modern machines, one proba- + bly wants to use 4, 8 or 16. This may result in a tiny amount + of memory wasted. Example: + + 4 memory alignment in double (> 0) + +============================================================== + Guide lines: + + 1) Figure out a good block size for the matrix-matrix + multiply routine. The best method is to try a few out. If you + happen to know the block size used by the matrix-matrix + multiply routine, a small multiple of that block size will do + fine. + + HPL uses the block size NB for the data distribution as well + as for the computational granularity. From a data + distribution point of view, the smallest NB, the better the + load balance. You definitely want to stay away from very + large values of NB. From a computation point of view, a too + small value of NB may limit the computational performance by + a large factor because almost no data reuse will occur in the + highest level of the memory hierarchy. The number of messages + will also increase. Efficient matrix-multiply routines are + often internally blocked. Small multiples of this blocking + factor are likely to be good block sizes for HPL. The bottom + line is that "good" block sizes are almost always in the + [32..256] interval. The best values depend on the computation + / communication performance ratio of your system. To a much + less extent, the problem size matters as well. Say for + example, you emperically found that 44 was a good block size + with respect to performance. 88 or 132 are likely to give + slightly better results for large problem sizes because of a + slighlty higher flop rate. + + 2) The process mapping should not matter if the nodes of + your platform are single processor computers. If these nodes + are multi-processors, a row-major mapping is recommended. + + 3) HPL likes "square" or slightly flat process grids. Unless + you are using a very small process grid, stay away from the + 1-by-Q and P-by-1 process grids. + + 4) Panel factorization parameters: a good start are the fol- + lowing for the lines 14-21: + + 1 # of panel fact + 1 PFACTs (0=left, 1=Crout, 2=Right) + 2 # of recursive stopping criterium + 4 8 NBMINs (>= 1) + 1 # of panels in recursion + 2 NDIVs + 1 # of recursive panel fact. + 2 RFACTs (0=left, 1=Crout, 2=Right) + + 5) Broadcast parameters: at this time, it is far from obvious + to me what the best setting is, so i would probably try them + all. If I had to guess I would probably start with the follo- + wing for the lines 22-23: + + 2 # of broadcast + 1 3 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + + The best broadcast depends on your problem size and harware + performance. My take is that 4 or 5 may be competitive for + machines featuring very fast nodes comparatively to the + network. + + 6) Look-ahead depth: as mentioned above 0 or 1 are likely to + be the best choices. This also depends on the problem size + and machine configuration, so I would try "no look-ahead (0)" + and "look-ahead of depth 1 (1)". That is for lines 24-25: + + 2 # of lookahead depth + 0 1 DEPTHs (>=0) + + 7) Swapping: one can select only one of the three algorithm + in the input file. Theoretically, mix (2) should win, however + long (1) might just be good enough. The difference should be + small between those two assuming a swapping threshold of the + order of the block size (NB) selected. If this threshold is + very large, HPL will use bin_exch (0) most of the time and if + it is very small (< NB) long (1) will always be used. In + short and assuming the block size (NB) used is say 60, I + would choose for the lines 26-27: + + 2 SWAP (0=bin-exch,1=long,2=mix) + 60 swapping threshold + + I would also try the long variant. For a very small number + of processes in every column of the process grid (say < 4), + very little performance difference should be observable. + + 8) Local storage: I do not think Line 28 matters. Pick 0 in + doubt. Line 29 is more important. It controls how the panel + of rows should be stored. No doubt 0 is better. The caveat is + that in that case the matrix-multiply function is called with + ( Notrans, Trans, ... ), that is C := C - A B^T. Unless the + computational kernel you are using has a very poor (with + respect to performance) implementation of that case, and is + much more efficient with ( Notrans, Notrans, ... ) just pick + 0 as well. So, my choice: + + 0 L1 in (0=transposed,1=no-transposed) form + 0 U in (0=transposed,1=no-transposed) form + + 9) Equilibration: It is hard to tell whether equilibration + should always be performed or not. Not knowing much about the + random matrix generated and because the overhead is so small + compared to the possible gain, I turn it on all the time. + + 1 Equilibration (0=no,1=yes) + + 10) For alignment, 4 should be plenty, but just to be safe, + one may want to pick 8 instead. + + 8 memory alignment in double (> 0) + +============================================================== diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/acinclude.m4 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/acinclude.m4 new file mode 100644 index 000000000..4072a950f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/acinclude.m4 @@ -0,0 +1,90 @@ + +AC_DEFUN([HPL_BLAS], [ + +AC_PREREQ(2.69) + +hpl_blas_ok=no + +dnl FIXME: add --with-blas="" + +current_LIBS="$LIBS" + +cat < hplvars.txt +name1=OpenBLAS +rout1=dgemm_ +libs1=-lopenblas -lm + +name2=Atlas Fortran BLAS +rout2=dgemm_ +libs2=-lf77blas -latlas + +name3=Sequential Intel MKL LP64 (group) +rout3=dgemm_ +libs3=-Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread + +name4=Sequential Intel MKL LP64 +rout4=dgemm_ +libs4=-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread + +name5=AMD's ACML +rout5=dgemm_ +libs5=-lacml -lm + +name6=Accelerate +rout6=dgemm_ +libs6=-framework Accelerate + +name7=Apple VecLib +rout7=dgemm_ +libs7=-framework vecLib + +name8=IBM ESSL +rout8=dgemm_ +libs8=-lessl + +name9=NVIDIA nvblas +rout9=dgemm_ +libs9=-lnvblas + +name10=Generic BLAS +rout10=dgemm_ +libs10=-lblas + +HPLEOF +for hpl_i in 1 2 3 4 5 6 7 8 9 10; +do +if test x$hpl_blas_ok = xno; then + name="`grep ^name${hpl_i}= hplvars.txt | sed s/^name${hpl_i}=//`" + rout="`grep ^rout${hpl_i}= hplvars.txt | sed s/^rout${hpl_i}=//`" + libs="`grep ^libs${hpl_i}= hplvars.txt | sed s/^libs${hpl_i}=//`" + AC_MSG_CHECKING([for [$]rout in [$]name]) + + LIBS="[$]libs" + AC_TRY_LINK_FUNC([$]rout, [hpl_blas_ok=yes;BLAS_LIBS="[$]libs"]) + LIBS="$current_LIBS" + + AC_MSG_RESULT($hpl_blas_ok) +fi +done +rm hplvars.txt + +if test x$hpl_blas_ok = xno; then +dnl +AC_MSG_CHECKING([for dgemm_ in OpenBLAS]) +AC_CHECK_LIB(openblas, dgemm_, [hpl_blas_ok=yes;BLAS_LIBS="-lopenblas"]) +AC_MSG_RESULT($hpl_blas_ok) +dnl +fi + +AC_SUBST(BLAS_LIBS) + +# If present, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$hpl_blas_ok" = xyes; then + ifelse([$1],,AC_DEFINE(HAVE_BLAS,1,[Define if you have a BLAS library.]),[$1]) + : +else + hpl_blas_ok=no + $2 +fi + +])dnl HPL_BLAS diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/aclocal.m4 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/aclocal.m4 new file mode 100644 index 000000000..56c6bd753 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/aclocal.m4 @@ -0,0 +1,1308 @@ +# generated automatically by aclocal 1.16.1 -*- Autoconf -*- + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. + +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],, +[m4_warning([this file was generated for autoconf 2.69. +You have another version of autoconf. It may work, but is not guaranteed to. +If you have problems, you may need to regenerate the build system entirely. +To do so, use the procedure documented by the package, typically 'autoreconf'.])]) + +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_prog_cc_mpi.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PROG_CC_MPI([MPI-WANTED-TEST[, ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]]) +# +# DESCRIPTION +# +# This macro tries to find out how to compile C programs that use MPI +# (Message Passing Interface), a standard API for parallel process +# communication (see http://www-unix.mcs.anl.gov/mpi/). The macro has to +# be used instead of the standard macro AC_PROG_CC and will replace the +# standard variable CC with the found compiler. +# +# MPI-WANTED-TEST is used to test whether MPI is actually wanted by the +# user. If MPI-WANTED_TEST is omitted or if it succeeds, the macro will +# try to find out how to use MPI, if it fails, the macro will call +# AC_PROG_CC to find a standard C compiler instead. +# +# When MPI is found, ACTION-IF-FOUND will be executed, if MPI is not found +# (or MPI-WANTED-TEST fails) ACTION-IF-NOT-FOUND is executed. If +# ACTION-IF-FOUND is not set, the macro will define HAVE_MPI. +# +# The following example demonstrates usage of the macro: +# +# # If --with-mpi=auto is used, try to find MPI, but use standard C compiler if it is not found. +# # If --with-mpi=yes is used, try to find MPI and fail if it isn't found. +# # If --with-mpi=no is used, use a standard C compiler instead. +# AC_ARG_WITH(mpi, [AS_HELP_STRING([--with-mpi], +# [compile with MPI (parallelization) support. If none is found, +# MPI is not used. Default: auto]) +# ],,[with_mpi=auto]) +# # +# AX_PROG_CC_MPI([test x"$with_mpi" != xno],[use_mpi=yes],[ +# use_mpi=no +# if test x"$with_mpi" = xyes; then +# AC_MSG_FAILURE([MPI compiler requested, but couldn't use MPI.]) +# else +# AC_MSG_WARN([No MPI compiler found, won't use MPI.]) +# fi +# ]) +# +# LICENSE +# +# Copyright (c) 2010,2011 Olaf Lenz +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 2 + +AC_DEFUN([AX_PROG_CC_MPI], [ +AC_PREREQ(2.50) + +# Check for compiler +# Needs to be split off into an extra macro to ensure right expansion +# order. +AC_REQUIRE([_AX_PROG_CC_MPI],[_AX_PROG_CC_MPI([$1])]) + +AS_IF([test x"$_ax_prog_cc_mpi_mpi_wanted" = xno], + [ _ax_prog_cc_mpi_mpi_found=no ], + [ + AC_LANG_PUSH([C]) + # test whether MPI_Init is available + # We do not use AC_SEARCH_LIBS here, as it caches its outcome and + # thus disallows corresponding calls in the other AX_PROG_*_MPI + # macros. + for lib in NONE mpi mpich; do + save_LIBS=$LIBS + if test x"$lib" = xNONE; then + AC_MSG_CHECKING([for function MPI_Init]) + else + AC_MSG_CHECKING([for function MPI_Init in -l$lib]) + LIBS="-l$lib $LIBS" + fi + AC_LINK_IFELSE([AC_LANG_CALL([],[MPI_Init])], + [ _ax_prog_cc_mpi_mpi_found=yes ], + [ _ax_prog_cc_mpi_mpi_found=no ]) + AC_MSG_RESULT($_ax_prog_cc_mpi_mpi_found) + if test "x$_ax_prog_cc_mpi_mpi_found" = "xyes"; then + break; + fi + LIBS=$save_LIBS + done + + # Check for header + AS_IF([test x"$_ax_prog_cc_mpi_mpi_found" = xyes], [ + AC_MSG_CHECKING([for mpi.h]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include ])], + [ AC_MSG_RESULT(yes)], + [ AC_MSG_RESULT(no) + _ax_prog_cc_mpi_mpi_found=no + ]) + ]) + AC_LANG_POP([C]) +]) + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +AS_IF([test x"$_ax_prog_cc_mpi_mpi_found" = xyes], [ + ifelse([$2],,[AC_DEFINE(HAVE_MPI,1,[Define if you have the MPI library.])],[$2]) + : +],[ + $3 + : +]) + +])dnl AX_PROG_CC_MPI + +dnl _AX_PROG_CC_MPI is an internal macro required by AX_PROG_CC_MPI. +dnl To ensure the right expansion order, the main function AX_PROG_CC_MPI +dnl has to be split into two parts. +dnl +dnl Known MPI C compilers: +dnl mpicc +dnl mpixlc_r +dnl mpixlc +dnl hcc +dnl mpxlc_r +dnl mpxlc +dnl sxmpicc NEC SX +dnl mpifcc Fujitsu +dnl mpgcc +dnl mpcc +dnl cmpicc +dnl cc +dnl +AC_DEFUN([_AX_PROG_CC_MPI], [ + AC_ARG_VAR(MPICC,[MPI C compiler command]) + ifelse([$1],,[_ax_prog_cc_mpi_mpi_wanted=yes],[ + AC_MSG_CHECKING([whether to compile using MPI]) + if $1; then + _ax_prog_cc_mpi_mpi_wanted=yes + else + _ax_prog_cc_mpi_mpi_wanted=no + fi + AC_MSG_RESULT($_ax_prog_cc_mpi_mpi_wanted) + ]) + if test x"$_ax_prog_cc_mpi_mpi_wanted" = xyes; then + if test -z "$CC" && test -n "$MPICC"; then + CC="$MPICC" + else + AC_CHECK_TOOLS([CC], [mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc]) + fi + fi + AC_PROG_CC +])dnl _AX_PROG_CC_MPI + +# Copyright (C) 2002-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_AUTOMAKE_VERSION(VERSION) +# ---------------------------- +# Automake X.Y traces this macro to ensure aclocal.m4 has been +# generated from the m4 files accompanying Automake X.Y. +# (This private macro should not be called outside this file.) +AC_DEFUN([AM_AUTOMAKE_VERSION], +[am__api_version='1.16' +dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to +dnl require some minimum version. Point them to the right macro. +m4_if([$1], [1.16.1], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl +]) + +# _AM_AUTOCONF_VERSION(VERSION) +# ----------------------------- +# aclocal traces this macro to find the Autoconf version. +# This is a private macro too. Using m4_define simplifies +# the logic in aclocal, which can simply ignore this definition. +m4_define([_AM_AUTOCONF_VERSION], []) + +# AM_SET_CURRENT_AUTOMAKE_VERSION +# ------------------------------- +# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. +# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. +AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +[AM_AUTOMAKE_VERSION([1.16.1])dnl +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) + +# AM_AUX_DIR_EXPAND -*- Autoconf -*- + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets +# $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to +# '$srcdir', '$srcdir/..', or '$srcdir/../..'. +# +# Of course, Automake must honor this variable whenever it calls a +# tool from the auxiliary directory. The problem is that $srcdir (and +# therefore $ac_aux_dir as well) can be either absolute or relative, +# depending on how configure is run. This is pretty annoying, since +# it makes $ac_aux_dir quite unusable in subdirectories: in the top +# source directory, any form will work fine, but in subdirectories a +# relative path needs to be adjusted first. +# +# $ac_aux_dir/missing +# fails when called from a subdirectory if $ac_aux_dir is relative +# $top_srcdir/$ac_aux_dir/missing +# fails if $ac_aux_dir is absolute, +# fails when called from a subdirectory in a VPATH build with +# a relative $ac_aux_dir +# +# The reason of the latter failure is that $top_srcdir and $ac_aux_dir +# are both prefixed by $srcdir. In an in-source build this is usually +# harmless because $srcdir is '.', but things will broke when you +# start a VPATH build or use an absolute $srcdir. +# +# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, +# iff we strip the leading $srcdir from $ac_aux_dir. That would be: +# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` +# and then we would define $MISSING as +# MISSING="\${SHELL} $am_aux_dir/missing" +# This will work as long as MISSING is not called from configure, because +# unfortunately $(top_srcdir) has no meaning in configure. +# However there are other variables, like CC, which are often used in +# configure, and could therefore not use this "fixed" $ac_aux_dir. +# +# Another solution, used here, is to always expand $ac_aux_dir to an +# absolute PATH. The drawback is that using absolute paths prevent a +# configured tree to be moved without reconfiguration. + +AC_DEFUN([AM_AUX_DIR_EXPAND], +[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` +]) + +# AM_CONDITIONAL -*- Autoconf -*- + +# Copyright (C) 1997-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_CONDITIONAL(NAME, SHELL-CONDITION) +# ------------------------------------- +# Define a conditional. +AC_DEFUN([AM_CONDITIONAL], +[AC_PREREQ([2.52])dnl + m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], + [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl +AC_SUBST([$1_TRUE])dnl +AC_SUBST([$1_FALSE])dnl +_AM_SUBST_NOTMAKE([$1_TRUE])dnl +_AM_SUBST_NOTMAKE([$1_FALSE])dnl +m4_define([_AM_COND_VALUE_$1], [$2])dnl +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi +AC_CONFIG_COMMANDS_PRE( +[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then + AC_MSG_ERROR([[conditional "$1" was never defined. +Usually this means the macro was only invoked conditionally.]]) +fi])]) + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be +# written in clear, in which case automake, when reading aclocal.m4, +# will think it sees a *use*, and therefore will trigger all it's +# C support machinery. Also note that it means that autoscan, seeing +# CC etc. in the Makefile, will ask for an AC_PROG_CC use... + + +# _AM_DEPENDENCIES(NAME) +# ---------------------- +# See how the compiler implements dependency checking. +# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". +# We try a few techniques and use that to set a single cache variable. +# +# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was +# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular +# dependency, and given that the user is not expected to run this macro, +# just rely on AC_PROG_CC. +AC_DEFUN([_AM_DEPENDENCIES], +[AC_REQUIRE([AM_SET_DEPDIR])dnl +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl +AC_REQUIRE([AM_MAKE_INCLUDE])dnl +AC_REQUIRE([AM_DEP_TRACK])dnl + +m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], + [$1], [CXX], [depcc="$CXX" am_compiler_list=], + [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], + [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], + [$1], [UPC], [depcc="$UPC" am_compiler_list=], + [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], + [depcc="$$1" am_compiler_list=]) + +AC_CACHE_CHECK([dependency style of $depcc], + [am_cv_$1_dependencies_compiler_type], +[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_$1_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` + fi + am__universal=false + m4_case([$1], [CC], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac], + [CXX], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac]) + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_$1_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_$1_dependencies_compiler_type=none +fi +]) +AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) +AM_CONDITIONAL([am__fastdep$1], [ + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) +]) + + +# AM_SET_DEPDIR +# ------------- +# Choose a directory name for dependency files. +# This macro is AC_REQUIREd in _AM_DEPENDENCIES. +AC_DEFUN([AM_SET_DEPDIR], +[AC_REQUIRE([AM_SET_LEADING_DOT])dnl +AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl +]) + + +# AM_DEP_TRACK +# ------------ +AC_DEFUN([AM_DEP_TRACK], +[AC_ARG_ENABLE([dependency-tracking], [dnl +AS_HELP_STRING( + [--enable-dependency-tracking], + [do not reject slow dependency extractors]) +AS_HELP_STRING( + [--disable-dependency-tracking], + [speeds up one-time build])]) +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi +AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) +AC_SUBST([AMDEPBACKSLASH])dnl +_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl +AC_SUBST([am__nodep])dnl +_AM_SUBST_NOTMAKE([am__nodep])dnl +]) + +# Generate code to set up dependency tracking. -*- Autoconf -*- + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_OUTPUT_DEPENDENCY_COMMANDS +# ------------------------------ +AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], +[{ + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + AS_CASE([$CONFIG_FILES], + [*\'*], [eval set x "$CONFIG_FILES"], + [*], [set x $CONFIG_FILES]) + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`AS_DIRNAME(["$am_mf"])` + am_filepart=`AS_BASENAME(["$am_mf"])` + AM_RUN_LOG([cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles]) || am_rc=$? + done + if test $am_rc -ne 0; then + AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. Try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking).]) + fi + AS_UNSET([am_dirpart]) + AS_UNSET([am_filepart]) + AS_UNSET([am_mf]) + AS_UNSET([am_rc]) + rm -f conftest-deps.mk +} +])# _AM_OUTPUT_DEPENDENCY_COMMANDS + + +# AM_OUTPUT_DEPENDENCY_COMMANDS +# ----------------------------- +# This macro should only be invoked once -- use via AC_REQUIRE. +# +# This code is only required when automatic dependency tracking is enabled. +# This creates each '.Po' and '.Plo' makefile fragment that we'll need in +# order to bootstrap the dependency handling code. +AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], +[AC_CONFIG_COMMANDS([depfiles], + [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], + [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])]) + +# Do all the work for Automake. -*- Autoconf -*- + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This macro actually does too much. Some checks are only needed if +# your package does certain things. But this isn't really a big deal. + +dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O. +m4_define([AC_PROG_CC], +m4_defn([AC_PROG_CC]) +[_AM_PROG_CC_C_O +]) + +# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) +# AM_INIT_AUTOMAKE([OPTIONS]) +# ----------------------------------------------- +# The call with PACKAGE and VERSION arguments is the old style +# call (pre autoconf-2.50), which is being phased out. PACKAGE +# and VERSION should now be passed to AC_INIT and removed from +# the call to AM_INIT_AUTOMAKE. +# We support both call styles for the transition. After +# the next Automake release, Autoconf can make the AC_INIT +# arguments mandatory, and then we can depend on a new Autoconf +# release and drop the old call support. +AC_DEFUN([AM_INIT_AUTOMAKE], +[AC_PREREQ([2.65])dnl +dnl Autoconf wants to disallow AM_ names. We explicitly allow +dnl the ones we care about. +m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl +AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl +AC_REQUIRE([AC_PROG_INSTALL])dnl +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi +AC_SUBST([CYGPATH_W]) + +# Define the identity of the package. +dnl Distinguish between old-style and new-style calls. +m4_ifval([$2], +[AC_DIAGNOSE([obsolete], + [$0: two- and three-arguments forms are deprecated.]) +m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl + AC_SUBST([PACKAGE], [$1])dnl + AC_SUBST([VERSION], [$2])], +[_AM_SET_OPTIONS([$1])dnl +dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. +m4_if( + m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]), + [ok:ok],, + [m4_fatal([AC_INIT should be called with package and version arguments])])dnl + AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl + AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl + +_AM_IF_OPTION([no-define],, +[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) + AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl + +# Some tools Automake needs. +AC_REQUIRE([AM_SANITY_CHECK])dnl +AC_REQUIRE([AC_ARG_PROGRAM])dnl +AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) +AM_MISSING_PROG([AUTOCONF], [autoconf]) +AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) +AM_MISSING_PROG([AUTOHEADER], [autoheader]) +AM_MISSING_PROG([MAKEINFO], [makeinfo]) +AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl +AC_REQUIRE([AC_PROG_MKDIR_P])dnl +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +AC_SUBST([mkdir_p], ['$(MKDIR_P)']) +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([AC_PROG_MAKE_SET])dnl +AC_REQUIRE([AM_SET_LEADING_DOT])dnl +_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], + [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], + [_AM_PROG_TAR([v7])])]) +_AM_IF_OPTION([no-dependencies],, +[AC_PROVIDE_IFELSE([AC_PROG_CC], + [_AM_DEPENDENCIES([CC])], + [m4_define([AC_PROG_CC], + m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [_AM_DEPENDENCIES([CXX])], + [m4_define([AC_PROG_CXX], + m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJC], + [_AM_DEPENDENCIES([OBJC])], + [m4_define([AC_PROG_OBJC], + m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], + [_AM_DEPENDENCIES([OBJCXX])], + [m4_define([AC_PROG_OBJCXX], + m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl +]) +AC_REQUIRE([AM_SILENT_RULES])dnl +dnl The testsuite driver may need to know about EXEEXT, so add the +dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This +dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below. +AC_CONFIG_COMMANDS_PRE(dnl +[m4_provide_if([_AM_COMPILER_EXEEXT], + [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) + fi +fi +dnl The trailing newline in this macro's definition is deliberate, for +dnl backward compatibility and to allow trailing 'dnl'-style comments +dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841. +]) + +dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not +dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further +dnl mangled by Autoconf and run in a shell conditional statement. +m4_define([_AC_COMPILER_EXEEXT], +m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) + +# When config.status generates a header, we must update the stamp-h file. +# This file resides in the same directory as the config header +# that is generated. The stamp files are numbered to have different names. + +# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the +# loop where config.status creates the headers, so we can generate +# our stamp files there. +AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], +[# Compute $1's index in $config_headers. +_am_arg=$1 +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_SH +# ------------------ +# Define $install_sh. +AC_DEFUN([AM_PROG_INSTALL_SH], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi +AC_SUBST([install_sh])]) + +# Copyright (C) 2003-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# Check whether the underlying file-system supports filenames +# with a leading dot. For instance MS-DOS doesn't. +AC_DEFUN([AM_SET_LEADING_DOT], +[rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null +AC_SUBST([am__leading_dot])]) + +# Check to see how 'make' treats includes. -*- Autoconf -*- + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAKE_INCLUDE() +# ----------------- +# Check whether make has an 'include' directive that can support all +# the idioms we need for our automatic dependency tracking code. +AC_DEFUN([AM_MAKE_INCLUDE], +[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive]) +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out]) + AS_CASE([$?:`cat confinc.out 2>/dev/null`], + ['0:this is the am__doit target'], + [AS_CASE([$s], + [BSD], [am__include='.include' am__quote='"'], + [am__include='include' am__quote=''])]) + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +AC_MSG_RESULT([${_am_result}]) +AC_SUBST([am__include])]) +AC_SUBST([am__quote])]) + +# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- + +# Copyright (C) 1997-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MISSING_PROG(NAME, PROGRAM) +# ------------------------------ +AC_DEFUN([AM_MISSING_PROG], +[AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + +# AM_MISSING_HAS_RUN +# ------------------ +# Define MISSING if not defined so far and test if it is modern enough. +# If it is, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([missing])dnl +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + AC_MSG_WARN(['missing' script is too old or missing]) +fi +]) + +# Helper functions for option handling. -*- Autoconf -*- + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_MANGLE_OPTION(NAME) +# ----------------------- +AC_DEFUN([_AM_MANGLE_OPTION], +[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) + +# _AM_SET_OPTION(NAME) +# -------------------- +# Set option NAME. Presently that only means defining a flag for this option. +AC_DEFUN([_AM_SET_OPTION], +[m4_define(_AM_MANGLE_OPTION([$1]), [1])]) + +# _AM_SET_OPTIONS(OPTIONS) +# ------------------------ +# OPTIONS is a space-separated list of Automake options. +AC_DEFUN([_AM_SET_OPTIONS], +[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) + +# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) +# ------------------------------------------- +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +AC_DEFUN([_AM_IF_OPTION], +[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_CC_C_O +# --------------- +# Like AC_PROG_CC_C_O, but changed for automake. We rewrite AC_PROG_CC +# to automatically call this. +AC_DEFUN([_AM_PROG_CC_C_O], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([compile])dnl +AC_LANG_PUSH([C])dnl +AC_CACHE_CHECK( + [whether $CC understands -c and -o together], + [am_cv_prog_cc_c_o], + [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])]) + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i]) +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +AC_LANG_POP([C])]) + +# For backward compatibility. +AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])]) + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_RUN_LOG(COMMAND) +# ------------------- +# Run COMMAND, save the exit status in ac_status, and log it. +# (This has been adapted from Autoconf's _AC_RUN_LOG macro.) +AC_DEFUN([AM_RUN_LOG], +[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD + ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + (exit $ac_status); }]) + +# Check to make sure that the build environment is sane. -*- Autoconf -*- + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SANITY_CHECK +# --------------- +AC_DEFUN([AM_SANITY_CHECK], +[AC_MSG_CHECKING([whether build environment is sane]) +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[[\\\"\#\$\&\'\`$am_lf]]*) + AC_MSG_ERROR([unsafe absolute working directory name]);; +esac +case $srcdir in + *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) + AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$[*]" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$[*]" != "X $srcdir/configure conftest.file" \ + && test "$[*]" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken + alias in your environment]) + fi + if test "$[2]" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$[2]" = conftest.file + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +AC_MSG_RESULT([yes]) +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi +AC_CONFIG_COMMANDS_PRE( + [AC_MSG_CHECKING([that generated files are newer than configure]) + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + AC_MSG_RESULT([done])]) +rm -f conftest.file +]) + +# Copyright (C) 2009-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SILENT_RULES([DEFAULT]) +# -------------------------- +# Enable less verbose build rules; with the default set to DEFAULT +# ("yes" being less verbose, "no" or empty being verbose). +AC_DEFUN([AM_SILENT_RULES], +[AC_ARG_ENABLE([silent-rules], [dnl +AS_HELP_STRING( + [--enable-silent-rules], + [less verbose build output (undo: "make V=1")]) +AS_HELP_STRING( + [--disable-silent-rules], + [verbose build output (undo: "make V=0")])dnl +]) +case $enable_silent_rules in @%:@ ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);; +esac +dnl +dnl A few 'make' implementations (e.g., NonStop OS and NextStep) +dnl do not support nested variable expansions. +dnl See automake bug#9928 and bug#10237. +am_make=${MAKE-make} +AC_CACHE_CHECK([whether $am_make supports nested variables], + [am_cv_make_support_nested_variables], + [if AS_ECHO([['TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi]) +if test $am_cv_make_support_nested_variables = yes; then + dnl Using '$V' instead of '$(V)' breaks IRIX make. + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AC_SUBST([AM_V])dnl +AM_SUBST_NOTMAKE([AM_V])dnl +AC_SUBST([AM_DEFAULT_V])dnl +AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl +AC_SUBST([AM_DEFAULT_VERBOSITY])dnl +AM_BACKSLASH='\' +AC_SUBST([AM_BACKSLASH])dnl +_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl +]) + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_STRIP +# --------------------- +# One issue with vendor 'install' (even GNU) is that you can't +# specify the program used to strip binaries. This is especially +# annoying in cross-compiling environments, where the build's strip +# is unlikely to handle the host's binaries. +# Fortunately install-sh will honor a STRIPPROG variable, so we +# always use install-sh in "make install-strip", and initialize +# STRIPPROG with the value of the STRIP variable (set by the user). +AC_DEFUN([AM_PROG_INSTALL_STRIP], +[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. +if test "$cross_compiling" != no; then + AC_CHECK_TOOL([STRIP], [strip], :) +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" +AC_SUBST([INSTALL_STRIP_PROGRAM])]) + +# Copyright (C) 2006-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_SUBST_NOTMAKE(VARIABLE) +# --------------------------- +# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. +# This macro is traced by Automake. +AC_DEFUN([_AM_SUBST_NOTMAKE]) + +# AM_SUBST_NOTMAKE(VARIABLE) +# -------------------------- +# Public sister of _AM_SUBST_NOTMAKE. +AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) + +# Check how to create a tarball. -*- Autoconf -*- + +# Copyright (C) 2004-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_TAR(FORMAT) +# -------------------- +# Check how to create a tarball in format FORMAT. +# FORMAT should be one of 'v7', 'ustar', or 'pax'. +# +# Substitute a variable $(am__tar) that is a command +# writing to stdout a FORMAT-tarball containing the directory +# $tardir. +# tardir=directory && $(am__tar) > result.tar +# +# Substitute a variable $(am__untar) that extract such +# a tarball read from stdin. +# $(am__untar) < result.tar +# +AC_DEFUN([_AM_PROG_TAR], +[# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AC_SUBST([AMTAR], ['$${TAR-tar}']) + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' + +m4_if([$1], [v7], + [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], + + [m4_case([$1], + [ustar], + [# The POSIX 1988 'ustar' format is defined with fixed-size fields. + # There is notably a 21 bits limit for the UID and the GID. In fact, + # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 + # and bug#13588). + am_max_uid=2097151 # 2^21 - 1 + am_max_gid=$am_max_uid + # The $UID and $GID variables are not portable, so we need to resort + # to the POSIX-mandated id(1) utility. Errors in the 'id' calls + # below are definitely unexpected, so allow the users to see them + # (that is, avoid stderr redirection). + am_uid=`id -u || echo unknown` + am_gid=`id -g || echo unknown` + AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format]) + if test $am_uid -le $am_max_uid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi + AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format]) + if test $am_gid -le $am_max_gid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi], + + [pax], + [], + + [m4_fatal([Unknown tar format])]) + + AC_MSG_CHECKING([how to create a $1 tar archive]) + + # Go ahead even if we have the value already cached. We do so because we + # need to set the values for the 'am__tar' and 'am__untar' variables. + _am_tools=${am_cv_prog_tar_$1-$_am_tools} + + for _am_tool in $_am_tools; do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; do + AM_RUN_LOG([$_am_tar --version]) && break + done + am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x $1 -w "$$tardir"' + am__tar_='pax -L -x $1 -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H $1 -L' + am__tar_='find "$tardir" -print | cpio -o -H $1 -L' + am__untar='cpio -i -H $1 -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_$1}" && break + + # tar/untar a dummy directory, and stop if the command works. + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) + rm -rf conftest.dir + if test -s conftest.tar; then + AM_RUN_LOG([$am__untar /dev/null 2>&1 && break + fi + done + rm -rf conftest.dir + + AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) + AC_MSG_RESULT([$am_cv_prog_tar_$1])]) + +AC_SUBST([am__tar]) +AC_SUBST([am__untar]) +]) # _AM_PROG_TAR + +m4_include([acinclude.m4]) diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/compile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/compile new file mode 100755 index 000000000..99e50524b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/compile @@ -0,0 +1,348 @@ +#! /bin/sh +# Wrapper for compilers which do not understand '-c -o'. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# Written by Tom Tromey . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +nl=' +' + +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent tools from complaining about whitespace usage. +IFS=" "" $nl" + +file_conv= + +# func_file_conv build_file lazy +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. If the determined conversion +# type is listed in (the comma separated) LAZY, no conversion will +# take place. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv/,$2, in + *,$file_conv,*) + ;; + mingw/*) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin/*) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine/*) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_cl_dashL linkdir +# Make cl look for libraries in LINKDIR +func_cl_dashL () +{ + func_file_conv "$1" + if test -z "$lib_path"; then + lib_path=$file + else + lib_path="$lib_path;$file" + fi + linker_opts="$linker_opts -LIBPATH:$file" +} + +# func_cl_dashl library +# Do a library search-path lookup for cl +func_cl_dashl () +{ + lib=$1 + found=no + save_IFS=$IFS + IFS=';' + for dir in $lib_path $LIB + do + IFS=$save_IFS + if $shared && test -f "$dir/$lib.dll.lib"; then + found=yes + lib=$dir/$lib.dll.lib + break + fi + if test -f "$dir/$lib.lib"; then + found=yes + lib=$dir/$lib.lib + break + fi + if test -f "$dir/lib$lib.a"; then + found=yes + lib=$dir/lib$lib.a + break + fi + done + IFS=$save_IFS + + if test "$found" != yes; then + lib=$lib.lib + fi +} + +# func_cl_wrapper cl arg... +# Adjust compile command to suit cl +func_cl_wrapper () +{ + # Assume a capable shell + lib_path= + shared=: + linker_opts= + for arg + do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + eat=1 + case $2 in + *.o | *.[oO][bB][jJ]) + func_file_conv "$2" + set x "$@" -Fo"$file" + shift + ;; + *) + func_file_conv "$2" + set x "$@" -Fe"$file" + shift + ;; + esac + ;; + -I) + eat=1 + func_file_conv "$2" mingw + set x "$@" -I"$file" + shift + ;; + -I*) + func_file_conv "${1#-I}" mingw + set x "$@" -I"$file" + shift + ;; + -l) + eat=1 + func_cl_dashl "$2" + set x "$@" "$lib" + shift + ;; + -l*) + func_cl_dashl "${1#-l}" + set x "$@" "$lib" + shift + ;; + -L) + eat=1 + func_cl_dashL "$2" + ;; + -L*) + func_cl_dashL "${1#-L}" + ;; + -static) + shared=false + ;; + -Wl,*) + arg=${1#-Wl,} + save_ifs="$IFS"; IFS=',' + for flag in $arg; do + IFS="$save_ifs" + linker_opts="$linker_opts $flag" + done + IFS="$save_ifs" + ;; + -Xlinker) + eat=1 + linker_opts="$linker_opts $2" + ;; + -*) + set x "$@" "$1" + shift + ;; + *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) + func_file_conv "$1" + set x "$@" -Tp"$file" + shift + ;; + *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) + func_file_conv "$1" mingw + set x "$@" "$file" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift + done + if test -n "$linker_opts"; then + linker_opts="-link$linker_opts" + fi + exec "$@" $linker_opts + exit 1 +} + +eat= + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: compile [--help] [--version] PROGRAM [ARGS] + +Wrapper for compilers which do not understand '-c -o'. +Remove '-o dest.o' from ARGS, run PROGRAM with the remaining +arguments, and rename the output as expected. + +If you are trying to build a whole package this is not the +right script to run: please start by reading the file 'INSTALL'. + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "compile $scriptversion" + exit $? + ;; + cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \ + icl | *[/\\]icl | icl.exe | *[/\\]icl.exe ) + func_cl_wrapper "$@" # Doesn't return... + ;; +esac + +ofile= +cfile= + +for arg +do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + # So we strip '-o arg' only if arg is an object. + eat=1 + case $2 in + *.o | *.obj) + ofile=$2 + ;; + *) + set x "$@" -o "$2" + shift + ;; + esac + ;; + *.c) + cfile=$1 + set x "$@" "$1" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift +done + +if test -z "$ofile" || test -z "$cfile"; then + # If no '-o' option was seen then we might have been invoked from a + # pattern rule where we don't need one. That is ok -- this is a + # normal compilation that the losing compiler can handle. If no + # '.c' file was seen then we are probably linking. That is also + # ok. + exec "$@" +fi + +# Name of file we expect compiler to create. +cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` + +# Create the lock directory. +# Note: use '[/\\:.-]' here to ensure that we don't use the same name +# that we are using for the .o file. Also, base the name on the expected +# object file name, since that is what matters with a parallel build. +lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d +while true; do + if mkdir "$lockdir" >/dev/null 2>&1; then + break + fi + sleep 1 +done +# FIXME: race condition here if user kills between mkdir and trap. +trap "rmdir '$lockdir'; exit 1" 1 2 15 + +# Run the compile. +"$@" +ret=$? + +if test -f "$cofile"; then + test "$cofile" = "$ofile" || mv "$cofile" "$ofile" +elif test -f "${cofile}bj"; then + test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" +fi + +rmdir "$lockdir" +exit $ret + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/config.guess b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/config.guess new file mode 100755 index 000000000..256083a70 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/config.guess @@ -0,0 +1,1476 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright 1992-2018 Free Software Foundation, Inc. + +timestamp='2018-03-08' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). +# +# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. +# +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess +# +# Please send patches to . + + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright 1992-2018 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > "$dummy.c" ; + for c in cc gcc c89 c99 ; do + if ($c -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +case "$UNAME_SYSTEM" in +Linux|GNU|GNU/*) + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + LIBC=gnu + + eval "$set_cc_for_build" + cat <<-EOF > "$dummy.c" + #include + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #else + LIBC=gnu + #endif + EOF + eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`" + + # If ldd exists, use it to detect musl libc. + if command -v ldd >/dev/null && \ + ldd --version 2>&1 | grep -q ^musl + then + LIBC=musl + fi + ;; +esac + +# Note: order is significant - the case branches are not exclusive. + +case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ + "/sbin/$sysctl" 2>/dev/null || \ + "/usr/sbin/$sysctl" 2>/dev/null || \ + echo unknown)` + case "$UNAME_MACHINE_ARCH" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + earmv*) + arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'` + endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'` + machine="${arch}${endian}"-unknown + ;; + *) machine="$UNAME_MACHINE_ARCH"-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently (or will in the future) and ABI. + case "$UNAME_MACHINE_ARCH" in + earm*) + os=netbsdelf + ;; + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval "$set_cc_for_build" + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # Determine ABI tags. + case "$UNAME_MACHINE_ARCH" in + earm*) + expr='s/^earmv[0-9]/-eabi/;s/eb$//' + abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"` + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "$UNAME_VERSION" in + Debian*) + release='-gnu' + ;; + *) + release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "$machine-${os}${release}${abi}" + exit ;; + *:Bitrig:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE" + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE" + exit ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE" + exit ;; + *:MidnightBSD:*:*) + echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE" + exit ;; + *:ekkoBSD:*:*) + echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE" + exit ;; + *:SolidBSD:*:*) + echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE" + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd"$UNAME_RELEASE" + exit ;; + *:MirBSD:*:*) + echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE" + exit ;; + *:Sortix:*:*) + echo "$UNAME_MACHINE"-unknown-sortix + exit ;; + *:Redox:*:*) + echo "$UNAME_MACHINE"-unknown-redox + exit ;; + mips:OSF1:*.*) + echo mips-dec-osf1 + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE=alpha ;; + "EV4.5 (21064)") + UNAME_MACHINE=alpha ;; + "LCA4 (21066/21068)") + UNAME_MACHINE=alpha ;; + "EV5 (21164)") + UNAME_MACHINE=alphaev5 ;; + "EV5.6 (21164A)") + UNAME_MACHINE=alphaev56 ;; + "EV5.6 (21164PC)") + UNAME_MACHINE=alphapca56 ;; + "EV5.7 (21164PC)") + UNAME_MACHINE=alphapca57 ;; + "EV6 (21264)") + UNAME_MACHINE=alphaev6 ;; + "EV6.7 (21264A)") + UNAME_MACHINE=alphaev67 ;; + "EV6.8CB (21264C)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8AL (21264B)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8CX (21264D)") + UNAME_MACHINE=alphaev68 ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE=alphaev69 ;; + "EV7 (21364)") + UNAME_MACHINE=alphaev7 ;; + "EV7.9 (21364A)") + UNAME_MACHINE=alphaev79 ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo "$UNAME_MACHINE"-dec-osf"`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`" + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo "$UNAME_MACHINE"-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo "$UNAME_MACHINE"-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix"$UNAME_RELEASE" + exit ;; + arm*:riscos:*:*|arm*:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + s390x:SunOS:*:*) + echo "$UNAME_MACHINE"-ibm-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`" + exit ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`" + exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux"$UNAME_RELEASE" + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + eval "$set_cc_for_build" + SUN_ARCH=i386 + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH=x86_64 + fi + fi + echo "$SUN_ARCH"-pc-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos"`echo "$UNAME_RELEASE"|sed -e 's/-/_/'`" + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos"$UNAME_RELEASE" + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos"$UNAME_RELEASE" + ;; + sun4) + echo sparc-sun-sunos"$UNAME_RELEASE" + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos"$UNAME_RELEASE" + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint"$UNAME_RELEASE" + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint"$UNAME_RELEASE" + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint"$UNAME_RELEASE" + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten"$UNAME_RELEASE" + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten"$UNAME_RELEASE" + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix"$UNAME_RELEASE" + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix"$UNAME_RELEASE" + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix"$UNAME_RELEASE" + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && + dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`"$dummy" "$dummyarg"` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos"$UNAME_RELEASE" + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ "$UNAME_PROCESSOR" = mc88100 ] || [ "$UNAME_PROCESSOR" = mc88110 ] + then + if [ "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx ] || \ + [ "$TARGET_BINARY_INTERFACE"x = x ] + then + echo m88k-dg-dgux"$UNAME_RELEASE" + else + echo m88k-dg-dguxbcs"$UNAME_RELEASE" + fi + else + echo i586-dg-dgux"$UNAME_RELEASE" + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix"`echo "$UNAME_RELEASE"|sed -e 's/-/_/g'`" + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" + fi + echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV" + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/lslpp ] ; then + IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | + awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` + else + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" + fi + echo "$IBM_ARCH"-ibm-aix"$IBM_REV" + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd"$UNAME_RELEASE" # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'` + case "$UNAME_MACHINE" in + 9000/31?) HP_ARCH=m68000 ;; + 9000/[34]??) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "$sc_cpu_version" in + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "$sc_kernel_bits" in + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "$HP_ARCH" = "" ]; then + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ "$HP_ARCH" = hppa2.0w ] + then + eval "$set_cc_for_build" + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ + then + HP_ARCH=hppa2.0w + else + HP_ARCH=hppa64 + fi + fi + echo "$HP_ARCH"-hp-hpux"$HPUX_REV" + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux"$HPUX_REV" + exit ;; + 3050*:HI-UX:*:*) + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo "$UNAME_MACHINE"-unknown-osf1mk + else + echo "$UNAME_MACHINE"-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE" + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi"$UNAME_RELEASE" + exit ;; + *:BSD/OS:*:*) + echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE" + exit ;; + *:FreeBSD:*:*) + UNAME_PROCESSOR=`/usr/bin/uname -p` + case "$UNAME_PROCESSOR" in + amd64) + UNAME_PROCESSOR=x86_64 ;; + i386) + UNAME_PROCESSOR=i586 ;; + esac + echo "$UNAME_PROCESSOR"-unknown-freebsd"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`" + exit ;; + i*:CYGWIN*:*) + echo "$UNAME_MACHINE"-pc-cygwin + exit ;; + *:MINGW64*:*) + echo "$UNAME_MACHINE"-pc-mingw64 + exit ;; + *:MINGW*:*) + echo "$UNAME_MACHINE"-pc-mingw32 + exit ;; + *:MSYS*:*) + echo "$UNAME_MACHINE"-pc-msys + exit ;; + i*:PW*:*) + echo "$UNAME_MACHINE"-pc-pw32 + exit ;; + *:Interix*:*) + case "$UNAME_MACHINE" in + x86) + echo i586-pc-interix"$UNAME_RELEASE" + exit ;; + authenticamd | genuineintel | EM64T) + echo x86_64-unknown-interix"$UNAME_RELEASE" + exit ;; + IA64) + echo ia64-unknown-interix"$UNAME_RELEASE" + exit ;; + esac ;; + i*:UWIN*:*) + echo "$UNAME_MACHINE"-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + *:GNU:*:*) + # the GNU system + echo "`echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,'`-unknown-$LIBC`echo "$UNAME_RELEASE"|sed -e 's,/.*$,,'`" + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo "$UNAME_MACHINE-unknown-`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`-$LIBC" + exit ;; + i*86:Minix:*:*) + echo "$UNAME_MACHINE"-pc-minix + exit ;; + aarch64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + arc:Linux:*:* | arceb:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + arm*:Linux:*:*) + eval "$set_cc_for_build" + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi + else + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf + fi + fi + exit ;; + avr32*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + cris:Linux:*:*) + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" + exit ;; + crisv32:Linux:*:*) + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" + exit ;; + e2k:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + frv:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + hexagon:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + i*86:Linux:*:*) + echo "$UNAME_MACHINE"-pc-linux-"$LIBC" + exit ;; + ia64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + k1om:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + m32r*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + m68*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" + #undef CPU + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=${UNAME_MACHINE}el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=${UNAME_MACHINE} + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU'`" + test "x$CPU" != x && { echo "$CPU-unknown-linux-$LIBC"; exit; } + ;; + mips64el:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + openrisc*:Linux:*:*) + echo or1k-unknown-linux-"$LIBC" + exit ;; + or32:Linux:*:* | or1k*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + padre:Linux:*:*) + echo sparc-unknown-linux-"$LIBC" + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-"$LIBC" + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;; + PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;; + *) echo hppa-unknown-linux-"$LIBC" ;; + esac + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-"$LIBC" + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-"$LIBC" + exit ;; + ppc64le:Linux:*:*) + echo powerpc64le-unknown-linux-"$LIBC" + exit ;; + ppcle:Linux:*:*) + echo powerpcle-unknown-linux-"$LIBC" + exit ;; + riscv32:Linux:*:* | riscv64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo "$UNAME_MACHINE"-ibm-linux-"$LIBC" + exit ;; + sh64*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + sh*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + tile*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + vax:Linux:*:*) + echo "$UNAME_MACHINE"-dec-linux-"$LIBC" + exit ;; + x86_64:Linux:*:*) + echo "$UNAME_MACHINE"-pc-linux-"$LIBC" + exit ;; + xtensa*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION" + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo "$UNAME_MACHINE"-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo "$UNAME_MACHINE"-unknown-stop + exit ;; + i*86:atheos:*:*) + echo "$UNAME_MACHINE"-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo "$UNAME_MACHINE"-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + echo i386-unknown-lynxos"$UNAME_RELEASE" + exit ;; + i*86:*DOS:*:*) + echo "$UNAME_MACHINE"-pc-msdosdjgpp + exit ;; + i*86:*:4.*:*) + UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL" + else + echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL" + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}{$UNAME_VERSION}" + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL" + else + echo "$UNAME_MACHINE"-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. + # Note: whatever this is, it MUST be the same as what config.sub + # prints for the "djgpp" host, or else GDB configure will decide that + # this is a cross-build. + echo i586-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv"$UNAME_RELEASE" # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos"$UNAME_RELEASE" + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos"$UNAME_RELEASE" + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos"$UNAME_RELEASE" + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + echo powerpc-unknown-lynxos"$UNAME_RELEASE" + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv"$UNAME_RELEASE" + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo "$UNAME_MACHINE"-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo "$UNAME_MACHINE"-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux"$UNAME_RELEASE" + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv"$UNAME_RELEASE" + else + echo mips-unknown-sysv"$UNAME_RELEASE" + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; + x86_64:Haiku:*:*) + echo x86_64-unknown-haiku + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux"$UNAME_RELEASE" + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux"$UNAME_RELEASE" + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux"$UNAME_RELEASE" + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux"$UNAME_RELEASE" + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux"$UNAME_RELEASE" + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux"$UNAME_RELEASE" + exit ;; + SX-ACE:SUPER-UX:*:*) + echo sxace-nec-superux"$UNAME_RELEASE" + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody"$UNAME_RELEASE" + exit ;; + *:Rhapsody:*:*) + echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE" + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + eval "$set_cc_for_build" + if test "$UNAME_PROCESSOR" = unknown ; then + UNAME_PROCESSOR=powerpc + fi + if test "`echo "$UNAME_RELEASE" | sed -e 's/\..*//'`" -le 10 ; then + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc + if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_PPC >/dev/null + then + UNAME_PROCESSOR=powerpc + fi + fi + elif test "$UNAME_PROCESSOR" = i386 ; then + # Avoid executing cc on OS X 10.9, as it ships with a stub + # that puts up a graphical alert prompting to install + # developer tools. Any system running Mac OS X 10.7 or + # later (Darwin 11 and later) is required to have a 64-bit + # processor. This is not true of the ARM version of Darwin + # that Apple uses in portable devices. + UNAME_PROCESSOR=x86_64 + fi + echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE" + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = x86; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE" + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NEO-*:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSE-*:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSR-*:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSV-*:NONSTOP_KERNEL:*:*) + echo nsv-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSX-*:NONSTOP_KERNEL:*:*) + echo nsx-tandem-nsk"$UNAME_RELEASE" + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE" + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = 386; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo "$UNAME_MACHINE"-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux"$UNAME_RELEASE" + exit ;; + *:DragonFly:*:*) + echo "$UNAME_MACHINE"-unknown-dragonfly"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`" + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "$UNAME_MACHINE" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo "$UNAME_MACHINE"-pc-skyos"`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`" + exit ;; + i*86:rdos:*:*) + echo "$UNAME_MACHINE"-pc-rdos + exit ;; + i*86:AROS:*:*) + echo "$UNAME_MACHINE"-pc-aros + exit ;; + x86_64:VMkernel:*:*) + echo "$UNAME_MACHINE"-unknown-esx + exit ;; + amd64:Isilon\ OneFS:*:*) + echo x86_64-unknown-onefs + exit ;; +esac + +echo "$0: unable to guess system type" >&2 + +case "$UNAME_MACHINE:$UNAME_SYSTEM" in + mips:Linux | mips64:Linux) + # If we got here on MIPS GNU/Linux, output extra information. + cat >&2 <&2 </dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = "$UNAME_MACHINE" +UNAME_RELEASE = "$UNAME_RELEASE" +UNAME_SYSTEM = "$UNAME_SYSTEM" +UNAME_VERSION = "$UNAME_VERSION" +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/config.sub b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/config.sub new file mode 100755 index 000000000..9ccf09a7a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/config.sub @@ -0,0 +1,1801 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright 1992-2018 Free Software Foundation, Inc. + +timestamp='2018-03-08' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). + + +# Please send patches to . +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS + +Canonicalize a configuration name. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright 1992-2018 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo "$1" + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ + linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ + knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \ + kopensolaris*-gnu* | cloudabi*-eabi* | \ + storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + android-linux) + os=-linux-android + basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown + ;; + *) + basic_machine=`echo "$1" | sed 's/-[^-]*$//'` + if [ "$basic_machine" != "$1" ] + then os=`echo "$1" | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis | -knuth | -cray | -microblaze*) + os= + basic_machine=$1 + ;; + -bluegene*) + os=-cnk + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco6) + os=-sco5v6 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*178) + os=-lynxos178 + ;; + -lynx*5) + os=-lynxos5 + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo "$1" | sed -e 's/86-.*/86-sequent/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ + | aarch64 | aarch64_be \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arceb \ + | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ + | avr | avr32 \ + | ba \ + | be32 | be64 \ + | bfin \ + | c4x | c8051 | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | e2k | epiphany \ + | fido | fr30 | frv | ft32 \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | hexagon \ + | i370 | i860 | i960 | ia16 | ia64 \ + | ip2k | iq2000 \ + | k1om \ + | le32 | le64 \ + | lm32 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | microblazeel | mcore | mep | metag \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64octeon | mips64octeonel \ + | mips64orion | mips64orionel \ + | mips64r5900 | mips64r5900el \ + | mips64vr | mips64vrel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa32r6 | mipsisa32r6el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64r6 | mipsisa64r6el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipsr5900 | mipsr5900el \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | moxie \ + | mt \ + | msp430 \ + | nds32 | nds32le | nds32be \ + | nios | nios2 | nios2eb | nios2el \ + | ns16k | ns32k \ + | open8 | or1k | or1knd | or32 \ + | pdp10 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle \ + | pru \ + | pyramid \ + | riscv32 | riscv64 \ + | rl78 | rx \ + | score \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu \ + | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ + | ubicom32 \ + | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ + | visium \ + | wasm32 \ + | x86 | xc16x | xstormy16 | xtensa \ + | z8k | z80) + basic_machine=$basic_machine-unknown + ;; + c54x) + basic_machine=tic54x-unknown + ;; + c55x) + basic_machine=tic55x-unknown + ;; + c6x) + basic_machine=tic6x-unknown + ;; + leon|leon[3-9]) + basic_machine=sparc-$basic_machine + ;; + m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip) + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65) + ;; + ms1) + basic_machine=mt-unknown + ;; + + strongarm | thumb | xscale) + basic_machine=arm-unknown + ;; + xgate) + basic_machine=$basic_machine-unknown + os=-none + ;; + xscaleeb) + basic_machine=armeb-unknown + ;; + + xscaleel) + basic_machine=armel-unknown + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ + | aarch64-* | aarch64_be-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* | avr32-* \ + | ba-* \ + | be32-* | be64-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* \ + | c8051-* | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | e2k-* | elxsi-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | hexagon-* \ + | i*86-* | i860-* | i960-* | ia16-* | ia64-* \ + | ip2k-* | iq2000-* \ + | k1om-* \ + | le32-* | le64-* \ + | lm32-* \ + | m32c-* | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \ + | microblaze-* | microblazeel-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64octeon-* | mips64octeonel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64r5900-* | mips64r5900el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa32r6-* | mipsisa32r6el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64r6-* | mipsisa64r6el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipsr5900-* | mipsr5900el-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ + | msp430-* \ + | nds32-* | nds32le-* | nds32be-* \ + | nios-* | nios2-* | nios2eb-* | nios2el-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | open8-* \ + | or1k*-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ + | pru-* \ + | pyramid-* \ + | riscv32-* | riscv64-* \ + | rl78-* | romp-* | rs6000-* | rx-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \ + | tahoe-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tile*-* \ + | tron-* \ + | ubicom32-* \ + | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ + | vax-* \ + | visium-* \ + | wasm32-* \ + | we32k-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* \ + | xstormy16-* | xtensa*-* \ + | ymp-* \ + | z8k-* | z80-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) + basic_machine=$basic_machine-unknown + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-pc + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aros) + basic_machine=i386-pc + os=-aros + ;; + asmjs) + basic_machine=asmjs-unknown + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + blackfin) + basic_machine=bfin-unknown + os=-linux + ;; + blackfin-*) + basic_machine=bfin-`echo "$basic_machine" | sed 's/^[^-]*-//'` + os=-linux + ;; + bluegene*) + basic_machine=powerpc-ibm + os=-cnk + ;; + c54x-*) + basic_machine=tic54x-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + c55x-*) + basic_machine=tic55x-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + c6x-*) + basic_machine=tic6x-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; + cegcc) + basic_machine=arm-unknown + os=-cegcc + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | j90) + basic_machine=j90-cray + os=-unicos + ;; + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16 | cr16-*) + basic_machine=cr16-unknown + os=-elf + ;; + crds | unos) + basic_machine=m68k-crds + ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + dicos) + basic_machine=i686-pc + os=-dicos + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2*) + basic_machine=m68k-bull + os=-sysv3 + ;; + e500v[12]) + basic_machine=powerpc-unknown + os=$os"spe" + ;; + e500v[12]-*) + basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'` + os=$os"spe" + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; + i*86v32) + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + leon-*|leon[3-9]-*) + basic_machine=sparc-`echo "$basic_machine" | sed 's/-.*//'` + ;; + m68knommu) + basic_machine=m68k-unknown + os=-linux + ;; + m68knommu-*) + basic_machine=m68k-`echo "$basic_machine" | sed 's/^[^-]*-//'` + os=-linux + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + microblaze*) + basic_machine=microblaze-xilinx + ;; + mingw64) + basic_machine=x86_64-pc + os=-mingw64 + ;; + mingw32) + basic_machine=i686-pc + os=-mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + os=-mingw32ce + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mips3*-*) + basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`-unknown + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + moxiebox) + basic_machine=moxie-unknown + os=-moxiebox + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + ms1-*) + basic_machine=`echo "$basic_machine" | sed -e 's/ms1-/mt-/'` + ;; + msys) + basic_machine=i686-pc + os=-msys + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + nacl) + basic_machine=le32-unknown + os=-nacl + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + neo-tandem) + basic_machine=neo-tandem + ;; + nse-tandem) + basic_machine=nse-tandem + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + nsv-tandem) + basic_machine=nsv-tandem + ;; + nsx-tandem) + basic_machine=nsx-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + openrisc | openrisc-*) + basic_machine=or32-unknown + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + parisc) + basic_machine=hppa-unknown + os=-linux + ;; + parisc-*) + basic_machine=hppa-`echo "$basic_machine" | sed 's/^[^-]*-//'` + os=-linux + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) + basic_machine=i586-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pentium4-*) + basic_machine=i786-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc | ppcbe) basic_machine=powerpc-unknown + ;; + ppc-* | ppcbe-*) + basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rdos | rdos64) + basic_machine=x86_64-pc + os=-rdos + ;; + rdos32) + basic_machine=i386-pc + os=-rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; + sequent) + basic_machine=i386-sequent + ;; + sh5el) + basic_machine=sh5le-unknown + ;; + simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + strongarm-* | thumb-*) + basic_machine=arm-`echo "$basic_machine" | sed 's/^[^-]*-//'` + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + tile*) + basic_machine=$basic_machine-unknown + os=-linux-gnu + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + x64) + basic_machine=x86_64-pc + ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + xscale-* | xscalee[bl]-*) + basic_machine=`echo "$basic_machine" | sed 's/^xscale/arm/'` + ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + romp) + basic_machine=romp-ibm + ;; + mmix) + basic_machine=mmix-knuth + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo "$basic_machine" | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo "$basic_machine" | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases that might get confused + # with valid system types. + # -solaris* is a basic system type, with this one exception. + -auroraux) + os=-auroraux + ;; + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # es1800 is here to avoid being matched by es* (a different OS) + -es1800*) + os=-ose + ;; + # Now accept the basic system types. + # The portable systems comes first. + # Each alternative MUST end in a * to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ + | -sym* | -kopensolaris* | -plan9* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* | -aros* | -cloudabi* | -sortix* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -knetbsd* | -mirbsd* | -netbsd* \ + | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* | -hcos* \ + | -chorusos* | -chorusrdb* | -cegcc* | -glidix* \ + | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -linux-newlib* | -linux-musl* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \ + | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox* | -bme* \ + | -midnightbsd*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -xray | -os68k* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo "$os" | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo "$os" | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo "$os" | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -nova*) + os=-rtmk-nova + ;; + -ns2) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -tpf*) + os=-tpf + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4*) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -zvmoe) + os=-zvmoe + ;; + -dicos*) + os=-dicos + ;; + -pikeos*) + # Until real need of OS specific support for + # particular features comes up, bare metal + # configurations are quite functional. + case $basic_machine in + arm*) + os=-eabi + ;; + *) + os=-elf + ;; + esac + ;; + -nacl*) + ;; + -ios) + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`"$1"\': system \`"$os"\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + c4x-* | tic4x-*) + os=-coff + ;; + c8051-*) + os=-elf + ;; + hexagon-*) + os=-elf + ;; + tic54x-*) + os=-coff + ;; + tic55x-*) + os=-coff + ;; + tic6x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + ;; + m68*-cisco) + os=-aout + ;; + mep-*) + os=-elf + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or32-*) + os=-coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + pru-*) + os=-elf + ;; + *-be) + os=-beos + ;; + *-ibm) + os=-aix + ;; + *-knuth) + os=-mmixware + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -cnk*|-aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; + esac + basic_machine=`echo "$basic_machine" | sed "s/unknown/$vendor/"` + ;; +esac + +echo "$basic_machine$os" +exit + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/configure b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/configure new file mode 100755 index 000000000..ed0b4faa0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/configure @@ -0,0 +1,6161 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.69 for hpl 2.3. +# +# Report bugs to . +# +# +# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +as_fn_exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org and hpl@icl.utk.edu +$0: about your system, including any error possibly output +$0: before this message. Then install a modern shell, or +$0: manually run the script under such a shell if you do +$0: have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='hpl' +PACKAGE_TARNAME='hpl' +PACKAGE_VERSION='2.3' +PACKAGE_STRING='hpl 2.3' +PACKAGE_BUGREPORT='hpl@icl.utk.edu' +PACKAGE_URL='' + +ac_unique_file="include/hpl.h" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='am__EXEEXT_FALSE +am__EXEEXT_TRUE +LTLIBOBJS +LIBOBJS +EGREP +GREP +CPP +BLAS_LIBS +AM_BACKSLASH +AM_DEFAULT_VERBOSITY +AM_DEFAULT_V +AM_V +am__fastdepCC_FALSE +am__fastdepCC_TRUE +CCDEPMODE +am__nodep +AMDEPBACKSLASH +AMDEP_FALSE +AMDEP_TRUE +am__include +DEPDIR +am__untar +am__tar +AMTAR +am__leading_dot +SET_MAKE +AWK +mkdir_p +MKDIR_P +INSTALL_STRIP_PROGRAM +STRIP +install_sh +MAKEINFO +AUTOHEADER +AUTOMAKE +AUTOCONF +ACLOCAL +VERSION +PACKAGE +CYGPATH_W +am__isrc +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +RANLIB +OBJEXT +EXEEXT +CPPFLAGS +LDFLAGS +CFLAGS +ac_ct_CC +CC +MPICC +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL +am__quote' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +enable_dependency_tracking +enable_silent_rules +' + ac_precious_vars='build_alias +host_alias +target_alias +MPICC +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures hpl 2.3 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/hpl] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of hpl 2.3:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-dependency-tracking + do not reject slow dependency extractors + --disable-dependency-tracking + speeds up one-time build + --enable-silent-rules less verbose build output (undo: "make V=1") + --disable-silent-rules verbose build output (undo: "make V=0") + +Some influential environment variables: + MPICC MPI C compiler command + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to . +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +hpl configure 2.3 +generated by GNU Autoconf 2.69 + +Copyright (C) 2012 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main () +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists, giving a warning if it cannot be compiled using +# the include files in INCLUDES and setting the cache variable VAR +# accordingly. +ac_fn_c_check_header_mongrel () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if eval \${$3+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} +( $as_echo "## ------------------------------ ## +## Report this to hpl@icl.utk.edu ## +## ------------------------------ ##" + ) | sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_mongrel + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by hpl $as_me 2.3, which was +generated by GNU Autoconf 2.69. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +ac_config_headers="$ac_config_headers include/hplconfig.h" + + +ac_aux_dir= +for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` + + + + _ax_prog_cc_mpi_mpi_wanted=yes + if test x"$_ax_prog_cc_mpi_mpi_wanted" = xyes; then + if test -z "$CC" && test -n "$MPICC"; then + CC="$MPICC" + else + if test -n "$ac_tool_prefix"; then + for ac_prog in mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in mpicc mpixlc_r mpixlc hcc mpxlc_r mpxlc sxmpicc mpifcc mpgcc mpcc cmpicc cc gcc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + + fi + fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +$as_echo_n "checking whether $CC understands -c and -o together... " >&6; } +if ${am_cv_prog_cc_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 + ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +$as_echo "$am_cv_prog_cc_c_o" >&6; } +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + +# Check for compiler +# Needs to be split off into an extra macro to ensure right expansion +# order. + + +if test x"$_ax_prog_cc_mpi_mpi_wanted" = xno; then : + _ax_prog_cc_mpi_mpi_found=no +else + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + # test whether MPI_Init is available + # We do not use AC_SEARCH_LIBS here, as it caches its outcome and + # thus disallows corresponding calls in the other AX_PROG_*_MPI + # macros. + for lib in NONE mpi mpich; do + save_LIBS=$LIBS + if test x"$lib" = xNONE; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for function MPI_Init" >&5 +$as_echo_n "checking for function MPI_Init... " >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for function MPI_Init in -l$lib" >&5 +$as_echo_n "checking for function MPI_Init in -l$lib... " >&6; } + LIBS="-l$lib $LIBS" + fi + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char MPI_Init (); +int +main () +{ +return MPI_Init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + _ax_prog_cc_mpi_mpi_found=yes +else + _ax_prog_cc_mpi_mpi_found=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_ax_prog_cc_mpi_mpi_found" >&5 +$as_echo "$_ax_prog_cc_mpi_mpi_found" >&6; } + if test "x$_ax_prog_cc_mpi_mpi_found" = "xyes"; then + break; + fi + LIBS=$save_LIBS + done + + # Check for header + if test x"$_ax_prog_cc_mpi_mpi_found" = xyes; then : + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpi.h" >&5 +$as_echo_n "checking for mpi.h... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + _ax_prog_cc_mpi_mpi_found=no + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +fi + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$_ax_prog_cc_mpi_mpi_found" = xyes; then : + + +$as_echo "#define HAVE_MPI 1" >>confdefs.h + + : + +else + + + : + +fi + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +$as_echo "$RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +$as_echo "$ac_ct_RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +$as_echo_n "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if ${ac_cv_path_install+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in #(( + ./ | .// | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +$as_echo "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + + +am__api_version='1.16' + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 +$as_echo_n "checking whether build environment is sane... " >&6; } +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[\\\"\#\$\&\'\`$am_lf]*) + as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; +esac +case $srcdir in + *[\\\"\#\$\&\'\`$am_lf\ \ ]*) + as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$*" != "X $srcdir/configure conftest.file" \ + && test "$*" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + as_fn_error $? "ls -t appears to fail. Make sure there is not a broken + alias in your environment" "$LINENO" 5 + fi + if test "$2" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$2" = conftest.file + ) +then + # Ok. + : +else + as_fn_error $? "newly created file is older than distributed files! +Check your system clock" "$LINENO" 5 +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi + +rm -f conftest.file + +test "$program_prefix" != NONE && + program_transform_name="s&^&$program_prefix&;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s&\$&$program_suffix&;$program_transform_name" +# Double any \ or $. +# By default was `s,x,x', remove it if useless. +ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' +program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` + +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 +$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} +fi + +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi + +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +$as_echo "$STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +$as_echo "$ac_ct_STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5 +$as_echo_n "checking for a thread-safe mkdir -p... " >&6; } +if test -z "$MKDIR_P"; then + if ${ac_cv_path_mkdir+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in mkdir gmkdir; do + for ac_exec_ext in '' $ac_executable_extensions; do + as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue + case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( + 'mkdir (GNU coreutils) '* | \ + 'mkdir (coreutils) '* | \ + 'mkdir (fileutils) '4.1*) + ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext + break 3;; + esac + done + done + done +IFS=$as_save_IFS + +fi + + test -d ./--version && rmdir ./--version + if test "${ac_cv_path_mkdir+set}" = set; then + MKDIR_P="$ac_cv_path_mkdir -p" + else + # As a last resort, use the slow shell script. Don't cache a + # value for MKDIR_P within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + MKDIR_P="$ac_install_sh -d" + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 +$as_echo "$MKDIR_P" >&6; } + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AWK+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +$as_echo "$AWK" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AWK" && break +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +set x ${MAKE-make} +ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering ...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + SET_MAKE= +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null + +DEPDIR="${am__leading_dot}deps" + +ac_config_commands="$ac_config_commands depfiles" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5 +$as_echo_n "checking whether ${MAKE-make} supports the include directive... " >&6; } +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5 + (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + case $?:`cat confinc.out 2>/dev/null` in #( + '0:this is the am__doit target') : + case $s in #( + BSD) : + am__include='.include' am__quote='"' ;; #( + *) : + am__include='include' am__quote='' ;; +esac ;; #( + *) : + ;; +esac + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5 +$as_echo "${_am_result}" >&6; } + +# Check whether --enable-dependency-tracking was given. +if test "${enable_dependency_tracking+set}" = set; then : + enableval=$enable_dependency_tracking; +fi + +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi + if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' +else + AMDEP_TRUE='#' + AMDEP_FALSE= +fi + + +# Check whether --enable-silent-rules was given. +if test "${enable_silent_rules+set}" = set; then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=1;; +esac +am_make=${MAKE-make} +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +$as_echo_n "checking whether $am_make supports nested variables... " >&6; } +if ${am_cv_make_support_nested_variables+:} false; then : + $as_echo_n "(cached) " >&6 +else + if $as_echo 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +$as_echo "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + am__isrc=' -I$(srcdir)' + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='hpl' + VERSION='2.3' + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE "$PACKAGE" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define VERSION "$VERSION" +_ACEOF + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +mkdir_p='$(MKDIR_P)' + +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AMTAR='$${TAR-tar}' + + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar pax cpio none' + +am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' + + + + + +depcc="$CC" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CC_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 + fi +fi + + + + + + + + + +hpl_blas_ok=no + + +current_LIBS="$LIBS" + +cat < hplvars.txt +name1=OpenBLAS +rout1=dgemm_ +libs1=-lopenblas -lm + +name2=Atlas Fortran BLAS +rout2=dgemm_ +libs2=-lf77blas -latlas + +name3=Sequential Intel MKL LP64 (group) +rout3=dgemm_ +libs3=-Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lpthread + +name4=Sequential Intel MKL LP64 +rout4=dgemm_ +libs4=-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread + +name5=AMD's ACML +rout5=dgemm_ +libs5=-lacml -lm + +name6=Accelerate +rout6=dgemm_ +libs6=-framework Accelerate + +name7=Apple VecLib +rout7=dgemm_ +libs7=-framework vecLib + +name8=IBM ESSL +rout8=dgemm_ +libs8=-lessl + +name9=NVIDIA nvblas +rout9=dgemm_ +libs9=-lnvblas + +name10=Generic BLAS +rout10=dgemm_ +libs10=-lblas + +HPLEOF +for hpl_i in 1 2 3 4 5 6 7 8 9 10; +do +if test x$hpl_blas_ok = xno; then + name="`grep ^name${hpl_i}= hplvars.txt | sed s/^name${hpl_i}=//`" + rout="`grep ^rout${hpl_i}= hplvars.txt | sed s/^rout${hpl_i}=//`" + libs="`grep ^libs${hpl_i}= hplvars.txt | sed s/^libs${hpl_i}=//`" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $rout in $name" >&5 +$as_echo_n "checking for $rout in $name... " >&6; } + + LIBS="$libs" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $rout (); +int +main () +{ +return $rout (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + hpl_blas_ok=yes;BLAS_LIBS="$libs" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LIBS="$current_LIBS" + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hpl_blas_ok" >&5 +$as_echo "$hpl_blas_ok" >&6; } +fi +done +rm hplvars.txt + +if test x$hpl_blas_ok = xno; then +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dgemm_ in OpenBLAS" >&5 +$as_echo_n "checking for dgemm_ in OpenBLAS... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dgemm_ in -lopenblas" >&5 +$as_echo_n "checking for dgemm_ in -lopenblas... " >&6; } +if ${ac_cv_lib_openblas_dgemm_+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lopenblas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dgemm_ (); +int +main () +{ +return dgemm_ (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_openblas_dgemm_=yes +else + ac_cv_lib_openblas_dgemm_=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_openblas_dgemm_" >&5 +$as_echo "$ac_cv_lib_openblas_dgemm_" >&6; } +if test "x$ac_cv_lib_openblas_dgemm_" = xyes; then : + hpl_blas_ok=yes;BLAS_LIBS="-lopenblas" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hpl_blas_ok" >&5 +$as_echo "$hpl_blas_ok" >&6; } +fi + + + +# If present, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$hpl_blas_ok" = xyes; then + LIBS="$BLAS_LIBS $LIBS" + : +else + hpl_blas_ok=no + as_fn_error $? "BLAS not found" "$LINENO" 5 +fi + + + + +for ac_func in dgemm_ +do : + ac_fn_c_check_func "$LINENO" "dgemm_" "ac_cv_func_dgemm_" +if test "x$ac_cv_func_dgemm_" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_DGEMM_ 1 +_ACEOF + +fi +done + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if ${ac_cv_prog_CPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in mpi.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "mpi.h" "ac_cv_header_mpi_h" "$ac_includes_default" +if test "x$ac_cv_header_mpi_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_MPI_H 1 +_ACEOF + +fi + +done + + +ac_config_files="$ac_config_files Makefile src/Makefile testing/Makefile" + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 +$as_echo_n "checking that generated files are newer than configure... " >&6; } + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5 +$as_echo "done" >&6; } +if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then + as_fn_error $? "conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi + if test -n "$EXEEXT"; then + am__EXEEXT_TRUE= + am__EXEEXT_FALSE='#' +else + am__EXEEXT_TRUE='#' + am__EXEEXT_FALSE= +fi + + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by hpl $as_me 2.3, which was +generated by GNU Autoconf 2.69. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration commands: +$config_commands + +Report bugs to ." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +hpl config.status 2.3 +configured by $0, generated by GNU Autoconf 2.69, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2012 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +MKDIR_P='$MKDIR_P' +AWK='$AWK' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# +# INIT-COMMANDS +# +AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "include/hplconfig.h") CONFIG_HEADERS="$CONFIG_HEADERS include/hplconfig.h" ;; + "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;; + "testing/Makefile") CONFIG_FILES="$CONFIG_FILES testing/Makefile" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers + test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS" +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac + ac_MKDIR_P=$MKDIR_P + case $MKDIR_P in + [\\/$]* | ?:[\\/]* ) ;; + */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +s&@MKDIR_P@&$ac_MKDIR_P&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +$as_echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi +# Compute "$ac_file"'s index in $config_headers. +_am_arg="$ac_file" +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || +$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$_am_arg" : 'X\(//\)[^/]' \| \ + X"$_am_arg" : 'X\(//\)$' \| \ + X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$_am_arg" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'`/stamp-h$_am_stamp_count + ;; + + :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +$as_echo "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "depfiles":C) test x"$AMDEP_TRUE" != x"" || { + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + case $CONFIG_FILES in #( + *\'*) : + eval set x "$CONFIG_FILES" ;; #( + *) : + set x $CONFIG_FILES ;; #( + *) : + ;; +esac + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`$as_echo "$am_mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`$as_dirname -- "$am_mf" || +$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$am_mf" : 'X\(//\)[^/]' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$am_mf" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + am_filepart=`$as_basename -- "$am_mf" || +$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$am_mf" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + { echo "$as_me:$LINENO: cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles" >&5 + (cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } || am_rc=$? + done + if test $am_rc -ne 0; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. Try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking). +See \`config.log' for more details" "$LINENO" 5; } + fi + { am_dirpart=; unset am_dirpart;} + { am_filepart=; unset am_filepart;} + { am_mf=; unset am_mf;} + { am_rc=; unset am_rc;} + rm -f conftest-deps.mk +} + ;; + + esac +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/configure.ac b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/configure.ac new file mode 100644 index 000000000..eb91dc590 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/configure.ac @@ -0,0 +1,34 @@ +AC_PREREQ([2.69]) + +AC_INIT(hpl, 2.3, hpl@icl.utk.edu) +AC_CONFIG_SRCDIR([include/hpl.h]) +AC_CONFIG_HEADERS([include/hplconfig.h]) + +AX_PROG_CC_MPI + +AC_PROG_RANLIB + +AC_PROG_INSTALL + +AM_INIT_AUTOMAKE([subdir-objects]) + +AM_PROG_CC_C_O + +dnl +dnl AX_BLAS requires Fortran compiler and detects fortran libraries in $FLIBS +dnl +dnl AX_BLAS(LIBS="$BLAS_LIBS $LIBS $FLIBS") +dnl + +HPL_BLAS(LIBS="$BLAS_LIBS $LIBS",AC_MSG_ERROR([BLAS not found])) + +dnl FIXME: test for CBLAS: Atlas, MKL, OpenBLAS, ESSL, ... +dnl FIXME: test for GSL CBLAS + +AC_CHECK_FUNCS([dgemm_]) + +AC_CHECK_HEADERS([mpi.h]) + +AC_CONFIG_FILES([Makefile src/Makefile testing/Makefile]) + +AC_OUTPUT diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/depcomp b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/depcomp new file mode 100755 index 000000000..65cbf7093 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/depcomp @@ -0,0 +1,791 @@ +#! /bin/sh +# depcomp - compile a program generating dependencies as side-effects + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva . + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by 'PROGRAMS ARGS'. + object Object file output by 'PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputting dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit $? + ;; +esac + +# Get the directory component of the given path, and save it in the +# global variables '$dir'. Note that this directory component will +# be either empty or ending with a '/' character. This is deliberate. +set_dir_from () +{ + case $1 in + */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; + *) dir=;; + esac +} + +# Get the suffix-stripped basename of the given path, and save it the +# global variable '$base'. +set_base_from () +{ + base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` +} + +# If no dependency file was actually created by the compiler invocation, +# we still have to create a dummy depfile, to avoid errors with the +# Makefile "include basename.Plo" scheme. +make_dummy_depfile () +{ + echo "#dummy" > "$depfile" +} + +# Factor out some common post-processing of the generated depfile. +# Requires the auxiliary global variable '$tmpdepfile' to be set. +aix_post_process_depfile () +{ + # If the compiler actually managed to produce a dependency file, + # post-process it. + if test -f "$tmpdepfile"; then + # Each line is of the form 'foo.o: dependency.h'. + # Do two passes, one to just change these to + # $object: dependency.h + # and one to simply output + # dependency.h: + # which is needed to avoid the deleted-header problem. + { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" + sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" + } > "$depfile" + rm -f "$tmpdepfile" + else + make_dummy_depfile + fi +} + +# A tabulation character. +tab=' ' +# A newline character. +nl=' +' +# Character ranges might be problematic outside the C locale. +# These definitions help. +upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ +lower=abcdefghijklmnopqrstuvwxyz +digits=0123456789 +alpha=${upper}${lower} + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Avoid interferences from the environment. +gccflag= dashmflag= + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +cygpath_u="cygpath -u -f -" +if test "$depmode" = msvcmsys; then + # This is just like msvisualcpp but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvisualcpp +fi + +if test "$depmode" = msvc7msys; then + # This is just like msvc7 but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvc7 +fi + +if test "$depmode" = xlc; then + # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. + gccflag=-qmakedep=gcc,-MF + depmode=gcc +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. +## Unfortunately, FreeBSD c89 acceptance of flags depends upon +## the command line argument order; so add the flags where they +## appear in depend2.am. Note that the slowdown incurred here +## affects only configure: in makefiles, %FASTDEP% shortcuts this. + for arg + do + case $arg in + -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; + *) set fnord "$@" "$arg" ;; + esac + shift # fnord + shift # $arg + done + "$@" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. +## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. +## (see the conditional assignment to $gccflag above). +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). Also, it might not be +## supported by the other compilers which use the 'gcc' depmode. +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The second -e expression handles DOS-style file names with drive + # letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the "deleted header file" problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. +## Some versions of gcc put a space before the ':'. On the theory +## that the space means something, we add a space to the output as +## well. hp depmode also adds that space, but also prefixes the VPATH +## to the object. Take care to not repeat it in the output. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like '#:fec' to the end of the + # dependency line. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ + | tr "$nl" ' ' >> "$depfile" + echo >> "$depfile" + # The second pass generates a dummy entry for each header file. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" + ;; + +xlc) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts '$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.u + tmpdepfile2=$base.u + tmpdepfile3=$dir.libs/$base.u + "$@" -Wc,-M + else + tmpdepfile1=$dir$base.u + tmpdepfile2=$dir$base.u + tmpdepfile3=$dir$base.u + "$@" -M + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + aix_post_process_depfile + ;; + +tcc) + # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 + # FIXME: That version still under development at the moment of writing. + # Make that this statement remains true also for stable, released + # versions. + # It will wrap lines (doesn't matter whether long or short) with a + # trailing '\', as in: + # + # foo.o : \ + # foo.c \ + # foo.h \ + # + # It will put a trailing '\' even on the last line, and will use leading + # spaces rather than leading tabs (at least since its commit 0394caf7 + # "Emit spaces for -MD"). + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. + # We have to change lines of the first kind to '$object: \'. + sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" + # And for each line of the second kind, we have to emit a 'dep.h:' + # dummy dependency, to avoid the deleted-header problem. + sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" + rm -f "$tmpdepfile" + ;; + +## The order of this option in the case statement is important, since the +## shell code in configure will try each of these formats in the order +## listed in this file. A plain '-MD' option would be understood by many +## compilers, so we must ensure this comes after the gcc and icc options. +pgcc) + # Portland's C compiler understands '-MD'. + # Will always output deps to 'file.d' where file is the root name of the + # source file under compilation, even if file resides in a subdirectory. + # The object file name does not affect the name of the '.d' file. + # pgcc 10.2 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using '\' : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + set_dir_from "$object" + # Use the source, not the object, to determine the base name, since + # that's sadly what pgcc will do too. + set_base_from "$source" + tmpdepfile=$base.d + + # For projects that build the same source file twice into different object + # files, the pgcc approach of using the *source* file root name can cause + # problems in parallel builds. Use a locking strategy to avoid stomping on + # the same $tmpdepfile. + lockdir=$base.d-lock + trap " + echo '$0: caught signal, cleaning up...' >&2 + rmdir '$lockdir' + exit 1 + " 1 2 13 15 + numtries=100 + i=$numtries + while test $i -gt 0; do + # mkdir is a portable test-and-set. + if mkdir "$lockdir" 2>/dev/null; then + # This process acquired the lock. + "$@" -MD + stat=$? + # Release the lock. + rmdir "$lockdir" + break + else + # If the lock is being held by a different process, wait + # until the winning process is done or we timeout. + while test -d "$lockdir" && test $i -gt 0; do + sleep 1 + i=`expr $i - 1` + done + fi + i=`expr $i - 1` + done + trap - 1 2 13 15 + if test $i -le 0; then + echo "$0: failed to acquire lock after $numtries attempts" >&2 + echo "$0: check lockdir '$lockdir'" >&2 + exit 1 + fi + + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp2) + # The "hp" stanza above does not work with aCC (C++) and HP's ia64 + # compilers, which have integrated preprocessors. The correct option + # to use with these is +Maked; it writes dependencies to a file named + # 'foo.d', which lands next to the object file, wherever that + # happens to be. + # Much of this is similar to the tru64 case; see comments there. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir.libs/$base.d + "$@" -Wc,+Maked + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + "$@" +Maked + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" + # Add 'dependent.h:' lines. + sed -ne '2,${ + s/^ *// + s/ \\*$// + s/$/:/ + p + }' "$tmpdepfile" >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" "$tmpdepfile2" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in 'foo.d' instead, so we check for that too. + # Subdirectories are respected. + set_dir_from "$object" + set_base_from "$object" + + if test "$libtool" = yes; then + # Libtool generates 2 separate objects for the 2 libraries. These + # two compilations output dependencies in $dir.libs/$base.o.d and + # in $dir$base.o.d. We have to check for both files, because + # one of the two compilations can be disabled. We should prefer + # $dir$base.o.d over $dir.libs/$base.o.d because the latter is + # automatically cleaned when .libs/ is deleted, while ignoring + # the former would cause a distcleancheck panic. + tmpdepfile1=$dir$base.o.d # libtool 1.5 + tmpdepfile2=$dir.libs/$base.o.d # Likewise. + tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 + "$@" -Wc,-MD + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + tmpdepfile3=$dir$base.d + "$@" -MD + fi + + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + # Same post-processing that is required for AIX mode. + aix_post_process_depfile + ;; + +msvc7) + if test "$libtool" = yes; then + showIncludes=-Wc,-showIncludes + else + showIncludes=-showIncludes + fi + "$@" $showIncludes > "$tmpdepfile" + stat=$? + grep -v '^Note: including file: ' "$tmpdepfile" + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The first sed program below extracts the file names and escapes + # backslashes for cygpath. The second sed program outputs the file + # name when reading, but also accumulates all include files in the + # hold buffer in order to output them again at the end. This only + # works with sed implementations that can handle large buffers. + sed < "$tmpdepfile" -n ' +/^Note: including file: *\(.*\)/ { + s//\1/ + s/\\/\\\\/g + p +}' | $cygpath_u | sort -u | sed -n ' +s/ /\\ /g +s/\(.*\)/'"$tab"'\1 \\/p +s/.\(.*\) \\/\1:/ +H +$ { + s/.*/'"$tab"'/ + G + p +}' >> "$depfile" + echo >> "$depfile" # make sure the fragment doesn't end with a backslash + rm -f "$tmpdepfile" + ;; + +msvc7msys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for ':' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. + "$@" $dashmflag | + sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this sed invocation + # correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no eat=no + for arg + do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + if test $eat = yes; then + eat=no + continue + fi + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -arch) + eat=yes ;; + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix=`echo "$object" | sed 's/^.*\././'` + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + # makedepend may prepend the VPATH from the source file name to the object. + # No need to regex-escape $object, excess matching of '.' is harmless. + sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process the last invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed '1,2d' "$tmpdepfile" \ + | tr ' ' "$nl" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E \ + | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + | sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + IFS=" " + for arg + do + case "$arg" in + -o) + shift + ;; + $object) + shift + ;; + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E 2>/dev/null | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" + echo "$tab" >> "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvcmsys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl.h new file mode 100644 index 000000000..6d131963f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl.h @@ -0,0 +1,97 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_H +#define HPL_H +/* + * --------------------------------------------------------------------- + * HPL default compile options that can overridden in the Make. + * --------------------------------------------------------------------- + */ +#ifndef HPL_NO_MPI_DATATYPE /* Use MPI user-defined data type */ +#define HPL_USE_MPI_DATATYPE +#endif + +#ifndef HPL_COPY_L /* do not copy L, use MPI user-defined data types */ +#define HPL_NO_COPY_L +#endif + +#ifndef HPL_DETAILED_TIMING /* Do not enable detailed timings */ +#define HPL_NO_DETAILED_TIMING +#endif + +#ifndef HPL_CALL_VSIPL /* Call the Fortran 77 BLAS interface */ +#ifndef HPL_CALL_CBLAS /* there can be only one */ +#define HPL_CALL_FBLAS +#endif +#endif +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pfact.h" +#include "hpl_pgesv.h" + +#include "hpl_timer.h" +#include "hpl_matgen.h" +#include "hpl_test.h" + +#include "hpl_ptimer.h" +#include "hpl_pmatgen.h" +#include "hpl_ptest.h" + +#endif +/* + * End of hpl.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_auxil.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_auxil.h new file mode 100644 index 000000000..861caf380 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_auxil.h @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_AUXIL_H +#define HPL_AUXIL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +/* + * --------------------------------------------------------------------- + * typedef definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_NORM_A = 800, HPL_NORM_1 = 801, HPL_NORM_I = 802 } HPL_T_NORM; + +typedef enum +{ + HPL_MACH_EPS = 900, /* relative machine precision */ + HPL_MACH_SFMIN = 901, /* safe minimum st 1/sfmin does not overflow */ + HPL_MACH_BASE = 902, /* base = base of the machine */ + HPL_MACH_PREC = 903, /* prec = eps*base */ + HPL_MACH_MLEN = 904, /* number of (base) digits in the mantissa */ + HPL_MACH_RND = 905, /* 1.0 if rounding occurs in addition */ + HPL_MACH_EMIN = 906, /* min exponent before (gradual) underflow */ + HPL_MACH_RMIN = 907, /* underflow threshold base**(emin-1) */ + HPL_MACH_EMAX = 908, /* largest exponent before overflow */ + HPL_MACH_RMAX = 909 /* overflow threshold - (base**emax)*(1-eps) */ + +} HPL_T_MACH; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_fprintf +STDC_ARGS( ( + FILE *, + const char *, + ... +) ); +void HPL_warn +STDC_ARGS( ( + FILE *, + int, + const char *, + const char *, + ... +) ); +void HPL_abort +STDC_ARGS( ( + int, + const char *, + const char *, + ... +) ); +void HPL_dlacpy +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dlatcpy +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dlaprnt +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int, + const int, + const char * +) ); +double HPL_dlange +STDC_ARGS( ( + const HPL_T_NORM, + const int, + const int, + const double *, + const int +) ); +double HPL_dlamch +STDC_ARGS( ( + const HPL_T_MACH +) ); + +#endif +/* + * End of hpl_auxil.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_blas.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_blas.h new file mode 100644 index 000000000..2a510471a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_blas.h @@ -0,0 +1,630 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_BLAS_H +#define HPL_BLAS_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" + + +/* + * --------------------------------------------------------------------- + * typedef definitions + * --------------------------------------------------------------------- + */ +enum HPL_ORDER +{ HplRowMajor = 101, HplColumnMajor = 102 }; +enum HPL_TRANS +{ HplNoTrans = 111, HplTrans = 112, HplConjTrans = 113 }; +enum HPL_UPLO +{ HplUpper = 121, HplLower = 122 }; +enum HPL_DIAG +{ HplNonUnit = 131, HplUnit = 132 }; +enum HPL_SIDE +{ HplLeft = 141, HplRight = 142 }; + + +#ifdef HPL_CALL_CBLAS + + +/* + * --------------------------------------------------------------------- + * The C interface of the BLAS is available ... + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define CBLAS_INDEX int + +#define CBLAS_ORDER HPL_ORDER +#define CblasRowMajor HplRowMajor +#define CblasColMajor HplColMajor + +#define CBLAS_TRANSPOSE HPL_TRANS +#define CblasNoTrans HplNoTrans +#define CblasTrans HplTrans +#define CblasConjTrans HplConjTrans + +#define CBLAS_UPLO HPL_UPLO +#define CblasUpper HplUpper +#define CblasLower HplLower + +#define CBLAS_DIAG HPL_DIAG +#define CblasNonUnit HplNonUnit +#define CblasUnit HplUnit + +#define CBLAS_SIDE HPL_SIDE +#define CblasLeft HplLeft +#define CblasRight HplRight +/* + * --------------------------------------------------------------------- + * CBLAS Function prototypes + * --------------------------------------------------------------------- + */ +CBLAS_INDEX cblas_idamax +STDC_ARGS( +( const int, const double *, const int ) ); +void cblas_dswap +STDC_ARGS( +( const int, double *, const int, double *, + const int ) ); +void cblas_dcopy +STDC_ARGS( +( const int, const double *, const int, double *, + const int ) ); +void cblas_daxpy +STDC_ARGS( +( const int, const double, const double *, const int, + double *, const int ) ); +void cblas_dscal +STDC_ARGS( +( const int, const double, double *, const int ) ); + +void cblas_dgemv +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const int, const int, const double, const double *, + const int, const double *, const int, const double, + double *, const int ) ); + +void cblas_dger +STDC_ARGS( +( const enum CBLAS_ORDER, const int, const int, + const double, const double *, const int, const double *, + const int, double *, const int ) ); +void cblas_dtrsv +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_UPLO, + const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG, + const int, const double *, const int, double *, + const int ) ); + +void cblas_dgemm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const enum CBLAS_TRANSPOSE, const int, const int, + const int, const double, const double *, const int, + const double *, const int, const double, double *, + const int ) ); + +void cblas_dtrsm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_SIDE, + const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, + const enum CBLAS_DIAG, const int, const int, + const double, const double *, const int, double *, + const int ) ); +void dpcpp_dgemm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, + const enum CBLAS_TRANSPOSE, const int, const int, + const int, const double, const double *, const int, + const double *, const int, const double, double *, + const int ) ); + +void dpcpp_dtrsm +STDC_ARGS( +( const enum CBLAS_ORDER, const enum CBLAS_SIDE, + const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, + const enum CBLAS_DIAG, const int, const int, + const double, const double *, const int, double *, + const int ) ); +/* + * --------------------------------------------------------------------- + * HPL C BLAS macro definition + * --------------------------------------------------------------------- + */ +#define HPL_dswap cblas_dswap +#define HPL_dcopy cblas_dcopy +#define HPL_daxpy cblas_daxpy +#define HPL_dscal cblas_dscal +#define HPL_idamax cblas_idamax + +#define HPL_dgemv cblas_dgemv +#define HPL_dtrsv cblas_dtrsv +#define HPL_dger cblas_dger + +//#define HPL_dgemm cblas_dgemm +//#define HPL_dtrsm cblas_dtrsm +#define HPL_dgemm dpcpp_dgemm +#define HPL_dtrsm dpcpp_dtrsm + +#endif + +//#define HPL_hello sss_gemm + +#ifdef HPL_CALL_FBLAS +/* + * --------------------------------------------------------------------- + * Use the Fortran 77 interface of the BLAS ... + * --------------------------------------------------------------------- + * Defaults: Add_, F77_INTEGER=int, StringSunStyle + * --------------------------------------------------------------------- + */ +#ifndef NoChange +#ifndef UpCase +#ifndef Add__ +#ifndef Add_ + +#define Add_ + +#endif +#endif +#endif +#endif + +#ifndef F77_INTEGER +#define F77_INTEGER int +#else +#define HPL_USE_F77_INTEGER_DEF +#endif + +#ifndef StringCrayStyle +#ifndef StringStructVal +#ifndef StringStructPtr +#ifndef StringSunStyle + +#define StringSunStyle + +#endif +#endif +#endif +#endif +/* + * --------------------------------------------------------------------- + * Fortran 77 <-> C interface + * --------------------------------------------------------------------- + * + * These macros identifies how Fortran routines will be called. + * + * Add_ : the Fortran compiler expects the name of C functions to be + * in all lower case and to have an underscore postfixed it (Suns, Intel + * compilers expect this). + * + * NoChange : the Fortran compiler expects the name of C functions to be + * in all lower case (IBM RS6K compilers do this). + * + * UpCase : the Fortran compiler expects the name of C functions to be + * in all upcase. (Cray compilers expect this). + * + * Add__ : the Fortran compiler in use is f2c, a Fortran to C conver- + * ter. + */ +#ifdef NoChange +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm(...) + */ +#define F77dswap dswap +#define F77dscal dscal +#define F77dcopy dcopy +#define F77daxpy daxpy +#define F77idamax idamax + +#define F77dgemv dgemv +#define F77dtrsv dtrsv +#define F77dger dger + +#define F77dgemm dgemm +#define F77dtrsm dtrsm + +#endif + +#ifdef UpCase +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) DGEMM(...) + */ +#ifdef CRAY_BLAS + +#define F77dswap SSWAP +#define F77dscal SSCAL +#define F77dcopy SCOPY +#define F77daxpy SAXPY +#define F77idamax ISAMAX + +#define F77dgemv SGEMV +#define F77dtrsv STRSV +#define F77dger SGER + +#define F77dgemm SGEMM +#define F77dtrsm STRSM + +#else + +#define F77dswap DSWAP +#define F77dscal DSCAL +#define F77dcopy DCOPY +#define F77daxpy DAXPY +#define F77idamax IDAMAX + +#define F77dgemv DGEMV +#define F77dtrsv DTRSV +#define F77dger DGER + +#define F77dgemm DGEMM +#define F77dtrsm DTRSM + +#endif + +#endif + +#ifdef Add_ +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm_(...) + */ +#define F77dswap dswap_ +#define F77dscal dscal_ +#define F77dcopy dcopy_ +#define F77daxpy daxpy_ +#define F77idamax idamax_ + +#define F77dgemv dgemv_ +#define F77dtrsv dtrsv_ +#define F77dger dger_ + +#define F77dgemm dgemm_ +#define F77dtrsm dtrsm_ + +#endif + +#ifdef Add__ +/* + * These defines set up the naming scheme required to have a FORTRAN + * routine called by a C routine with the following FORTRAN to C inter- + * face: + * + * FORTRAN DECLARATION C CALL + * SUBROUTINE DGEMM(...) dgemm_(...) + */ +#define F77dswap dswap_ +#define F77dscal dscal_ +#define F77dcopy dcopy_ +#define F77daxpy daxpy_ +#define F77idamax idamax_ + +#define F77dgemv dgemv_ +#define F77dtrsv dtrsv_ +#define F77dger dger_ + +#define F77dgemm dgemm_ +#define F77dtrsm dtrsm_ +//#define F77hello sss_gemm + +#endif +//#define F77hello sss_gemm +/* + * --------------------------------------------------------------------- + * Typedef definitions and conversion utilities + * --------------------------------------------------------------------- + */ +#ifdef StringCrayStyle + +#include + /* Type of character argument in a FORTRAN call */ +#define F77_CHAR _fcd + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(_fcdtocp(c) )) +#define HPL_C2F_CHAR(c) (_cptofcd(&(c), 1)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringStructVal + /* Type of character argument in a FORTRAN call */ +typedef struct { char *cp; F77_INTEGER len; } F77_CHAR; + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c.cp)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringStructPtr + /* Type of character argument in a FORTRAN call */ +typedef struct { char *cp; F77_INTEGER len; } F77_CHAR; + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c->cp)) + +#define F77_CHAR_DECL F77_CHAR * /* input CHARACTER*1 */ + +#endif +/* ------------------------------------------------------------------ */ +#ifdef StringSunStyle + /* Type of character argument in a FORTRAN call */ +#define F77_CHAR char * + /* Character conversion utilities */ +#define HPL_F2C_CHAR(c) (*(c)) +#define HPL_C2F_CHAR(c) (&(c)) + +#define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ +#define F77_1_CHAR , F77_INTEGER +#define F77_2_CHAR F77_1_CHAR F77_1_CHAR +#define F77_3_CHAR F77_2_CHAR F77_1_CHAR +#define F77_4_CHAR F77_3_CHAR F77_1_CHAR + +#endif +/* ------------------------------------------------------------------ */ + +#ifndef F77_1_CHAR +#define F77_1_CHAR +#define F77_2_CHAR +#define F77_3_CHAR +#define F77_4_CHAR +#endif + +#define F77_INT_DECL const F77_INTEGER * /* input integer */ +#define F77_SIN_DECL const double * /* input scalar */ +#define F77_VIN_DECL const double * /* input vector */ +#define F77_VINOUT_DECL double * /* input/output matrix */ +#define F77_MIN_DECL const double * /* input matrix */ +#define F77_MINOUT_DECL double * /* input/output matrix */ + +#ifdef CRAY_PVP_ENV /* Type of FORTRAN functions */ +#define F77_VOID_FUN extern fortran void /* subroutine */ +#define F77_INT_FUN extern fortran int /* integer function */ +#else +#define F77_VOID_FUN extern void /* subroutine */ +#define F77_INT_FUN extern int /* integer function */ +#endif +/* + * --------------------------------------------------------------------- + * Fortran 77 BLAS function prototypes + * --------------------------------------------------------------------- + */ +F77_VOID_FUN F77dswap +STDC_ARGS( +( F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL, F77_VINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77dscal +STDC_ARGS( +( F77_INT_DECL, F77_SIN_DECL, F77_VINOUT_DECL, F77_INT_DECL ) ); +F77_VOID_FUN F77dcopy +STDC_ARGS( +( F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, F77_VINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77daxpy +STDC_ARGS( +( F77_INT_DECL, F77_SIN_DECL, F77_VIN_DECL, F77_INT_DECL, + F77_VINOUT_DECL, F77_INT_DECL ) ); +F77_INT_FUN F77idamax +STDC_ARGS( +( F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL ) ); + +F77_VOID_FUN F77dgemv +STDC_ARGS( +( F77_CHAR_DECL, F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, + F77_SIN_DECL, F77_VINOUT_DECL, F77_INT_DECL F77_1_CHAR ) ); +F77_VOID_FUN F77dger +STDC_ARGS( +( F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_VIN_DECL, + F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, F77_MINOUT_DECL, + F77_INT_DECL ) ); +F77_VOID_FUN F77dtrsv +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, F77_INT_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL + F77_3_CHAR ) ); + +F77_VOID_FUN F77dgemm +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_INT_DECL, F77_INT_DECL, + F77_INT_DECL, F77_SIN_DECL, F77_MIN_DECL, F77_INT_DECL, + F77_MIN_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MINOUT_DECL, + F77_INT_DECL F77_2_CHAR ) ); +F77_VOID_FUN F77dtrsm +STDC_ARGS( +( F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, + F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MIN_DECL, + F77_INT_DECL, F77_MINOUT_DECL, F77_INT_DECL F77_4_CHAR ) ); + +#endif +/* + * --------------------------------------------------------------------- + * HPL BLAS Function prototypes + * --------------------------------------------------------------------- + */ +#ifndef HPL_CALL_CBLAS + +int HPL_idamax +STDC_ARGS( ( + const int, + const double *, + const int +) ); +void HPL_daxpy +STDC_ARGS( ( + const int, + const double, + const double *, + const int, + double *, + const int +) ); +void HPL_dcopy +STDC_ARGS( ( + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dscal +STDC_ARGS( ( + const int, + const double, + double *, + const int +) ); +void HPL_dswap +STDC_ARGS( ( + const int, + double *, + const int, + double *, + const int +) ); +void HPL_dgemv +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_TRANS, + const int, + const int, + const double, + const double *, + const int, + const double *, + const int, + const double, + double *, + const int +) ); +void HPL_dger +STDC_ARGS( ( + const enum HPL_ORDER, + const int, + const int, + const double, + const double *, + const int, + double *, + const int, + double *, + const int +) ); +void HPL_dtrsv +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_UPLO, + const enum HPL_TRANS, + const enum HPL_DIAG, + const int, + const double *, + const int, + double *, + const int +) ); +void HPL_dgemm +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_TRANS, + const enum HPL_TRANS, + const int, + const int, + const int, + const double, + const double *, + const int, + const double *, + const int, + const double, + double *, + const int +) ); +void HPL_hello +STDC_ARGS( ( +) ); +#endif +void HPL_dtrsm +STDC_ARGS( ( + const enum HPL_ORDER, + const enum HPL_SIDE, + const enum HPL_UPLO, + const enum HPL_TRANS, + const enum HPL_DIAG, + const int, + const int, + const double, + const double *, + const int, + double *, + const int +) ); + +//#endif + +#endif +/* + * hpl_blas.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_comm.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_comm.h new file mode 100644 index 000000000..e3ba51a57 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_comm.h @@ -0,0 +1,161 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_COMM_H +#define HPL_COMM_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +#include "hpl_panel.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_1RING = 401, /* Increasing ring */ + HPL_1RING_M = 402, /* Increasing ring (modified) */ + HPL_2RING = 403, /* Increasing 2-ring */ + HPL_2RING_M = 404, /* Increasing 2-ring (modified) */ + HPL_BLONG = 405, /* long broadcast */ + HPL_BLONG_M = 406 /* long broadcast (modified) */ +} HPL_T_TOP; +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_FAILURE 0 +#define HPL_SUCCESS 1 +#define HPL_KEEP_TESTING 2 +/* + * --------------------------------------------------------------------- + * comm function prototypes + * --------------------------------------------------------------------- + */ +int HPL_send +STDC_ARGS( ( + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_recv +STDC_ARGS( ( + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_sdrv +STDC_ARGS( ( + double *, + int, + int, + double *, + int, + int, + int, + MPI_Comm +) ); +int HPL_binit +STDC_ARGS( ( + HPL_T_panel * +) ); +int HPL_bcast +STDC_ARGS( ( + HPL_T_panel *, + int * +) ); +int HPL_bwait +STDC_ARGS( ( + HPL_T_panel * +) ); +int HPL_packL +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int +) ); +void HPL_copyL +STDC_ARGS( ( + HPL_T_panel * +) ); + +int HPL_binit_1ring STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_1ring STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_1ring STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_1rinM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_1rinM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_1rinM STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_2ring STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_2ring STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_2ring STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_2rinM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_2rinM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_2rinM STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_blong STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_blong STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_blong STDC_ARGS( ( HPL_T_panel * ) ); + +int HPL_binit_blonM STDC_ARGS( ( HPL_T_panel * ) ); +int HPL_bcast_blonM STDC_ARGS( ( HPL_T_panel *, int * ) ); +int HPL_bwait_blonM STDC_ARGS( ( HPL_T_panel * ) ); + +#endif +/* + * End of hpl_comm.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_gesv.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_gesv.h new file mode 100644 index 000000000..ce671cf2b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_gesv.h @@ -0,0 +1,87 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_GESV_H +#define HPL_GESV_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_LEFT_LOOKING = 301, /* Left looking lu fact variant */ + HPL_CROUT = 302, /* Crout lu fact variant */ + HPL_RIGHT_LOOKING = 303 /* Right looking lu fact variant */ +} HPL_T_FACT; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dgesv +STDC_ARGS( +( const int, const int, const int, const HPL_T_FACT, + const HPL_T_FACT, const int, double *, + const int, int * ) ); +void HPL_ipid +STDC_ARGS( +( const int, double *, int *, int *, + int *, int *, int *, int *, + const int, const int, const int, const int, + const int ) ); + +#endif +/* + * End of hpl_gesv.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_grid.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_grid.h new file mode 100644 index 000000000..1895a5ed4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_grid.h @@ -0,0 +1,212 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_GRID_H +#define HPL_GRID_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum { HPL_INT = 100, HPL_DOUBLE = 101 } HPL_T_TYPE; + +typedef enum +{ + HPL_ROW_MAJOR = 201, + HPL_COLUMN_MAJOR = 202 +} HPL_T_ORDER; + +typedef struct HPL_S_grid +{ + MPI_Comm all_comm; /* grid communicator */ + MPI_Comm row_comm; /* row communicator */ + MPI_Comm col_comm; /* column communicator */ + HPL_T_ORDER order; /* ordering of the procs in the grid */ + int iam; /* my rank in the grid */ + int myrow; /* my row number in the grid */ + int mycol; /* my column number in the grid */ + int nprow; /* the total # of rows in the grid */ + int npcol; /* the total # of columns in the grid */ + int nprocs; /* the total # of procs in the grid */ + int row_ip2; /* largest power of two <= nprow */ + int row_hdim; /* row_ip2 procs hypercube dimension */ + int row_ip2m1; /* largest power of two <= nprow-1 */ + int row_mask; /* row_ip2m1 procs hypercube mask */ + int col_ip2; /* largest power of two <= npcol */ + int col_hdim; /* col_ip2 procs hypercube dimension */ + int col_ip2m1; /* largest power of two <= npcol-1 */ + int col_mask; /* col_ip2m1 procs hypercube mask */ +} HPL_T_grid; + +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef void (*HPL_T_OP) +( const int, const void *, void *, const HPL_T_TYPE ); +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define HPL_2_MPI_TYPE( typ ) \ + ( ( typ == HPL_INT ? MPI_INT : MPI_DOUBLE ) ) +/* + * The following macros perform common modulo operations; All functions + * except MPosMod assume arguments are < d (i.e., arguments are themsel- + * ves within modulo range). + */ + /* increment with mod */ +#define MModInc(I, d) if(++(I) == (d)) (I) = 0 + /* decrement with mod */ +#define MModDec(I, d) if(--(I) == -1) (I) = (d)-1 + /* positive modulo */ +#define MPosMod(I, d) ( (I) - ((I)/(d))*(d) ) + /* add two numbers */ +#define MModAdd(I1, I2, d) \ + ( ( (I1) + (I2) < (d) ) ? (I1) + (I2) : (I1) + (I2) - (d) ) + /* add 1 to # */ +#define MModAdd1(I, d) ( ((I) != (d)-1) ? (I) + 1 : 0 ) + /* subtract two numbers */ +#define MModSub(I1, I2, d) \ + ( ( (I1) < (I2) ) ? (d) + (I1) - (I2) : (I1) - (I2) ) + /* sub 1 from # */ +#define MModSub1(I, d) ( ((I)!=0) ? (I)-1 : (d)-1 ) +/* + * --------------------------------------------------------------------- + * grid function prototypes + * --------------------------------------------------------------------- + */ +int HPL_grid_init +STDC_ARGS( ( + MPI_Comm, + const HPL_T_ORDER, + const int, + const int, + HPL_T_grid * +) ); +int HPL_grid_exit +STDC_ARGS( ( + HPL_T_grid * +) ); + +int HPL_grid_info +STDC_ARGS( ( + const HPL_T_grid *, + int *, + int *, + int *, + int * +) ); +int HPL_pnum +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int +) ); + +int HPL_barrier +STDC_ARGS( ( + MPI_Comm +) ); +int HPL_broadcast +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const int, + MPI_Comm +) ); +int HPL_reduce +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const HPL_T_OP , + const int, + MPI_Comm +) ); +int HPL_all_reduce +STDC_ARGS( ( + void *, + const int, + const HPL_T_TYPE, + const HPL_T_OP , + MPI_Comm +) ); + +void HPL_max +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); +void HPL_min +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); +void HPL_sum +STDC_ARGS( ( + const int, + const void *, + void *, + const HPL_T_TYPE +) ); + +#endif +/* + * End of hpl_grid.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_matgen.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_matgen.h new file mode 100644 index 000000000..de6503eea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_matgen.h @@ -0,0 +1,120 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_MATGEN_H +#define HPL_MATGEN_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_MULT0 1284865837 +#define HPL_MULT1 1481765933 +#define HPL_IADD0 1 +#define HPL_IADD1 0 +#define HPL_DIVFAC 2147483648.0 +#define HPL_POW16 65536.0 +#define HPL_HALF 0.5 +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dmatgen +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int +) ); +void HPL_lmul +STDC_ARGS( ( + int *, + int *, + int * +) ); +void HPL_ladd +STDC_ARGS( ( + int *, + int *, + int * +) ); +void HPL_xjumpm +STDC_ARGS( ( + const int, + int *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_setran +STDC_ARGS( ( + const int, + int * +) ); +void HPL_jumpit +STDC_ARGS( ( + int *, + int *, + int *, + int * +) ); +double HPL_rand STDC_ARGS( ( void ) ); + +#endif +/* + * End of hpl_matgen.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_misc.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_misc.h new file mode 100644 index 000000000..ea421a403 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_misc.h @@ -0,0 +1,110 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_MISC_H +#define HPL_MISC_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#ifdef __STDC__ +#define STDC_HEADERS +#endif + +#include +#include +#include +#include + +#ifdef STDC_HEADERS +#include +#define STDC_ARGS(p) p +#else +#include +#define STDC_ARGS(p) () +#endif + +#ifdef HPL_CALL_VSIPL +#include +#endif +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_rone 1.0 +#define HPL_rtwo 2.0 +#define HPL_rzero 0.0 +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define Mabs( a_ ) ( ( (a_) < 0 ) ? -(a_) : (a_) ) +#define Mmin( a_, b_ ) ( ( (a_) < (b_) ) ? (a_) : (b_) ) +#define Mmax( a_, b_ ) ( ( (a_) > (b_) ) ? (a_) : (b_) ) + +#define Mfloor(a,b) (((a)>0) ? (((a)/(b))) : (-(((-(a))+(b)-1)/(b)))) +#define Mceil(a,b) ( ( (a)+(b)-1 ) / (b) ) +#define Miceil(a,b) (((a)>0) ? ((((a)+(b)-1)/(b))) : (-((-(a))/(b)))) + +#define Mupcase(C) (((C)>96 && (C)<123) ? (C) & 0xDF : (C)) +#define Mlowcase(C) (((C)>64 && (C)< 91) ? (C) | 32 : (C)) +/* + * Mptr returns a pointer to a_( i_, j_ ) for readability reasons and + * also less silly errors ... + */ +#define Mptr( a_, i_, j_, lda_ ) \ + ( (a_) + (size_t)(i_) + (size_t)(j_)*(size_t)(lda_) ) +/* + * Align pointer + */ +#define HPL_PTR( ptr_, al_ ) \ + ( ( ( (size_t)(ptr_)+(al_)-1 ) / (al_) ) * (al_) ) +#endif +/* + * End of hpl_misc.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_panel.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_panel.h new file mode 100644 index 000000000..d5ba2939c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_panel.h @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PANEL_H +#define HPL_PANEL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +#include "hpl_grid.h" +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef struct HPL_S_panel +{ + struct HPL_S_grid * grid; /* ptr to the process grid */ + struct HPL_S_palg * algo; /* ptr to the algo parameters */ + struct HPL_S_pmat * pmat; /* ptr to the local array info */ + double * A; /* ptr to trailing part of A */ + double * WORK; /* work space */ + double * L2; /* ptr to L */ + double * L1; /* ptr to jb x jb upper block of A */ + double * DPIV; /* ptr to replicated jb pivot array */ + double * DINFO; /* ptr to replicated scalar info */ + double * U; /* ptr to U */ + int * IWORK; /* integer workspace for swapping */ + void * * * buffers[2]; /* buffers for panel bcast */ + int counts [2]; /* counts for panel bcast */ + MPI_Datatype dtypes [2]; /* data types for panel bcast */ + MPI_Request request[1]; /* requests for panel bcast */ + MPI_Status status [1]; /* status for panel bcast */ + int nb; /* distribution blocking factor */ + int jb; /* panel width */ + int m; /* global # of rows of trailing part of A */ + int n; /* global # of cols of trailing part of A */ + int ia; /* global row index of trailing part of A */ + int ja; /* global col index of trailing part of A */ + int mp; /* local # of rows of trailing part of A */ + int nq; /* local # of cols of trailing part of A */ + int ii; /* local row index of trailing part of A */ + int jj; /* local col index of trailing part of A */ + int lda; /* local leading dim of array A */ + int prow; /* proc. row owning 1st row of trail. A */ + int pcol; /* proc. col owning 1st col of trail. A */ + int msgid; /* message id for panel bcast */ + int ldl2; /* local leading dim of array L2 */ + int len; /* length of the buffer to broadcast */ +#ifdef HPL_CALL_VSIPL + vsip_block_d * Ablock; /* A block */ + vsip_block_d * L1block; /* L1 block */ + vsip_block_d * L2block; /* L2 block */ + vsip_block_d * Ublock; /* U block */ +#endif +} HPL_T_panel; + +/* + * --------------------------------------------------------------------- + * panel function prototypes + * --------------------------------------------------------------------- + */ +#include "hpl_pgesv.h" + +void HPL_pdpanel_new +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + const int, + const int, + const int, + HPL_T_pmat *, + const int, + const int, + const int, + HPL_T_panel * * +) ); +void HPL_pdpanel_init +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + const int, + const int, + const int, + HPL_T_pmat *, + const int, + const int, + const int, + HPL_T_panel * +) ); +int HPL_pdpanel_disp +STDC_ARGS( ( + HPL_T_panel * * +) ); +int HPL_pdpanel_free +STDC_ARGS( ( + HPL_T_panel * +) ); + +#endif +/* + * End of hpl_panel.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pauxil.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pauxil.h new file mode 100644 index 000000000..1fd0ee457 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pauxil.h @@ -0,0 +1,505 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PAUXIL_H +#define HPL_PAUXIL_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" + +#include "hpl_pmisc.h" +#include "hpl_grid.h" +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +/* + * Mindxg2p returns the process coodinate owning the entry globally in- + * dexed by ig_. + */ +#define Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) && \ + ( (nprocs_) > 1 ) ) \ + { \ + proc_ = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_); \ + proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ + } \ + else \ + { \ + proc_ = (src_); \ + } \ + } + +#define Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ + ( (nprocs_) == 1 ) ) { il_ = (ig_); } \ + else \ + { \ + int i__, j__; \ + j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ + il_ = (nb_)*( j__ - i__ ) + \ + ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ + (ig_) - (inb_) : (ig_) ); \ + } \ + } + +#define Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ ) \ + { \ + if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ + ( (nprocs_) == 1 ) ) \ + { il_ = (ig_); proc_ = (src_); } \ + else \ + { \ + int i__, j__; \ + j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ + il_ = (nb_)*(j__-i__) + \ + ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ + (ig_) - (inb_) : (ig_) ); \ + proc_ = (src_) + 1 + i__; \ + proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ + } \ + } +/* + * Mindxl2g computes the global index ig_ corresponding to the local + * index il_ in process proc_. + */ +#define Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ + { \ + if( (proc_) == (src_) ) \ + { \ + if( (il_) < (inb_) ) ig_ = (il_); \ + else ig_ = (il_) + \ + (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \ + } \ + else if( (proc_) < (src_) ) \ + { \ + ig_ = (il_) + (inb_) + \ + (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ + (proc_)-(src_)-1+(nprocs_) ); \ + } \ + else \ + { \ + ig_ = (il_) + (inb_) + \ + (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ + (proc_)-(src_)-1 ); \ + } \ + } \ + else \ + { \ + ig_ = (il_); \ + } \ + } +/* + * MnumrocI computes the # of local indexes np_ residing in the process + * of coordinate proc_ corresponding to the interval of global indexes + * i_:i_+n_-1 assuming that the global index 0 resides in the process + * src_, and that the indexes are distributed from src_ using the para- + * meters inb_, nb_ and nprocs_. + */ +#define MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ ) \ + { \ + if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ + { \ + int inb__, mydist__, n__, nblk__, quot__, src__; \ + if( ( inb__ = (inb_) - (i_) ) <= 0 ) \ + { \ + nblk__ = (-inb__) / (nb_) + 1; \ + src__ = (src_) + nblk__; \ + src__ -= ( src__ / (nprocs_) ) * (nprocs_); \ + inb__ += nblk__*(nb_); \ + if( ( n__ = (n_) - inb__ ) <= 0 ) \ + { \ + if( (proc_) == src__ ) np_ = (n_); \ + else np_ = 0; \ + } \ + else \ + { \ + if( ( mydist__ = (proc_) - src__ ) < 0 ) \ + mydist__ += (nprocs_); \ + nblk__ = n__ / (nb_) + 1; \ + mydist__ -= nblk__ - \ + (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \ + if( mydist__ < 0 ) \ + { \ + if( (proc_) != src__ ) \ + np_ = (nb_) + (nb_) * quot__; \ + else \ + np_ = inb__ + (nb_) * quot__; \ + } \ + else if( mydist__ > 0 ) \ + { \ + np_ = (nb_) * quot__; \ + } \ + else \ + { \ + if( (proc_) != src__ ) \ + np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ + else \ + np_ = (n_)+ (nb_)*(quot__ - nblk__); \ + } \ + } \ + } \ + else \ + { \ + if( ( n__ = (n_) - inb__ ) <= 0 ) \ + { \ + if( (proc_) == (src_) ) np_ = (n_); \ + else np_ = 0; \ + } \ + else \ + { \ + if( ( mydist__ = (proc_) - (src_) ) < 0 ) \ + mydist__ += (nprocs_); \ + nblk__ = n__ / (nb_) + 1; \ + mydist__ -= nblk__ - \ + ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \ + if( mydist__ < 0 ) \ + { \ + if( (proc_) != (src_) ) \ + np_ = (nb_) + (nb_) * quot__; \ + else \ + np_ = inb__ + (nb_) * quot__; \ + } \ + else if( mydist__ > 0 ) \ + { \ + np_ = (nb_) * quot__; \ + } \ + else \ + { \ + if( (proc_) != (src_) ) \ + np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ + else \ + np_ = (n_)+ (nb_)*(quot__ - nblk__); \ + } \ + } \ + } \ + } \ + else \ + { \ + np_ = (n_); \ + } \ + } + +#define Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ ) \ + MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ ) +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_indxg2lp +STDC_ARGS( ( + int *, + int *, + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxg2l +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxg2p +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int +) ); +int HPL_indxl2g +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int +) ); +void HPL_infog2l +STDC_ARGS( ( + int, + int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + int *, + int *, + int *, + int * +) ); +int HPL_numroc +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int +) ); +int HPL_numrocI +STDC_ARGS( ( + const int, + const int, + const int, + const int, + const int, + const int, + const int +) ); + +void HPL_dlaswp00N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp10N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp01N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp01T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp02N +STDC_ARGS( ( + const int, + const int, + const double *, + const int, + double *, + double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp03N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const double *, + const int +) ); +void HPL_dlaswp03T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const double *, + const int +) ); +void HPL_dlaswp04N +STDC_ARGS( ( + const int, + const int, + const int, + double *, + const int, + double *, + const int, + const double *, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp04T +STDC_ARGS( ( + const int, + const int, + const int, + double *, + const int, + double *, + const int, + const double *, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp05N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp05T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + const double *, + const int, + const int *, + const int * +) ); +void HPL_dlaswp06N +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int * +) ); +void HPL_dlaswp06T +STDC_ARGS( ( + const int, + const int, + double *, + const int, + double *, + const int, + const int * +) ); + +void HPL_pabort +STDC_ARGS( ( + int, + const char *, + const char *, + ... +) ); +void HPL_pwarn +STDC_ARGS( ( + FILE *, + int, + const char *, + const char *, + ... +) ); +void HPL_pdlaprnt +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int, + const int, + double *, + const int, + const int, + const int, + const char * +) ); +double HPL_pdlamch +STDC_ARGS( ( + MPI_Comm, + const HPL_T_MACH +) ); +double HPL_pdlange +STDC_ARGS( ( + const HPL_T_grid *, + const HPL_T_NORM, + const int, + const int, + const int, + const double *, + const int +) ); + +#endif +/* + * End of hpl_pauxil.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pfact.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pfact.h new file mode 100644 index 000000000..09eee79ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pfact.h @@ -0,0 +1,216 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PFACT_H +#define HPL_PFACT_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef void (*HPL_T_PFA_FUN) +( HPL_T_panel *, const int, const int, const int, + double * ); +typedef void (*HPL_T_RFA_FUN) +( HPL_T_panel *, const int, const int, const int, + double * ); +typedef void (*HPL_T_UPD_FUN) +( HPL_T_panel *, int *, HPL_T_panel *, const int ); +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dlocmax +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_dlocswpN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + double * +) ); +void HPL_dlocswpT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + double * +) ); +void HPL_pdmxswp +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdpancrN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpancrT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanllN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanllT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanrlN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdpanrlT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdrpancrN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpancrT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanllN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanllT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanrlN +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); +void HPL_pdrpanrlT +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int, + const int, + double * +) ); + +void HPL_pdfact +STDC_ARGS( ( + HPL_T_panel * +) ); + +#endif +/* + * End of hpl_pfact.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pgesv.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pgesv.h new file mode 100644 index 000000000..3ca576c68 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pgesv.h @@ -0,0 +1,346 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PGESV_H +#define HPL_PGESV_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" + +#include "hpl_pmisc.h" +#include "hpl_grid.h" +#include "hpl_comm.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pfact.h" +/* + * --------------------------------------------------------------------- + * #typedefs and data structures + * --------------------------------------------------------------------- + */ +typedef enum +{ + HPL_SWAP00 = 451, /* Use HPL_pdlaswp00 */ + HPL_SWAP01 = 452, /* Use HPL_pdlaswp01 */ + HPL_SW_MIX = 453, /* Use HPL_pdlaswp00_ for small number of */ + /* columns, and HPL_pdlaswp01_ otherwise. */ + HPL_NO_SWP = 499 +} HPL_T_SWAP; + +typedef struct HPL_S_palg +{ + HPL_T_TOP btopo; /* row broadcast topology */ + int depth; /* look-ahead depth */ + int nbdiv; /* recursive division factor */ + int nbmin; /* recursion stopping criterium */ + HPL_T_FACT pfact; /* panel fact variant */ + HPL_T_FACT rfact; /* recursive fact variant */ + HPL_T_PFA_FUN pffun; /* panel fact function ptr */ + HPL_T_RFA_FUN rffun; /* recursive fact function ptr */ + HPL_T_UPD_FUN upfun; /* update function */ + HPL_T_SWAP fswap; /* Swapping algorithm */ + int fsthr; /* Swapping threshold */ + int equil; /* Equilibration */ + int align; /* data alignment constant */ +} HPL_T_palg; + +typedef struct HPL_S_pmat +{ +#ifdef HPL_CALL_VSIPL + vsip_block_d * block; +#endif + double * A; /* pointer to local piece of A */ + double * X; /* pointer to solution vector */ + int n; /* global problem size */ + int nb; /* blocking factor */ + int ld; /* local leading dimension */ + int mp; /* local number of rows */ + int nq; /* local number of columns */ + int info; /* computational flag */ +} HPL_T_pmat; +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define MSGID_BEGIN_PFACT 1001 /* message id ranges */ +#define MSGID_END_PFACT 2000 +#define MSGID_BEGIN_FACT 2001 +#define MSGID_END_FACT 3000 +#define MSGID_BEGIN_PTRSV 3001 +#define MSGID_END_PTRSV 4000 + +#define MSGID_BEGIN_COLL 9001 +#define MSGID_END_COLL 10000 +/* + * --------------------------------------------------------------------- + * #define macros definitions + * --------------------------------------------------------------------- + */ +#define MNxtMgid( id_, beg_, end_ ) \ + (( (id_)+1 > (end_) ? (beg_) : (id_)+1 )) +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pipid +STDC_ARGS( ( + HPL_T_panel *, + int *, + int * +) ); +void HPL_plindx0 +STDC_ARGS( ( + HPL_T_panel *, + const int, + int *, + int *, + int *, + int * +) ); +void HPL_pdlaswp00N +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdlaswp00T +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_perm +STDC_ARGS( ( + const int, + int *, + int *, + int * +) ); +void HPL_logsort +STDC_ARGS( ( + const int, + const int, + int *, + int *, + int * +) ); +void HPL_plindx10 +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int *, + int *, + int *, + int * +) ); +void HPL_plindx1 +STDC_ARGS( ( + HPL_T_panel *, + const int, + const int *, + int *, + int *, + int *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_spreadN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_SIDE, + const int, + double *, + const int, + const int, + const int *, + const int *, + const int * +) ); +void HPL_spreadT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_SIDE, + const int, + double *, + const int, + const int, + const int *, + const int *, + const int * +) ); +void HPL_equil +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const enum HPL_TRANS, + const int, + double *, + const int, + int *, + const int *, + const int *, + int * +) ); +void HPL_rollN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int, + double *, + const int, + const int *, + const int *, + const int * +) ); +void HPL_rollT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int, + double *, + const int, + const int *, + const int *, + const int * +) ); +void HPL_pdlaswp01N +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdlaswp01T +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_pdupdateNN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateNT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateTN +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); +void HPL_pdupdateTT +STDC_ARGS( ( + HPL_T_panel *, + int *, + HPL_T_panel *, + const int +) ); + +void HPL_pdgesv0 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesvK1 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesvK2 +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); +void HPL_pdgesv +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_palg *, + HPL_T_pmat * +) ); + +void HPL_pdtrsv +STDC_ARGS( ( + HPL_T_grid *, + HPL_T_pmat * +) ); + +#endif +/* + * End of hpl_pgesv.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pmatgen.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pmatgen.h new file mode 100644 index 000000000..1091b0f60 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pmatgen.h @@ -0,0 +1,77 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PMATGEN_H +#define HPL_PMATGEN_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_matgen.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pdmatgen +STDC_ARGS( ( + const HPL_T_grid *, + const int, + const int, + const int, + double *, + const int, + const int +) ); + +#endif +/* + * End of hpl_pmatgen.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pmisc.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pmisc.h new file mode 100644 index 000000000..23550d47b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_pmisc.h @@ -0,0 +1,59 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PMISC_H +#define HPL_PMISC_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "mpi.h" + +#endif +/* + * End of hpl_pmisc.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_ptest.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_ptest.h new file mode 100644 index 000000000..5777bd536 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_ptest.h @@ -0,0 +1,151 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PTEST_H +#define HPL_PTEST_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +#include "hpl_panel.h" +#include "hpl_pgesv.h" + +#include "hpl_ptimer.h" +#include "hpl_pmatgen.h" +/* + * --------------------------------------------------------------------- + * Data Structures + * --------------------------------------------------------------------- + */ +typedef struct HPL_S_test +{ + double epsil; /* epsilon machine */ + double thrsh; /* threshold */ + FILE * outfp; /* output stream (only in proc 0) */ + int kfail; /* # of tests failed */ + int kpass; /* # of tests passed */ + int kskip; /* # of tests skipped */ + int ktest; /* total number of tests */ +} HPL_T_test; + +/* + * --------------------------------------------------------------------- + * #define macro constants for testing only + * --------------------------------------------------------------------- + */ +#define HPL_LINE_MAX 256 +#define HPL_MAX_PARAM 20 +#define HPL_ISEED 100 +/* + * --------------------------------------------------------------------- + * global timers for timing analysis only + * --------------------------------------------------------------------- + */ +#ifdef HPL_DETAILED_TIMING +#define HPL_TIMING_BEG 11 /* timer 0 reserved, used by main */ +#define HPL_TIMING_N 6 /* number of timers defined below */ +#define HPL_TIMING_RPFACT 11 /* starting from here, contiguous */ +#define HPL_TIMING_PFACT 12 +#define HPL_TIMING_MXSWP 13 +#define HPL_TIMING_UPDATE 14 +#define HPL_TIMING_LASWP 15 +#define HPL_TIMING_PTRSV 16 +#endif +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_pdinfo +STDC_ARGS( ( + HPL_T_test *, + int *, + int *, + int *, + int *, + HPL_T_ORDER *, + int *, + int *, + int *, + int *, + HPL_T_FACT *, + int *, + int *, + int *, + int *, + int *, + HPL_T_FACT *, + int *, + HPL_T_TOP *, + int *, + int *, + HPL_T_SWAP *, + int *, + int *, + int *, + int *, + int * +) ); +void HPL_pdtest +STDC_ARGS( ( + HPL_T_test *, + HPL_T_grid *, + HPL_T_palg *, + const int, + const int +) ); + +#endif +/* + * End of hpl_ptest.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_ptimer.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_ptimer.h new file mode 100644 index 000000000..43c8fe33a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_ptimer.h @@ -0,0 +1,96 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_PTIMER_H +#define HPL_PTIMER_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_NPTIMER 64 +#define HPL_PTIMER_STARTFLAG 5.0 +#define HPL_PTIMER_ERROR -1.0 +/* + * --------------------------------------------------------------------- + * type definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_WALL_PTIME = 101, HPL_CPU_PTIME = 102 } HPL_T_PTIME; + +typedef enum +{ HPL_AMAX_PTIME = 201, HPL_AMIN_PTIME = 202, HPL_SUM_PTIME = 203 } +HPL_T_PTIME_OP; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +double HPL_ptimer_cputime STDC_ARGS( ( void ) ); +double HPL_ptimer_walltime STDC_ARGS( ( void ) ); + +void HPL_ptimer STDC_ARGS( ( const int ) ); +void HPL_ptimer_boot STDC_ARGS( ( void ) ); +void HPL_ptimer_combine +STDC_ARGS( +( MPI_Comm comm, const HPL_T_PTIME_OP, const HPL_T_PTIME, + const int, const int, double * ) ); +void HPL_ptimer_disable STDC_ARGS( ( void ) ); +void HPL_ptimer_enable STDC_ARGS( ( void ) ); +double HPL_ptimer_inquire +STDC_ARGS( +( const HPL_T_PTIME, const int ) ); + +#endif +/* + * End of hpl_ptimer.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_test.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_test.h new file mode 100644 index 000000000..1eedc97e0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_test.h @@ -0,0 +1,80 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_TEST_H +#define HPL_TEST_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +#include "hpl_blas.h" +#include "hpl_auxil.h" +#include "hpl_gesv.h" + +#include "hpl_matgen.h" +#include "hpl_timer.h" +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_dinfo +STDC_ARGS( +( FILE * *, int *, int *, int *, + HPL_T_FACT *, int *, int *, int *, + int *, int *, HPL_T_FACT *, int *, + double *, double * ) ); +void HPL_dtest +STDC_ARGS( +( FILE *, const int, const int, const int, + HPL_T_FACT, HPL_T_FACT, const int, const double, + const double, int *, int *, int * ) ); + +#endif +/* + * End of hpl_test.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_timer.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_timer.h new file mode 100644 index 000000000..4c91700ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_timer.h @@ -0,0 +1,88 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_TIMER_H +#define HPL_TIMER_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_misc.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_NTIMER 64 +#define HPL_TIMER_STARTFLAG 5.0 +#define HPL_TIMER_ERROR -1.0 +/* + * --------------------------------------------------------------------- + * type definitions + * --------------------------------------------------------------------- + */ +typedef enum +{ HPL_WALL_TIME = 101, HPL_CPU_TIME = 102 } HPL_T_TIME; +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +double HPL_timer_cputime STDC_ARGS( ( void ) ); +double HPL_timer_walltime STDC_ARGS( ( void ) ); + +void HPL_timer STDC_ARGS( ( const int ) ); +void HPL_timer_boot STDC_ARGS( ( void ) ); +void HPL_timer_enable STDC_ARGS( ( void ) ); +void HPL_timer_disable STDC_ARGS( ( void ) ); +double HPL_timer_inquire +STDC_ARGS( +( const HPL_T_TIME, const int ) ); + +#endif +/* + * End of hpl_timer.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_units.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_units.h new file mode 100644 index 000000000..a96956497 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hpl_units.h @@ -0,0 +1,135 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef HPL_UNITS_H +#define HPL_UNITS_H +/* + * --------------------------------------------------------------------- + * Include files + * --------------------------------------------------------------------- + */ +#include "hpl_pmisc.h" +#include "hpl_pauxil.h" +/* + * --------------------------------------------------------------------- + * #define macro constants + * --------------------------------------------------------------------- + */ +#define HPL_MAXROUT 50 +#define HPL_MAXRNAME 15 + +#define HPL_TRUE 'T' +#define HPL_FALSE 'F' + +#define HPL_INDXG2P_ROUT "HPL_indxg2p" +#define HPL_INDXG2L_ROUT "HPL_indxg2l" +#define HPL_INDXL2G_ROUT "HPL_indxl2g" +#define HPL_NUMROC_ROUT "HPL_numroc" +#define HPL_NUMROCI_ROUT "HPL_numrocI" +/* + * --------------------------------------------------------------------- + * Function prototypes + * --------------------------------------------------------------------- + */ +void HPL_unit_info +STDC_ARGS( +( FILE * *, int *, int *, int *, + int *, int *, int *, int *, + int *, int *, int *, char [][HPL_MAXRNAME], + int [] ) ); + +void HPL_unit_indxg2l +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); +int HPL_chek_indxg2l +STDC_ARGS( +( FILE *, const char *, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); + +void HPL_unit_indxl2g +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); +int HPL_chek_indxl2g +STDC_ARGS( +( FILE *, const char *, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); + +void HPL_unit_indxg2p +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); +int HPL_chek_indxg2p +STDC_ARGS( +( FILE *, const char *, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); + +void HPL_unit_numroc +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, long *, long * ) ); +void HPL_unit_numrocI +STDC_ARGS( +( FILE *, const int, const int, const int, + const int, const int, const int, const int, + const int, const int, long *, long * ) ); +int HPL_chek_numrocI +STDC_ARGS( +( FILE *, const char *, const int, const int, + const int, const int, const int, const int, + const int, const int, long *, long * ) ); + +#endif +/* + * End of hpl_units.h + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hplconfig.h.in b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hplconfig.h.in new file mode 100644 index 000000000..b4b3b9a35 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/include/hplconfig.h.in @@ -0,0 +1,67 @@ +/* include/hplconfig.h.in. Generated from configure.ac by autoheader. */ + +/* Define if you have a BLAS library. */ +#undef HAVE_BLAS + +/* Define to 1 if you have the `dgemm_' function. */ +#undef HAVE_DGEMM_ + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define if you have the MPI library. */ +#undef HAVE_MPI + +/* Define to 1 if you have the header file. */ +#undef HAVE_MPI_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Version number of package */ +#undef VERSION diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/install-sh b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/install-sh new file mode 100755 index 000000000..8175c640f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/install-sh @@ -0,0 +1,518 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2018-03-11.20; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# 'make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +tab=' ' +nl=' +' +IFS=" $tab$nl" + +# Set DOITPROG to "echo" to test this script. + +doit=${DOITPROG-} +doit_exec=${doit:-exec} + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +is_target_a_directory=possibly + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve the last data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -s $stripprog installed files. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -s) stripcmd=$stripprog;; + + -t) + is_target_a_directory=always + dst_arg=$2 + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; + + -T) is_target_a_directory=never;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +# We allow the use of options -d and -T together, by making -d +# take the precedence; this is for compatibility with GNU install. + +if test -n "$dir_arg"; then + if test -n "$dst_arg"; then + echo "$0: target directory not allowed when installing a directory." >&2 + exit 1 + fi +fi + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call 'install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + if test $# -gt 1 || test "$is_target_a_directory" = always; then + if test ! -d "$dst_arg"; then + echo "$0: $dst_arg: Is not a directory." >&2 + exit 1 + fi + fi +fi + +if test -z "$dir_arg"; then + do_exit='(exit $ret); exit $ret' + trap "ret=129; $do_exit" 1 + trap "ret=130; $do_exit" 2 + trap "ret=141; $do_exit" 13 + trap "ret=143; $do_exit" 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names problematic for 'test' and other utilities. + case $src in + -* | [=\(\)!]) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + dst=$dst_arg + + # If destination is a directory, append the input filename. + if test -d "$dst"; then + if test "$is_target_a_directory" = never; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dstbase=`basename "$src"` + case $dst in + */) dst=$dst$dstbase;; + *) dst=$dst/$dstbase;; + esac + dstdir_status=0 + else + dstdir=`dirname "$dst"` + test -d "$dstdir" + dstdir_status=$? + fi + fi + + case $dstdir in + */) dstdirslash=$dstdir;; + *) dstdirslash=$dstdir/;; + esac + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + # Note that $RANDOM variable is not portable (e.g. dash); Use it + # here however when possible just to lower collision chance. + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + + trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0 + + # Because "mkdir -p" follows existing symlinks and we likely work + # directly in world-writeable /tmp, make sure that the '$tmpdir' + # directory is successfully created first before we actually test + # 'mkdir -p' feature. + if (umask $mkdir_umask && + $mkdirprog $mkdir_mode "$tmpdir" && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + test_tmpdir="$tmpdir/a" + ls_ld_tmpdir=`ls -ld "$test_tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$test_tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null + fi + trap '' 0;; + esac;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # The umask is ridiculous, or mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; + esac + + oIFS=$IFS + IFS=/ + set -f + set fnord $dstdir + shift + set +f + IFS=$oIFS + + prefixes= + + for d + do + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=${dstdirslash}_inst.$$_ + rmtmp=${dstdirslash}_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + set +f && + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd -f "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.auxil b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.auxil new file mode 100644 index 000000000..e92d18b80 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.auxil @@ -0,0 +1,100 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h +# +## Object files ######################################################## +# +HPL_au0obj = \ + HPL_dlacpy.o HPL_dlatcpy.o HPL_fprintf.o \ + HPL_warn.o HPL_abort.o HPL_dlaprnt.o \ + HPL_dlange.o +HPL_au1obj = \ + HPL_dlamch.o +HPL_auxobj = \ + $(HPL_au0obj) $(HPL_au1obj) +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_auxobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_auxobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dlacpy.o : ../HPL_dlacpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlacpy.c +HPL_dlatcpy.o : ../HPL_dlatcpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlatcpy.c +HPL_fprintf.o : ../HPL_fprintf.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_fprintf.c +HPL_warn.o : ../HPL_warn.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_warn.c +HPL_abort.o : ../HPL_abort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_abort.c +HPL_dlaprnt.o : ../HPL_dlaprnt.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaprnt.c +HPL_dlange.o : ../HPL_dlange.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlange.c +HPL_dlamch.o : ../HPL_dlamch.c $(INCdep) + $(CC) -o $@ -c $(CCNOOPT) ../HPL_dlamch.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.blas b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.blas new file mode 100644 index 000000000..ed9f3d0e2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.blas @@ -0,0 +1,98 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h +# +## Object files ######################################################## +# +HPL_blaobj = \ + HPL_dcopy.o HPL_daxpy.o HPL_dscal.o \ + HPL_idamax.o HPL_dgemv.o HPL_dtrsv.o \ + HPL_dger.o HPL_dgemm.o HPL_dtrsm.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_blaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_blaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dcopy.o : ../HPL_dcopy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dcopy.c +HPL_daxpy.o : ../HPL_daxpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_daxpy.c +HPL_dscal.o : ../HPL_dscal.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dscal.c +HPL_idamax.o : ../HPL_idamax.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_idamax.c +HPL_dgemv.o : ../HPL_dgemv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemv.c +HPL_dtrsv.o : ../HPL_dtrsv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsv.c +HPL_dger.o : ../HPL_dger.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dger.c +HPL_dgemm.o : ../HPL_dgemm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemm.c +HPL_dtrsm.o : ../HPL_dtrsm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsm.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.comm b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.comm new file mode 100644 index 000000000..529fe9aea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.comm @@ -0,0 +1,111 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h \ + $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_comobj = \ + HPL_1ring.o HPL_1rinM.o HPL_2ring.o \ + HPL_2rinM.o HPL_blong.o HPL_blonM.o \ + HPL_packL.o HPL_copyL.o HPL_binit.o \ + HPL_bcast.o HPL_bwait.o HPL_send.o \ + HPL_recv.o HPL_sdrv.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_comobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_comobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_1ring.o : ../HPL_1ring.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_1ring.c +HPL_1rinM.o : ../HPL_1rinM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_1rinM.c +HPL_2ring.o : ../HPL_2ring.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_2ring.c +HPL_2rinM.o : ../HPL_2rinM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_2rinM.c +HPL_blong.o : ../HPL_blong.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_blong.c +HPL_blonM.o : ../HPL_blonM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_blonM.c +HPL_packL.o : ../HPL_packL.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_packL.c +HPL_copyL.o : ../HPL_copyL.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_copyL.c +HPL_binit.o : ../HPL_binit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_binit.c +HPL_bcast.o : ../HPL_bcast.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_bcast.c +HPL_bwait.o : ../HPL_bwait.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_bwait.c +HPL_send.o : ../HPL_send.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_send.c +HPL_recv.o : ../HPL_recv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_recv.c +HPL_sdrv.o : ../HPL_sdrv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_sdrv.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.gesv b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.gesv new file mode 100644 index 000000000..2a8722559 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.gesv @@ -0,0 +1,83 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_gesv.h +# +## Object files ######################################################## +# +HPL_gesobj = \ + HPL_dgesv.o HPL_ipid.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_gesobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_gesobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dgesv.o : ../HPL_dgesv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgesv.c +HPL_ipid.o : ../HPL_ipid.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ipid.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.grid b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.grid new file mode 100644 index 000000000..51549d817 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.grid @@ -0,0 +1,103 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h +# +## Object files ######################################################## +# +HPL_griobj = \ + HPL_grid_init.o HPL_pnum.o HPL_grid_info.o \ + HPL_grid_exit.o HPL_broadcast.o HPL_reduce.o \ + HPL_all_reduce.o HPL_barrier.o HPL_min.o \ + HPL_max.o HPL_sum.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_griobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_griobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_grid_init.o : ../HPL_grid_init.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_init.c +HPL_pnum.o : ../HPL_pnum.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pnum.c +HPL_grid_info.o : ../HPL_grid_info.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_info.c +HPL_grid_exit.o : ../HPL_grid_exit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_exit.c +HPL_broadcast.o : ../HPL_broadcast.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_broadcast.c +HPL_reduce.o : ../HPL_reduce.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_reduce.c +HPL_all_reduce.o : ../HPL_all_reduce.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_all_reduce.c +HPL_barrier.o : ../HPL_barrier.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_barrier.c +HPL_min.o : ../HPL_min.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_min.c +HPL_max.o : ../HPL_max.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_max.c +HPL_sum.o : ../HPL_sum.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_sum.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.matgen b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.matgen new file mode 100644 index 000000000..f027fbc06 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.matgen @@ -0,0 +1,95 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_matgen.h +# +## Object files ######################################################## +# +HPL_matobj = \ + HPL_dmatgen.o HPL_ladd.o HPL_lmul.o \ + HPL_xjumpm.o HPL_jumpit.o HPL_rand.o \ + HPL_setran.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_matobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_matobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dmatgen.o : ../HPL_dmatgen.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dmatgen.c +HPL_ladd.o : ../HPL_ladd.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ladd.c +HPL_lmul.o : ../HPL_lmul.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_lmul.c +HPL_xjumpm.o : ../HPL_xjumpm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_xjumpm.c +HPL_jumpit.o : ../HPL_jumpit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_jumpit.c +HPL_rand.o : ../HPL_rand.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rand.c +HPL_setran.o : ../HPL_setran.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_setran.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.panel b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.panel new file mode 100644 index 000000000..804749cc2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.panel @@ -0,0 +1,90 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ + $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_panobj = \ + HPL_pdpanel_new.o HPL_pdpanel_init.o HPL_pdpanel_disp.o \ + HPL_pdpanel_free.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_panobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_panobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pdpanel_new.o : ../HPL_pdpanel_new.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_new.c +HPL_pdpanel_init.o : ../HPL_pdpanel_init.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_init.c +HPL_pdpanel_disp.o : ../HPL_pdpanel_disp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_disp.c +HPL_pdpanel_free.o : ../HPL_pdpanel_free.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_free.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pauxil b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pauxil new file mode 100644 index 000000000..ea93cd150 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pauxil @@ -0,0 +1,137 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_pauxil.h +# +## Object files ######################################################## +# +HPL_pauobj = \ + HPL_indxg2l.o HPL_indxg2lp.o HPL_indxg2p.o \ + HPL_indxl2g.o HPL_infog2l.o HPL_numroc.o \ + HPL_numrocI.o HPL_dlaswp00N.o HPL_dlaswp10N.o \ + HPL_dlaswp01N.o HPL_dlaswp01T.o HPL_dlaswp02N.o \ + HPL_dlaswp03N.o HPL_dlaswp03T.o HPL_dlaswp04N.o \ + HPL_dlaswp04T.o HPL_dlaswp05N.o HPL_dlaswp05T.o \ + HPL_dlaswp06N.o HPL_dlaswp06T.o HPL_pwarn.o \ + HPL_pabort.o HPL_pdlaprnt.o HPL_pdlamch.o \ + HPL_pdlange.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pauobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pauobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_indxg2l.o : ../HPL_indxg2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2l.c +HPL_indxg2lp.o : ../HPL_indxg2lp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2lp.c +HPL_indxg2p.o : ../HPL_indxg2p.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2p.c +HPL_indxl2g.o : ../HPL_indxl2g.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxl2g.c +HPL_infog2l.o : ../HPL_infog2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_infog2l.c +HPL_numroc.o : ../HPL_numroc.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_numroc.c +HPL_numrocI.o : ../HPL_numrocI.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_numrocI.c +HPL_dlaswp00N.o : ../HPL_dlaswp00N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp00N.c +HPL_dlaswp10N.o : ../HPL_dlaswp10N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp10N.c +HPL_dlaswp01N.o : ../HPL_dlaswp01N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01N.c +HPL_dlaswp01T.o : ../HPL_dlaswp01T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01T.c +HPL_dlaswp02N.o : ../HPL_dlaswp02N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp02N.c +HPL_dlaswp03N.o : ../HPL_dlaswp03N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03N.c +HPL_dlaswp03T.o : ../HPL_dlaswp03T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03T.c +HPL_dlaswp04N.o : ../HPL_dlaswp04N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04N.c +HPL_dlaswp04T.o : ../HPL_dlaswp04T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04T.c +HPL_dlaswp05N.o : ../HPL_dlaswp05N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05N.c +HPL_dlaswp05T.o : ../HPL_dlaswp05T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05T.c +HPL_dlaswp06N.o : ../HPL_dlaswp06N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06N.c +HPL_dlaswp06T.o : ../HPL_dlaswp06T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06T.c +HPL_pwarn.o : ../HPL_pwarn.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pwarn.c +HPL_pabort.o : ../HPL_pabort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pabort.c +HPL_pdlaprnt.o : ../HPL_pdlaprnt.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaprnt.c +HPL_pdlamch.o : ../HPL_pdlamch.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlamch.c +HPL_pdlange.o : ../HPL_pdlange.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlange.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pfact b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pfact new file mode 100644 index 000000000..bf4634d31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pfact @@ -0,0 +1,118 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pfact.h +# +## Object files ######################################################## +# +HPL_pfaobj = \ + HPL_dlocmax.o HPL_dlocswpN.o HPL_dlocswpT.o \ + HPL_pdmxswp.o HPL_pdpancrN.o HPL_pdpancrT.o \ + HPL_pdpanllN.o HPL_pdpanllT.o HPL_pdpanrlN.o \ + HPL_pdpanrlT.o HPL_pdrpanllN.o HPL_pdrpanllT.o \ + HPL_pdrpancrN.o HPL_pdrpancrT.o HPL_pdrpanrlN.o \ + HPL_pdrpanrlT.o HPL_pdfact.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pfaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pfaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dlocmax.o : ../HPL_dlocmax.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocmax.c +HPL_dlocswpN.o : ../HPL_dlocswpN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpN.c +HPL_dlocswpT.o : ../HPL_dlocswpT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpT.c +HPL_pdmxswp.o : ../HPL_pdmxswp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmxswp.c +HPL_pdpancrN.o : ../HPL_pdpancrN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrN.c +HPL_pdpancrT.o : ../HPL_pdpancrT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrT.c +HPL_pdpanllN.o : ../HPL_pdpanllN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllN.c +HPL_pdpanllT.o : ../HPL_pdpanllT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllT.c +HPL_pdpanrlN.o : ../HPL_pdpanrlN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlN.c +HPL_pdpanrlT.o : ../HPL_pdpanrlT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlT.c +HPL_pdrpanllN.o : ../HPL_pdrpanllN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllN.c +HPL_pdrpanllT.o : ../HPL_pdrpanllT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllT.c +HPL_pdrpancrN.o : ../HPL_pdrpancrN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrN.c +HPL_pdrpancrT.o : ../HPL_pdrpancrT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrT.c +HPL_pdrpanrlN.o : ../HPL_pdrpanrlN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlN.c +HPL_pdrpanrlT.o : ../HPL_pdrpanrlT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlT.c +HPL_pdfact.o : ../HPL_pdfact.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdfact.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pgesv b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pgesv new file mode 100644 index 000000000..7898665f0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pgesv @@ -0,0 +1,136 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ + $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_pgeobj = \ + HPL_pipid.o HPL_plindx0.o HPL_pdlaswp00N.o \ + HPL_pdlaswp00T.o HPL_perm.o HPL_logsort.o \ + HPL_plindx10.o HPL_plindx1.o HPL_spreadN.o \ + HPL_spreadT.o HPL_rollN.o HPL_rollT.o \ + HPL_equil.o HPL_pdlaswp01N.o HPL_pdlaswp01T.o \ + HPL_pdupdateNN.o HPL_pdupdateNT.o HPL_pdupdateTN.o \ + HPL_pdupdateTT.o HPL_pdtrsv.o HPL_pdgesv0.o \ + HPL_pdgesvK1.o HPL_pdgesvK2.o HPL_pdgesv.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pgeobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pgeobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pipid.o : ../HPL_pipid.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pipid.c +HPL_plindx0.o : ../HPL_plindx0.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx0.c +HPL_pdlaswp00N.o : ../HPL_pdlaswp00N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00N.c +HPL_pdlaswp00T.o : ../HPL_pdlaswp00T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00T.c +HPL_perm.o : ../HPL_perm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_perm.c +HPL_logsort.o : ../HPL_logsort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_logsort.c +HPL_plindx10.o : ../HPL_plindx10.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx10.c +HPL_plindx1.o : ../HPL_plindx1.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx1.c +HPL_spreadN.o : ../HPL_spreadN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadN.c +HPL_spreadT.o : ../HPL_spreadT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadT.c +HPL_rollN.o : ../HPL_rollN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollN.c +HPL_rollT.o : ../HPL_rollT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollT.c +HPL_equil.o : ../HPL_equil.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_equil.c +HPL_pdlaswp01N.o : ../HPL_pdlaswp01N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01N.c +HPL_pdlaswp01T.o : ../HPL_pdlaswp01T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01T.c +HPL_pdupdateNN.o : ../HPL_pdupdateNN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNN.c +HPL_pdupdateNT.o : ../HPL_pdupdateNT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNT.c +HPL_pdupdateTN.o : ../HPL_pdupdateTN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTN.c +HPL_pdupdateTT.o : ../HPL_pdupdateTT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTT.c +HPL_pdtrsv.o : ../HPL_pdtrsv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtrsv.c +HPL_pdgesv0.o : ../HPL_pdgesv0.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv0.c +HPL_pdgesvK1.o : ../HPL_pdgesvK1.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK1.c +HPL_pdgesvK2.o : ../HPL_pdgesvK2.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK2.c +HPL_pdgesv.o : ../HPL_pdgesv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pmatgen b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pmatgen new file mode 100644 index 000000000..bf33fcd7b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.pmatgen @@ -0,0 +1,81 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_matgen.h $(INCdir)/hpl_pmisc.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pmatgen.h +# +## Object files ######################################################## +# +HPL_pmaobj = \ + HPL_pdmatgen.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pmaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pmaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pdmatgen.o : ../HPL_pdmatgen.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmatgen.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.ptest b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.ptest new file mode 100644 index 000000000..cfc96e667 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.ptest @@ -0,0 +1,94 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_gesv.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h \ + $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h $(INCdir)/hpl_pmatgen.h \ + $(INCdir)/hpl_ptimer.h $(INCdir)/hpl_ptest.h +# +## Executable names #################################################### +# +xhpl = $(BINdir)/xhpl +# +## Object files ######################################################## +# +HPL_pteobj = \ + HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o +# +## Targets ############################################################# +# +all : dexe +# +dexe : dexe.grd +# +$(BINdir)/HPL.dat : ../HPL.dat + ( $(CP) ../HPL.dat $(BINdir) ) +# +dexe.grd: $(HPL_pteobj) $(HPLlib) + $(LINKER) $(LINKFLAGS) -o $(xhpl) $(HPL_pteobj) $(HPL_LIBS) + $(MAKE) $(BINdir)/HPL.dat + $(TOUCH) dexe.grd +# +# ###################################################################### +# +HPL_pddriver.o : ../HPL_pddriver.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pddriver.c +HPL_pdinfo.o : ../HPL_pdinfo.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdinfo.c +HPL_pdtest.o : ../HPL_pdtest.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtest.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.ptimer b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.ptimer new file mode 100644 index 000000000..971500764 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.ptimer @@ -0,0 +1,84 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_ptimer.h +# +## Object files ######################################################## +# +HPL_ptiobj = \ + HPL_ptimer.o HPL_ptimer_cputime.o HPL_ptimer_walltime.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_ptiobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_ptiobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_ptimer.o : ../HPL_ptimer.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer.c +HPL_ptimer_cputime.o : ../HPL_ptimer_cputime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_cputime.c +HPL_ptimer_walltime.o : ../HPL_ptimer_walltime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_walltime.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.test b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.test new file mode 100644 index 000000000..514d445b8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.test @@ -0,0 +1,93 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_gesv.h $(INCdir)/hpl_matgen.h $(INCdir)/hpl_timer.h \ + $(INCdir)/hpl_test.h +# +## Executable names #################################################### +# +xlinpack = $(BINdir)/xlinpack +# +## Object files ######################################################## +# +HPL_tesobj = \ + HPL_ddriver.o HPL_dinfo.o HPL_dtest.o +# +## Targets ############################################################# +# +all : dexe +# +dexe : dexe.grd +# +$(BINdir)/LINPACK.dat : ../LINPACK.dat + ( $(CP) ../LINPACK.dat $(BINdir) ) +# +dexe.grd: $(HPL_tesobj) $(HPLlib) + $(LINKER) $(LINKFLAGS) -o $(xlinpack) $(HPL_tesobj) HPL_make_libs + $(MAKE) $(BINdir)/LINPACK.dat + $(TOUCH) dexe.grd +# +# ###################################################################### +# +HPL_ddriver.o : ../HPL_ddriver.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ddriver.c +HPL_dinfo.o : ../HPL_dinfo.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dinfo.c +HPL_dtest.o : ../HPL_dtest.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtest.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.timer b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.timer new file mode 100644 index 000000000..b8009e88a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.timer @@ -0,0 +1,84 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_timer.h +# +## Object files ######################################################## +# +HPL_timobj = \ + HPL_timer.o HPL_timer_cputime.o HPL_timer_walltime.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_timobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_timobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_timer.o : ../HPL_timer.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer.c +HPL_timer_cputime.o : ../HPL_timer_cputime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_cputime.c +HPL_timer_walltime.o : ../HPL_timer_walltime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_walltime.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.units b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.units new file mode 100644 index 000000000..1c447f204 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/makes/Make.units @@ -0,0 +1,112 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ +@rout Make.units + $(INCdir)/hpl_misc.h $(INCdir)/hpl_auxil.h $(INCdir)/hpl_pmisc.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_units.h +# +## Executable names #################################################### +# +xunits = $(BINdir)/xunits +# +## Object files ######################################################## +# +HPL_uniobj = \ + HPL_unit_driver.o HPL_unit_info.o HPL_unit_indxg2l.o \ + HPL_chek_indxg2l.o HPL_unit_indxg2p.o HPL_chek_indxg2p.o \ + HPL_unit_indxl2g.o HPL_chek_indxl2g.o HPL_unit_numroc.o \ + HPL_unit_numrocI.o HPL_chek_numrocI.o +# +## Targets ############################################################# +# +all : dexe +# +dexe : dexe.grd +# +$(BINdir)/UNITS.dat : ../UNITS.dat + ( $(CP) ../UNITS.dat $(BINdir) ) +# +dexe.grd : $(HPL_uniobj) $(HPLlib) + $(LINKER) $(LINKFLAGS) -o $(xunits) $(HPL_uniobj) @(hpllibs) + $(MAKE) $(BINdir)/UNITS.dat + $(TOUCH) dexe.grd +# +# ###################################################################### +# +HPL_unit_driver.o : ../HPL_unit_driver.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_driver.c +HPL_unit_info.o : ../HPL_unit_info.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_info.c +HPL_unit_indxg2l.o : ../HPL_unit_indxg2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_indxg2l.c +HPL_chek_indxg2l.o : ../HPL_chek_indxg2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_indxg2l.c +HPL_unit_indxg2p.o : ../HPL_unit_indxg2p.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_indxg2p.c +HPL_chek_indxg2p.o : ../HPL_chek_indxg2p.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_indxg2p.c +HPL_unit_indxl2g.o : ../HPL_unit_indxl2g.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_indxl2g.c +HPL_chek_indxl2g.o : ../HPL_chek_indxl2g.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_indxl2g.c +HPL_unit_numroc.o : ../HPL_unit_numroc.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_numroc.c +HPL_unit_numrocI.o : ../HPL_unit_numrocI.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_numrocI.c +HPL_chek_numrocI.o : ../HPL_chek_numrocI.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_numrocI.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_abort.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_abort.3 new file mode 100644 index 000000000..c6a2c7a70 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_abort.3 @@ -0,0 +1,52 @@ +.TH HPL_abort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_abort \- halts execution. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_abort(\fR +\fB\&int\fR +\fI\&LINE\fR, +\fB\&const char *\fR +\fI\&SRNAME\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_abort\fR +displays an error message on stderr and halts execution. +.SH ARGUMENTS +.TP 8 +LINE (local input) int +On entry, LINE specifies the line number in the file where +the error has occured. When LINE is not a positive line +number, it is ignored. +.TP 8 +SRNAME (local input) const char * +On entry, SRNAME should be the name of the routine calling +this error handler. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + HPL_abort( __LINE__, __FILE__, "Halt.\en" ); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_fprintf \ (3), +.BR HPL_warn \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_all_reduce.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_all_reduce.3 new file mode 100644 index 000000000..70ec6c4ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_all_reduce.3 @@ -0,0 +1,49 @@ +.TH HPL_all_reduce 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_all_reduce \- All reduce operation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_all_reduce(\fR +\fB\&void *\fR +\fI\&BUFFER\fR, +\fB\&const int\fR +\fI\&COUNT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR, +\fB\&const HPL_T_OP \fR +\fI\&OP\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_all_reduce\fR +performs a global reduce operation across all +processes of a group leaving the results on all processes. +.SH ARGUMENTS +.TP 8 +BUFFER (local input/global out void * +On entry, BUFFER points to the buffer to be combined. On +exit, this array contains the combined data and is identical +on all processes in the group. +.TP 8 +COUNT (global input) const int +On entry, COUNT indicates the number of entries in BUFFER. +COUNT must be at least zero. +.TP 8 +DTYPE (global input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.TP 8 +OP (global input) const HPL_T_OP +On entry, OP is a pointer to the local combine function. +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_barrier.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_barrier.3 new file mode 100644 index 000000000..ffee7f291 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_barrier.3 @@ -0,0 +1,27 @@ +.TH HPL_barrier 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_barrier \- Barrier operation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_barrier(\fR +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_barrier\fR +blocks the caller until all process members have call it. +The call returns at any process only after all group members have +entered the call. +.SH ARGUMENTS +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_bcast.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_bcast.3 new file mode 100644 index 000000000..54eb54b25 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_bcast.3 @@ -0,0 +1,31 @@ +.TH HPL_bcast 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_bcast \- Perform the row broadcast. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_bcast(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&int *\fR +\fI\&IFLAG\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_bcast\fR +broadcasts the current panel. Successful completion is +indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to +HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was +not completed, in which case this function should be called again. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.TP 8 +IFLAG (output) int * +On exit, IFLAG indicates whether or not the broadcast has +occured. +.SH SEE ALSO +.BR HPL_binit \ (3), +.BR HPL_bwait \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_binit.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_binit.3 new file mode 100644 index 000000000..083776ab6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_binit.3 @@ -0,0 +1,23 @@ +.TH HPL_binit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_binit \- Initialize the row broadcast. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_binit(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_binit\fR +initializes a row broadcast. Successful completion is +indicated by the returned error code HPL_SUCCESS. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.SH SEE ALSO +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_broadcast.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_broadcast.3 new file mode 100644 index 000000000..317d374cf --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_broadcast.3 @@ -0,0 +1,49 @@ +.TH HPL_broadcast 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_broadcast \- Broadcast operation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_broadcast(\fR +\fB\&void *\fR +\fI\&BUFFER\fR, +\fB\&const int\fR +\fI\&COUNT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR, +\fB\&const int\fR +\fI\&ROOT\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_broadcast\fR +broadcasts a message from the process with rank ROOT to +all processes in the group. +.SH ARGUMENTS +.TP 8 +BUFFER (local input/output) void * +On entry, BUFFER points to the buffer to be broadcast. On +exit, this array contains the broadcast data and is identical +on all processes in the group. +.TP 8 +COUNT (global input) const int +On entry, COUNT indicates the number of entries in BUFFER. +COUNT must be at least zero. +.TP 8 +DTYPE (global input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.TP 8 +ROOT (global input) const int +On entry, ROOT is the coordinate of the source process. +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.SH SEE ALSO +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_bwait.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_bwait.3 new file mode 100644 index 000000000..0dac6fe58 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_bwait.3 @@ -0,0 +1,24 @@ +.TH HPL_bwait 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_bwait \- Finalize the row broadcast. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_bwait(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_bwait\fR +HPL_bwait waits for the row broadcast of the current panel to +terminate. Successful completion is indicated by the returned error +code HPL_SUCCESS. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.SH SEE ALSO +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_copyL.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_copyL.3 new file mode 100644 index 000000000..d60619a06 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_copyL.3 @@ -0,0 +1,28 @@ +.TH HPL_copyL 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_copyL \- Copy the current panel into a contiguous workspace. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_copyL(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_copyL\fR +copies the panel of columns, the L1 replicated submatrix, +the pivot array and the info scalar into a contiguous workspace for +later broadcast. + +The copy of this panel into a contiguous buffer can be enforced by +specifying -DHPL_COPY_L in the architecture specific Makefile. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.SH SEE ALSO +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_daxpy.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_daxpy.3 new file mode 100644 index 000000000..50bd0b0a8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_daxpy.3 @@ -0,0 +1,76 @@ +.TH HPL_daxpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_daxpy \- y := y + alpha * x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_daxpy(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_daxpy\fR +scales the vector x by alpha and adds it to y. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vectors x and y. N +must be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero, then the entries of the incremented array X +need not be set on input. +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +Y (local input/output) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +On exit, the entries of the incremented array Y are updated +with the scaled entries of the incremented array X. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3], y[3]; +.br + x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; +.br + y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; +.br + HPL_daxpy( 3, 2.0, x, 1, y, 1 ); +.br + printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dcopy \ (3), +.BR HPL_dscal \ (3), +.BR HPL_dswap \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dcopy.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dcopy.3 new file mode 100644 index 000000000..f2759ced9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dcopy.3 @@ -0,0 +1,69 @@ +.TH HPL_dcopy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dcopy \- y := x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dcopy(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dcopy\fR +copies the vector x into the vector y. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vectors x and y. N +must be at least zero. +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +Y (local input/output) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +On exit, the entries of the incremented array Y are updated +with the entries of the incremented array X. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3], y[3]; +.br + x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; +.br + y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; +.br + HPL_dcopy( 3, x, 1, y, 1 ); +.br + printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_daxpy \ (3), +.BR HPL_dscal \ (3), +.BR HPL_dswap \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dgemm.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dgemm.3 new file mode 100644 index 000000000..57c69f78c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dgemm.3 @@ -0,0 +1,160 @@ +.TH HPL_dgemm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dgemm \- C := alpha * op(A) * op(B) + beta * C. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dgemm(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANSA\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANSB\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&K\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&B\fR, +\fB\&const int\fR +\fI\&LDB\fR, +\fB\&const double\fR +\fI\&BETA\fR, +\fB\&double *\fR +\fI\&C\fR, +\fB\&const int\fR +\fI\&LDC\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dgemm\fR +performs one of the matrix-matrix operations + + C := alpha * op( A ) * op( B ) + beta * C + + where op( X ) is one of + + op( X ) = X or op( X ) = X^T. + +Alpha and beta are scalars, and A, B and C are matrices, with op(A) +an m by k matrix, op(B) a k by n matrix and C an m by n matrix. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +TRANSA (local input) const enum HPL_TRANS +On entry, TRANSA specifies the form of op(A) to be used in +the matrix-matrix operation follows: + TRANSA==HplNoTrans : op( A ) = A, + TRANSA==HplTrans : op( A ) = A^T, + TRANSA==HplConjTrans : op( A ) = A^T. +.TP 8 +TRANSB (local input) const enum HPL_TRANS +On entry, TRANSB specifies the form of op(B) to be used in +the matrix-matrix operation follows: + TRANSB==HplNoTrans : op( B ) = B, + TRANSB==HplTrans : op( B ) = B^T, + TRANSB==HplConjTrans : op( B ) = B^T. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix +op(A) and of the matrix C. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix +op(B) and the number of columns of the matrix C. N must be +at least zero. +.TP 8 +K (local input) const int +On entry, K specifies the number of columns of the matrix +op(A) and the number of rows of the matrix op(B). K must be +be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero then the elements of the matrices A and B +need not be set on input. +.TP 8 +A (local input) const double * +On entry, A is an array of dimension (LDA,ka), where ka is +k when TRANSA==HplNoTrans, and is m otherwise. Before +entry with TRANSA==HplNoTrans, the leading m by k part of +the array A must contain the matrix A, otherwise the leading +k by m part of the array A must contain the matrix A. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the first dimension of A as declared +in the calling (sub) program. When TRANSA==HplNoTrans then +LDA must be at least max(1,m), otherwise LDA must be at least +max(1,k). +.TP 8 +B (local input) const double * +On entry, B is an array of dimension (LDB,kb), where kb is +n when TRANSB==HplNoTrans, and is k otherwise. Before +entry with TRANSB==HplNoTrans, the leading k by n part of +the array B must contain the matrix B, otherwise the leading +n by k part of the array B must contain the matrix B. +.TP 8 +LDB (local input) const int +On entry, LDB specifies the first dimension of B as declared +in the calling (sub) program. When TRANSB==HplNoTrans then +LDB must be at least max(1,k), otherwise LDB must be at least +max(1,n). +.TP 8 +BETA (local input) const double +On entry, BETA specifies the scalar beta. When BETA is +supplied as zero then the elements of the matrix C need +not be set on input. +.TP 8 +C (local input/output) double * +On entry, C is an array of dimension (LDC,n). Before entry, +the leading m by n part of the array C must contain the +matrix C, except when beta is zero, in which case C need not +be set on entry. On exit, the array C is overwritten by the +m by n matrix ( alpha*op( A )*op( B ) + beta*C ). +.TP 8 +LDC (local input) const int +On entry, LDC specifies the first dimension of C as declared +in the calling (sub) program. LDC must be at least +max(1,m). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], b[2*2], c[2*2]; +.br + a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0; +.br + b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0; +.br + c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0; +.br + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, +.br + 2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 ); +.br + printf(" [%f,%f]\en", c[0], c[2]); +.br + printf("c=[%f,%f]\en", c[1], c[3]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dtrsm \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dgemv.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dgemv.3 new file mode 100644 index 000000000..f85db57fb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dgemv.3 @@ -0,0 +1,128 @@ +.TH HPL_dgemv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dgemv \- y := beta * y + alpha * op(A) * x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dgemv(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANS\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&const double\fR +\fI\&BETA\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dgemv\fR +performs one of the matrix-vector operations + + y := alpha * op( A ) * x + beta * y, + + where op( X ) is one of + + op( X ) = X or op( X ) = X^T. + +where alpha and beta are scalars, x and y are vectors and A is an m +by n matrix. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +TRANS (local input) const enum HPL_TRANS +On entry, TRANS specifies the operation to be performed as +follows: + TRANS = HplNoTrans y := alpha*A *x + beta*y, + TRANS = HplTrans y := alpha*A^T*x + beta*y. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero then A and X need not be set on input. +.TP 8 +A (local input) const double * +On entry, A points to an array of size equal to or greater +than LDA * n. Before entry, the leading m by n part of the +array A must contain the matrix coefficients. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of A as +declared in the calling (sub) program. LDA must be at +least MAX(1,m). +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +BETA (local input) const double +On entry, BETA specifies the scalar beta. When ALPHA is +supplied as zero then Y need not be set on input. +.TP 8 +Y (local input/output) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +Before entry with BETA non-zero, the incremented array Y must +contain the vector y. On exit, Y is overwritten by the +updated vector y. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], x[2], y[2]; +.br + a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0; +.br + x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0; +.br + HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0, +.br + a, 2, x, 1, -1.0, y, 1 ); +.br + printf("y=[%f,%f]\en", y[0], y[1]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dger \ (3), +.BR HPL_dtrsv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dger.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dger.3 new file mode 100644 index 000000000..da9ddf495 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dger.3 @@ -0,0 +1,108 @@ +.TH HPL_dger 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dger \- A := alpha * x * y^T + A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dger(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dger\fR +performs the rank 1 operation + + A := alpha * x * y^T + A, + +where alpha is a scalar, x is an m-element vector, y is an n-element +vector and A is an m by n matrix. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero then X and Y need not be set on input. +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( m - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +Y (local input) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.TP 8 +A (local input/output) double * +On entry, A points to an array of size equal to or greater +than LDA * n. Before entry, the leading m by n part of the +array A must contain the matrix coefficients. On exit, A is +overwritten by the updated matrix. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of A as +declared in the calling (sub) program. LDA must be at +least MAX(1,m). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], x[2], y[2]; +.br + a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0; +.br + x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0; +.br + HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1, +.br + a, 2 ); +.br + printf("y=[%f,%f]\en", y[0], y[1]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dgemv \ (3), +.BR HPL_dtrsv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlacpy.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlacpy.3 new file mode 100644 index 000000000..8da8b1316 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlacpy.3 @@ -0,0 +1,72 @@ +.TH HPL_dlacpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlacpy \- B := A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlacpy(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&B\fR, +\fB\&const int\fR +\fI\&LDB\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlacpy\fR +copies an array A into an array B. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the arrays A and +B. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the arrays A +and B. N must be at least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,N). +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +B (local output) double * +On entry, B points to an array of dimension (LDB,N). On exit, +B is overwritten with A. +.TP 8 +LDB (local input) const int +On entry, LDB specifies the leading dimension of the array B. +LDB must be at least MAX(1,M). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], b[2*2]; +.br + a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; +.br + HPL_dlacpy( 2, 2, a, 2, b, 2 ); +.br + printf(" [%f,%f]\en", b[0], b[2]); +.br + printf("b=[%f,%f]\en", b[1], b[3]); +.br + exit(0); +.br + return(0); +.br +} +.SH SEE ALSO +.BR HPL_dlatcpy \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlamch.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlamch.3 new file mode 100644 index 000000000..9bf41b68a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlamch.3 @@ -0,0 +1,76 @@ +.TH HPL_dlamch 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlamch \- determines machine-specific arithmetic constants. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_dlamch(\fR +\fB\&const HPL_T_MACH\fR +\fI\&CMACH\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlamch\fR +determines machine-specific arithmetic constants such as +the relative machine precision (eps), the safe minimum (sfmin) such +that 1 / sfmin does not overflow, the base of the machine (base), the +precision (prec), the number of (base) digits in the mantissa (t), +whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the +minimum exponent before (gradual) underflow (emin), the underflow +threshold (rmin) base**(emin-1), the largest exponent before overflow +(emax), the overflow threshold (rmax) (base**emax)*(1-eps). +.SH ARGUMENTS +.TP 8 +CMACH (local input) const HPL_T_MACH +Specifies the value to be returned by HPL_dlamch + = HPL_MACH_EPS, HPL_dlamch := eps (default) + = HPL_MACH_SFMIN, HPL_dlamch := sfmin + = HPL_MACH_BASE, HPL_dlamch := base + = HPL_MACH_PREC, HPL_dlamch := eps*base + = HPL_MACH_MLEN, HPL_dlamch := t + = HPL_MACH_RND, HPL_dlamch := rnd + = HPL_MACH_EMIN, HPL_dlamch := emin + = HPL_MACH_RMIN, HPL_dlamch := rmin + = HPL_MACH_EMAX, HPL_dlamch := emax + = HPL_MACH_RMAX, HPL_dlamch := rmax + +where + + eps = relative machine precision, + sfmin = safe minimum, + base = base of the machine, + prec = eps*base, + t = number of digits in the mantissa, + rnd = 1.0 if rounding occurs in addition, + emin = minimum exponent before underflow, + rmin = underflow threshold, + emax = largest exponent before overflow, + rmax = overflow threshold. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double eps; +.br + eps = HPL_dlamch( HPL_MACH_EPS ); +.br + printf("eps=%18.8e\en", eps); +.br + exit(0); return(0); +.br +} +.SH REFERENCES +This function has been manually translated from the Fortran 77 LAPACK +auxiliary function dlamch.f (version 2.0 -- 1992), that was itself +based on the function ENVRON by Malcolm and incorporated suggestions +by Gentleman and Marovich. See + +Malcolm M. A., Algorithms to reveal properties of floating-point +arithmetic., Comms. of the ACM, 15, 949-951 (1972). + +Gentleman W. M. and Marovich S. B., More on algorithms that reveal +properties of floating point arithmetic units., Comms. of the ACM, +17, 276-277 (1974). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlange.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlange.3 new file mode 100644 index 000000000..ffbab554f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlange.3 @@ -0,0 +1,73 @@ +.TH HPL_dlange 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlange \- Compute ||A||. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_dlange(\fR +\fB\&const HPL_T_NORM\fR +\fI\&NORM\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlange\fR +returns the value of the one norm, or the infinity norm, +or the element of largest absolute value of a matrix A: + + max(abs(A(i,j))) when NORM = HPL_NORM_A, + norm1(A), when NORM = HPL_NORM_1, + normI(A), when NORM = HPL_NORM_I, + +where norm1 denotes the one norm of a matrix (maximum column sum) and +normI denotes the infinity norm of a matrix (maximum row sum). Note +that max(abs(A(i,j))) is not a matrix norm. +.SH ARGUMENTS +.TP 8 +NORM (local input) const HPL_T_NORM +On entry, NORM specifies the value to be returned by this +function as described above. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,N), that +contains the matrix A. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,M). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2]; +.br + a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; +.br + norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 ); +.br + printf("norm=%f\en", norm); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dlaprnt \ (3), +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaprnt.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaprnt.3 new file mode 100644 index 000000000..8fdd89b8c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaprnt.3 @@ -0,0 +1,70 @@ +.TH HPL_dlaprnt 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaprnt \- Print the matrix A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaprnt(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&IA\fR, +\fB\&const int\fR +\fI\&JA\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const char *\fR +\fI\&CMATNM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaprnt\fR +prints to standard error an M-by-N matrix A. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A. M must be at +least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of A. N must be +at least zero. +.TP 8 +A (local input) double * +On entry, A points to an array of dimension (LDA,N). +.TP 8 +IA (local input) const int +On entry, IA specifies the starting row index to be printed. +.TP 8 +JA (local input) const int +On entry, JA specifies the starting column index to be +printed. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,M). +.TP 8 +CMATNM (local input) const char * +On entry, CMATNM is the name of the matrix to be printed. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2]; +.br + a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; +.br + HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" ); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp00N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp00N.3 new file mode 100644 index 000000000..efe3580b3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp00N.3 @@ -0,0 +1,60 @@ +.TH HPL_dlaswp00N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp00N \- performs a series of row interchanges. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp00N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int *\fR +\fI\&IPIV\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp00N\fR +performs a series of local row interchanges on a matrix +A. One row interchange is initiated for rows 0 through M-1 of A. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the array A to be +interchanged. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the array A. +N must be at least zero. +.TP 8 +A (local input/output) double * +On entry, A points to an array of dimension (LDA,N) to which +the row interchanges will be applied. On exit, the permuted +matrix. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +IPIV (local input) const int * +On entry, IPIV is an array of size M that contains the +pivoting information. For k in [0..M), IPIV[k]=IROFF + l +implies that local rows k and l are to be interchanged. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp01N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp01N.3 new file mode 100644 index 000000000..662913e54 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp01N.3 @@ -0,0 +1,88 @@ +.TH HPL_dlaswp01N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp01N \- copies rows of A into itself and into U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp01N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp01N\fR +copies scattered rows of A into itself and into an +array U. The row offsets in A of the source rows are specified by +LINDXA. The destination of those rows are specified by LINDXAU. A +positive value of LINDXAU indicates that the array destination is U, +and A otherwise. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +moved within A or copied into U. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of rows of A that should be +moved within A or copied into U. N must be at least zero. +.TP 8 +A (local input/output) double * +On entry, A points to an array of dimension (LDA,N). The rows +of this array specified by LINDXA should be moved within A or +copied into U. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,N). The rows +of A specified by LINDXA are be copied within this array U at +the positions indicated by positive values of LINDXAU. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be moved within A or +or copied into U. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local row indexes of U where the rows of A should be +copied at. This array also contains the local row offsets in +A where some of the rows of A should be moved to. A positive +value of LINDXAU[i] indicates that the row LINDXA[i] of A +should be copied into U at the position LINDXAU[i]; otherwise +the row LINDXA[i] of A should be moved at the position +-LINDXAU[i] within A. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp01T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp01T.3 new file mode 100644 index 000000000..738507755 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp01T.3 @@ -0,0 +1,89 @@ +.TH HPL_dlaswp01T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp01T \- copies rows of A into itself and into U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp01T(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp01T\fR +copies scattered rows of A into itself and into an +array U. The row offsets in A of the source rows are specified by +LINDXA. The destination of those rows are specified by LINDXAU. A +positive value of LINDXAU indicates that the array destination is U, +and A otherwise. Rows of A are stored as columns in U. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +moved within A or copied into U. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of rows of A that should be +moved within A or copied into U. N must be at least zero. +.TP 8 +A (local input/output) double * +On entry, A points to an array of dimension (LDA,N). The rows +of this array specified by LINDXA should be moved within A or +copied into U. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,M). The rows +of A specified by LINDXA are copied within this array U at +the positions indicated by positive values of LINDXAU. The +rows of A are stored as columns in U. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be moved within A or +or copied into U. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local row indexes of U where the rows of A should be +copied at. This array also contains the local row offsets in +A where some of the rows of A should be moved to. A positive +value of LINDXAU[i] indicates that the row LINDXA[i] of A +should be copied into U at the position LINDXAU[i]; otherwise +the row LINDXA[i] of A should be moved at the position +-LINDXAU[i] within A. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp02N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp02N.3 new file mode 100644 index 000000000..600449c68 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp02N.3 @@ -0,0 +1,85 @@ +.TH HPL_dlaswp02N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp02N \- pack rows of A into columns of W. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp02N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&W0\fR, +\fB\&double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp02N\fR +packs scattered rows of an array A into workspace W. +The row offsets in A are specified by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +copied into W. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of rows of A that should be +copied into W. N must be at least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,N). The rows +of this array specified by LINDXA should be copied into W. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +W0 (local input/output) double * +On exit, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local output) double * +On entry, W is an array of size (LDW,M). On exit, W contains +the rows LINDXA[i] for i in [0..M) of A stored contiguously +in W(:,i). +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be copied into W. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local row indexes of U that should be copied into A and +replaced by the rows of W. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp03N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp03N.3 new file mode 100644 index 000000000..1ba0b3208 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp03N.3 @@ -0,0 +1,75 @@ +.TH HPL_dlaswp03N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp03N \- copy rows of W into U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp03N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const double *\fR +\fI\&W0\fR, +\fB\&const double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp03N\fR +copies columns of W into rows of an array U. The +destination in U of these columns contained in W is stored within W0. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of columns of W stored +contiguously that should be copied into U. M must be at least +zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of columns of W stored +contiguously that should be copied into U. N must be at least +zero. +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,N). Columns +of W are copied as rows within this array U at the positions +specified in W0. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M). +.TP 8 +W0 (local input) const double * +On entry, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local input) const double * +On entry, W is an array of size (LDW,M), that contains data +to be copied into U. For i in [0..M), entries W(:,i) should +be copied into the row or column W0(i*LDW) of U. +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp03T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp03T.3 new file mode 100644 index 000000000..d8bd11ec1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp03T.3 @@ -0,0 +1,75 @@ +.TH HPL_dlaswp03T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp03T \- copy columns of W into U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp03T(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const double *\fR +\fI\&W0\fR, +\fB\&const double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp03T\fR +copies columns of W into an array U. The destination +in U of these columns contained in W is stored within W0. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of columns of W stored +contiguously that should be copied into U. M must be at least +zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of columns of W stored +contiguously that should be copied into U. N must be at least +zero. +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,M). Columns +of W are copied within the array U at the positions specified +in W0. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +W0 (local input) const double * +On entry, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local input) const double * +On entry, W is an array of size (LDW,M), that contains data +to be copied into U. For i in [0..M), entries W(:,i) should +be copied into the row or column W0(i*LDW) of U. +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp04N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp04N.3 new file mode 100644 index 000000000..9f12d79ab --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp04N.3 @@ -0,0 +1,106 @@ +.TH HPL_dlaswp04N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp04N \- copy rows of U in A and replace them with columns of W. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp04N(\fR +\fB\&const int\fR +\fI\&M0\fR, +\fB\&const int\fR +\fI\&M1\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&W0\fR, +\fB\&const double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp04N\fR +copies M0 rows of U into A and replaces those rows of U +with columns of W. In addition M1 - M0 columns of W are copied into +rows of U. +.SH ARGUMENTS +.TP 8 +M0 (local input) const int +On entry, M0 specifies the number of rows of U that should be +copied into A and replaced by columns of W. M0 must be at +least zero. +.TP 8 +M1 (local input) const int +On entry, M1 specifies the number of columns of W that should +be copied into rows of U. M1 must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the rows of U that should +be copied into A. N must be at least zero. +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,N). This +array contains the rows that are to be copied into A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M1). +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +rows of U indicated by LINDXAU. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M0). +.TP 8 +W0 (local input) const double * +On entry, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local input) const double * +On entry, W is an array of size (LDW,M0+M1), that contains +data to be copied into U. For i in [M0..M0+M1), the entries +W(:,i) are copied into the row W0(i*LDW) of U. +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M0 containing the +local row indexes A into which rows of U are copied. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M0 that contains +the local row indexes of U that should be copied into A and +replaced by the columns of W. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp04T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp04T.3 new file mode 100644 index 000000000..448334148 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp04T.3 @@ -0,0 +1,107 @@ +.TH HPL_dlaswp04T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp04T \- copy columns of U in rows of A and replace them with columns of W. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp04T(\fR +\fB\&const int\fR +\fI\&M0\fR, +\fB\&const int\fR +\fI\&M1\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&W0\fR, +\fB\&const double *\fR +\fI\&W\fR, +\fB\&const int\fR +\fI\&LDW\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp04T\fR +copies M0 columns of U into rows of A and replaces those +columns of U with columns of W. In addition M1 - M0 columns of W are +copied into U. +.SH ARGUMENTS +.TP 8 +M0 (local input) const int +On entry, M0 specifies the number of columns of U that should +be copied into A and replaced by columns of W. M0 must be at +least zero. +.TP 8 +M1 (local input) const int +On entry, M1 specifies the number of columnns of W that will +be copied into U. M1 must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the columns of U that +will be copied into rows of A. N must be at least zero. +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,*). This +array contains the columns that are to be copied into rows of +A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +columns of U indicated by LINDXAU. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M0). +.TP 8 +W0 (local input) const double * +On entry, W0 is an array of size (M-1)*LDW+1, that contains +the destination offset in U where the columns of W should be +copied. +.TP 8 +W (local input) const double * +On entry, W is an array of size (LDW,M0+M1), that contains +data to be copied into U. For i in [M0..M0+M1), the entries +W(:,i) are copied into the column W0(i*LDW) of U. +.TP 8 +LDW (local input) const int +On entry, LDW specifies the leading dimension of the array W. +LDW must be at least MAX(1,N+1). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M0 containing the +local row indexes A into which columns of U are copied. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M0 that contains +the local column indexes of U that should be copied into A +and replaced by the columns of W. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp05N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp05N.3 new file mode 100644 index 000000000..371dd0b92 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp05N.3 @@ -0,0 +1,77 @@ +.TH HPL_dlaswp05N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp05N \- copy rows of U into A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp05N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp05N\fR +copies rows of U of global offset LINDXAU into rows of +A at positions indicated by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of U that should be +copied into A. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the rows of U that should +be copied into A. N must be at least zero. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +rows of U indicated by LINDXAU. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) const double * +On entry, U points to an array of dimension (LDU,N). This +array contains the rows that are to be copied into A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be copied from U. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local row indexes of U that should be copied in A. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp05T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp05T.3 new file mode 100644 index 000000000..5d70a7a16 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp05T.3 @@ -0,0 +1,77 @@ +.TH HPL_dlaswp05T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp05T \- copy rows of U into A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp05T(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR, +\fB\&const int *\fR +\fI\&LINDXAU\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp05T\fR +copies columns of U of global offset LINDXAU into rows +of A at positions indicated by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of columns of U that shouldbe copied into A. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the columns of U that will +be copied into rows of A. N must be at least zero. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +columns of U indicated by LINDXAU. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) const double * +On entry, U points to an array of dimension (LDU,*). This +array contains the columns that are to be copied into rows of +A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be copied from U. +.TP 8 +LINDXAU (local input) const int * +On entry, LINDXAU is an array of dimension M that contains +the local column indexes of U that should be copied in A. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp06N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp06N.3 new file mode 100644 index 000000000..7fa19d41a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp06N.3 @@ -0,0 +1,72 @@ +.TH HPL_dlaswp06N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp06N \- swap rows of U with rows of A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp06N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp06N\fR +swaps rows of U with rows of A at positions +indicated by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +swapped with rows of U. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the rows of A that should +be swapped with rows of U. N must be at least zero. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +rows or columns of U. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,N). This +array contains the rows of U that are to be swapped with rows +of A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,M). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be swapped with U. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp06T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp06T.3 new file mode 100644 index 000000000..41fa3d6ee --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp06T.3 @@ -0,0 +1,72 @@ +.TH HPL_dlaswp06T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp06T \- swap rows or columns of U with rows of A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp06T(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&LINDXA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp06T\fR +swaps columns of U with rows of A at positions +indicated by LINDXA. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of A that should be +swapped with columns of U. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the length of the rows of A that should +be swapped with columns of U. N must be at least zero. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +the rows of this array specified by LINDXA are replaced by +columns of U. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.TP 8 +U (local input/output) double * +On entry, U points to an array of dimension (LDU,*). This +array contains the columns of U that are to be swapped with +rows of A. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the leading dimension of the array U. +LDU must be at least MAX(1,N). +.TP 8 +LINDXA (local input) const int * +On entry, LINDXA is an array of dimension M that contains the +local row indexes of A that should be swapped with U. +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp10N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp10N.3 new file mode 100644 index 000000000..23465895c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlaswp10N.3 @@ -0,0 +1,59 @@ +.TH HPL_dlaswp10N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlaswp10N \- performs a series column interchanges. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlaswp10N(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int *\fR +\fI\&IPIV\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlaswp10N\fR +performs a sequence of local column interchanges on a +matrix A. One column interchange is initiated for columns 0 through +N-1 of A. +.SH ARGUMENTS +.TP 8 +M (local input) const int +__arg0__ +.TP 8 +N (local input) const int +On entry, M specifies the number of rows of the array A. M +must be at least zero. +.TP 8 +A (local input/output) double * +On entry, N specifies the number of columns of the array A. N +must be at least zero. +.TP 8 +LDA (local input) const int +On entry, A points to an array of dimension (LDA,N). This +array contains the columns onto which the interchanges should +be applied. On exit, A contains the permuted matrix. +.TP 8 +IPIV (local input) const int * +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,M). +.SH SEE ALSO +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05N \ (3), +.BR HPL_dlaswp05T \ (3), +.BR HPL_dlaswp06N \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlatcpy.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlatcpy.3 new file mode 100644 index 000000000..dc940e321 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlatcpy.3 @@ -0,0 +1,70 @@ +.TH HPL_dlatcpy 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlatcpy \- B := A^T +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlatcpy(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&B\fR, +\fB\&const int\fR +\fI\&LDB\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlatcpy\fR +copies the transpose of an array A into an array B. +.SH ARGUMENTS +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the array B and +the number of columns of A. M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of rows of the array A and +the number of columns of B. N must be at least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,M). +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least MAX(1,N). +.TP 8 +B (local output) double * +On entry, B points to an array of dimension (LDB,N). On exit, +B is overwritten with the transpose of A. +.TP 8 +LDB (local input) const int +On entry, LDB specifies the leading dimension of the array B. +LDB must be at least MAX(1,M). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], b[2*2]; +.br + a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; +.br + HPL_dlacpy( 2, 2, a, 2, b, 2 ); +.br + printf(" [%f,%f]\en", b[0], b[2]); +.br + printf("b=[%f,%f]\en", b[1], b[3]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dlacpy \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocmax.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocmax.3 new file mode 100644 index 000000000..f68f887c9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocmax.3 @@ -0,0 +1,69 @@ +.TH HPL_dlocmax 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlocmax \- finds the maximum entry in matrix column. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlocmax(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&II\fR, +\fB\&const int\fR +\fI\&JJ\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlocmax\fR +finds the maximum entry in the current column and packs +the useful information in WORK[0:3]. On exit, WORK[0] contains the +local maximum absolute value scalar, WORK[1] is the corresponding +local row index, WORK[2] is the corresponding global row index, and +WORK[3] is the coordinate of the process owning this max. When N is +less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set +to the total number of process rows. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +N (local input) const int +On entry, N specifies the local number of rows of the column +of A on which we operate. +.TP 8 +II (local input) const int +On entry, II specifies the row offset where the column to be +operated on starts with respect to the panel. +.TP 8 +JJ (local input) const int +On entry, JJ specifies the column offset where the column to +be operated on starts with respect to the panel. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 4. On exit, +WORK[0] contains the local maximum absolute value scalar, +WORK[1] contains the corresponding local row index, WORK[2] +contains the corresponding global row index, and WORK[3] is +the coordinate of process owning this max. +.SH SEE ALSO +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocswpN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocswpN.3 new file mode 100644 index 000000000..367e37e36 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocswpN.3 @@ -0,0 +1,62 @@ +.TH HPL_dlocswpN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlocswpN \- locally swaps rows within panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlocswpN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&II\fR, +\fB\&const int\fR +\fI\&JJ\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlocswpN\fR +performs the local swapping operations within a panel. +The lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +II (local input) const int +On entry, II specifies the row offset where the column to be +operated on starts with respect to the panel. +.TP 8 +JJ (local input) const int +On entry, JJ specifies the column offset where the column to +be operated on starts with respect to the panel. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2 * (4+2*N0). +WORK[0] contains the local maximum absolute value scalar, +WORK[1] contains the corresponding local row index, WORK[2] +contains the corresponding global row index, and WORK[3] is +the coordinate of process owning this max. The N0 length max +row is stored in WORK[4:4+N0-1]; Note that this is also the +JJth row (or column) of L1. The remaining part of this array +is used as workspace. +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocswpT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocswpT.3 new file mode 100644 index 000000000..f864de535 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dlocswpT.3 @@ -0,0 +1,62 @@ +.TH HPL_dlocswpT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dlocswpT \- locally swaps rows within panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dlocswpT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&II\fR, +\fB\&const int\fR +\fI\&JJ\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dlocswpT\fR +performs the local swapping operations within a panel. +The lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +II (local input) const int +On entry, II specifies the row offset where the column to be +operated on starts with respect to the panel. +.TP 8 +JJ (local input) const int +On entry, JJ specifies the column offset where the column to +be operated on starts with respect to the panel. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2 * (4+2*N0). +WORK[0] contains the local maximum absolute value scalar, +WORK[1] contains the corresponding local row index, WORK[2] +contains the corresponding global row index, and WORK[3] is +the coordinate of process owning this max. The N0 length max +row is stored in WORK[4:4+N0-1]; Note that this is also the +JJth row (or column) of L1. The remaining part of this array +is used as workspace. +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dmatgen.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dmatgen.3 new file mode 100644 index 000000000..c287fb0fb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dmatgen.3 @@ -0,0 +1,55 @@ +.TH HPL_dmatgen 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dmatgen \- random matrix generator. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dmatgen(\fR +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int\fR +\fI\&ISEED\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dmatgen\fR +generates (or regenerates) a random matrix A. + +The pseudo-random generator uses the linear congruential algorithm: +X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer +Programming, Knuth 1973, Vol. 2. +.SH ARGUMENTS +.TP 8 +M (input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +A (output) double * +On entry, A points to an array of dimension (LDA,N). On exit, +this array contains the coefficients of the randomly +generated matrix. +.TP 8 +LDA (input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,M). +.TP 8 +ISEED (input) const int +On entry, ISEED specifies the seed number to generate the +matrix A. ISEED must be at least zero. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dscal.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dscal.3 new file mode 100644 index 000000000..8f42a10f5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dscal.3 @@ -0,0 +1,62 @@ +.TH HPL_dscal 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dscal \- x = alpha * x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dscal(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dscal\fR +scales the vector x by alpha. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vector x. N must be +at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero, then the entries of the incremented array X +need not be set on input. +.TP 8 +X (local input/output) double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +On exit, the entries of the incremented array X are scaled +by the scalar alpha. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3]; +.br + x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; +.br + HPL_dscal( 3, 2.0, x, 1 ); +.br + printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_daxpy \ (3), +.BR HPL_dcopy \ (3), +.BR HPL_dswap \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dswap.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dswap.3 new file mode 100644 index 000000000..a398f795a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dswap.3 @@ -0,0 +1,73 @@ +.TH HPL_dswap 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dswap \- y <-> x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dswap(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR, +\fB\&double *\fR +\fI\&Y\fR, +\fB\&const int\fR +\fI\&INCY\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dswap\fR +swaps the vectors x and y. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vectors x and y. N +must be at least zero. +.TP 8 +X (local input/output) double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +On exit, the entries of the incremented array X are updated +with the entries of the incremented array Y. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.TP 8 +Y (local input/output) double * +On entry, Y is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. +On exit, the entries of the incremented array Y are updated +with the entries of the incremented array X. +.TP 8 +INCY (local input) const int +On entry, INCY specifies the increment for the elements of Y. +INCY must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3], y[3]; +.br + x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; +.br + y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; +.br + HPL_dswap( 3, x, 1, y, 1 ); +.br + printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]); +.br + printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_daxpy \ (3), +.BR HPL_dcopy \ (3), +.BR HPL_dscal \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dtrsm.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dtrsm.3 new file mode 100644 index 000000000..ad099eb83 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dtrsm.3 @@ -0,0 +1,152 @@ +.TH HPL_dtrsm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dtrsm \- B := A^{-1} * B or B := B * A^{-1}. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dtrsm(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const enum HPL_SIDE\fR +\fI\&SIDE\fR, +\fB\&const enum HPL_UPLO\fR +\fI\&UPLO\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANS\fR, +\fB\&const enum HPL_DIAG\fR +\fI\&DIAG\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double\fR +\fI\&ALPHA\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&B\fR, +\fB\&const int\fR +\fI\&LDB\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dtrsm\fR +solves one of the matrix equations + + op( A ) * X = alpha * B, or X * op( A ) = alpha * B, + +where alpha is a scalar, X and B are m by n matrices, A is a unit, or +non-unit, upper or lower triangular matrix and op(A) is one of + + op( A ) = A or op( A ) = A^T. + +The matrix X is overwritten on B. + +No test for singularity or near-singularity is included in this +routine. Such tests must be performed before calling this routine. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +SIDE (local input) const enum HPL_SIDE +On entry, SIDE specifies whether op(A) appears on the left +or right of X as follows: + SIDE==HplLeft op( A ) * X = alpha * B, + SIDE==HplRight X * op( A ) = alpha * B. +.TP 8 +UPLO (local input) const enum HPL_UPLO +On entry, UPLO specifies whether the upper or lower +triangular part of the array A is to be referenced. When +UPLO==HplUpper, only the upper triangular part of A is to be +referenced, otherwise only the lower triangular part of A is +to be referenced. +.TP 8 +TRANS (local input) const enum HPL_TRANS +On entry, TRANSA specifies the form of op(A) to be used in +the matrix-matrix operation follows: + TRANSA==HplNoTrans : op( A ) = A, + TRANSA==HplTrans : op( A ) = A^T, + TRANSA==HplConjTrans : op( A ) = A^T. +.TP 8 +DIAG (local input) const enum HPL_DIAG +On entry, DIAG specifies whether A is unit triangular or +not. When DIAG==HplUnit, A is assumed to be unit triangular, +and otherwise, A is not assumed to be unit triangular. +.TP 8 +M (local input) const int +On entry, M specifies the number of rows of the matrix B. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of the matrix B. +N must be at least zero. +.TP 8 +ALPHA (local input) const double +On entry, ALPHA specifies the scalar alpha. When ALPHA is +supplied as zero then the elements of the matrix B need not +be set on input. +.TP 8 +A (local input) const double * +On entry, A points to an array of size equal to or greater +than LDA * k, where k is m when SIDE==HplLeft and is n +otherwise. Before entry with UPLO==HplUpper, the leading +k by k upper triangular part of the array A must contain the +upper triangular matrix and the strictly lower triangular +part of A is not referenced. When UPLO==HplLower on entry, +the leading k by k lower triangular part of the array A must +contain the lower triangular matrix and the strictly upper +triangular part of A is not referenced. + +Note that when DIAG==HplUnit, the diagonal elements of A +not referenced either, but are assumed to be unity. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of A as +declared in the calling (sub) program. LDA must be at +least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise. +.TP 8 +B (local input/output) double * +On entry, B points to an array of size equal to or greater +than LDB * n. Before entry, the leading m by n part of the +array B must contain the matrix B, except when beta is zero, +in which case B need not be set on entry. On exit, the array +B is overwritten by the m by n solution matrix. +.TP 8 +LDB (local input) const int +On entry, LDB specifies the leading dimension of B as +declared in the calling (sub) program. LDB must be at +least MAX(1,m). +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], b[2*2]; +.br + a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0; +.br + b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0; +.br + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, +.br + HplNoTrans, HplNonUnit, 2, 2, 2.0, +.br + a, 2, b, 2 ); +.br + printf(" [%f,%f]\en", b[0], b[2]); +.br + printf("b=[%f,%f]\en", b[1], b[3]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dgemm \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dtrsv.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dtrsv.3 new file mode 100644 index 000000000..5df37c78b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_dtrsv.3 @@ -0,0 +1,121 @@ +.TH HPL_dtrsv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_dtrsv \- x := A^{-1} x. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_dtrsv(\fR +\fB\&const enum HPL_ORDER\fR +\fI\&ORDER\fR, +\fB\&const enum HPL_UPLO\fR +\fI\&UPLO\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANS\fR, +\fB\&const enum HPL_DIAG\fR +\fI\&DIAG\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_dtrsv\fR +solves one of the systems of equations + + A * x = b, or A^T * x = b, + +where b and x are n-element vectors and A is an n by n non-unit, or +unit, upper or lower triangular matrix. + +No test for singularity or near-singularity is included in this +routine. Such tests must be performed before calling this routine. +.SH ARGUMENTS +.TP 8 +ORDER (local input) const enum HPL_ORDER +On entry, ORDER specifies the storage format of the operands +as follows: + ORDER = HplRowMajor, + ORDER = HplColumnMajor. +.TP 8 +UPLO (local input) const enum HPL_UPLO +On entry, UPLO specifies whether the upper or lower +triangular part of the array A is to be referenced. When +UPLO==HplUpper, only the upper triangular part of A is to be +referenced, otherwise only the lower triangular part of A is +to be referenced. +.TP 8 +TRANS (local input) const enum HPL_TRANS +On entry, TRANS specifies the equations to be solved as +follows: + TRANS==HplNoTrans A * x = b, + TRANS==HplTrans A^T * x = b. +.TP 8 +DIAG (local input) const enum HPL_DIAG +On entry, DIAG specifies whether A is unit triangular or +not. When DIAG==HplUnit, A is assumed to be unit triangular, +and otherwise, A is not assumed to be unit triangular. +.TP 8 +N (local input) const int +On entry, N specifies the order of the matrix A. N must be at +least zero. +.TP 8 +A (local input) const double * +On entry, A points to an array of size equal to or greater +than LDA * n. Before entry with UPLO==HplUpper, the leading +n by n upper triangular part of the array A must contain the +upper triangular matrix and the strictly lower triangular +part of A is not referenced. When UPLO==HplLower on entry, +the leading n by n lower triangular part of the array A must +contain the lower triangular matrix and the strictly upper +triangular part of A is not referenced. + +Note that when DIAG==HplUnit, the diagonal elements of A +not referenced either, but are assumed to be unity. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of A as +declared in the calling (sub) program. LDA must be at +least MAX(1,n). +.TP 8 +X (local input/output) double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +Before entry, the incremented array X must contain the n +element right-hand side vector b. On exit, X is overwritten +with the solution vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double a[2*2], x[2]; +.br + a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0; +.br + x[0] = 2.0; x[1] = 1.0; +.br + HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, +.br + HplNoUnit, a, 2, x, 1 ); +.br + printf("x=[%f,%f]\en", x[0], x[1]); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_dger \ (3), +.BR HPL_dgemv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_equil.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_equil.3 new file mode 100644 index 000000000..817780e44 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_equil.3 @@ -0,0 +1,91 @@ +.TH HPL_equil 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_equil \- Equilibrate U and forward the column panel L. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_equil(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const enum HPL_TRANS\fR +\fI\&TRANS\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR, +\fB\&int *\fR +\fI\&IWORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_equil\fR +equilibrates the local pieces of U, so that on exit to +this function, pieces of U contained in every process row are of the +same size. This phase makes the rolling phase optimal. In addition, +this function probes for the column panel L and forwards it when +possible. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be equilibrated) information. +.TP 8 +TRANS (global input) const enum HPL_TRANS +On entry, TRANS specifies whether U is stored in transposed +or non-transposed form. +.TP 8 +N (local input) const int +On entry, N specifies the number of rows or columns of U. N +must be at least 0. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U in each process row. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,IPLEN[nprow]) when U is stored in +non-transposed form, and MAX(1,N) otherwise. +.TP 8 +IPLEN (global input) int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in process IPMAP[i]. +.TP 8 +IPMAP (global input) const int * +On entry, IPMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IPMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IPMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i. +.TP 8 +IWORK (workspace) int * +On entry, IWORK is a workarray of dimension NPROW+1. +.SH SEE ALSO +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_fprintf.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_fprintf.3 new file mode 100644 index 000000000..8a81c0bfb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_fprintf.3 @@ -0,0 +1,44 @@ +.TH HPL_fprintf 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_fprintf \- fprintf + fflush wrapper. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_fprintf(\fR +\fB\&FILE *\fR +\fI\&STREAM\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_fprintf\fR +is a wrapper around fprintf flushing the output stream. +.SH ARGUMENTS +.TP 8 +STREAM (local input) FILE * +On entry, STREAM specifies the output stream. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + HPL_fprintf( stdout, "Hello World.\en" ); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_abort \ (3), +.BR HPL_warn \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_exit.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_exit.3 new file mode 100644 index 000000000..dab8067e2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_exit.3 @@ -0,0 +1,25 @@ +.TH HPL_grid_exit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_grid_exit \- Exit process grid. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_grid_exit(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_grid_exit\fR +marks the process grid object for deallocation. The +returned error code MPI_SUCCESS indicates successful completion. +Other error codes are (MPI) implementation dependent. +.SH ARGUMENTS +.TP 8 +GRID (local input/output) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid to be released. +.SH SEE ALSO +.BR HPL_pnum \ (3), +.BR HPL_grid_init \ (3), +.BR HPL_grid_info \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_info.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_info.3 new file mode 100644 index 000000000..53c6a214b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_info.3 @@ -0,0 +1,52 @@ +.TH HPL_grid_info 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_grid_info \- Retrieve grid information. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_grid_info(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&int *\fR +\fI\&NPROW\fR, +\fB\&int *\fR +\fI\&NPCOL\fR, +\fB\&int *\fR +\fI\&MYROW\fR, +\fB\&int *\fR +\fI\&MYCOL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_grid_info\fR +returns the grid shape and the coordinates in the grid +of the calling process. Successful completion is indicated by the +returned error code MPI_SUCCESS. Other error codes depend on the MPI +implementation. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +NPROW (global output) int * +On exit, NPROW specifies the number of process rows in the +grid. NPROW is at least one. +.TP 8 +NPCOL (global output) int * +On exit, NPCOL specifies the number of process columns in +the grid. NPCOL is at least one. +.TP 8 +MYROW (global output) int * +On exit, MYROW specifies my row process coordinate in the +grid. MYROW is greater than or equal to zero and less than +NPROW. +.TP 8 +MYCOL (global output) int * +On exit, MYCOL specifies my column process coordinate in the +grid. MYCOL is greater than or equal to zero and less than +NPCOL. +.SH SEE ALSO +.BR HPL_pnum \ (3), +.BR HPL_grid_init \ (3), +.BR HPL_grid_exit \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_init.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_init.3 new file mode 100644 index 000000000..7792a522d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_grid_init.3 @@ -0,0 +1,55 @@ +.TH HPL_grid_init 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_grid_init \- Create a process grid. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_grid_init(\fR +\fB\&MPI_Comm\fR +\fI\&COMM\fR, +\fB\&const HPL_T_ORDER\fR +\fI\&ORDER\fR, +\fB\&const int\fR +\fI\&NPROW\fR, +\fB\&const int\fR +\fI\&NPCOL\fR, +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_grid_init\fR +creates a NPROW x NPCOL process grid using column- or +row-major ordering from an initial collection of processes identified +by an MPI communicator. Successful completion is indicated by the +returned error code MPI_SUCCESS. Other error codes depend on the MPI +implementation. The coordinates of processes that are not part of the +grid are set to values outside of [0..NPROW) x [0..NPCOL). +.SH ARGUMENTS +.TP 8 +COMM (global/local input) MPI_Comm +On entry, COMM is the MPI communicator identifying the +initial collection of processes out of which the grid is +formed. +.TP 8 +ORDER (global input) const HPL_T_ORDER +On entry, ORDER specifies how the processes should be ordered +in the grid as follows: + ORDER = HPL_ROW_MAJOR row-major ordering; + ORDER = HPL_COLUMN_MAJOR column-major ordering; +.TP 8 +NPROW (global input) const int +On entry, NPROW specifies the number of process rows in the +grid to be created. NPROW must be at least one. +.TP 8 +NPCOL (global input) const int +On entry, NPCOL specifies the number of process columns in +the grid to be created. NPCOL must be at least one. +.TP 8 +GRID (local input/output) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information to be initialized. +.SH SEE ALSO +.BR HPL_pnum \ (3), +.BR HPL_grid_info \ (3), +.BR HPL_grid_exit \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_idamax.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_idamax.3 new file mode 100644 index 000000000..c00292a02 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_idamax.3 @@ -0,0 +1,59 @@ +.TH HPL_idamax 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_idamax \- 1st k s.t. |x_k| = max_i(|x_i|). +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_idamax(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const double *\fR +\fI\&X\fR, +\fB\&const int\fR +\fI\&INCX\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_idamax\fR +returns the index in an n-vector x of the first element +having maximum absolute value. +.SH ARGUMENTS +.TP 8 +N (local input) const int +On entry, N specifies the length of the vector x. N must be +at least zero. +.TP 8 +X (local input) const double * +On entry, X is an incremented array of dimension at least +( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. +.TP 8 +INCX (local input) const int +On entry, INCX specifies the increment for the elements of X. +INCX must not be zero. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + double x[3]; +.br + int imax; +.br + x[0] = 1.0; x[1] = 3.0; x[2] = 2.0; +.br + imax = HPL_idamax( 3, x, 1 ); +.br + printf("imax=%d\en", imax); +.br + exit(0); +.br + return(0); +.br +} +.SH SEE ALSO +.BR HPL_daxpy \ (3), +.BR HPL_dcopy \ (3), +.BR HPL_dscal \ (3), +.BR HPL_dswap \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2l.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2l.3 new file mode 100644 index 000000000..32c4d9e07 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2l.3 @@ -0,0 +1,53 @@ +.TH HPL_indxg2l 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_indxg2l \- Map a global index into a local one. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_indxg2l(\fR +\fB\&const int\fR +\fI\&IG\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_indxg2l\fR +computes the local index of a matrix entry pointed to by +the global index IG. This local returned index is the same in all +processes. +.SH ARGUMENTS +.TP 8 +IG (input) const int +On entry, IG specifies the global index of the matrix entry. +IG must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix. NB must be larger than one. +.TP 8 +SRCPROC (input) const int +On entry, if SRCPROC = -1, the data is not distributed but +replicated, in which case this routine returns IG in all +processes. Otherwise, the value of SRCPROC is ignored. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2lp \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2lp.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2lp.3 new file mode 100644 index 000000000..ca2004031 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2lp.3 @@ -0,0 +1,66 @@ +.TH HPL_indxg2lp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_indxg2lp \- Map a local index into a global one. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_indxg2lp(\fR +\fB\&int *\fR +\fI\&IL\fR, +\fB\&int *\fR +\fI\&PROC\fR, +\fB\&const int\fR +\fI\&IG\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_indxg2lp\fR +computes the local index of a matrix entry pointed to by +the global index IG as well as the process coordinate which posseses +this entry. The local returned index is the same in all processes. +.SH ARGUMENTS +.TP 8 +IL (output) int * +On exit, IL specifies the local index corresponding to IG. IL +is at least zero. +.TP 8 +PROC (output) int * +On exit, PROC is the coordinate of the process owning the +entry specified by the global index IG. PROC is at least zero +and less than NPROCS. +.TP 8 +IG (input) const int +On entry, IG specifies the global index of the matrix entry. +IG must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +SRCPROC (input) const int +On entry, if SRCPROC = -1, the data is not distributed but +replicated, in which case this routine returns IG in all +processes. Otherwise, the value of SRCPROC is ignored. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2p.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2p.3 new file mode 100644 index 000000000..5e0273feb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxg2p.3 @@ -0,0 +1,52 @@ +.TH HPL_indxg2p 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_indxg2p \- Map a global index into a process coordinate. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_indxg2p(\fR +\fB\&const int\fR +\fI\&IG\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_indxg2p\fR +computes the process coordinate which posseses the entry +of a matrix specified by a global index IG. +.SH ARGUMENTS +.TP 8 +IG (input) const int +On entry, IG specifies the global index of the matrix entry. +IG must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +SRCPROC (input) const int +On entry, SRCPROC specifies the coordinate of the process +that possesses the first row or column of the matrix. SRCPROC +must be at least zero and strictly less than NPROCS. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxl2g.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxl2g.3 new file mode 100644 index 000000000..ba6da53a7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_indxl2g.3 @@ -0,0 +1,59 @@ +.TH HPL_indxl2g 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_indxl2g \- Map a index-process pair into a global index. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_indxl2g(\fR +\fB\&const int\fR +\fI\&IL\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&PROC\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_indxl2g\fR +computes the global index of a matrix entry pointed to +by the local index IL of the process indicated by PROC. +.SH ARGUMENTS +.TP 8 +IL (input) const int +On entry, IL specifies the local index of the matrix entry. +IL must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +PROC (input) const int +On entry, PROC specifies the coordinate of the process whose +local array row or column is to be determined. PROC must be +at least zero and strictly less than NPROCS. +.TP 8 +SRCPROC (input) const int +On entry, SRCPROC specifies the coordinate of the process +that possesses the first row or column of the matrix. SRCPROC +must be at least zero and strictly less than NPROCS. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2lp \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_infog2l.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_infog2l.3 new file mode 100644 index 000000000..c07f276d5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_infog2l.3 @@ -0,0 +1,126 @@ +.TH HPL_infog2l 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_infog2l \- global to local index translation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_infog2l(\fR +\fB\&int\fR +\fI\&I\fR, +\fB\&int\fR +\fI\&J\fR, +\fB\&const int\fR +\fI\&IMB\fR, +\fB\&const int\fR +\fI\&MB\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&RSRC\fR, +\fB\&const int\fR +\fI\&CSRC\fR, +\fB\&const int\fR +\fI\&MYROW\fR, +\fB\&const int\fR +\fI\&MYCOL\fR, +\fB\&const int\fR +\fI\&NPROW\fR, +\fB\&const int\fR +\fI\&NPCOL\fR, +\fB\&int *\fR +\fI\&II\fR, +\fB\&int *\fR +\fI\&JJ\fR, +\fB\&int *\fR +\fI\&PROW\fR, +\fB\&int *\fR +\fI\&PCOL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_infog2l\fR +computes the starting local index II, JJ corresponding to +the submatrix starting globally at the entry pointed by I, J. This +routine returns the coordinates in the grid of the process owning the +matrix entry of global indexes I, J, namely PROW and PCOL. +.SH ARGUMENTS +.TP 8 +I (global input) int +On entry, I specifies the global row index of the matrix +entry. I must be at least zero. +.TP 8 +J (global input) int +On entry, J specifies the global column index of the matrix +entry. J must be at least zero. +.TP 8 +IMB (global input) const int +On entry, IMB specifies the size of the first row block of +the global matrix. IMB must be at least one. +.TP 8 +MB (global input) const int +On entry, MB specifies the blocking factor used to partition +and distribute the rows of the matrix A. MB must be larger +than one. +.TP 8 +INB (global input) const int +On entry, INB specifies the size of the first column block of +the global matrix. INB must be at least one. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the columns of the matrix A. NB must be larger +than one. +.TP 8 +RSRC (global input) const int +On entry, RSRC specifies the row coordinate of the process +that possesses the row I. RSRC must be at least zero and +strictly less than NPROW. +.TP 8 +CSRC (global input) const int +On entry, CSRC specifies the column coordinate of the process +that possesses the column J. CSRC must be at least zero and +strictly less than NPCOL. +.TP 8 +MYROW (local input) const int +On entry, MYROW specifies my row process coordinate in the +grid. MYROW is greater than or equal to zero and less than +NPROW. +.TP 8 +MYCOL (local input) const int +On entry, MYCOL specifies my column process coordinate in the +grid. MYCOL is greater than or equal to zero and less than +NPCOL. +.TP 8 +NPROW (global input) const int +On entry, NPROW specifies the number of process rows in the +grid. NPROW is at least one. +.TP 8 +NPCOL (global input) const int +On entry, NPCOL specifies the number of process columns in +the grid. NPCOL is at least one. +.TP 8 +II (local output) int * +On exit, II specifies the local starting row index of the +submatrix. On exit, II is at least 0. +.TP 8 +JJ (local output) int * +On exit, JJ specifies the local starting column index of the +submatrix. On exit, JJ is at least 0. +.TP 8 +PROW (global output) int * +On exit, PROW is the row coordinate of the process owning the +entry specified by the global index I. PROW is at least zero +and less than NPROW. +.TP 8 +PCOL (global output) int * +On exit, PCOL is the column coordinate of the process owning +the entry specified by the global index J. PCOL is at least +zero and less than NPCOL. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_jumpit.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_jumpit.3 new file mode 100644 index 000000000..66e77ac32 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_jumpit.3 @@ -0,0 +1,48 @@ +.TH HPL_jumpit 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_jumpit \- jump into the random sequence. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_jumpit(\fR +\fB\&int *\fR +\fI\&MULT\fR, +\fB\&int *\fR +\fI\&IADD\fR, +\fB\&int *\fR +\fI\&IRANN\fR, +\fB\&int *\fR +\fI\&IRANM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_jumpit\fR +jumps in the random sequence from the number X(n) encoded +in IRANN to the number X(m) encoded in IRANM using the constants A +and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A +and C obviously depend on m and n, see the function HPL_xjumpm in +order to initialize them. +.SH ARGUMENTS +.TP 8 +MULT (local input) int * +On entry, MULT is an array of dimension 2, that contains the +16-lower and 15-higher bits of the constant A. +.TP 8 +IADD (local input) int * +On entry, IADD is an array of dimension 2, that contains the +16-lower and 15-higher bits of the constant C. +.TP 8 +IRANN (local input) int * +On entry, IRANN is an array of dimension 2, that contains +the 16-lower and 15-higher bits of the encoding of X(n). +.TP 8 +IRANM (local output) int * +On entry, IRANM is an array of dimension 2. On exit, this +array contains respectively the 16-lower and 15-higher bits +of the encoding of X(m). +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ladd.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ladd.3 new file mode 100644 index 000000000..9fd6805d3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ladd.3 @@ -0,0 +1,41 @@ +.TH HPL_ladd 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_ladd \- Adds two long positive integers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_ladd(\fR +\fB\&int *\fR +\fI\&J\fR, +\fB\&int *\fR +\fI\&K\fR, +\fB\&int *\fR +\fI\&I\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_ladd\fR +adds without carry two long positive integers K and J and +puts the result into I. The long integers I, J, K are encoded on 64 +bits using an array of 2 integers. The 32-lower bits are stored in +the first entry of each array, the 32-higher bits in the second +entry. +.SH ARGUMENTS +.TP 8 +J (local input) int * +On entry, J is an integer array of dimension 2 containing the +encoded long integer J. +.TP 8 +K (local input) int * +On entry, K is an integer array of dimension 2 containing the +encoded long integer K. +.TP 8 +I (local output) int * +On entry, I is an integer array of dimension 2. On exit, this +array contains the encoded long integer result. +.SH SEE ALSO +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_lmul.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_lmul.3 new file mode 100644 index 000000000..8be7380e0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_lmul.3 @@ -0,0 +1,42 @@ +.TH HPL_lmul 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_lmul \- multiplies 2 long positive integers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_lmul(\fR +\fB\&int *\fR +\fI\&K\fR, +\fB\&int *\fR +\fI\&J\fR, +\fB\&int *\fR +\fI\&I\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_lmul\fR +multiplies without carry two long positive integers K and J +and puts the result into I. The long integers I, J, K are encoded on +64 bits using an array of 2 integers. The 32-lower bits are stored in +the first entry of each array, the 32-higher bits in the second entry +of each array. For efficiency purposes, the intrisic modulo function +is inlined. +.SH ARGUMENTS +.TP 8 +K (local input) int * +On entry, K is an integer array of dimension 2 containing the +encoded long integer K. +.TP 8 +J (local input) int * +On entry, J is an integer array of dimension 2 containing the +encoded long integer J. +.TP 8 +I (local output) int * +On entry, I is an integer array of dimension 2. On exit, this +array contains the encoded long integer result. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_logsort.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_logsort.3 new file mode 100644 index 000000000..e7e80062a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_logsort.3 @@ -0,0 +1,65 @@ +.TH HPL_logsort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_logsort \- Sort the processes in logarithmic order. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_logsort(\fR +\fB\&const int\fR +\fI\&NPROCS\fR, +\fB\&const int\fR +\fI\&ICURROC\fR, +\fB\&int *\fR +\fI\&IPLEN\fR, +\fB\&int *\fR +\fI\&IPMAP\fR, +\fB\&int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_logsort\fR +computes an array IPMAP and its inverse IPMAPM1 that +contain the logarithmic sorted processes id with repect to the local +number of rows of U that they own. This is necessary to ensure that +the logarithmic spreading of U is optimal in terms of number of steps +and communication volume as well. In other words, the larget pieces +of U will be sent a minimal number of times. +.SH ARGUMENTS +.TP 8 +NPROCS (global input) const int +On entry, NPROCS specifies the number of process rows in the +process grid. NPROCS is at least one. +.TP 8 +ICURROC (global input) const int +On entry, ICURROC is the source process row. +.TP 8 +IPLEN (global input/output) int * +On entry, IPLEN is an array of dimension NPROCS+1, such that +IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U, +that process i-1 has. On exit, IPLEN[i] is the number of +rows of U in the processes before process IPMAP[i] after the +sort, with the convention that IPLEN[NPROCS] is the total +number of rows of the panel. In other words, IPLEN[i+1] - +IPLEN[i] is the number of rows of A that should be moved to +the process IPMAP[i]. IPLEN is such that the number of rows +of the source process row is IPLEN[1] - IPLEN[0], and the +remaining entries of this array are sorted so that the +quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted. +.TP 8 +IPMAP (global output) int * +On entry, IPMAP is an array of dimension NPROCS. On exit, +array contains the logarithmic mapping of the processes. In +other words, IPMAP[myroc] is the corresponding sorted process +coordinate. +.TP 8 +IPMAPM1 (global output) int * +On entry, IPMAPM1 is an array of dimension NPROCS. On exit, +this array contains the inverse of the logarithmic mapping +contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in +[0.. NPROCS) +.SH SEE ALSO +.BR HPL_plindx1 \ (3), +.BR HPL_plindx10 \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_max.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_max.3 new file mode 100644 index 000000000..16d8aecc6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_max.3 @@ -0,0 +1,43 @@ +.TH HPL_max 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_max \- Combine (max) two buffers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_max(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const void *\fR +\fI\&IN\fR, +\fB\&void *\fR +\fI\&INOUT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_max\fR +combines (max) two buffers. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the length of the buffers to be +combined. N must be at least zero. +.TP 8 +IN (input) const void * +On entry, IN points to the input-only buffer to be combined. +.TP 8 +INOUT (input/output) void * +On entry, INOUT points to the input-output buffer to be +combined. On exit, the entries of this array contains the +combined results. +.TP 8 +DTYPE (input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_min.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_min.3 new file mode 100644 index 000000000..a816d61b7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_min.3 @@ -0,0 +1,43 @@ +.TH HPL_min 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_min \- Combine (min) two buffers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_min(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const void *\fR +\fI\&IN\fR, +\fB\&void *\fR +\fI\&INOUT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_min\fR +combines (min) two buffers. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the length of the buffers to be +combined. N must be at least zero. +.TP 8 +IN (input) const void * +On entry, IN points to the input-only buffer to be combined. +.TP 8 +INOUT (input/output) void * +On entry, INOUT points to the input-output buffer to be +combined. On exit, the entries of this array contains the +combined results. +.TP 8 +DTYPE (input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_numroc.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_numroc.3 new file mode 100644 index 000000000..34c8acfa9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_numroc.3 @@ -0,0 +1,60 @@ +.TH HPL_numroc 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_numroc \- Compute the local number of row/columns. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_numroc(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&PROC\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_numroc\fR +returns the local number of matrix rows/columns process +PROC will get if we give out N rows/columns starting from global +index 0. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the number of rows/columns being dealt +out. N must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of the +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +PROC (input) const int +On entry, PROC specifies the coordinate of the process whose +local portion is determined. PROC must be at least zero and +strictly less than NPROCS. +.TP 8 +SRCPROC (input) const int +On entry, SRCPROC specifies the coordinate of the process +that possesses the first row or column of the matrix. SRCPROC +must be at least zero and strictly less than NPROCS. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process rows +or columns over which the matrix is distributed. NPROCS must +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2lp \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numrocI \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_numrocI.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_numrocI.3 new file mode 100644 index 000000000..1891f1ac9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_numrocI.3 @@ -0,0 +1,66 @@ +.TH HPL_numrocI 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_numrocI \- Compute the local number of row/columns. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_numrocI(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&I\fR, +\fB\&const int\fR +\fI\&INB\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const int\fR +\fI\&PROC\fR, +\fB\&const int\fR +\fI\&SRCPROC\fR, +\fB\&const int\fR +\fI\&NPROCS\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_numrocI\fR +returns the local number of matrix rows/columns process +PROC will get if we give out N rows/columns starting from global +index I. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the number of rows/columns being dealt +out. N must be at least zero. +.TP 8 +I (input) const int +On entry, I specifies the global index of the matrix entry +I must be at least zero. +.TP 8 +INB (input) const int +On entry, INB specifies the size of the first block of th +global matrix. INB must be at least one. +.TP 8 +NB (input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +PROC (input) const int +On entry, PROC specifies the coordinate of the process whos +local portion is determined. PROC must be at least zero an +strictly less than NPROCS. +.TP 8 +SRCPROC (input) const int +On entry, SRCPROC specifies the coordinate of the proces +that possesses the first row or column of the matrix. SRCPRO +must be at least zero and strictly less than NPROCS. +.TP 8 +NPROCS (input) const int +On entry, NPROCS specifies the total number of process row +or columns over which the matrix is distributed. NPROCS mus +be at least one. +.SH SEE ALSO +.BR HPL_indxg2l \ (3), +.BR HPL_indxg2lp \ (3), +.BR HPL_indxg2p \ (3), +.BR HPL_indxl2g \ (3), +.BR HPL_numroc \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pabort.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pabort.3 new file mode 100644 index 000000000..044e87210 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pabort.3 @@ -0,0 +1,40 @@ +.TH HPL_pabort 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pabort \- halts execution. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pabort(\fR +\fB\&int\fR +\fI\&LINE\fR, +\fB\&const char *\fR +\fI\&SRNAME\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pabort\fR +displays an error message on stderr and halts execution. +.SH ARGUMENTS +.TP 8 +LINE (local input) int +On entry, LINE specifies the line number in the file where +the error has occured. When LINE is not a positive line +number, it is ignored. +.TP 8 +SRNAME (local input) const char * +On entry, SRNAME should be the name of the routine calling +this error handler. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH SEE ALSO +.BR HPL_fprintf \ (3), +.BR HPL_pwarn \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_packL.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_packL.3 new file mode 100644 index 000000000..c79019c37 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_packL.3 @@ -0,0 +1,42 @@ +.TH HPL_packL 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_packL \- Form the MPI structure for the row ring broadcasts. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_packL(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&INDEX\fR, +\fB\&const int\fR +\fI\&LEN\fR, +\fB\&const int\fR +\fI\&IBUF\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_packL\fR +forms the MPI data type for the panel to be broadcast. +Successful completion is indicated by the returned error code +MPI_SUCCESS. +.SH ARGUMENTS +.TP 8 +PANEL (input/output) HPL_T_panel * +On entry, PANEL points to the current panel data structure +being broadcast. +.TP 8 +INDEX (input) const int +On entry, INDEX points to the first entry of the packed +buffer being broadcast. +.TP 8 +LEN (input) const int +On entry, LEN is the length of the packed buffer. +.TP 8 +IBUF (input) const int +On entry, IBUF specifies the panel buffer/count/type entries +that should be initialized. +.SH SEE ALSO +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pddriver.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pddriver.3 new file mode 100644 index 000000000..30e55b62e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pddriver.3 @@ -0,0 +1,15 @@ +.TH main 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +main \- HPL main timing program. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&main();\fR +.SH DESCRIPTION +\fB\&main\fR +is the main driver program for testing the HPL routines. +This program is driven by a short data file named "HPL.dat". +.SH SEE ALSO +.BR HPL_pdinfo \ (3), +.BR HPL_pdtest \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdfact.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdfact.3 new file mode 100644 index 000000000..e3db5fb8b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdfact.3 @@ -0,0 +1,64 @@ +.TH HPL_pdfact 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdfact \- recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdfact(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdfact\fR +recursively factorizes a 1-dimensional panel of columns. +The RPFACT function pointer specifies the recursive algorithm to be +used, either Crout, Left- or Right looking. NBMIN allows to vary the +recursive stopping criterium in terms of the number of columns in the +panel, and NDIV allow to specify the number of subpanels each panel +should be divided into. Usuallly a value of 2 will be chosen. Finally +PFACT is a function pointer specifying the non-recursive algorithm to +to be used on at most NBMIN columns. One can also choose here between +Crout, Left- or Right looking. Empirical tests seem to indicate that +values of 4 or 8 for NBMIN give the best results. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesv.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesv.3 new file mode 100644 index 000000000..ab4b62c4e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesv.3 @@ -0,0 +1,40 @@ +.TH HPL_pdgesv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdgesv \- Solve A x = b. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdgesv(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdgesv\fR +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with or without look-ahead. The lower triangular factor is left +unpivoted and the pivots are not returned. The right hand side is the +N+1 column of the coefficient matrix. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.SH SEE ALSO +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdtrsv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesv0.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesv0.3 new file mode 100644 index 000000000..180f191f2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesv0.3 @@ -0,0 +1,47 @@ +.TH HPL_pdgesv0 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdgesv0 \- Factor an N x N+1 matrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdgesv0(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdgesv0\fR +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +without look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdfact \ (3), +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pdupdateTT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesvK1.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesvK1.3 new file mode 100644 index 000000000..64cee67ed --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesvK1.3 @@ -0,0 +1,46 @@ +.TH HPL_pdgesvK1 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdgesvK1 \- Factor an N x N+1 matrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdgesvK1(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdgesvK1\fR +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdfact \ (3), +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pdupdateTT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesvK2.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesvK2.3 new file mode 100644 index 000000000..9f389b9dd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdgesvK2.3 @@ -0,0 +1,47 @@ +.TH HPL_pdgesvK2 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdgesvK2 \- Factor an N x N+1 matrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdgesvK2(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdgesvK2\fR +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdfact \ (3), +.BR HPL_binit \ (3), +.BR HPL_bcast \ (3), +.BR HPL_bwait \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pdupdateTT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdinfo.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdinfo.3 new file mode 100644 index 000000000..eed541159 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdinfo.3 @@ -0,0 +1,212 @@ +.TH HPL_pdinfo 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdinfo \- Read input parameter file. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdinfo(\fR +\fB\&HPL_T_test *\fR +\fI\&TEST\fR, +\fB\&int *\fR +\fI\&NS\fR, +\fB\&int *\fR +\fI\&N\fR, +\fB\&int *\fR +\fI\&NBS\fR, +\fB\&int *\fR +\fI\&NB\fR, +\fB\&HPL_T_ORDER *\fR +\fI\&PMAPPIN\fR, +\fB\&int *\fR +\fI\&NPQS\fR, +\fB\&int *\fR +\fI\&P\fR, +\fB\&int *\fR +\fI\&Q\fR, +\fB\&int *\fR +\fI\&NPFS\fR, +\fB\&HPL_T_FACT *\fR +\fI\&PF\fR, +\fB\&int *\fR +\fI\&NBMS\fR, +\fB\&int *\fR +\fI\&NBM\fR, +\fB\&int *\fR +\fI\&NDVS\fR, +\fB\&int *\fR +\fI\&NDV\fR, +\fB\&int *\fR +\fI\&NRFS\fR, +\fB\&HPL_T_FACT *\fR +\fI\&RF\fR, +\fB\&int *\fR +\fI\&NTPS\fR, +\fB\&HPL_T_TOP *\fR +\fI\&TP\fR, +\fB\&int *\fR +\fI\&NDHS\fR, +\fB\&int *\fR +\fI\&DH\fR, +\fB\&HPL_T_SWAP *\fR +\fI\&FSWAP\fR, +\fB\&int *\fR +\fI\&TSWAP\fR, +\fB\&int *\fR +\fI\&L1NOTRAN\fR, +\fB\&int *\fR +\fI\&UNOTRAN\fR, +\fB\&int *\fR +\fI\&EQUIL\fR, +\fB\&int *\fR +\fI\&ALIGN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdinfo\fR +reads the startup information for the various tests and +transmits it to all processes. +.SH ARGUMENTS +.TP 8 +TEST (global output) HPL_T_test * +On entry, TEST points to a testing data structure. On exit, +the fields of this data structure are initialized as follows: +TEST->outfp specifies the output file where the results will +be printed. It is only defined and used by the process 0 of +the grid. TEST->thrsh specifies the threshhold value for the +test ratio. TEST->epsil is the relative machine precision of +the distributed computer. Finally the test counters, kfail, +kpass, kskip, ktest are initialized to zero. +.TP 8 +NS (global output) int * +On exit, NS specifies the number of different problem sizes +to be tested. NS is less than or equal to HPL_MAX_PARAM. +.TP 8 +N (global output) int * +On entry, N is an array of dimension HPL_MAX_PARAM. On exit, +the first NS entries of this array contain the problem sizes +to run the code with. +.TP 8 +NBS (global output) int * +On exit, NBS specifies the number of different distribution +blocking factors to be tested. NBS must be less than or equal +to HPL_MAX_PARAM. +.TP 8 +NB (global output) int * +On exit, PMAPPIN specifies the process mapping onto the no- +des of the MPI machine configuration. PMAPPIN defaults to +row-major ordering. +.TP 8 +PMAPPIN (global output) HPL_T_ORDER * +On entry, NB is an array of dimension HPL_MAX_PARAM. On exit, +the first NBS entries of this array contain the values of the +various distribution blocking factors, to run the code with. +.TP 8 +NPQS (global output) int * +On exit, NPQS specifies the number of different values that +can be used for P and Q, i.e., the number of process grids to +run the code with. NPQS must be less than or equal to +HPL_MAX_PARAM. +.TP 8 +P (global output) int * +On entry, P is an array of dimension HPL_MAX_PARAM. On exit, +the first NPQS entries of this array contain the values of P, +the number of process rows of the NPQS grids to run the code +with. +.TP 8 +Q (global output) int * +On entry, Q is an array of dimension HPL_MAX_PARAM. On exit, +the first NPQS entries of this array contain the values of Q, +the number of process columns of the NPQS grids to run the +code with. +.TP 8 +NPFS (global output) int * +On exit, NPFS specifies the number of different values that +can be used for PF : the panel factorization algorithm to run +the code with. NPFS is less than or equal to HPL_MAX_PARAM. +.TP 8 +PF (global output) HPL_T_FACT * +On entry, PF is an array of dimension HPL_MAX_PARAM. On exit, +the first NPFS entries of this array contain the various +panel factorization algorithms to run the code with. +.TP 8 +NBMS (global output) int * +On exit, NBMS specifies the number of various recursive +stopping criteria to be tested. NBMS must be less than or +equal to HPL_MAX_PARAM. +.TP 8 +NBM (global output) int * +On entry, NBM is an array of dimension HPL_MAX_PARAM. On +exit, the first NBMS entries of this array contain the values +of the various recursive stopping criteria to be tested. +.TP 8 +NDVS (global output) int * +On exit, NDVS specifies the number of various numbers of +panels in recursion to be tested. NDVS is less than or equal +to HPL_MAX_PARAM. +.TP 8 +NDV (global output) int * +On entry, NDV is an array of dimension HPL_MAX_PARAM. On +exit, the first NDVS entries of this array contain the values +of the various numbers of panels in recursion to be tested. +.TP 8 +NRFS (global output) int * +On exit, NRFS specifies the number of different values that +can be used for RF : the recursive factorization algorithm to +be tested. NRFS is less than or equal to HPL_MAX_PARAM. +.TP 8 +RF (global output) HPL_T_FACT * +On entry, RF is an array of dimension HPL_MAX_PARAM. On exit, +the first NRFS entries of this array contain the various +recursive factorization algorithms to run the code with. +.TP 8 +NTPS (global output) int * +On exit, NTPS specifies the number of different values that +can be used for the broadcast topologies to be tested. NTPS +is less than or equal to HPL_MAX_PARAM. +.TP 8 +TP (global output) HPL_T_TOP * +On entry, TP is an array of dimension HPL_MAX_PARAM. On exit, +the first NTPS entries of this array contain the various +broadcast (along rows) topologies to run the code with. +.TP 8 +NDHS (global output) int * +On exit, NDHS specifies the number of different values that +can be used for the lookahead depths to be tested. NDHS is +less than or equal to HPL_MAX_PARAM. +.TP 8 +DH (global output) int * +On entry, DH is an array of dimension HPL_MAX_PARAM. On +exit, the first NDHS entries of this array contain the values +of lookahead depths to run the code with. Such a value is at +least 0 (no-lookahead) or greater than zero. +.TP 8 +FSWAP (global output) HPL_T_SWAP * +On exit, FSWAP specifies the swapping algorithm to be used in +all tests. +.TP 8 +TSWAP (global output) int * +On exit, TSWAP specifies the swapping threshold as a number +of columns when the mixed swapping algorithm was chosen. +.TP 8 +L1NOTRA (global output) int * +On exit, L1NOTRAN specifies whether the upper triangle of the +panels of columns should be stored in no-transposed form +(L1NOTRAN=1) or in transposed form (L1NOTRAN=0). +.TP 8 +UNOTRAN (global output) int * +On exit, UNOTRAN specifies whether the panels of rows should +be stored in no-transposed form (UNOTRAN=1) or transposed +form (UNOTRAN=0) during their broadcast. +.TP 8 +EQUIL (global output) int * +On exit, EQUIL specifies whether equilibration during the +swap-broadcast of the panel of rows should be performed +(EQUIL=1) or not (EQUIL=0). +.TP 8 +ALIGN (global output) int * +On exit, ALIGN specifies the alignment of the dynamically +allocated buffers in double precision words. ALIGN is greater +than zero. +.SH SEE ALSO +.BR HPL_pddriver \ (3), +.BR HPL_pdtest \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlamch.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlamch.3 new file mode 100644 index 000000000..7ce46c23e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlamch.3 @@ -0,0 +1,53 @@ +.TH HPL_pdlamch 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlamch \- determines machine-specific arithmetic constants. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_pdlamch(\fR +\fB\&MPI_Comm\fR +\fI\&COMM\fR, +\fB\&const HPL_T_MACH\fR +\fI\&CMACH\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlamch\fR +determines machine-specific arithmetic constants such as +the relative machine precision (eps), the safe minimum(sfmin) such that +1/sfmin does not overflow, the base of the machine (base), the precision +(prec), the number of (base) digits in the mantissa (t), whether +rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum +exponent before (gradual) underflow (emin), the underflow threshold +(rmin)- base**(emin-1), the largest exponent before overflow (emax), the +overflow threshold (rmax) - (base**emax)*(1-eps). +.SH ARGUMENTS +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.TP 8 +CMACH (global input) const HPL_T_MACH +Specifies the value to be returned by HPL_pdlamch + = HPL_MACH_EPS, HPL_pdlamch := eps (default) + = HPL_MACH_SFMIN, HPL_pdlamch := sfmin + = HPL_MACH_BASE, HPL_pdlamch := base + = HPL_MACH_PREC, HPL_pdlamch := eps*base + = HPL_MACH_MLEN, HPL_pdlamch := t + = HPL_MACH_RND, HPL_pdlamch := rnd + = HPL_MACH_EMIN, HPL_pdlamch := emin + = HPL_MACH_RMIN, HPL_pdlamch := rmin + = HPL_MACH_EMAX, HPL_pdlamch := emax + = HPL_MACH_RMAX, HPL_pdlamch := rmax + +where + + eps = relative machine precision, + sfmin = safe minimum, + base = base of the machine, + prec = eps*base, + t = number of digits in the mantissa, + rnd = 1.0 if rounding occurs in addition, + emin = minimum exponent before underflow, + rmin = underflow threshold, + emax = largest exponent before overflow, + rmax = overflow threshold. diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlange.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlange.3 new file mode 100644 index 000000000..30593401b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlange.3 @@ -0,0 +1,68 @@ +.TH HPL_pdlange 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlange \- Compute ||A||. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_pdlange(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&const HPL_T_NORM\fR +\fI\&NORM\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&const double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlange\fR +returns the value of the one norm, or the infinity norm, +or the element of largest absolute value of a distributed matrix A: + + + max(abs(A(i,j))) when NORM = HPL_NORM_A, + norm1(A), when NORM = HPL_NORM_1, + normI(A), when NORM = HPL_NORM_I, + +where norm1 denotes the one norm of a matrix (maximum column sum) and +normI denotes the infinity norm of a matrix (maximum row sum). Note +that max(abs(A(i,j))) is not a matrix norm. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +NORM (global input) const HPL_T_NORM +On entry, NORM specifies the value to be returned by this +function as described above. +.TP 8 +M (global input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (global input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix. NB must be larger than one. +.TP 8 +A (local input) const double * +On entry, A points to an array of dimension (LDA,LocQ(N)), +that contains the local pieces of the distributed matrix A. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,LocP(M)). +.SH SEE ALSO +.BR HPL_pdlaprnt \ (3), +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaprnt.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaprnt.3 new file mode 100644 index 000000000..feb010a67 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaprnt.3 @@ -0,0 +1,72 @@ +.TH HPL_pdlaprnt 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaprnt \- Print a distributed matrix A. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaprnt(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int\fR +\fI\&IAROW\fR, +\fB\&const int\fR +\fI\&IACOL\fR, +\fB\&const char *\fR +\fI\&CMATNM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaprnt\fR +prints to standard error a distributed matrix A. The +local pieces of A are sent to the process of coordinates (0,0) in +the grid and then printed. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +M (global input) const int +On entry, M specifies the number of rows of the coefficient +matrix A. M must be at least zero. +.TP 8 +N (global input) const int +On entry, N specifies the number of columns of the +coefficient matrix A. N must be at least zero. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix. NB must be larger than one. +.TP 8 +A (local input) double * +On entry, A points to an array of dimension (LDA,LocQ(N)). +This array contains the coefficient matrix to be printed. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,LocP(M)). +.TP 8 +IAROW (global input) const int +On entry, IAROW specifies the row process coordinate owning +the first row of A. IAROW must be larger than or equal to +zero and less than NPROW. +.TP 8 +IACOL (global input) const int +On entry, IACOL specifies the column process coordinate +owning the first column of A. IACOL must be larger than or +equal to zero and less than NPCOL. +.TP 8 +CMATNM (global input) const char * +On entry, CMATNM is the name of the matrix to be printed. +.SH SEE ALSO +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp00N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp00N.3 new file mode 100644 index 000000000..3875400e3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp00N.3 @@ -0,0 +1,65 @@ +.TH HPL_pdlaswp00N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaswp00N \- Broadcast a column panel L and swap the row panel U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaswp00N(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaswp00N\fR +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +Bi-directional exchange is used to perform the swap :: broadcast of +the row panel U at once, resulting in a lower number of messages than +usual as well as a lower communication volume. With P process rows and +assuming bi-directional links, the running time of this function can +be approximated by: + + log_2(P) * (lat + NB*LocQ(N) / bdwth) + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. Mono +directional links will double this communication cost. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be broadcast and swapped) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be swapped and broadcast starting at +the current position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pipid \ (3), +.BR HPL_plindx0 \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03N \ (3), +.BR HPL_dlaswp04N \ (3), +.BR HPL_dlaswp05N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp00T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp00T.3 new file mode 100644 index 000000000..39901ba4b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp00T.3 @@ -0,0 +1,65 @@ +.TH HPL_pdlaswp00T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaswp00T \- Broadcast a column panel L and swap the row panel U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaswp00T(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaswp00T\fR +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +Bi-directional exchange is used to perform the swap :: broadcast of +the row panel U at once, resulting in a lower number of messages than +usual as well as a lower communication volume. With P process rows and +assuming bi-directional links, the running time of this function can +be approximated by: + + log_2(P) * (lat + NB*LocQ(N) / bdwth) + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. Mono +directional links will double this communication cost. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be broadcast and swapped) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be swapped and broadcast starting at +the current position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTT \ (3), +.BR HPL_pipid \ (3), +.BR HPL_plindx0 \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp02N \ (3), +.BR HPL_dlaswp03T \ (3), +.BR HPL_dlaswp04T \ (3), +.BR HPL_dlaswp05T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp01N.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp01N.3 new file mode 100644 index 000000000..1ee14c0a8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp01N.3 @@ -0,0 +1,69 @@ +.TH HPL_pdlaswp01N 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaswp01N \- Broadcast a column panel L and swap the row panel U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaswp01N(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaswp01N\fR +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +A "Spread then roll" algorithm performs the swap :: broadcast of the +row panel U at once, resulting in a minimal communication volume and +a "very good" use of the connectivity if available. With P process +rows and assuming bi-directional links, the running time of this +function can be approximated by: + + (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. K is +a constant in (2,3] that depends on the achieved bandwidth during a +simultaneous message exchange between two processes. An empirical +optimistic value of K is typically 2.4. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be swapped and broadcast starting at +the current position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdupdateNN \ (3), +.BR HPL_pdupdateTN \ (3), +.BR HPL_pipid \ (3), +.BR HPL_plindx1 \ (3), +.BR HPL_plindx10 \ (3), +.BR HPL_spreadN \ (3), +.BR HPL_equil \ (3), +.BR HPL_rollN \ (3), +.BR HPL_dlaswp00N \ (3), +.BR HPL_dlaswp01N \ (3), +.BR HPL_dlaswp06N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp01T.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp01T.3 new file mode 100644 index 000000000..e5c5de024 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdlaswp01T.3 @@ -0,0 +1,69 @@ +.TH HPL_pdlaswp01T 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdlaswp01T \- Broadcast a column panel L and swap the row panel U. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdlaswp01T(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdlaswp01T\fR +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +A "Spread then roll" algorithm performs the swap :: broadcast of the +row panel U at once, resulting in a minimal communication volume and +a "very good" use of the connectivity if available. With P process +rows and assuming bi-directional links, the running time of this +function can be approximated by: + + (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. K is +a constant in (2,3] that depends on the achieved bandwidth during a +simultaneous message exchange between two processes. An empirical +optimistic value of K is typically 2.4. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be swapped and broadcast starting at +the current position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdupdateNT \ (3), +.BR HPL_pdupdateTT \ (3), +.BR HPL_pipid \ (3), +.BR HPL_plindx1 \ (3), +.BR HPL_plindx10 \ (3), +.BR HPL_spreadT \ (3), +.BR HPL_equil \ (3), +.BR HPL_rollT \ (3), +.BR HPL_dlaswp10N \ (3), +.BR HPL_dlaswp01T \ (3), +.BR HPL_dlaswp06T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdmatgen.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdmatgen.3 new file mode 100644 index 000000000..5b4675c6e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdmatgen.3 @@ -0,0 +1,67 @@ +.TH HPL_pdmatgen 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdmatgen \- Parallel random matrix generator. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdmatgen(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&NB\fR, +\fB\&double *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&LDA\fR, +\fB\&const int\fR +\fI\&ISEED\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdmatgen\fR +generates (or regenerates) a parallel random matrix A. + +The pseudo-random generator uses the linear congruential algorithm: +X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer +Programming, Knuth 1973, Vol. 2. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +M (global input) const int +On entry, M specifies the number of rows of the matrix A. +M must be at least zero. +.TP 8 +N (global input) const int +On entry, N specifies the number of columns of the matrix A. +N must be at least zero. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.TP 8 +A (local output) double * +On entry, A points to an array of dimension (LDA,LocQ(N)). +On exit, this array contains the coefficients of the randomly +generated matrix. +.TP 8 +LDA (local input) const int +On entry, LDA specifies the leading dimension of the array A. +LDA must be at least max(1,LocP(M)). +.TP 8 +ISEED (global input) const int +On entry, ISEED specifies the seed number to generate the +matrix A. ISEED must be at least zero. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_drand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdmxswp.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdmxswp.3 new file mode 100644 index 000000000..41c604373 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdmxswp.3 @@ -0,0 +1,78 @@ +.TH HPL_pdmxswp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdmxswp \- swaps and broacast the pivot row. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdmxswp(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&II\fR, +\fB\&const int\fR +\fI\&JJ\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdmxswp\fR +swaps and broadcasts the absolute value max row using +bi-directional exchange. The buffer is partially set by HPL_dlocmax. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by + + log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) + +where lat and bdwth are the latency and bandwidth of the network for +double precision real elements. Communication only occurs in one +process column. Mono-directional links will cause the communication +cost to double. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of the matrix +column on which this function operates. +.TP 8 +II (local input) const int +On entry, II specifies the row offset where the column to be +operated on starts with respect to the panel. +.TP 8 +JJ (local input) const int +On entry, JJ specifies the column offset where the column to +be operated on starts with respect to the panel. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2 * (4+2*N0). +It is assumed that HPL_dlocmax was called prior to this +routine to initialize the first four entries of this array. +On exit, the N0 length max row is stored in WORK[4:4+N0-1]; +Note that this is also the JJth row (or column) of L1. The +remaining part is used as a temporary array. +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpancrN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpancrN.3 new file mode 100644 index 000000000..2e94a36a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpancrN.3 @@ -0,0 +1,82 @@ +.TH HPL_pdpancrN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpancrN \- Crout panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpancrN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpancrN\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Crout variant of the usual +one-dimensional algorithm. The lower triangular N0-by-N0 upper block +of the panel is stored in no-transpose form (i.e. just like the input +matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpancrT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpancrT.3 new file mode 100644 index 000000000..035e60d60 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpancrT.3 @@ -0,0 +1,81 @@ +.TH HPL_pdpancrT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpancrT \- Crout panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpancrT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpancrT\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Crout variant of the usual +one-dimensional algorithm. The lower triangular N0-by-N0 upper block +of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_disp.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_disp.3 new file mode 100644 index 000000000..94a212ced --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_disp.3 @@ -0,0 +1,24 @@ +.TH HPL_pdpanel_disp 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanel_disp \- Deallocate a panel data structure. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_pdpanel_disp(\fR +\fB\&HPL_T_panel * *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanel_disp\fR +deallocates the panel structure and resources and +stores the error code returned by the panel factorization. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * * +On entry, PANEL points to the address of the panel data +structure to be deallocated. +.SH SEE ALSO +.BR HPL_pdpanel_new \ (3), +.BR HPL_pdpanel_init \ (3), +.BR HPL_pdpanel_free \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_free.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_free.3 new file mode 100644 index 000000000..cfad40c3d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_free.3 @@ -0,0 +1,24 @@ +.TH HPL_pdpanel_free 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanel_free \- Deallocate the panel ressources. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_pdpanel_free(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanel_free\fR +deallocates the panel resources and stores the error +code returned by the panel factorization. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the panel data structure from +which the resources should be deallocated. +.SH SEE ALSO +.BR HPL_pdpanel_new \ (3), +.BR HPL_pdpanel_init \ (3), +.BR HPL_pdpanel_disp \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_init.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_init.3 new file mode 100644 index 000000000..cbb0e7e3a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_init.3 @@ -0,0 +1,76 @@ +.TH HPL_pdpanel_init 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanel_init \- Initialize the panel resources. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanel_init(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&JB\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&IA\fR, +\fB\&const int\fR +\fI\&JA\fR, +\fB\&const int\fR +\fI\&TAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanel_init\fR +initializes a panel data structure. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +M (local input) const int +On entry, M specifies the global number of rows of the panel. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the global number of columns of the +panel and trailing submatrix. N must be at least zero. +.TP 8 +JB (global input) const int +On entry, JB specifies is the number of columns of the panel. +JB must be at least zero. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.TP 8 +IA (global input) const int +On entry, IA is the global row index identifying the panel +and trailing submatrix. IA must be at least zero. +.TP 8 +JA (global input) const int +On entry, JA is the global column index identifying the panel +and trailing submatrix. JA must be at least zero. +.TP 8 +TAG (global input) const int +On entry, TAG is the row broadcast message id. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.SH SEE ALSO +.BR HPL_pdpanel_new \ (3), +.BR HPL_pdpanel_disp \ (3), +.BR HPL_pdpanel_free \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_new.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_new.3 new file mode 100644 index 000000000..ed9fe1053 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanel_new.3 @@ -0,0 +1,76 @@ +.TH HPL_pdpanel_new 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanel_new \- Create a panel data structure. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanel_new(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&JB\fR, +\fB\&HPL_T_pmat *\fR +\fI\&A\fR, +\fB\&const int\fR +\fI\&IA\fR, +\fB\&const int\fR +\fI\&JA\fR, +\fB\&const int\fR +\fI\&TAG\fR, +\fB\&HPL_T_panel * *\fR +\fI\&PANEL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanel_new\fR +creates and initializes a panel data structure. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters. +.TP 8 +M (local input) const int +On entry, M specifies the global number of rows of the panel. +M must be at least zero. +.TP 8 +N (local input) const int +On entry, N specifies the global number of columns of the +panel and trailing submatrix. N must be at least zero. +.TP 8 +JB (global input) const int +On entry, JB specifies is the number of columns of the panel. +JB must be at least zero. +.TP 8 +A (local input/output) HPL_T_pmat * +On entry, A points to the data structure containing the local +array information. +.TP 8 +IA (global input) const int +On entry, IA is the global row index identifying the panel +and trailing submatrix. IA must be at least zero. +.TP 8 +JA (global input) const int +On entry, JA is the global column index identifying the panel +and trailing submatrix. JA must be at least zero. +.TP 8 +TAG (global input) const int +On entry, TAG is the row broadcast message id. +.TP 8 +PANEL (local input/output) HPL_T_panel * * +On entry, PANEL points to the address of the panel data +structure to create and initialize. +.SH SEE ALSO +.BR HPL_pdpanel_new \ (3), +.BR HPL_pdpanel_init \ (3), +.BR HPL_pdpanel_disp \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanllN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanllN.3 new file mode 100644 index 000000000..eca1f4a34 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanllN.3 @@ -0,0 +1,82 @@ +.TH HPL_pdpanllN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanllN \- Left-looking panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanllN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanllN\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Left-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in no-transpose form (i.e. just like the +input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanllT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanllT.3 new file mode 100644 index 000000000..a18d52c61 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanllT.3 @@ -0,0 +1,81 @@ +.TH HPL_pdpanllT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanllT \- Left-looking panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanllT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanllT\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Left-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanrlN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanrlN.3 new file mode 100644 index 000000000..cae2b5b5b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanrlN.3 @@ -0,0 +1,82 @@ +.TH HPL_pdpanrlN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanrlN \- Right-looking panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanrlN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanrlN\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Right-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in no-transpose form (i.e. just like the +input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlT \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanrlT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanrlT.3 new file mode 100644 index 000000000..434444bf7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdpanrlT.3 @@ -0,0 +1,81 @@ +.TH HPL_pdpanrlT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdpanrlT \- Right-looking panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdpanrlT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdpanrlT\fR +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Right-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpancrN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpancrN.3 new file mode 100644 index 000000000..fc6dd25f8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpancrN.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpancrN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpancrN \- Crout recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpancrN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpancrN\fR +HPL_pdrpancrN recursively factorizes a panel of columns using the +recursive Crout variant of the usual one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpancrT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpancrT.3 new file mode 100644 index 000000000..ea0a57bc9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpancrT.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpancrT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpancrT \- Crout recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpancrT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpancrT\fR +recursively factorizes a panel of columns using the +recursive Crout variant of the usual one-dimensional algorithm. +The lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanllN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanllN.3 new file mode 100644 index 000000000..29b6db40a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanllN.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpanllN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpanllN \- Left-looking recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpanllN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpanllN\fR +recursively factorizes a panel of columns using the +recursive Left-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanllT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanllT.3 new file mode 100644 index 000000000..18db5c1fb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanllT.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpanllT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpanllT \- Left-looking recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpanllT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpanllT\fR +recursively factorizes a panel of columns using the +recursive Left-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanrlN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanrlN.3 new file mode 100644 index 000000000..441560c14 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanrlN.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpanrlN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpanrlN \- Right-looking recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpanrlN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpanrlN\fR +recursively factorizes a panel of columns using the +recursive Right-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlT \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanrlT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanrlT.3 new file mode 100644 index 000000000..e5bd9d110 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdrpanrlT.3 @@ -0,0 +1,79 @@ +.TH HPL_pdrpanrlT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdrpanrlT \- Right-looking recursive panel factorization. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdrpanrlT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&M\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&ICOFF\fR, +\fB\&double *\fR +\fI\&WORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdrpanrlT\fR +recursively factorizes a panel of columns using the +recursive Right-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +M (local input) const int +On entry, M specifies the local number of rows of sub(A). +.TP 8 +N (local input) const int +On entry, N specifies the local number of columns of sub(A). +.TP 8 +ICOFF (global input) const int +On entry, ICOFF specifies the row and column offset of sub(A) +in A. +.TP 8 +WORK (local workspace) double * +On entry, WORK is a workarray of size at least 2*(4+2*N0). +.SH SEE ALSO +.BR HPL_dlocmax \ (3), +.BR HPL_dlocswpN \ (3), +.BR HPL_dlocswpT \ (3), +.BR HPL_pdmxswp \ (3), +.BR HPL_pdpancrN \ (3), +.BR HPL_pdpancrT \ (3), +.BR HPL_pdpanllN \ (3), +.BR HPL_pdpanllT \ (3), +.BR HPL_pdpanrlN \ (3), +.BR HPL_pdpanrlT \ (3), +.BR HPL_pdrpancrN \ (3), +.BR HPL_pdrpancrT \ (3), +.BR HPL_pdrpanllN \ (3), +.BR HPL_pdrpanllT \ (3), +.BR HPL_pdrpanrlN \ (3), +.BR HPL_pdfact \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdtest.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdtest.3 new file mode 100644 index 000000000..eaaff2bff --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdtest.3 @@ -0,0 +1,63 @@ +.TH HPL_pdtest 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdtest \- Perform one test. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdtest(\fR +\fB\&HPL_T_test *\fR +\fI\&TEST\fR, +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_palg *\fR +\fI\&ALGO\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&const int\fR +\fI\&NB\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdtest\fR +performs one test given a set of parameters such as the +process grid, the problem size, the distribution blocking factor ... +This function generates the data, calls and times the linear system +solver, checks the accuracy of the obtained vector solution and +writes this information to the file pointed to by TEST->outfp. +.SH ARGUMENTS +.TP 8 +TEST (global input) HPL_T_test * +On entry, TEST points to a testing data structure: outfp +specifies the output file where the results will be printed. +It is only defined and used by the process 0 of the grid. +thrsh specifies the threshhold value for the test ratio. +Concretely, a test is declared "PASSED" if and only if the +following inequality is satisfied: +||Ax-b||_oo / ( epsil * + ( || x ||_oo * || A ||_oo + || b ||_oo ) * + N ) < thrsh. +epsil is the relative machine precision of the distributed +computer. Finally the test counters, kfail, kpass, kskip and +ktest are updated as follows: if the test passes, kpass is +incremented by one; if the test fails, kfail is incremented +by one; if the test is skipped, kskip is incremented by one. +ktest is left unchanged. +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +ALGO (global input) HPL_T_palg * +On entry, ALGO points to the data structure containing the +algorithmic parameters to be used for this test. +.TP 8 +N (global input) const int +On entry, N specifies the order of the coefficient matrix A. +N must be at least zero. +.TP 8 +NB (global input) const int +On entry, NB specifies the blocking factor used to partition +and distribute the matrix A. NB must be larger than one. +.SH SEE ALSO +.BR HPL_pddriver \ (3), +.BR HPL_pdinfo \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdtrsv.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdtrsv.3 new file mode 100644 index 000000000..5d2d14dcd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdtrsv.3 @@ -0,0 +1,49 @@ +.TH HPL_pdtrsv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdtrsv \- Solve triu( A ) x = b. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdtrsv(\fR +\fB\&HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&HPL_T_pmat *\fR +\fI\&AMAT\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdtrsv\fR +solves an upper triangular system of linear equations. + +The rhs is the last column of the N by N+1 matrix A. The solve starts +in the process column owning the Nth column of A, so the rhs b may +need to be moved one process column to the left at the beginning. The +routine therefore needs a column vector in every process column but +the one owning b. The result is replicated in all process rows, and +returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. + +The algorithm uses decreasing one-ring broadcast in process rows and +columns implemented in terms of synchronous communication point to +point primitives. The lookahead of depth 1 is used to minimize the +critical path. This entire operation is essentially ``latency'' bound +and an estimate of its running time is given by: + + (move rhs) lat + N / ( P bdwth ) + + (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + + gam2 N^2 / ( P Q ), + +where gam2 is an estimate of the Level 2 BLAS rate of execution. +There are N / NB diagonal blocks. One must exchange 2 messages of +length NB to compute the next NB entries of the vector solution, as +well as performing a total of N^2 floating point operations. +.SH ARGUMENTS +.TP 8 +GRID (local input) HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +AMAT (local input/output) HPL_T_pmat * +On entry, AMAT points to the data structure containing the +local array information. +.SH SEE ALSO +.BR HPL_pdgesv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateNN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateNN.3 new file mode 100644 index 000000000..e20929a27 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateNN.3 @@ -0,0 +1,48 @@ +.TH HPL_pdupdateNN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdupdateNN \- Broadcast a panel and update the trailing submatrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdupdateNN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdupdateNN\fR +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local output) int * +On exit, IFLAG indicates whether or not the broadcast has +been completed when PBCST is not NULL on entry. In that case, +IFLAG is left unchanged. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be updated) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be updated starting at the current +position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp01N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateNT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateNT.3 new file mode 100644 index 000000000..276c2ceda --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateNT.3 @@ -0,0 +1,48 @@ +.TH HPL_pdupdateNT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdupdateNT \- Broadcast a panel and update the trailing submatrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdupdateNT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdupdateNT\fR +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local output) int * +On exit, IFLAG indicates whether or not the broadcast has +been completed when PBCST is not NULL on entry. In that case, +IFLAG is left unchanged. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be updated) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be updated starting at the current +position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateTN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateTN.3 new file mode 100644 index 000000000..091859d01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateTN.3 @@ -0,0 +1,48 @@ +.TH HPL_pdupdateTN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdupdateTN \- Broadcast a panel and update the trailing submatrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdupdateTN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdupdateTN\fR +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local output) int * +On exit, IFLAG indicates whether or not the broadcast has +been completed when PBCST is not NULL on entry. In that case, +IFLAG is left unchanged. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be updated) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be updated starting at the current +position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp01N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateTT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateTT.3 new file mode 100644 index 000000000..34502c6ef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pdupdateTT.3 @@ -0,0 +1,48 @@ +.TH HPL_pdupdateTT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pdupdateTT \- Broadcast a panel and update the trailing submatrix. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pdupdateTT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&NN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pdupdateTT\fR +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local output) int * +On exit, IFLAG indicates whether or not the broadcast has +been completed when PBCST is not NULL on entry. In that case, +IFLAG is left unchanged. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be updated) information. +.TP 8 +NN (local input) const int +On entry, NN specifies the local number of columns of the +trailing submatrix to be updated starting at the current +position. NN must be at least zero. +.SH SEE ALSO +.BR HPL_pdgesv \ (3), +.BR HPL_pdgesv0 \ (3), +.BR HPL_pdgesvK1 \ (3), +.BR HPL_pdgesvK2 \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_perm.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_perm.3 new file mode 100644 index 000000000..9476b5eff --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_perm.3 @@ -0,0 +1,50 @@ +.TH HPL_perm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_perm \- Combine 2 index arrays - Generate the permutation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_perm(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&int *\fR +\fI\&LINDXA\fR, +\fB\&int *\fR +\fI\&LINDXAU\fR, +\fB\&int *\fR +\fI\&IWORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_perm\fR +combines two index arrays and generate the corresponding +permutation. First, this function computes the inverse of LINDXA, and +then combine it with LINDXAU. Second, in order to be able to perform +the permutation in place, LINDXAU is overwritten by the sequence of +permutation producing the same result. What we ultimately want to +achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the +call to this function, this in place permutation can be performed by +for i in [0..N) swap U[i] with U[LINDXAU[i]]. +.SH ARGUMENTS +.TP 8 +N (global input) const int +On entry, N specifies the length of the arrays LINDXA and +LINDXAU. N should be at least zero. +.TP 8 +LINDXA (global input/output) int * +On entry, LINDXA is an array of dimension N containing the +source indexes. On exit, LINDXA contains the combined index +array. +.TP 8 +LINDXAU (global input/output) int * +On entry, LINDXAU is an array of dimension N containing the +target indexes. On exit, LINDXAU contains the sequence of +permutation, that should be applied in increasing order to +permute the underlying array U in place. +.TP 8 +IWORK (workspace) int * +On entry, IWORK is a workarray of dimension N. +.SH SEE ALSO +.BR HPL_plindx1 \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pipid.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pipid.3 new file mode 100644 index 000000000..6a8f5f277 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pipid.3 @@ -0,0 +1,79 @@ +.TH HPL_pipid 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pipid \- Simplify the pivot vector. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pipid(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&int *\fR +\fI\&K\fR, +\fB\&int *\fR +\fI\&IPID\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pipid\fR +computes an array IPID that contains the source and final +destination of matrix rows resulting from the application of N +interchanges as computed by the LU factorization with row partial +pivoting. The array IPID is such that the row of global index IPID(i) +should be mapped onto the row of global index IPID(i+1). Note that we +cannot really know the length of IPID a priori. However, we know that +this array is at least 2*N long, since there are N rows to swap and +broadcast. The length of this array must be smaller than or equal to +4*N, since every row is swapped with at most a single distinct remote +row. The algorithm constructing IPID goes as follows: Let IA be the +global index of the first row to be swapped. + +For every row src IA + i with i in [0..N) to be swapped with row dst +such that dst is given by DPIV[i]: + +Is row src the destination of a previous row of the current block, +that is, is there k odd such that IPID(k) is equal to src ? + Yes: update this destination with dst. For example, if the +pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, +we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it +was thought so far ... + No : add the pair (src,dst) at the end of IPID; row src has not +been moved yet. + +Is row dst different from src the destination of a previous row of +the current block, i.e., is there k odd such that IPID(k) is equal to +dst ? + Yes: update IPID(k) with src. For example, if the pivot array +is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in +fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought +so far ... + No : add the pair (dst,src) at the end of IPID; row dst has not +been moved yet. + +Note that when src is equal to dst, the pair (dst,src) should not be +added to IPID in order to avoid duplicated entries in this array. +During the construction of the array IPID, we make sure that the +first N entries are such that IPID(k) with k odd is equal to IA+k/2. +For k in [0..K/2), the row of global index IPID(2*k) should be +mapped onto the row of global index IPID(2*k+1). +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +K (global output) int * +On exit, K specifies the number of entries in IPID. K is at +least 2*N, and at most 4*N. +.TP 8 +IPID (global output) int * +On entry, IPID is an array of length 4*N. On exit, the first +K entries of that array contain the src and final destination +resulting from the application of the N interchanges as +specified by DPIV. The pairs (src,dst) are contiguously +stored and sorted so that IPID(2*i+1) is equal to IA+i with i +in [0..N) +.SH SEE ALSO +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx0.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx0.3 new file mode 100644 index 000000000..2b889947a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx0.3 @@ -0,0 +1,168 @@ +.TH HPL_plindx0 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_plindx0 \- Compute local swapping index arrays. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_plindx0(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&K\fR, +\fB\&int *\fR +\fI\&IPID\fR, +\fB\&int *\fR +\fI\&LINDXA\fR, +\fB\&int *\fR +\fI\&LINDXAU\fR, +\fB\&int *\fR +\fI\&LLEN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_plindx0\fR +computes two local arrays LINDXA and LINDXAU containing +the local source and final destination position resulting from the +application of row interchanges. + +On entry, the array IPID of length K is such that the row of global +index IPID(i) should be mapped onto row of global index IPID(i+1). +Let IA be the global index of the first row to be swapped. For k in +[0..K/2), the row of global index IPID(2*k) should be mapped onto the +row of global index IPID(2*k+1). The question then, is to determine +which rows should ultimately be part of U. + +First, some rows of the process ICURROW may be swapped locally. One +of this row belongs to U, the other one belongs to my local piece of +A. The other rows of the current block are swapped with remote rows +and are thus not part of U. These rows however should be sent along, +and grabbed by the other processes as we progress in the exchange +phase. + +So, assume that I am ICURROW and consider a row of index IPID(2*i) +that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less +than N, this row is locally swapped and should be copied into U at +the position IPID(2*i+1) - IA. No row will be exchanged for this one. +If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be +locally copied into my local piece of A at the position corresponding +to the row of global index IPID(2*i+1). + +If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) +is to be swapped away and strictly speaking does not belong to U, but +to A remotely. Since this process will however send this array U, +this row is copied into U, exactly where the row IPID(2*i+1) should +go. For this, we search IPID for k1, such that IPID(2*k1) is equal to +IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position +IPID(2*k1+1)-IA. + +It is thus important to put the rows that go into U, i.e., such that +IPID(2*i+1) - IA is less than N at the begining of the array IPID. By +doing so, U is formed, and the local copy is performed in just one +sweep. + +Two lists LINDXA and LINDXAU are built. LINDXA contains the local +index of the rows I have that should be copied. LINDXAU contains the +local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A +is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) +of A should be locally copied into A(-LINDXAU(k),:). In the process +ICURROW, the initial packing algorithm proceeds as follows. + + for all entries in IPID, + if IPID(2*i) is in ICURROW, + if IPID(2*i+1) is in ICURROW, + if( IPID(2*i+1) - IA < N ) + save corresponding local position + of this row (LINDXA); + save local position (LINDXAU) in U + where this row goes; + [copy row IPID(2*i) in U at position + IPID(2*i+1)-IA; ]; + else + save corresponding local position of + this row (LINDXA); + save local position (-LINDXAU) in A + where this row goes; + [copy row IPID(2*i) in my piece of A + at IPID(2*i+1);] + end if + else + find k1 such that IPID(2*k1) = IPID(2*i+1); + copy row IPID(2*i) in U at position + IPID(2*k1+1)-IA; + save corresponding local position of this + row (LINDXA); + save local position (LINDXAU) in U where + this row goes; + end if + end if + end for + +Second, if I am not the current row process ICURROW, all source rows +in IPID that I own are part of U. Indeed, they are swapped with one +row of the current block of rows, and the main factorization +algorithm proceeds one row after each other. The processes different +from ICURROW, should exchange and accumulate those rows until they +receive some data previously owned by the process ICURROW. + +In processes different from ICURROW, the initial packing algorithm +proceeds as follows. Consider a row of global index IPID(2*i) that I +own. When I will be receiving data previously owned by ICURROW, i.e., +U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, +and this particular row of U should be first copied into my piece of +A, at A(il,:), where il is the local row index corresponding to +IPID(2*i). Now,initially, this row will be packed into workspace, say +as the kth row of that work array. The following algorithm sets +LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row +should be copied. LINDXA(k) stores the local index in A where this +row of U should be copied, i.e il. + + for all entries in IPID, + if IPID(2*i) is not in ICURROW, + copy row IPID(2*i) in work array; + save corresponding local position + of this row (LINDXA); + save position (LINDXAU) in U where + this row should be copied; + end if + end for + +Since we are at it, we also globally figure out how many rows every +process has. That is necessary, because it would rather be cumbersome +to figure it on the fly during the bi-directional exchange phase. +This information is kept in the array LLEN of size NPROW. Also note +that the arrays LINDXA and LINDXAU are of max length equal to 2*N. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +K (global input) const int +On entry, K specifies the number of entries in IPID. K is at +least 2*N, and at most 4*N. +.TP 8 +IPID (global input) int * +On entry, IPID is an array of length K. The first K entries +of that array contain the src and final destination resulting +from the application of the interchanges. +.TP 8 +LINDXA (local output) int * +On entry, LINDXA is an array of dimension 2*N. On exit, this +array contains the local indexes of the rows of A I have that +should be copied into U. +.TP 8 +LINDXAU (local output) int * +On exit, LINDXAU is an array of dimension 2*N. On exit, this +array contains the local destination information encoded as +follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be +copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) +of A should be locally copied into A(-LINDXAU(k),:). +.TP 8 +LLEN (global output) int * +On entry, LLEN is an array of length NPROW. On exit, it +contains how many rows every process has. +.SH SEE ALSO +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx1.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx1.3 new file mode 100644 index 000000000..7d4f8feba --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx1.3 @@ -0,0 +1,106 @@ +.TH HPL_plindx1 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_plindx1 \- Compute local swapping index arrays. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_plindx1(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&K\fR, +\fB\&const int *\fR +\fI\&IPID\fR, +\fB\&int *\fR +\fI\&IPA\fR, +\fB\&int *\fR +\fI\&LINDXA\fR, +\fB\&int *\fR +\fI\&LINDXAU\fR, +\fB\&int *\fR +\fI\&IPLEN\fR, +\fB\&int *\fR +\fI\&IPMAP\fR, +\fB\&int *\fR +\fI\&IPMAPM1\fR, +\fB\&int *\fR +\fI\&PERMU\fR, +\fB\&int *\fR +\fI\&IWORK\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_plindx1\fR +computes two local arrays LINDXA and LINDXAU containing +the local source and final destination position resulting from the +application of row interchanges. In addition, this function computes +three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic +mapping information for the spreading phase. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +K (global input) const int +On entry, K specifies the number of entries in IPID. K is at +least 2*N, and at most 4*N. +.TP 8 +IPID (global input) const int * +On entry, IPID is an array of length K. The first K entries +of that array contain the src and final destination resulting +from the application of the interchanges. +.TP 8 +IPA (global output) int * +On exit, IPA specifies the number of rows that the current +process row has that either belong to U or should be swapped +with remote rows of A. +.TP 8 +LINDXA (global output) int * +On entry, LINDXA is an array of dimension 2*N. On exit, this +array contains the local indexes of the rows of A I have that +should be copied into U. +.TP 8 +LINDXAU (global output) int * +On exit, LINDXAU is an array of dimension 2*N. On exit, this +array contains the local destination information encoded as +follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be +copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) +of A should be locally copied into A(-LINDXAU(k),:). +.TP 8 +IPLEN (global output) int * +On entry, IPLEN is an array of dimension NPROW + 1. On exit, +this array is such that IPLEN[i] is the number of rows of A +in the processes before process IPMAP[i] after the sort +with the convention that IPLEN[nprow] is the total number of +rows of the panel. In other words IPLEN[i+1]-IPLEN[i] is the +local number of rows of A that should be moved to the process +IPMAP[i]. IPLEN is such that the number of rows of the source +process row can be computed as IPLEN[1] - IPLEN[0], and the +remaining entries of this array are sorted so that the +quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted. +.TP 8 +IPMAP (global output) int * +On entry, IPMAP is an array of dimension NPROW. On exit, this +array contains the logarithmic mapping of the processes. In +other words, IPMAP[myrow] is the corresponding sorted process +coordinate. +.TP 8 +IPMAPM1 (global output) int * +On entry, IPMAPM1 is an array of dimension NPROW. On exit, +this array contains the inverse of the logarithmic mapping +contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in +[0.. NPROCS) +.TP 8 +PERMU (global output) int * +On entry, PERMU is an array of dimension JB. On exit, PERMU +contains a sequence of permutations, that should be applied +in increasing order to permute in place the row panel U. +.TP 8 +IWORK (workspace) int * +On entry, IWORK is a workarray of dimension 2*JB. +.SH SEE ALSO +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx10.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx10.3 new file mode 100644 index 000000000..d22d64f36 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_plindx10.3 @@ -0,0 +1,68 @@ +.TH HPL_plindx10 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_plindx10 \- Compute the logarithmic maps for the spreading. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_plindx10(\fR +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&K\fR, +\fB\&const int *\fR +\fI\&IPID\fR, +\fB\&int *\fR +\fI\&IPLEN\fR, +\fB\&int *\fR +\fI\&IPMAP\fR, +\fB\&int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_plindx10\fR +computes three arrays IPLEN, IPMAP and IPMAPM1 that +contain the logarithmic mapping information for the spreading phase. +.SH ARGUMENTS +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel information. +.TP 8 +K (global input) const int +On entry, K specifies the number of entries in IPID. K is at +least 2*N, and at most 4*N. +.TP 8 +IPID (global input) const int * +On entry, IPID is an array of length K. The first K entries +of that array contain the src and final destination resulting +from the application of the interchanges. +.TP 8 +IPLEN (global output) int * +On entry, IPLEN is an array of dimension NPROW + 1. On exit, +this array is such that IPLEN[i] is the number of rows of A +in the processes before process IMAP[i] after the sort, with +the convention that IPLEN[nprow] is the total number of rows. +In other words, IPLEN[i+1] - IPLEN[i] is the local number of +rows of A that should be moved for each process. IPLEN is +such that the number of rows of the source process row can be +computed as IPLEN[1] - IPLEN[0], and the remaining entries of +this array are sorted so that the quantities IPLEN[i+1] - +IPLEN[i] are logarithmically sorted. +.TP 8 +IPMAP (global output) int * +On entry, IPMAP is an array of dimension NPROW. On exit, this +array contains the logarithmic mapping of the processes. In +other words, IPMAP[myrow] is the corresponding sorted process +coordinate. +.TP 8 +IPMAPM1 (global output) int * +On entry, IPMAPM1 is an array of dimension NPROW. On exit, +this array contains the inverse of the logarithmic mapping +contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in +[0.. NPROW) +.SH SEE ALSO +.BR HPL_pdlaswp00N \ (3), +.BR HPL_pdlaswp00T \ (3), +.BR HPL_pdlaswp01N \ (3), +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pnum.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pnum.3 new file mode 100644 index 000000000..38956c5a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pnum.3 @@ -0,0 +1,38 @@ +.TH HPL_pnum 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pnum \- Rank determination. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_pnum(\fR +\fB\&const HPL_T_grid *\fR +\fI\&GRID\fR, +\fB\&const int\fR +\fI\&MYROW\fR, +\fB\&const int\fR +\fI\&MYCOL\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pnum\fR +determines the rank of a process as a function of its +coordinates in the grid. +.SH ARGUMENTS +.TP 8 +GRID (local input) const HPL_T_grid * +On entry, GRID points to the data structure containing the +process grid information. +.TP 8 +MYROW (local input) const int +On entry, MYROW specifies the row coordinate of the process +whose rank is to be determined. MYROW must be greater than or +equal to zero and less than NPROW. +.TP 8 +MYCOL (local input) const int +On entry, MYCOL specifies the column coordinate of the +process whose rank is to be determined. MYCOL must be greater +than or equal to zero and less than NPCOL. +.SH SEE ALSO +.BR HPL_grid_init \ (3), +.BR HPL_grid_info \ (3), +.BR HPL_grid_exit \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer.3 new file mode 100644 index 000000000..550703aee --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer.3 @@ -0,0 +1,35 @@ +.TH HPL_ptimer 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_ptimer \- Timer facility. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_ptimer(\fR +\fB\&const int\fR +\fI\&I\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_ptimer\fR +provides a "stopwatch" functionality cpu/wall timer in +seconds. Up to 64 separate timers can be functioning at once. The +first call starts the timer, and the second stops it. This routine +can be disenabled by calling HPL_ptimer_disable(), so that calls to +the timer are ignored. This feature can be used to make sure certain +sections of code do not affect timings, even if they call routines +which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable +the timer functionality. One can retrieve the current value of a +timer by calling + +t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + +where I is the timer index in [0..64). To inititialize the timer +functionality, one must have called HPL_ptimer_boot() prior to any of +the functions mentioned above. +.SH ARGUMENTS +.TP 8 +I (global input) const int +On entry, I specifies the timer to stop/start. +.SH SEE ALSO +.BR HPL_ptimer_cputime \ (3), +.BR HPL_ptimer_walltime \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer_cputime.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer_cputime.3 new file mode 100644 index 000000000..a93a1c208 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer_cputime.3 @@ -0,0 +1,23 @@ +.TH HPL_ptimer_cputime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_ptimer_cputime \- Return the CPU time. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_ptimer_cputime();\fR +.SH DESCRIPTION +\fB\&HPL_ptimer_cputime\fR +returns the cpu time. If HPL_USE_CLOCK is defined, +the clock() function is used to return an approximation of processor +time used by the program. The value returned is the CPU time used so +far as a clock_t; to get the number of seconds used, the result is +divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C +standard library. If HPL_USE_TIMES is defined, the times() function +is used instead. This function returns the current process times. +times() returns the number of clock ticks that have elapsed since the +system has been up. Otherwise and by default, the standard library +function getrusage() is used. +.SH SEE ALSO +.BR HPL_ptimer_walltime \ (3), +.BR HPL_ptimer \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer_walltime.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer_walltime.3 new file mode 100644 index 000000000..37e5e8c54 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_ptimer_walltime.3 @@ -0,0 +1,14 @@ +.TH HPL_ptimer_walltime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_ptimer_walltime \- Return the elapsed (wall-clock) time. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_ptimer_walltime();\fR +.SH DESCRIPTION +\fB\&HPL_ptimer_walltime\fR +returns the elapsed (wall-clock) time. +.SH SEE ALSO +.BR HPL_ptimer_cputime \ (3), +.BR HPL_ptimer \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pwarn.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pwarn.3 new file mode 100644 index 000000000..14e4a65d3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_pwarn.3 @@ -0,0 +1,45 @@ +.TH HPL_pwarn 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_pwarn \- displays an error message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_pwarn(\fR +\fB\&FILE *\fR +\fI\&STREAM\fR, +\fB\&int\fR +\fI\&LINE\fR, +\fB\&const char *\fR +\fI\&SRNAME\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_pwarn\fR +displays an error message. +.SH ARGUMENTS +.TP 8 +STREAM (local input) FILE * +On entry, STREAM specifies the output stream. +.TP 8 +LINE (local input) int +On entry, LINE specifies the line number in the file where +the error has occured. When LINE is not a positive line +number, it is ignored. +.TP 8 +SRNAME (local input) const char * +On entry, SRNAME should be the name of the routine calling +this error handler. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH SEE ALSO +.BR HPL_pabort \ (3), +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rand.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rand.3 new file mode 100644 index 000000000..8b1918fea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rand.3 @@ -0,0 +1,28 @@ +.TH HPL_rand 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_rand \- random number generator. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_rand();\fR +.SH DESCRIPTION +\fB\&HPL_rand\fR +generates the next number in the random sequence. This +function ensures that this number lies in the interval (-0.5, 0.5]. + +The static array irand contains the information (2 integers) required +to generate the next number in the sequence X(n). This number is +computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the +constant d is the largest 64 bit positive integer. The array irand is +then updated for the generation of the next number X(n+1) in the +random sequence as follows X(n+1) = a * X(n) + c. The constants a and +c should have been preliminarily stored in the arrays ias and ics as +2 pairs of integers. The initialization of ias, ics and irand is +performed by the function HPL_setran. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_recv.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_recv.3 new file mode 100644 index 000000000..d9136c14b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_recv.3 @@ -0,0 +1,49 @@ +.TH HPL_recv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_recv \- Receive a message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_recv(\fR +\fB\&double *\fR +\fI\&RBUF\fR, +\fB\&int\fR +\fI\&RCOUNT\fR, +\fB\&int\fR +\fI\&SRC\fR, +\fB\&int\fR +\fI\&RTAG\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_recv\fR +is a simple wrapper around MPI_Recv. Its main purpose is +to allow for some experimentation / tuning of this simple routine. +Successful completion is indicated by the returned error code +HPL_SUCCESS. In the case of messages of length less than or equal to +zero, this function returns immediately. +.SH ARGUMENTS +.TP 8 +RBUF (local output) double * +On entry, RBUF specifies the starting address of buffer to be +received. +.TP 8 +RCOUNT (local input) int +On entry, RCOUNT specifies the number of double precision +entries in RBUF. RCOUNT must be at least zero. +.TP 8 +SRC (local input) int +On entry, SRC specifies the rank of the sending process in +the communication space defined by COMM. +.TP 8 +RTAG (local input) int +On entry, STAG specifies the message tag to be used for this +communication operation. +.TP 8 +COMM (local input) MPI_Comm +The MPI communicator identifying the communication space. +.SH SEE ALSO +.BR HPL_send \ (3), +.BR HPL_sendrecv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_reduce.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_reduce.3 new file mode 100644 index 000000000..c48f04ded --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_reduce.3 @@ -0,0 +1,56 @@ +.TH HPL_reduce 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_reduce \- Reduce operation. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_reduce(\fR +\fB\&void *\fR +\fI\&BUFFER\fR, +\fB\&const int\fR +\fI\&COUNT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR, +\fB\&const HPL_T_OP \fR +\fI\&OP\fR, +\fB\&const int\fR +\fI\&ROOT\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_reduce\fR +performs a global reduce operation across all processes of +a group. Note that the input buffer is used as workarray and in all +processes but the accumulating process corrupting the original data. +.SH ARGUMENTS +.TP 8 +BUFFER (local input/output) void * +On entry, BUFFER points to the buffer to be reduced. On +exit, and in process of rank ROOT this array contains the +reduced data. This buffer is also used as workspace during +the operation in the other processes of the group. +.TP 8 +COUNT (global input) const int +On entry, COUNT indicates the number of entries in BUFFER. +COUNT must be at least zero. +.TP 8 +DTYPE (global input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.TP 8 +OP (global input) const HPL_T_OP +On entry, OP is a pointer to the local combine function. +.TP 8 +ROOT (global input) const int +On entry, ROOT is the coordinate of the accumulating process. +.TP 8 +COMM (global/local input) MPI_Comm +The MPI communicator identifying the process collection. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rollN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rollN.3 new file mode 100644 index 000000000..eac4deb66 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rollN.3 @@ -0,0 +1,77 @@ +.TH HPL_rollN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_rollN \- Roll U and forward the column panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_rollN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_rollN\fR +rolls the local arrays containing the local pieces of U, so +that on exit to this function U is replicated in every process row. +In addition, this function probe for the presence of the column panel +and forwards it when available. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be rolled) information. +.TP 8 +N (local input) const int +On entry, N specifies the number of columns of U. N must be +at least zero. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U in each process row. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,IPLEN[NPROW]). +.TP 8 +IPLEN (global input) const int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in each process row. +.TP 8 +IPMAP (global input) const int * +On entry, IMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. +.SH SEE ALSO +.BR HPL_pdlaswp01N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rollT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rollT.3 new file mode 100644 index 000000000..bab5bdffd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_rollT.3 @@ -0,0 +1,77 @@ +.TH HPL_rollT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_rollT \- Roll U and forward the column panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_rollT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_rollT\fR +rolls the local arrays containing the local pieces of U, so +that on exit to this function U is replicated in every process row. +In addition, this function probe for the presence of the column panel +and forwards it when available. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be rolled) information. +.TP 8 +N (local input) const int +On entry, N specifies the local number of rows of U. N must +be at least zero. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U in each process row. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,N). +.TP 8 +IPLEN (global input) const int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in each process row. +.TP 8 +IPMAP (global input) const int * +On entry, IMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. +.SH SEE ALSO +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_sdrv.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_sdrv.3 new file mode 100644 index 000000000..a11252d6a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_sdrv.3 @@ -0,0 +1,67 @@ +.TH HPL_sdrv 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_sdrv \- Send and receive a message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_sdrv(\fR +\fB\&double *\fR +\fI\&SBUF\fR, +\fB\&int\fR +\fI\&SCOUNT\fR, +\fB\&int\fR +\fI\&STAG\fR, +\fB\&double *\fR +\fI\&RBUF\fR, +\fB\&int\fR +\fI\&RCOUNT\fR, +\fB\&int\fR +\fI\&RTAG\fR, +\fB\&int\fR +\fI\&PARTNER\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_sdrv\fR +is a simple wrapper around MPI_Sendrecv. Its main purpose is +to allow for some experimentation and tuning of this simple function. +Messages of length less than or equal to zero are not sent nor +received. Successful completion is indicated by the returned error +code HPL_SUCCESS. +.SH ARGUMENTS +.TP 8 +SBUF (local input) double * +On entry, SBUF specifies the starting address of buffer to be +sent. +.TP 8 +SCOUNT (local input) int +On entry, SCOUNT specifies the number of double precision +entries in SBUF. SCOUNT must be at least zero. +.TP 8 +STAG (local input) int +On entry, STAG specifies the message tag to be used for the +sending communication operation. +.TP 8 +RBUF (local output) double * +On entry, RBUF specifies the starting address of buffer to be +received. +.TP 8 +RCOUNT (local input) int +On entry, RCOUNT specifies the number of double precision +entries in RBUF. RCOUNT must be at least zero. +.TP 8 +RTAG (local input) int +On entry, RTAG specifies the message tag to be used for the +receiving communication operation. +.TP 8 +PARTNER (local input) int +On entry, PARTNER specifies the rank of the collaborative +process in the communication space defined by COMM. +.TP 8 +COMM (local input) MPI_Comm +The MPI communicator identifying the communication space. +.SH SEE ALSO +.BR HPL_send \ (3), +.BR HPL_recv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_send.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_send.3 new file mode 100644 index 000000000..48ffc5d62 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_send.3 @@ -0,0 +1,49 @@ +.TH HPL_send 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_send \- Send a message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&int\fR +\fB\&HPL_send(\fR +\fB\&double *\fR +\fI\&SBUF\fR, +\fB\&int\fR +\fI\&SCOUNT\fR, +\fB\&int\fR +\fI\&DEST\fR, +\fB\&int\fR +\fI\&STAG\fR, +\fB\&MPI_Comm\fR +\fI\&COMM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_send\fR +is a simple wrapper around MPI_Send. Its main purpose is +to allow for some experimentation / tuning of this simple routine. +Successful completion is indicated by the returned error code +MPI_SUCCESS. In the case of messages of length less than or equal to +zero, this function returns immediately. +.SH ARGUMENTS +.TP 8 +SBUF (local input) double * +On entry, SBUF specifies the starting address of buffer to be +sent. +.TP 8 +SCOUNT (local input) int +On entry, SCOUNT specifies the number of double precision +entries in SBUF. SCOUNT must be at least zero. +.TP 8 +DEST (local input) int +On entry, DEST specifies the rank of the receiving process in +the communication space defined by COMM. +.TP 8 +STAG (local input) int +On entry, STAG specifies the message tag to be used for this +communication operation. +.TP 8 +COMM (local input) MPI_Comm +The MPI communicator identifying the communication space. +.SH SEE ALSO +.BR HPL_recv \ (3), +.BR HPL_sendrecv \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_setran.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_setran.3 new file mode 100644 index 000000000..e9a9433ae --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_setran.3 @@ -0,0 +1,37 @@ +.TH HPL_setran 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_setran \- Manage the random number generator. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_setran(\fR +\fB\&const int\fR +\fI\&OPTION\fR, +\fB\&int *\fR +\fI\&IRAN\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_setran\fR +initializes the random generator with the encoding of the +first number X(0) in the sequence, and the constants a and c used to +compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), +a and c are stored in the static variables irand, ias and ics. When +OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the +values of the input array IRAN. When OPTION is 3, IRAN is set to the +current value of irand, and irand is then incremented. +.SH ARGUMENTS +.TP 8 +OPTION (local input) const int +On entry, OPTION is an integer that specifies the operations +to be performed on the random generator as specified above. +.TP 8 +IRAN (local input/output) int * +On entry, IRAN is an array of dimension 2, that contains the +16-lower and 15-higher bits of a random number. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_xjumpm \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_spreadN.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_spreadN.3 new file mode 100644 index 000000000..452b8da34 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_spreadN.3 @@ -0,0 +1,96 @@ +.TH HPL_spreadN 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_spreadN \- Spread row panel U and forward current column panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_spreadN(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const enum HPL_SIDE\fR +\fI\&SIDE\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int\fR +\fI\&SRCDIST\fR, +\fB\&const int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_spreadN\fR +spreads the local array containing local pieces of U, so +that on exit to this function, a piece of U is contained in every +process row. The array IPLEN contains the number of rows of U, that +should be spread on any given process row. This function also probes +for the presence of the column panel PBCST. In case of success, this +panel will be forwarded. If PBCST is NULL on input, this probing +mechanism will be disabled. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be spread) information. +.TP 8 +SIDE (global input) const enum HPL_SIDE +On entry, SIDE specifies whether the local piece of U located +in process IPMAP[SRCDIST] should be spread to the right or to +the left. This feature is used by the equilibration process. +.TP 8 +N (global input) const int +On entry, N specifies the local number of columns of U. N +must be at least zero. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,IPLEN[nprow]). +.TP 8 +SRCDIST (local input) const int +On entry, SRCDIST specifies the source process that spreads +its piece of U. +.TP 8 +IPLEN (global input) const int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in each process before process IPMAP[i], with the convention +that IPLEN[nprow] is the total number of rows. In other words +IPLEN[i+1] - IPLEN[i] is the local number of rows of U that +should be moved to process IPMAP[i]. +.TP 8 +IPMAP (global input) const int * +On entry, IPMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IPMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IPMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. +.SH SEE ALSO +.BR HPL_pdlaswp01N \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_spreadT.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_spreadT.3 new file mode 100644 index 000000000..54f7dda31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_spreadT.3 @@ -0,0 +1,96 @@ +.TH HPL_spreadT 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_spreadT \- Spread row panel U and forward current column panel. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_spreadT(\fR +\fB\&HPL_T_panel *\fR +\fI\&PBCST\fR, +\fB\&int *\fR +\fI\&IFLAG\fR, +\fB\&HPL_T_panel *\fR +\fI\&PANEL\fR, +\fB\&const enum HPL_SIDE\fR +\fI\&SIDE\fR, +\fB\&const int\fR +\fI\&N\fR, +\fB\&double *\fR +\fI\&U\fR, +\fB\&const int\fR +\fI\&LDU\fR, +\fB\&const int\fR +\fI\&SRCDIST\fR, +\fB\&const int *\fR +\fI\&IPLEN\fR, +\fB\&const int *\fR +\fI\&IPMAP\fR, +\fB\&const int *\fR +\fI\&IPMAPM1\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_spreadT\fR +spreads the local array containing local pieces of U, so +that on exit to this function, a piece of U is contained in every +process row. The array IPLEN contains the number of columns of U, +that should be spread on any given process row. This function also +probes for the presence of the column panel PBCST. If available, +this panel will be forwarded. If PBCST is NULL on input, this +probing mechanism will be disabled. +.SH ARGUMENTS +.TP 8 +PBCST (local input/output) HPL_T_panel * +On entry, PBCST points to the data structure containing the +panel (to be broadcast) information. +.TP 8 +IFLAG (local input/output) int * +On entry, IFLAG indicates whether or not the broadcast has +already been completed. If not, probing will occur, and the +outcome will be contained in IFLAG on exit. +.TP 8 +PANEL (local input/output) HPL_T_panel * +On entry, PANEL points to the data structure containing the +panel (to be spread) information. +.TP 8 +SIDE (global input) const enum HPL_SIDE +On entry, SIDE specifies whether the local piece of U located +in process IPMAP[SRCDIST] should be spread to the right or to +the left. This feature is used by the equilibration process. +.TP 8 +N (global input) const int +On entry, N specifies the local number of rows of U. N must +be at least zero. +.TP 8 +U (local input/output) double * +On entry, U is an array of dimension (LDU,*) containing the +local pieces of U. +.TP 8 +LDU (local input) const int +On entry, LDU specifies the local leading dimension of U. LDU +should be at least MAX(1,N). +.TP 8 +SRCDIST (local input) const int +On entry, SRCDIST specifies the source process that spreads +its piece of U. +.TP 8 +IPLEN (global input) const int * +On entry, IPLEN is an array of dimension NPROW+1. This array +is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U +in each process before process IPMAP[i], with the convention +that IPLEN[nprow] is the total number of rows. In other words +IPLEN[i+1] - IPLEN[i] is the local number of rows of U that +should be moved to process IPMAP[i]. +.TP 8 +IPMAP (global input) const int * +On entry, IPMAP is an array of dimension NPROW. This array +contains the logarithmic mapping of the processes. In other +words, IPMAP[myrow] is the absolute coordinate of the sorted +process. +.TP 8 +IPMAPM1 (global input) const int * +On entry, IPMAPM1 is an array of dimension NPROW. This array +contains the inverse of the logarithmic mapping contained in +IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. +.SH SEE ALSO +.BR HPL_pdlaswp01T \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_sum.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_sum.3 new file mode 100644 index 000000000..a3c4e2190 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_sum.3 @@ -0,0 +1,44 @@ +.TH HPL_sum 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_sum \- Combine (sum) two buffers. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_sum(\fR +\fB\&const int\fR +\fI\&N\fR, +\fB\&const void *\fR +\fI\&IN\fR, +\fB\&void *\fR +\fI\&INOUT\fR, +\fB\&const HPL_T_TYPE\fR +\fI\&DTYPE\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_sum\fR +combines (sum) two buffers. +.SH ARGUMENTS +.TP 8 +N (input) const int +On entry, N specifies the length of the buffers to be +combined. N must be at least zero. +.TP 8 +IN (input) const void * +On entry, IN points to the input-only buffer to be combined. +.TP 8 +INOUT (input/output) void * +On entry, INOUT points to the input-output buffer to be +combined. On exit, the entries of this array contains the +combined results. +.TP 8 +DTYPE (input) const HPL_T_TYPE +On entry, DTYPE specifies the type of the buffers operands. +.SH SEE ALSO +.BR HPL_broadcast \ (3), +.BR HPL_reduce \ (3), +.BR HPL_all_reduce \ (3), +.BR HPL_barrier \ (3), +.BR HPL_min \ (3), +.BR HPL_max \ (3), +.BR HPL_sum \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer.3 new file mode 100644 index 000000000..61f3f7cb1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer.3 @@ -0,0 +1,35 @@ +.TH HPL_timer 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_timer \- Timer facility. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_timer(\fR +\fB\&const int\fR +\fI\&I\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_timer\fR +provides a "stopwatch" functionality cpu/wall timer in +seconds. Up to 64 separate timers can be functioning at once. The +first call starts the timer, and the second stops it. This routine +can be disenabled by calling HPL_timer_disable(), so that calls to +the timer are ignored. This feature can be used to make sure certain +sections of code do not affect timings, even if they call routines +which have HPL_timer calls in them. HPL_timer_enable() will re-enable +the timer functionality. One can retrieve the current value of a +timer by calling + +t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + +where I is the timer index in [0..64). To initialize the timer +functionality, one must have called HPL_timer_boot() prior to any of +the functions mentioned above. +.SH ARGUMENTS +.TP 8 +I (global input) const int +On entry, I specifies the timer to stop/start. +.SH SEE ALSO +.BR HPL_timer_cputime \ (3), +.BR HPL_timer_walltime \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer_cputime.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer_cputime.3 new file mode 100644 index 000000000..1f8987ca2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer_cputime.3 @@ -0,0 +1,23 @@ +.TH HPL_timer_cputime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_timer_cputime \- Return the CPU time. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_timer_cputime();\fR +.SH DESCRIPTION +\fB\&HPL_timer_cputime\fR +returns the cpu time. If HPL_USE_CLOCK is defined, +the clock() function is used to return an approximation of processor +time used by the program. The value returned is the CPU time used so +far as a clock_t; to get the number of seconds used, the result is +divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C +standard library. If HPL_USE_TIMES is defined, the times() function +is used instead. This function returns the current process times. +times() returns the number of clock ticks that have elapsed since the +system has been up. Otherwise and by default, the standard library +function getrusage() is used. +.SH SEE ALSO +.BR HPL_timer_walltime \ (3), +.BR HPL_timer \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer_walltime.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer_walltime.3 new file mode 100644 index 000000000..9a6e898e7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_timer_walltime.3 @@ -0,0 +1,14 @@ +.TH HPL_timer_walltime 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_timer_walltime \- Return the elapsed (wall-clock) time. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&double\fR +\fB\&HPL_timer_walltime();\fR +.SH DESCRIPTION +\fB\&HPL_timer_walltime\fR +returns the elapsed (wall-clock) time. +.SH SEE ALSO +.BR HPL_timer_cputime \ (3), +.BR HPL_timer \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_warn.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_warn.3 new file mode 100644 index 000000000..6b051acb3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_warn.3 @@ -0,0 +1,59 @@ +.TH HPL_warn 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_warn \- displays an error message. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_warn(\fR +\fB\&FILE *\fR +\fI\&STREAM\fR, +\fB\&int\fR +\fI\&LINE\fR, +\fB\&const char *\fR +\fI\&SRNAME\fR, +\fB\&const char *\fR +\fI\&FORM\fR, +\fB\&...\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_warn\fR +displays an error message. +.SH ARGUMENTS +.TP 8 +STREAM (local input) FILE * +On entry, STREAM specifies the output stream. +.TP 8 +LINE (local input) int +On entry, LINE specifies the line number in the file where +the error has occured. When LINE is not a positive line +number, it is ignored. +.TP 8 +SRNAME (local input) const char * +On entry, SRNAME should be the name of the routine calling +this error handler. +.TP 8 +FORM (local input) const char * +On entry, FORM specifies the format, i.e., how the subsequent +arguments are converted for output. +.TP 8 + (local input) ... +On entry, ... is the list of arguments to be printed within +the format string. +.SH EXAMPLE +\fI\&#include "hpl.h"\fR + +int main(int argc, char *argv[]) +.br +{ +.br + HPL_warn( stderr, __LINE__, __FILE__, +.br + "Demo.\en" ); +.br + exit(0); return(0); +.br +} +.SH SEE ALSO +.BR HPL_abort \ (3), +.BR HPL_fprintf \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_xjumpm.3 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_xjumpm.3 new file mode 100644 index 000000000..df3e0a954 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/man/man3/HPL_xjumpm.3 @@ -0,0 +1,77 @@ +.TH HPL_xjumpm 3 "December 2, 2018" "HPL 2.3" "HPL Library Functions" +.SH NAME +HPL_xjumpm \- Compute constants to jump in the random sequence. +.SH SYNOPSIS +\fB\&#include "hpl.h"\fR + +\fB\&void\fR +\fB\&HPL_xjumpm(\fR +\fB\&const int\fR +\fI\&JUMPM\fR, +\fB\&int *\fR +\fI\&MULT\fR, +\fB\&int *\fR +\fI\&IADD\fR, +\fB\&int *\fR +\fI\&IRANN\fR, +\fB\&int *\fR +\fI\&IRANM\fR, +\fB\&int *\fR +\fI\&IAM\fR, +\fB\&int *\fR +\fI\&ICM\fR +\fB\&);\fR +.SH DESCRIPTION +\fB\&HPL_xjumpm\fR +computes the constants A and C to jump JUMPM numbers in +the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in +MULT and IADD specify how to jump from one entry in the sequence to +the next. +.SH ARGUMENTS +.TP 8 +JUMPM (local input) const int +On entry, JUMPM specifies the number of entries in the +sequence to jump over. When JUMPM is less or equal than zero, +A and C are not computed, IRANM is set to IRANN corresponding +to a jump of size zero. +.TP 8 +MULT (local input) int * +On entry, MULT is an array of dimension 2, that contains the +16-lower and 15-higher bits of the constant a to jump from +X(n) to X(n+1) = a*X(n) + c in the random sequence. +.TP 8 +IADD (local input) int * +On entry, IADD is an array of dimension 2, that contains the +16-lower and 15-higher bits of the constant c to jump from +X(n) to X(n+1) = a*X(n) + c in the random sequence. +.TP 8 +IRANN (local input) int * +On entry, IRANN is an array of dimension 2. that contains the +16-lower and 15-higher bits of the encoding of X(n). +.TP 8 +IRANM (local output) int * +On entry, IRANM is an array of dimension 2. On exit, this +array contains respectively the 16-lower and 15-higher bits +of the encoding of X(n+JUMPM). +.TP 8 +IAM (local output) int * +On entry, IAM is an array of dimension 2. On exit, when JUMPM +is greater than zero, this array contains the encoded +constant A to jump from X(n) to X(n+JUMPM) in the random +sequence. IAM(0:1) contains respectively the 16-lower and +15-higher bits of this constant A. When JUMPM is less or +equal than zero, this array is not referenced. +.TP 8 +ICM (local output) int * +On entry, ICM is an array of dimension 2. On exit, when JUMPM +is greater than zero, this array contains the encoded +constant C to jump from X(n) to X(n+JUMPM) in the random +sequence. ICM(0:1) contains respectively the 16-lower and +15-higher bits of this constant C. When JUMPM is less or +equal than zero, this array is not referenced. +.SH SEE ALSO +.BR HPL_ladd \ (3), +.BR HPL_lmul \ (3), +.BR HPL_setran \ (3), +.BR HPL_jumpit \ (3), +.BR HPL_rand \ (3). diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/missing b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/missing new file mode 100755 index 000000000..625aeb118 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/missing @@ -0,0 +1,215 @@ +#! /bin/sh +# Common wrapper for a few potentially missing GNU programs. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# Originally written by Fran,cois Pinard , 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +if test $# -eq 0; then + echo 1>&2 "Try '$0 --help' for more information" + exit 1 +fi + +case $1 in + + --is-lightweight) + # Used by our autoconf macros to check whether the available missing + # script is modern enough. + exit 0 + ;; + + --run) + # Back-compat with the calling convention used by older automake. + shift + ;; + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due +to PROGRAM being missing or too old. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + +Supported PROGRAM values: + aclocal autoconf autoheader autom4te automake makeinfo + bison yacc flex lex help2man + +Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and +'g' are ignored when checking the name. + +Send bug reports to ." + exit $? + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing $scriptversion (GNU Automake)" + exit $? + ;; + + -*) + echo 1>&2 "$0: unknown '$1' option" + echo 1>&2 "Try '$0 --help' for more information" + exit 1 + ;; + +esac + +# Run the given program, remember its exit status. +"$@"; st=$? + +# If it succeeded, we are done. +test $st -eq 0 && exit 0 + +# Also exit now if we it failed (or wasn't found), and '--version' was +# passed; such an option is passed most likely to detect whether the +# program is present and works. +case $2 in --version|--help) exit $st;; esac + +# Exit code 63 means version mismatch. This often happens when the user +# tries to use an ancient version of a tool on a file that requires a +# minimum version. +if test $st -eq 63; then + msg="probably too old" +elif test $st -eq 127; then + # Program was missing. + msg="missing on your system" +else + # Program was found and executed, but failed. Give up. + exit $st +fi + +perl_URL=https://www.perl.org/ +flex_URL=https://github.com/westes/flex +gnu_software_URL=https://www.gnu.org/software + +program_details () +{ + case $1 in + aclocal|automake) + echo "The '$1' program is part of the GNU Automake package:" + echo "<$gnu_software_URL/automake>" + echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/autoconf>" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + autoconf|autom4te|autoheader) + echo "The '$1' program is part of the GNU Autoconf package:" + echo "<$gnu_software_URL/autoconf/>" + echo "It also requires GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + esac +} + +give_advice () +{ + # Normalize program name to check for. + normalized_program=`echo "$1" | sed ' + s/^gnu-//; t + s/^gnu//; t + s/^g//; t'` + + printf '%s\n' "'$1' is $msg." + + configure_deps="'configure.ac' or m4 files included by 'configure.ac'" + case $normalized_program in + autoconf*) + echo "You should only need it if you modified 'configure.ac'," + echo "or m4 files included by it." + program_details 'autoconf' + ;; + autoheader*) + echo "You should only need it if you modified 'acconfig.h' or" + echo "$configure_deps." + program_details 'autoheader' + ;; + automake*) + echo "You should only need it if you modified 'Makefile.am' or" + echo "$configure_deps." + program_details 'automake' + ;; + aclocal*) + echo "You should only need it if you modified 'acinclude.m4' or" + echo "$configure_deps." + program_details 'aclocal' + ;; + autom4te*) + echo "You might have modified some maintainer files that require" + echo "the 'autom4te' program to be rebuilt." + program_details 'autom4te' + ;; + bison*|yacc*) + echo "You should only need it if you modified a '.y' file." + echo "You may want to install the GNU Bison package:" + echo "<$gnu_software_URL/bison/>" + ;; + lex*|flex*) + echo "You should only need it if you modified a '.l' file." + echo "You may want to install the Fast Lexical Analyzer package:" + echo "<$flex_URL>" + ;; + help2man*) + echo "You should only need it if you modified a dependency" \ + "of a man page." + echo "You may want to install the GNU Help2man package:" + echo "<$gnu_software_URL/help2man/>" + ;; + makeinfo*) + echo "You should only need it if you modified a '.texi' file, or" + echo "any other file indirectly affecting the aspect of the manual." + echo "You might want to install the Texinfo package:" + echo "<$gnu_software_URL/texinfo/>" + echo "The spurious makeinfo call might also be the consequence of" + echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might" + echo "want to install GNU make:" + echo "<$gnu_software_URL/make/>" + ;; + *) + echo "You might have modified some files without having the proper" + echo "tools for further handling them. Check the 'README' file, it" + echo "often tells you about the needed prerequisites for installing" + echo "this package. You may also peek at any GNU archive site, in" + echo "case some other package contains this missing '$1' program." + ;; + esac +} + +give_advice "$1" | sed -e '1s/^/WARNING: /' \ + -e '2,$s/^/ /' >&2 + +# Propagate the correct exit status (expected to be 127 for a program +# not found, 63 for a program that failed due to version mismatch). +exit $st + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS new file mode 100644 index 000000000..056fd81ba --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.FreeBSD_PIV_CBLAS @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = FreeBSD_PIV_CBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpich +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a $(MPdir)/lib/libpmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/share/ATLAS/lib/FreeBSD_P5SSE2 +LAinc = +LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = /usr/bin/f77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = /usr/bin/ranlib +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.HPUX_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.HPUX_FBLAS new file mode 100644 index 000000000..af3f5da5f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.HPUX_FBLAS @@ -0,0 +1,179 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = HPUX +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - MPI directories - library ------------------------------------------ +# ---------------------------------------------------------------------- +# MPIinc tells the C compiler where to find the MPI header files, MPIlib +# is defined to be the name of the MPI library to be used. The variables +# MPIdir and MPIplat are only used for defining MPIinc and MPIlib). +# +MPIdir = $(HOME)/local/mpi +MPIplat = $(MPIdir)/hpux/ch_p4 +# +MPIinc = -I$(MPIdir)/include -I$(MPIplat)/include +MPIlib = $(MPIplat)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - BLAS library ------------------------------------------------------- +# ---------------------------------------------------------------------- +# +BLASlib = /usr/lib/pa1.1/libblas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(MPIinc) +HPL_LIBS = $(HPLlib) $(BLASlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS F77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(HPL_INCLUDES) $(F2CDEFS) $(HPL_OPTS) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -D_INCLUDE_POSIX_SOURCE -DUseTimes -Aa +O4 +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = cc +LINKFLAGS = -Aa +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.I860_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.I860_FBLAS new file mode 100644 index 000000000..984236be2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.I860_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = I860_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = -lmpi +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lkmath +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) -nx +CCFLAGS = $(HPL_DEFS) -O4 -nx +# +LINKER = f77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.IRIX_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.IRIX_FBLAS new file mode 100644 index 000000000..d78bcf09f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.IRIX_FBLAS @@ -0,0 +1,181 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = IRIX_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = $(HOME)/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/IRIX64/ch_p4/include +MPlib = $(MPdir)/IRIX64/ch_p4/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lblas +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DStringSunStyle -DF77_INTEGER=int +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) -64 +CCFLAGS = $(HPL_DEFS) -O3 -64 -OPT:Olimit=15000 -TARG:platform=IP30 \ + -LNO:blocking=OFF -LOPT:alias=typed +# +LINKER = cc +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS new file mode 100644 index 000000000..624306902 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_CBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_ATHLON_CBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - MPI directories - library ------------------------------------------ +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_ATHLON +LAinc = +LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the Fortran 77 BLAS interface +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +LINKER = /usr/bin/gcc +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS new file mode 100644 index 000000000..07985f781 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_ATHLON_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_ATHLON +LAinc = +LAlib = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +LINKER = /usr/bin/g77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL new file mode 100644 index 000000000..ddf3fb4b6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_ATHLON_VSIPL @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_ATHLON_VSIPL +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - MPI directories - library ------------------------------------------ +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = /home/software/TASP_VSIPL_Core_Plus +LAinc = -I$(LAdir)/include +LAlib = $(LAdir)/lib/libvsip_c.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the Fortran 77 BLAS interface +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_VSIPL +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +LINKER = /usr/bin/gcc +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_Intel64 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_Intel64 new file mode 100644 index 000000000..47661c25d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_Intel64 @@ -0,0 +1,193 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -fs +MKDIR = mkdir -p +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_Intel64 +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +# MPdir = /opt/intel/mpi/4.1.0 +# MPinc = -I$(MPdir)/include64 +# MPlib = $(MPdir)/lib64/libmpi.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(MKLROOT) +ifndef LAinc +LAinc = $(LAdir)/mkl/include +endif +ifndef LAlib +LAlib = -L$(LAdir)/mkl/lib/intel64 \ + -Wl,--start-group \ + $(LAdir)/lib/intel64/libmkl_intel_lp64.a \ + $(LAdir)/lib/intel64/libmkl_intel_thread.a \ + $(LAdir)/lib/intel64/libmkl_core.a \ + -Wl,--end-group -lpthread -ldl +endif +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) -I$(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpiicc +CCNOOPT = $(HPL_DEFS) +OMP_DEFS = -openmp +CCFLAGS = $(HPL_DEFS) -O3 -w -ansi-alias -i-static -z noexecstack -z relro -z now -nocompchk -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = $(CC) +LINKFLAGS = $(CCFLAGS) $(OMP_DEFS) -mt_mpi +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_CBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_CBLAS new file mode 100644 index 000000000..535a0e214 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_CBLAS @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_CBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII +LAinc = +LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = /usr/bin/g77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm new file mode 100644 index 000000000..31fc9ea74 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_CBLAS_gm @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_CBLAS_gm +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII +LAinc = +LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_CBLAS +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = mpif77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_FBLAS new file mode 100644 index 000000000..5ed9aac12 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_FBLAS @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII +LAinc = +LAlib = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = /usr/bin/g77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm new file mode 100644 index 000000000..a2416396c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_FBLAS_gm @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_FBLAS_gm +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII +LAinc = +LAlib = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = mpif77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_VSIPL new file mode 100644 index 000000000..0f690a1b3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_VSIPL @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_VSIPL +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = /home/software/TASP_VSIPL_Core_Plus +LAinc = -I$(LAdir)/include +LAlib = $(LAdir)/lib/libvsip_c.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_VSIPL +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/bin/gcc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = /usr/bin/g77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm new file mode 100644 index 000000000..fee265e46 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Linux_PII_VSIPL_gm @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Linux_PII_VSIPL_gm +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = /home/software/TASP_VSIPL_Core_Plus +LAinc = -I$(LAdir)/include +LAlib = $(LAdir)/lib/libvsip_c.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_VSIPL +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = mpif77 +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.MacOSX_Accelerate b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.MacOSX_Accelerate new file mode 100644 index 000000000..d1ce69b64 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.MacOSX_Accelerate @@ -0,0 +1,183 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -fs +MKDIR = mkdir -p +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = MacOSX_Accelerate +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +# MPdir = /opt/intel/mpi/4.1.0 +# MPinc = -I$(MPdir)/include64 +# MPlib = $(MPdir)/lib64/libmpi.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -framework Accelerate +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpicc-openmpi-mp +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -O3 +# +# On some platforms, it is necessary to use the Fortran linker to find +# the Fortran internals used in the BLAS library. +# +LINKER = $(CC) +LINKFLAGS = $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = cr +RANLIB = ranlib +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWR2_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWR2_FBLAS new file mode 100644 index 000000000..628f2c152 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWR2_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = PWR2_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lesslp2 +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpcc_r +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -O3 -qarch=pwr2 -qtune=pwr2 -qmaxmem=-1 +# +LINKER = mpxlf_r +LINKFLAGS = -bmaxdata:0x70000000 $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWR3_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWR3_FBLAS new file mode 100644 index 000000000..bba468803 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWR3_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = PWR3_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lessl +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = /usr/vac/bin/xlc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -qtune=pwr3 -qarch=pwr3 -O3 -qmaxmem=-1 -qfloat=hsflt +# +LINKER = /usr/bin/xlf +LINKFLAGS = -bmaxdata:0x70000000 $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWRPC_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWRPC_FBLAS new file mode 100644 index 000000000..2a0fb2ec6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.PWRPC_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = PWRPC_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include +MPlib = $(MPdir)/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lessl +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = mpcc_r +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -O3 -qarch=ppc -qtune=604 -qmaxmem=-1 +# +LINKER = mpxlf_r +LINKFLAGS = -bmaxdata:0x70000000 $(CCFLAGS) +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS new file mode 100644 index 000000000..1ade2d8aa --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2-g_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = SUN4SOL2-g_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = $(HOME)/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include +MPlib = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -xlic_lib=sunperf +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -g +# +LINKER = purify -best-effort f77 +LINKFLAGS = +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL new file mode 100644 index 000000000..1cbb371fd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2-g_VSIPL @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = SUN4SOL2-g_VSIPL +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = $(HOME)/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include +MPlib = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = $(HOME)/local/TASP_VSIPL_Core_Plus +LAinc = -I$(LAdir)/include +LAlib = $(LAdir)/lib/libvsip_c.a +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = -DHPL_CALL_VSIPL +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -g +# +LINKER = purify -best-effort cc +LINKFLAGS = +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2_FBLAS new file mode 100644 index 000000000..a1d5d6315 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.SUN4SOL2_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = SUN4SOL2_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = $(HOME)/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include +MPlib = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -xlic_lib=sunperf +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -dalign -fsingle -xO5 -native -xarch=v8plusa +# +LINKER = f77 +LINKFLAGS = -dalign -native -xarch=v8plusa -xO5 +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.T3E_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.T3E_FBLAS new file mode 100644 index 000000000..fe12cae9a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.T3E_FBLAS @@ -0,0 +1,187 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = T3E_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DUpCase -DF77_INTEGER=long -DStringCrayStyle \ + -DCRAY_BLAS -DHPL_USE_TIMES +# +# When UpCase is defined, CRAY_BLAS redefines the BLAS routines used in +# HPL to be prefixed with an S. In the Cray programming environment, the +# default INTEGER and REAL size is 64 bits. This is reflected in the +# Cray Scientific Library as well, so SGEMM is the 64-bit matrix multi- +# ply. +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -O3 +# +LINKER = f77 +LINKFLAGS = -O3,unroll2,pipeline2 +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = echo +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Tru64_FBLAS b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Tru64_FBLAS new file mode 100644 index 000000000..3d8062061 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Tru64_FBLAS @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Tru64_FBLAS +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = /usr/local/mpi +MPinc = -I$(MPdir)/include -I$(MPdir)/alpha/ch_p4/include +MPlib = $(MPdir)/alpha/ch_p4/lib/libmpich.a +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lcxml +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -arch host -tune host -std -O5 +# +LINKER = f77 +LINKFLAGS = -nofor_main -O5 -arch host -tune host +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = ranlib +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Tru64_FBLAS_elan b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Tru64_FBLAS_elan new file mode 100644 index 000000000..f9550412c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.Tru64_FBLAS_elan @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = /bin/sh +# +CD = cd +CP = cp +LN_S = ln -s +MKDIR = mkdir +RM = /bin/rm -f +TOUCH = touch +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = Tru64_FBLAS_elan +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = +MPinc = +MPlib = -lmpi -lelan +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = +LAinc = +LAlib = -lcxml +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = cc +CCNOOPT = $(HPL_DEFS) +CCFLAGS = $(HPL_DEFS) -arch host -tune host -std -O5 +# +LINKER = f77 +LINKFLAGS = -nofor_main -O5 -arch host -tune host +# +ARCHIVER = ar +ARFLAGS = r +RANLIB = ranlib +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.UNKNOWN.in b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.UNKNOWN.in new file mode 100644 index 000000000..8cbbd8242 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/Make.UNKNOWN.in @@ -0,0 +1,180 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# ---------------------------------------------------------------------- +# - shell -------------------------------------------------------------- +# ---------------------------------------------------------------------- +# +SHELL = @SHELL@ +# +CD = @CD@ +CP = @CP@ +LN_S = @LN_S@ +MKDIR = @MKDIR@ +RM = @RM@ +TOUCH = @TOUCH@ +# +# ---------------------------------------------------------------------- +# - Platform identifier ------------------------------------------------ +# ---------------------------------------------------------------------- +# +ARCH = @ARCH@ +# +# ---------------------------------------------------------------------- +# - HPL Directory Structure / HPL library ------------------------------ +# ---------------------------------------------------------------------- +# +TOPdir = $(HOME)/hpl +INCdir = $(TOPdir)/include +BINdir = $(TOPdir)/bin/$(ARCH) +LIBdir = $(TOPdir)/lib/$(ARCH) +# +HPLlib = $(LIBdir)/libhpl.a +# +# ---------------------------------------------------------------------- +# - Message Passing library (MPI) -------------------------------------- +# ---------------------------------------------------------------------- +# MPinc tells the C compiler where to find the Message Passing library +# header files, MPlib is defined to be the name of the library to be +# used. The variable MPdir is only used for defining MPinc and MPlib. +# +MPdir = @MPDIR@ +MPinc = @MPINC@ +MPlib = @MPLIB@ +# +# ---------------------------------------------------------------------- +# - Linear Algebra library (BLAS or VSIPL) ----------------------------- +# ---------------------------------------------------------------------- +# LAinc tells the C compiler where to find the Linear Algebra library +# header files, LAlib is defined to be the name of the library to be +# used. The variable LAdir is only used for defining LAinc and LAlib. +# +LAdir = @LADIR@ +LAinc = @LAINC@ +LAlib = @LALIB@ +# +# ---------------------------------------------------------------------- +# - F77 / C interface -------------------------------------------------- +# ---------------------------------------------------------------------- +# You can skip this section if and only if you are not planning to use +# a BLAS library featuring a Fortran 77 interface. Otherwise, it is +# necessary to fill out the F2CDEFS variable with the appropriate +# options. **One and only one** option should be chosen in **each** of +# the 3 following categories: +# +# 1) name space (How C calls a Fortran 77 routine) +# +# -DAdd_ : all lower case and a suffixed underscore (Suns, +# Intel, ...), [default] +# -DNoChange : all lower case (IBM RS6000), +# -DUpCase : all upper case (Cray), +# -DAdd__ : the FORTRAN compiler in use is f2c. +# +# 2) C and Fortran 77 integer mapping +# +# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] +# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, +# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. +# +# 3) Fortran 77 string handling +# +# -DStringSunStyle : The string address is passed at the string loca- +# tion on the stack, and the string length is then +# passed as an F77_INTEGER after all explicit +# stack arguments, [default] +# -DStringStructPtr : The address of a structure is passed by a +# Fortran 77 string, and the structure is of the +# form: struct {char *cp; F77_INTEGER len;}, +# -DStringStructVal : A structure is passed by value for each Fortran +# 77 string, and the structure is of the form: +# struct {char *cp; F77_INTEGER len;}, +# -DStringCrayStyle : Special option for Cray machines, which uses +# Cray fcd (fortran character descriptor) for +# interoperation. +# +F2CDEFS = @F2CDEFS@ +# +# ---------------------------------------------------------------------- +# - HPL includes / libraries / specifics ------------------------------- +# ---------------------------------------------------------------------- +# +HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) +HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) +# +# - Compile time options ----------------------------------------------- +# +# -DHPL_COPY_L force the copy of the panel L before bcast; +# -DHPL_CALL_CBLAS call the cblas interface; +# -DHPL_CALL_VSIPL call the vsip library; +# -DHPL_DETAILED_TIMING enable detailed timers; +# +# By default HPL will: +# *) not copy L before broadcast, +# *) call the BLAS Fortran 77 interface, +# *) not display detailed timing information. +# +HPL_OPTS = +# +# ---------------------------------------------------------------------- +# +HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) +# +# ---------------------------------------------------------------------- +# - Compilers / linkers - Optimization flags --------------------------- +# ---------------------------------------------------------------------- +# +CC = @CC@ +CCNOOPT = $(HPL_DEFS) @CCNOOPT@ +CCFLAGS = $(HPL_DEFS) @CCFLAGS@ +# +LINKER = @LINKER@ +LINKFLAGS = @LINKFLAGS@ +# +ARCHIVER = @ARCHIVER@ +ARFLAGS = @ARFLAGS@ +RANLIB = @RANLIB@ +# +# ---------------------------------------------------------------------- diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/make_generic b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/make_generic new file mode 100644 index 000000000..68cf74a3a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/setup/make_generic @@ -0,0 +1,83 @@ +#!/bin/sh +# +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +# +# Configure script to create Make.UNKNOWN from Make.UNKNOWN.in for the +# HPL distribution, so users without a real Unix system can have a gene- +# ric Make.UNKNOWN to edit for their needs. This script substitutes +# pathless version of all the system programs, and commonly used options +# values into Make.UNKNOWN.in. +# +######################################################################## +# +sed -e 's%@SHELL@%/bin/sh%' \ + -e 's%@CD@%cd%' \ + -e 's%@CP@%cp%' \ + -e 's%@LN_S@%ln -s%' \ + -e 's%@MKDIR@%mkdir%' \ + -e 's%@RM@%/bin/rm -f%' \ + -e 's%@TOUCH@%touch%' \ + -e 's%@ARCH@%UNKNOWN%' \ + -e 's%@CC@%mpicc%' \ + -e 's%@CCNOOPT@%%' \ + -e 's%@CCFLAGS@%%' \ + -e 's%@LINKER@%mpif77%' \ + -e 's%@LINKFLAGS@%%' \ + -e 's%@ARCHIVER@%ar%' \ + -e 's%@ARFLAGS@%r%' \ + -e 's%@RANLIB@%echo%' \ + -e 's%@MPDIR@%%' \ + -e 's%@MPINC@%%' \ + -e 's%@MPLIB@%%' \ + -e 's%@F2CDEFS@%-DAdd_ -DF77_INTEGER=int -DStringSunStyle%' \ + -e 's%@LADIR@%%' \ + -e 's%@LAINC@%%' \ + -e 's%@LALIB@%-lblas%' \ + Make.UNKNOWN.in > Make.UNKNOWN +# +######################################################################## diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/Makefile.am new file mode 100644 index 000000000..2e6d3d454 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/Makefile.am @@ -0,0 +1,42 @@ +AM_CPPFLAGS = -I$(top_srcdir)/../include + +lib_LIBRARIES = libhpl.a + +libhpl_a_SOURCES = \ +auxil/HPL_dlatcpy.c auxil/HPL_fprintf.c auxil/HPL_dlacpy.c auxil/HPL_dlamch.c \ +blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \ +blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \ +blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \ +comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \ +comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \ +comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \ +grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \ +grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \ +grid/HPL_min.c grid/HPL_all_reduce.c grid/HPL_barrier.c \ +panel/HPL_pdpanel_disp.c panel/HPL_pdpanel_free.c panel/HPL_pdpanel_init.c panel/HPL_pdpanel_new.c \ +pauxil/HPL_pdlamch.c pauxil/HPL_pdlange.c \ +pauxil/HPL_indxg2p.c pauxil/HPL_numroc.c pauxil/HPL_numrocI.c pauxil/HPL_numrocI.c \ +pauxil/HPL_dlaswp00N.c pauxil/HPL_dlaswp01N.c pauxil/HPL_dlaswp01T.c \ +pauxil/HPL_dlaswp02N.c pauxil/HPL_dlaswp03N.c pauxil/HPL_dlaswp03T.c \ +pauxil/HPL_dlaswp04N.c pauxil/HPL_dlaswp04T.c pauxil/HPL_dlaswp05N.c \ +pauxil/HPL_dlaswp05T.c pauxil/HPL_dlaswp06N.c pauxil/HPL_dlaswp06T.c \ +pauxil/HPL_infog2l.c pauxil/HPL_dlaswp10N.c pauxil/HPL_pwarn.c \ +pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \ +pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \ +pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c pfact/HPL_pdrpanrlT.c \ +pfact/HPL_pdmxswp.c pfact/HPL_pdfact.c pfact/HPL_dlocmax.c \ +pfact/HPL_pdpancrT.c pfact/HPL_pdpancrN.c pfact/HPL_dlocmax.c \ +pfact/HPL_dlocswpN.c pfact/HPL_dlocswpT.c pfact/HPL_pdmxswp.c \ +pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \ +pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \ +pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c \ +pfact/HPL_pdrpanrlT.c pauxil/HPL_pabort.c pauxil/HPL_pdlamch.c \ +pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c \ +pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c pgesv/HPL_pdupdateTT.c \ +pgesv/HPL_equil.c pgesv/HPL_pipid.c pgesv/HPL_plindx0.c \ +pgesv/HPL_plindx10.c pgesv/HPL_plindx1.c pgesv/HPL_plindx10.c \ +pgesv/HPL_rollN.c pgesv/HPL_rollT.c pgesv/HPL_spreadN.c pgesv/HPL_spreadT.c \ +pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesvK2.c \ +pgesv/HPL_pdlaswp00N.c pgesv/HPL_pdlaswp00T.c pgesv/HPL_pdlaswp01N.c pgesv/HPL_pdlaswp01T.c \ +pgesv/HPL_pdtrsv.c pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c \ +pgesv/HPL_pdupdateTT.c pgesv/HPL_logsort.c pgesv/HPL_perm.c diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/Makefile.in new file mode 100644 index 000000000..139ecbad0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/Makefile.in @@ -0,0 +1,1355 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +subdir = src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/include/hplconfig.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" +LIBRARIES = $(lib_LIBRARIES) +AR = ar +ARFLAGS = cru +AM_V_AR = $(am__v_AR_@AM_V@) +am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@) +am__v_AR_0 = @echo " AR " $@; +am__v_AR_1 = +libhpl_a_AR = $(AR) $(ARFLAGS) +libhpl_a_LIBADD = +am__dirstamp = $(am__leading_dot)dirstamp +am_libhpl_a_OBJECTS = auxil/HPL_dlatcpy.$(OBJEXT) \ + auxil/HPL_fprintf.$(OBJEXT) auxil/HPL_dlacpy.$(OBJEXT) \ + auxil/HPL_dlamch.$(OBJEXT) blas/HPL_dscal.$(OBJEXT) \ + blas/HPL_dtrsm.$(OBJEXT) blas/HPL_dtrsv.$(OBJEXT) \ + blas/HPL_idamax.$(OBJEXT) blas/HPL_dgemv.$(OBJEXT) \ + blas/HPL_dscal.$(OBJEXT) blas/HPL_daxpy.$(OBJEXT) \ + blas/HPL_dcopy.$(OBJEXT) blas/HPL_dgemm.$(OBJEXT) \ + blas/HPL_dgemv.$(OBJEXT) blas/HPL_dger.$(OBJEXT) \ + comm/HPL_sdrv.$(OBJEXT) comm/HPL_send.$(OBJEXT) \ + comm/HPL_recv.$(OBJEXT) comm/HPL_bcast.$(OBJEXT) \ + comm/HPL_binit.$(OBJEXT) comm/HPL_bwait.$(OBJEXT) \ + comm/HPL_blong.$(OBJEXT) comm/HPL_1ring.$(OBJEXT) \ + comm/HPL_1rinM.$(OBJEXT) comm/HPL_2rinM.$(OBJEXT) \ + comm/HPL_2ring.$(OBJEXT) comm/HPL_blonM.$(OBJEXT) \ + comm/HPL_packL.$(OBJEXT) grid/HPL_reduce.$(OBJEXT) \ + grid/HPL_sum.$(OBJEXT) grid/HPL_grid_info.$(OBJEXT) \ + grid/HPL_grid_init.$(OBJEXT) grid/HPL_all_reduce.$(OBJEXT) \ + grid/HPL_broadcast.$(OBJEXT) grid/HPL_grid_exit.$(OBJEXT) \ + grid/HPL_max.$(OBJEXT) grid/HPL_min.$(OBJEXT) \ + grid/HPL_all_reduce.$(OBJEXT) grid/HPL_barrier.$(OBJEXT) \ + panel/HPL_pdpanel_disp.$(OBJEXT) \ + panel/HPL_pdpanel_free.$(OBJEXT) \ + panel/HPL_pdpanel_init.$(OBJEXT) \ + panel/HPL_pdpanel_new.$(OBJEXT) pauxil/HPL_pdlamch.$(OBJEXT) \ + pauxil/HPL_pdlange.$(OBJEXT) pauxil/HPL_indxg2p.$(OBJEXT) \ + pauxil/HPL_numroc.$(OBJEXT) pauxil/HPL_numrocI.$(OBJEXT) \ + pauxil/HPL_numrocI.$(OBJEXT) pauxil/HPL_dlaswp00N.$(OBJEXT) \ + pauxil/HPL_dlaswp01N.$(OBJEXT) pauxil/HPL_dlaswp01T.$(OBJEXT) \ + pauxil/HPL_dlaswp02N.$(OBJEXT) pauxil/HPL_dlaswp03N.$(OBJEXT) \ + pauxil/HPL_dlaswp03T.$(OBJEXT) pauxil/HPL_dlaswp04N.$(OBJEXT) \ + pauxil/HPL_dlaswp04T.$(OBJEXT) pauxil/HPL_dlaswp05N.$(OBJEXT) \ + pauxil/HPL_dlaswp05T.$(OBJEXT) pauxil/HPL_dlaswp06N.$(OBJEXT) \ + pauxil/HPL_dlaswp06T.$(OBJEXT) pauxil/HPL_infog2l.$(OBJEXT) \ + pauxil/HPL_dlaswp10N.$(OBJEXT) pauxil/HPL_pwarn.$(OBJEXT) \ + pfact/HPL_pdpanllN.$(OBJEXT) pfact/HPL_pdpanllT.$(OBJEXT) \ + pfact/HPL_pdpanrlN.$(OBJEXT) pfact/HPL_pdpanrlT.$(OBJEXT) \ + pfact/HPL_pdrpancrN.$(OBJEXT) pfact/HPL_pdrpancrT.$(OBJEXT) \ + pfact/HPL_pdrpanllN.$(OBJEXT) pfact/HPL_pdrpanllT.$(OBJEXT) \ + pfact/HPL_pdrpanrlN.$(OBJEXT) pfact/HPL_pdrpanrlT.$(OBJEXT) \ + pfact/HPL_pdmxswp.$(OBJEXT) pfact/HPL_pdfact.$(OBJEXT) \ + pfact/HPL_dlocmax.$(OBJEXT) pfact/HPL_pdpancrT.$(OBJEXT) \ + pfact/HPL_pdpancrN.$(OBJEXT) pfact/HPL_dlocmax.$(OBJEXT) \ + pfact/HPL_dlocswpN.$(OBJEXT) pfact/HPL_dlocswpT.$(OBJEXT) \ + pfact/HPL_pdmxswp.$(OBJEXT) pfact/HPL_pdpanllN.$(OBJEXT) \ + pfact/HPL_pdpanllT.$(OBJEXT) pfact/HPL_pdpanrlN.$(OBJEXT) \ + pfact/HPL_pdpanrlT.$(OBJEXT) pfact/HPL_pdrpancrN.$(OBJEXT) \ + pfact/HPL_pdrpancrT.$(OBJEXT) pfact/HPL_pdrpanllN.$(OBJEXT) \ + pfact/HPL_pdrpanllT.$(OBJEXT) pfact/HPL_pdrpanrlN.$(OBJEXT) \ + pfact/HPL_pdrpanrlT.$(OBJEXT) pauxil/HPL_pabort.$(OBJEXT) \ + pauxil/HPL_pdlamch.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \ + pgesv/HPL_pdgesv.$(OBJEXT) pgesv/HPL_pdgesvK1.$(OBJEXT) \ + pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdupdateNN.$(OBJEXT) \ + pgesv/HPL_pdupdateNT.$(OBJEXT) pgesv/HPL_pdupdateTN.$(OBJEXT) \ + pgesv/HPL_pdupdateTT.$(OBJEXT) pgesv/HPL_equil.$(OBJEXT) \ + pgesv/HPL_pipid.$(OBJEXT) pgesv/HPL_plindx0.$(OBJEXT) \ + pgesv/HPL_plindx10.$(OBJEXT) pgesv/HPL_plindx1.$(OBJEXT) \ + pgesv/HPL_plindx10.$(OBJEXT) pgesv/HPL_rollN.$(OBJEXT) \ + pgesv/HPL_rollT.$(OBJEXT) pgesv/HPL_spreadN.$(OBJEXT) \ + pgesv/HPL_spreadT.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \ + pgesv/HPL_pdgesv.$(OBJEXT) pgesv/HPL_pdgesvK1.$(OBJEXT) \ + pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdgesv0.$(OBJEXT) \ + pgesv/HPL_pdgesvK2.$(OBJEXT) pgesv/HPL_pdlaswp00N.$(OBJEXT) \ + pgesv/HPL_pdlaswp00T.$(OBJEXT) pgesv/HPL_pdlaswp01N.$(OBJEXT) \ + pgesv/HPL_pdlaswp01T.$(OBJEXT) pgesv/HPL_pdtrsv.$(OBJEXT) \ + pgesv/HPL_pdupdateNN.$(OBJEXT) pgesv/HPL_pdupdateNT.$(OBJEXT) \ + pgesv/HPL_pdupdateTN.$(OBJEXT) pgesv/HPL_pdupdateTT.$(OBJEXT) \ + pgesv/HPL_logsort.$(OBJEXT) pgesv/HPL_perm.$(OBJEXT) +libhpl_a_OBJECTS = $(am_libhpl_a_OBJECTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = auxil/$(DEPDIR)/HPL_dlacpy.Po \ + auxil/$(DEPDIR)/HPL_dlamch.Po auxil/$(DEPDIR)/HPL_dlatcpy.Po \ + auxil/$(DEPDIR)/HPL_fprintf.Po blas/$(DEPDIR)/HPL_daxpy.Po \ + blas/$(DEPDIR)/HPL_dcopy.Po blas/$(DEPDIR)/HPL_dgemm.Po \ + blas/$(DEPDIR)/HPL_dgemv.Po blas/$(DEPDIR)/HPL_dger.Po \ + blas/$(DEPDIR)/HPL_dscal.Po blas/$(DEPDIR)/HPL_dtrsm.Po \ + blas/$(DEPDIR)/HPL_dtrsv.Po blas/$(DEPDIR)/HPL_idamax.Po \ + comm/$(DEPDIR)/HPL_1rinM.Po comm/$(DEPDIR)/HPL_1ring.Po \ + comm/$(DEPDIR)/HPL_2rinM.Po comm/$(DEPDIR)/HPL_2ring.Po \ + comm/$(DEPDIR)/HPL_bcast.Po comm/$(DEPDIR)/HPL_binit.Po \ + comm/$(DEPDIR)/HPL_blonM.Po comm/$(DEPDIR)/HPL_blong.Po \ + comm/$(DEPDIR)/HPL_bwait.Po comm/$(DEPDIR)/HPL_packL.Po \ + comm/$(DEPDIR)/HPL_recv.Po comm/$(DEPDIR)/HPL_sdrv.Po \ + comm/$(DEPDIR)/HPL_send.Po grid/$(DEPDIR)/HPL_all_reduce.Po \ + grid/$(DEPDIR)/HPL_barrier.Po grid/$(DEPDIR)/HPL_broadcast.Po \ + grid/$(DEPDIR)/HPL_grid_exit.Po \ + grid/$(DEPDIR)/HPL_grid_info.Po \ + grid/$(DEPDIR)/HPL_grid_init.Po grid/$(DEPDIR)/HPL_max.Po \ + grid/$(DEPDIR)/HPL_min.Po grid/$(DEPDIR)/HPL_reduce.Po \ + grid/$(DEPDIR)/HPL_sum.Po panel/$(DEPDIR)/HPL_pdpanel_disp.Po \ + panel/$(DEPDIR)/HPL_pdpanel_free.Po \ + panel/$(DEPDIR)/HPL_pdpanel_init.Po \ + panel/$(DEPDIR)/HPL_pdpanel_new.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp00N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp01N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp01T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp02N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp03N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp03T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp04N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp04T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp05N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp05T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp06N.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp06T.Po \ + pauxil/$(DEPDIR)/HPL_dlaswp10N.Po \ + pauxil/$(DEPDIR)/HPL_indxg2p.Po \ + pauxil/$(DEPDIR)/HPL_infog2l.Po pauxil/$(DEPDIR)/HPL_numroc.Po \ + pauxil/$(DEPDIR)/HPL_numrocI.Po pauxil/$(DEPDIR)/HPL_pabort.Po \ + pauxil/$(DEPDIR)/HPL_pdlamch.Po \ + pauxil/$(DEPDIR)/HPL_pdlange.Po pauxil/$(DEPDIR)/HPL_pwarn.Po \ + pfact/$(DEPDIR)/HPL_dlocmax.Po pfact/$(DEPDIR)/HPL_dlocswpN.Po \ + pfact/$(DEPDIR)/HPL_dlocswpT.Po pfact/$(DEPDIR)/HPL_pdfact.Po \ + pfact/$(DEPDIR)/HPL_pdmxswp.Po pfact/$(DEPDIR)/HPL_pdpancrN.Po \ + pfact/$(DEPDIR)/HPL_pdpancrT.Po \ + pfact/$(DEPDIR)/HPL_pdpanllN.Po \ + pfact/$(DEPDIR)/HPL_pdpanllT.Po \ + pfact/$(DEPDIR)/HPL_pdpanrlN.Po \ + pfact/$(DEPDIR)/HPL_pdpanrlT.Po \ + pfact/$(DEPDIR)/HPL_pdrpancrN.Po \ + pfact/$(DEPDIR)/HPL_pdrpancrT.Po \ + pfact/$(DEPDIR)/HPL_pdrpanllN.Po \ + pfact/$(DEPDIR)/HPL_pdrpanllT.Po \ + pfact/$(DEPDIR)/HPL_pdrpanrlN.Po \ + pfact/$(DEPDIR)/HPL_pdrpanrlT.Po pgesv/$(DEPDIR)/HPL_equil.Po \ + pgesv/$(DEPDIR)/HPL_logsort.Po pgesv/$(DEPDIR)/HPL_pdgesv.Po \ + pgesv/$(DEPDIR)/HPL_pdgesv0.Po pgesv/$(DEPDIR)/HPL_pdgesvK1.Po \ + pgesv/$(DEPDIR)/HPL_pdgesvK2.Po \ + pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po \ + pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po \ + pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po \ + pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po \ + pgesv/$(DEPDIR)/HPL_pdtrsv.Po \ + pgesv/$(DEPDIR)/HPL_pdupdateNN.Po \ + pgesv/$(DEPDIR)/HPL_pdupdateNT.Po \ + pgesv/$(DEPDIR)/HPL_pdupdateTN.Po \ + pgesv/$(DEPDIR)/HPL_pdupdateTT.Po pgesv/$(DEPDIR)/HPL_perm.Po \ + pgesv/$(DEPDIR)/HPL_pipid.Po pgesv/$(DEPDIR)/HPL_plindx0.Po \ + pgesv/$(DEPDIR)/HPL_plindx1.Po pgesv/$(DEPDIR)/HPL_plindx10.Po \ + pgesv/$(DEPDIR)/HPL_rollN.Po pgesv/$(DEPDIR)/HPL_rollT.Po \ + pgesv/$(DEPDIR)/HPL_spreadN.Po pgesv/$(DEPDIR)/HPL_spreadT.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libhpl_a_SOURCES) +DIST_SOURCES = $(libhpl_a_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIBS = @BLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build_alias = @build_alias@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host_alias = @host_alias@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/../include +lib_LIBRARIES = libhpl.a +libhpl_a_SOURCES = \ +auxil/HPL_dlatcpy.c auxil/HPL_fprintf.c auxil/HPL_dlacpy.c auxil/HPL_dlamch.c \ +blas/HPL_dscal.c blas/HPL_dtrsm.c blas/HPL_dtrsv.c blas/HPL_idamax.c \ +blas/HPL_dgemv.c blas/HPL_dscal.c blas/HPL_daxpy.c \ +blas/HPL_dcopy.c blas/HPL_dgemm.c blas/HPL_dgemv.c blas/HPL_dger.c \ +comm/HPL_sdrv.c comm/HPL_send.c comm/HPL_recv.c comm/HPL_bcast.c \ +comm/HPL_binit.c comm/HPL_bwait.c comm/HPL_blong.c comm/HPL_1ring.c \ +comm/HPL_1rinM.c comm/HPL_2rinM.c comm/HPL_2ring.c comm/HPL_blonM.c comm/HPL_packL.c \ +grid/HPL_reduce.c grid/HPL_sum.c grid/HPL_grid_info.c grid/HPL_grid_init.c \ +grid/HPL_all_reduce.c grid/HPL_broadcast.c grid/HPL_grid_exit.c grid/HPL_max.c \ +grid/HPL_min.c grid/HPL_all_reduce.c grid/HPL_barrier.c \ +panel/HPL_pdpanel_disp.c panel/HPL_pdpanel_free.c panel/HPL_pdpanel_init.c panel/HPL_pdpanel_new.c \ +pauxil/HPL_pdlamch.c pauxil/HPL_pdlange.c \ +pauxil/HPL_indxg2p.c pauxil/HPL_numroc.c pauxil/HPL_numrocI.c pauxil/HPL_numrocI.c \ +pauxil/HPL_dlaswp00N.c pauxil/HPL_dlaswp01N.c pauxil/HPL_dlaswp01T.c \ +pauxil/HPL_dlaswp02N.c pauxil/HPL_dlaswp03N.c pauxil/HPL_dlaswp03T.c \ +pauxil/HPL_dlaswp04N.c pauxil/HPL_dlaswp04T.c pauxil/HPL_dlaswp05N.c \ +pauxil/HPL_dlaswp05T.c pauxil/HPL_dlaswp06N.c pauxil/HPL_dlaswp06T.c \ +pauxil/HPL_infog2l.c pauxil/HPL_dlaswp10N.c pauxil/HPL_pwarn.c \ +pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \ +pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \ +pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c pfact/HPL_pdrpanrlT.c \ +pfact/HPL_pdmxswp.c pfact/HPL_pdfact.c pfact/HPL_dlocmax.c \ +pfact/HPL_pdpancrT.c pfact/HPL_pdpancrN.c pfact/HPL_dlocmax.c \ +pfact/HPL_dlocswpN.c pfact/HPL_dlocswpT.c pfact/HPL_pdmxswp.c \ +pfact/HPL_pdpanllN.c pfact/HPL_pdpanllT.c pfact/HPL_pdpanrlN.c \ +pfact/HPL_pdpanrlT.c pfact/HPL_pdrpancrN.c pfact/HPL_pdrpancrT.c \ +pfact/HPL_pdrpanllN.c pfact/HPL_pdrpanllT.c pfact/HPL_pdrpanrlN.c \ +pfact/HPL_pdrpanrlT.c pauxil/HPL_pabort.c pauxil/HPL_pdlamch.c \ +pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c \ +pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c pgesv/HPL_pdupdateTT.c \ +pgesv/HPL_equil.c pgesv/HPL_pipid.c pgesv/HPL_plindx0.c \ +pgesv/HPL_plindx10.c pgesv/HPL_plindx1.c pgesv/HPL_plindx10.c \ +pgesv/HPL_rollN.c pgesv/HPL_rollT.c pgesv/HPL_spreadN.c pgesv/HPL_spreadT.c \ +pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesv.c pgesv/HPL_pdgesvK1.c pgesv/HPL_pdgesvK2.c pgesv/HPL_pdgesv0.c pgesv/HPL_pdgesvK2.c \ +pgesv/HPL_pdlaswp00N.c pgesv/HPL_pdlaswp00T.c pgesv/HPL_pdlaswp01N.c pgesv/HPL_pdlaswp01T.c \ +pgesv/HPL_pdtrsv.c pgesv/HPL_pdupdateNN.c pgesv/HPL_pdupdateNT.c pgesv/HPL_pdupdateTN.c \ +pgesv/HPL_pdupdateTT.c pgesv/HPL_logsort.c pgesv/HPL_perm.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-libLIBRARIES: $(lib_LIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(INSTALL_DATA) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(INSTALL_DATA) $$list2 "$(DESTDIR)$(libdir)" || exit $$?; } + @$(POST_INSTALL) + @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + if test -f $$p; then \ + $(am__strip_dir) \ + echo " ( cd '$(DESTDIR)$(libdir)' && $(RANLIB) $$f )"; \ + ( cd "$(DESTDIR)$(libdir)" && $(RANLIB) $$f ) || exit $$?; \ + else :; fi; \ + done + +uninstall-libLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(libdir)'; $(am__uninstall_files_from_dir) + +clean-libLIBRARIES: + -test -z "$(lib_LIBRARIES)" || rm -f $(lib_LIBRARIES) +auxil/$(am__dirstamp): + @$(MKDIR_P) auxil + @: > auxil/$(am__dirstamp) +auxil/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) auxil/$(DEPDIR) + @: > auxil/$(DEPDIR)/$(am__dirstamp) +auxil/HPL_dlatcpy.$(OBJEXT): auxil/$(am__dirstamp) \ + auxil/$(DEPDIR)/$(am__dirstamp) +auxil/HPL_fprintf.$(OBJEXT): auxil/$(am__dirstamp) \ + auxil/$(DEPDIR)/$(am__dirstamp) +auxil/HPL_dlacpy.$(OBJEXT): auxil/$(am__dirstamp) \ + auxil/$(DEPDIR)/$(am__dirstamp) +auxil/HPL_dlamch.$(OBJEXT): auxil/$(am__dirstamp) \ + auxil/$(DEPDIR)/$(am__dirstamp) +blas/$(am__dirstamp): + @$(MKDIR_P) blas + @: > blas/$(am__dirstamp) +blas/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) blas/$(DEPDIR) + @: > blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dscal.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dtrsm.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dtrsv.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_idamax.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dgemv.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_daxpy.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dcopy.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dgemm.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +blas/HPL_dger.$(OBJEXT): blas/$(am__dirstamp) \ + blas/$(DEPDIR)/$(am__dirstamp) +comm/$(am__dirstamp): + @$(MKDIR_P) comm + @: > comm/$(am__dirstamp) +comm/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) comm/$(DEPDIR) + @: > comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_sdrv.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_send.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_recv.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_bcast.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_binit.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_bwait.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_blong.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_1ring.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_1rinM.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_2rinM.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_2ring.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_blonM.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +comm/HPL_packL.$(OBJEXT): comm/$(am__dirstamp) \ + comm/$(DEPDIR)/$(am__dirstamp) +grid/$(am__dirstamp): + @$(MKDIR_P) grid + @: > grid/$(am__dirstamp) +grid/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) grid/$(DEPDIR) + @: > grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_reduce.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_sum.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_grid_info.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_grid_init.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_all_reduce.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_broadcast.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_grid_exit.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_max.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_min.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +grid/HPL_barrier.$(OBJEXT): grid/$(am__dirstamp) \ + grid/$(DEPDIR)/$(am__dirstamp) +panel/$(am__dirstamp): + @$(MKDIR_P) panel + @: > panel/$(am__dirstamp) +panel/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) panel/$(DEPDIR) + @: > panel/$(DEPDIR)/$(am__dirstamp) +panel/HPL_pdpanel_disp.$(OBJEXT): panel/$(am__dirstamp) \ + panel/$(DEPDIR)/$(am__dirstamp) +panel/HPL_pdpanel_free.$(OBJEXT): panel/$(am__dirstamp) \ + panel/$(DEPDIR)/$(am__dirstamp) +panel/HPL_pdpanel_init.$(OBJEXT): panel/$(am__dirstamp) \ + panel/$(DEPDIR)/$(am__dirstamp) +panel/HPL_pdpanel_new.$(OBJEXT): panel/$(am__dirstamp) \ + panel/$(DEPDIR)/$(am__dirstamp) +pauxil/$(am__dirstamp): + @$(MKDIR_P) pauxil + @: > pauxil/$(am__dirstamp) +pauxil/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pauxil/$(DEPDIR) + @: > pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_pdlamch.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_pdlange.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_indxg2p.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_numroc.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_numrocI.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp00N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp01N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp01T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp02N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp03N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp03T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp04N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp04T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp05N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp05T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp06N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp06T.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_infog2l.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_dlaswp10N.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_pwarn.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pfact/$(am__dirstamp): + @$(MKDIR_P) pfact + @: > pfact/$(am__dirstamp) +pfact/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pfact/$(DEPDIR) + @: > pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpanllN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpanllT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpanrlN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpanrlT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpancrN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpancrT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpanllN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpanllT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpanrlN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdrpanrlT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdmxswp.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdfact.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_dlocmax.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpancrT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_pdpancrN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_dlocswpN.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pfact/HPL_dlocswpT.$(OBJEXT): pfact/$(am__dirstamp) \ + pfact/$(DEPDIR)/$(am__dirstamp) +pauxil/HPL_pabort.$(OBJEXT): pauxil/$(am__dirstamp) \ + pauxil/$(DEPDIR)/$(am__dirstamp) +pgesv/$(am__dirstamp): + @$(MKDIR_P) pgesv + @: > pgesv/$(am__dirstamp) +pgesv/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pgesv/$(DEPDIR) + @: > pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdgesv0.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdgesv.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdgesvK1.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdgesvK2.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdupdateNN.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdupdateNT.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdupdateTN.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdupdateTT.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_equil.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pipid.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_plindx0.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_plindx10.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_plindx1.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_rollN.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_rollT.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_spreadN.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_spreadT.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdlaswp00N.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdlaswp00T.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdlaswp01N.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdlaswp01T.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_pdtrsv.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_logsort.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) +pgesv/HPL_perm.$(OBJEXT): pgesv/$(am__dirstamp) \ + pgesv/$(DEPDIR)/$(am__dirstamp) + +libhpl.a: $(libhpl_a_OBJECTS) $(libhpl_a_DEPENDENCIES) $(EXTRA_libhpl_a_DEPENDENCIES) + $(AM_V_at)-rm -f libhpl.a + $(AM_V_AR)$(libhpl_a_AR) libhpl.a $(libhpl_a_OBJECTS) $(libhpl_a_LIBADD) + $(AM_V_at)$(RANLIB) libhpl.a + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f auxil/*.$(OBJEXT) + -rm -f blas/*.$(OBJEXT) + -rm -f comm/*.$(OBJEXT) + -rm -f grid/*.$(OBJEXT) + -rm -f panel/*.$(OBJEXT) + -rm -f pauxil/*.$(OBJEXT) + -rm -f pfact/*.$(OBJEXT) + -rm -f pgesv/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlacpy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlamch.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_dlatcpy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@auxil/$(DEPDIR)/HPL_fprintf.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_daxpy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dcopy.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dgemm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dgemv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dger.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dscal.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dtrsm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_dtrsv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@blas/$(DEPDIR)/HPL_idamax.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_1rinM.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_1ring.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_2rinM.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_2ring.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bcast.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_binit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blonM.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_blong.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_bwait.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_packL.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_recv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_sdrv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@comm/$(DEPDIR)/HPL_send.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_all_reduce.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_barrier.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_broadcast.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_exit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_info.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_grid_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_max.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_min.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_reduce.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@grid/$(DEPDIR)/HPL_sum.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_disp.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_free.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_init.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@panel/$(DEPDIR)/HPL_pdpanel_new.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp00N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp01N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp01T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp02N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp03N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp03T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp04N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp04T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp05N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp05T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp06N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp06T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_dlaswp10N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_indxg2p.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_infog2l.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_numroc.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_numrocI.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pabort.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pdlamch.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pdlange.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pauxil/$(DEPDIR)/HPL_pwarn.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocmax.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocswpN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_dlocswpT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdfact.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdmxswp.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpancrN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpancrT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanllN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanllT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanrlN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdpanrlT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpancrN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpancrT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanllN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanllT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanrlN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pfact/$(DEPDIR)/HPL_pdrpanrlT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_equil.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_logsort.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesv0.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesvK1.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdgesvK2.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdtrsv.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateNN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateNT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateTN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pdupdateTT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_perm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_pipid.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx0.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx1.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_plindx10.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_rollN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_rollT.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_spreadN.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pgesv/$(DEPDIR)/HPL_spreadT.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f auxil/$(DEPDIR)/$(am__dirstamp) + -rm -f auxil/$(am__dirstamp) + -rm -f blas/$(DEPDIR)/$(am__dirstamp) + -rm -f blas/$(am__dirstamp) + -rm -f comm/$(DEPDIR)/$(am__dirstamp) + -rm -f comm/$(am__dirstamp) + -rm -f grid/$(DEPDIR)/$(am__dirstamp) + -rm -f grid/$(am__dirstamp) + -rm -f panel/$(DEPDIR)/$(am__dirstamp) + -rm -f panel/$(am__dirstamp) + -rm -f pauxil/$(DEPDIR)/$(am__dirstamp) + -rm -f pauxil/$(am__dirstamp) + -rm -f pfact/$(DEPDIR)/$(am__dirstamp) + -rm -f pfact/$(am__dirstamp) + -rm -f pgesv/$(DEPDIR)/$(am__dirstamp) + -rm -f pgesv/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLIBRARIES mostlyclean-am + +distclean: distclean-am + -rm -f auxil/$(DEPDIR)/HPL_dlacpy.Po + -rm -f auxil/$(DEPDIR)/HPL_dlamch.Po + -rm -f auxil/$(DEPDIR)/HPL_dlatcpy.Po + -rm -f auxil/$(DEPDIR)/HPL_fprintf.Po + -rm -f blas/$(DEPDIR)/HPL_daxpy.Po + -rm -f blas/$(DEPDIR)/HPL_dcopy.Po + -rm -f blas/$(DEPDIR)/HPL_dgemm.Po + -rm -f blas/$(DEPDIR)/HPL_dgemv.Po + -rm -f blas/$(DEPDIR)/HPL_dger.Po + -rm -f blas/$(DEPDIR)/HPL_dscal.Po + -rm -f blas/$(DEPDIR)/HPL_dtrsm.Po + -rm -f blas/$(DEPDIR)/HPL_dtrsv.Po + -rm -f blas/$(DEPDIR)/HPL_idamax.Po + -rm -f comm/$(DEPDIR)/HPL_1rinM.Po + -rm -f comm/$(DEPDIR)/HPL_1ring.Po + -rm -f comm/$(DEPDIR)/HPL_2rinM.Po + -rm -f comm/$(DEPDIR)/HPL_2ring.Po + -rm -f comm/$(DEPDIR)/HPL_bcast.Po + -rm -f comm/$(DEPDIR)/HPL_binit.Po + -rm -f comm/$(DEPDIR)/HPL_blonM.Po + -rm -f comm/$(DEPDIR)/HPL_blong.Po + -rm -f comm/$(DEPDIR)/HPL_bwait.Po + -rm -f comm/$(DEPDIR)/HPL_packL.Po + -rm -f comm/$(DEPDIR)/HPL_recv.Po + -rm -f comm/$(DEPDIR)/HPL_sdrv.Po + -rm -f comm/$(DEPDIR)/HPL_send.Po + -rm -f grid/$(DEPDIR)/HPL_all_reduce.Po + -rm -f grid/$(DEPDIR)/HPL_barrier.Po + -rm -f grid/$(DEPDIR)/HPL_broadcast.Po + -rm -f grid/$(DEPDIR)/HPL_grid_exit.Po + -rm -f grid/$(DEPDIR)/HPL_grid_info.Po + -rm -f grid/$(DEPDIR)/HPL_grid_init.Po + -rm -f grid/$(DEPDIR)/HPL_max.Po + -rm -f grid/$(DEPDIR)/HPL_min.Po + -rm -f grid/$(DEPDIR)/HPL_reduce.Po + -rm -f grid/$(DEPDIR)/HPL_sum.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_disp.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_free.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_init.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_new.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp00N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp01N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp01T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp02N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp03N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp03T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp04N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp04T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp05N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp05T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp06N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp06T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp10N.Po + -rm -f pauxil/$(DEPDIR)/HPL_indxg2p.Po + -rm -f pauxil/$(DEPDIR)/HPL_infog2l.Po + -rm -f pauxil/$(DEPDIR)/HPL_numroc.Po + -rm -f pauxil/$(DEPDIR)/HPL_numrocI.Po + -rm -f pauxil/$(DEPDIR)/HPL_pabort.Po + -rm -f pauxil/$(DEPDIR)/HPL_pdlamch.Po + -rm -f pauxil/$(DEPDIR)/HPL_pdlange.Po + -rm -f pauxil/$(DEPDIR)/HPL_pwarn.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocmax.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocswpN.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocswpT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdfact.Po + -rm -f pfact/$(DEPDIR)/HPL_pdmxswp.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpancrN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpancrT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanllN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanllT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanrlN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanrlT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpancrN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpancrT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanllN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanllT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanrlN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanrlT.Po + -rm -f pgesv/$(DEPDIR)/HPL_equil.Po + -rm -f pgesv/$(DEPDIR)/HPL_logsort.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesv.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesv0.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesvK1.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesvK2.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdtrsv.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateNN.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateNT.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateTN.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateTT.Po + -rm -f pgesv/$(DEPDIR)/HPL_perm.Po + -rm -f pgesv/$(DEPDIR)/HPL_pipid.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx0.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx1.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx10.Po + -rm -f pgesv/$(DEPDIR)/HPL_rollN.Po + -rm -f pgesv/$(DEPDIR)/HPL_rollT.Po + -rm -f pgesv/$(DEPDIR)/HPL_spreadN.Po + -rm -f pgesv/$(DEPDIR)/HPL_spreadT.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f auxil/$(DEPDIR)/HPL_dlacpy.Po + -rm -f auxil/$(DEPDIR)/HPL_dlamch.Po + -rm -f auxil/$(DEPDIR)/HPL_dlatcpy.Po + -rm -f auxil/$(DEPDIR)/HPL_fprintf.Po + -rm -f blas/$(DEPDIR)/HPL_daxpy.Po + -rm -f blas/$(DEPDIR)/HPL_dcopy.Po + -rm -f blas/$(DEPDIR)/HPL_dgemm.Po + -rm -f blas/$(DEPDIR)/HPL_dgemv.Po + -rm -f blas/$(DEPDIR)/HPL_dger.Po + -rm -f blas/$(DEPDIR)/HPL_dscal.Po + -rm -f blas/$(DEPDIR)/HPL_dtrsm.Po + -rm -f blas/$(DEPDIR)/HPL_dtrsv.Po + -rm -f blas/$(DEPDIR)/HPL_idamax.Po + -rm -f comm/$(DEPDIR)/HPL_1rinM.Po + -rm -f comm/$(DEPDIR)/HPL_1ring.Po + -rm -f comm/$(DEPDIR)/HPL_2rinM.Po + -rm -f comm/$(DEPDIR)/HPL_2ring.Po + -rm -f comm/$(DEPDIR)/HPL_bcast.Po + -rm -f comm/$(DEPDIR)/HPL_binit.Po + -rm -f comm/$(DEPDIR)/HPL_blonM.Po + -rm -f comm/$(DEPDIR)/HPL_blong.Po + -rm -f comm/$(DEPDIR)/HPL_bwait.Po + -rm -f comm/$(DEPDIR)/HPL_packL.Po + -rm -f comm/$(DEPDIR)/HPL_recv.Po + -rm -f comm/$(DEPDIR)/HPL_sdrv.Po + -rm -f comm/$(DEPDIR)/HPL_send.Po + -rm -f grid/$(DEPDIR)/HPL_all_reduce.Po + -rm -f grid/$(DEPDIR)/HPL_barrier.Po + -rm -f grid/$(DEPDIR)/HPL_broadcast.Po + -rm -f grid/$(DEPDIR)/HPL_grid_exit.Po + -rm -f grid/$(DEPDIR)/HPL_grid_info.Po + -rm -f grid/$(DEPDIR)/HPL_grid_init.Po + -rm -f grid/$(DEPDIR)/HPL_max.Po + -rm -f grid/$(DEPDIR)/HPL_min.Po + -rm -f grid/$(DEPDIR)/HPL_reduce.Po + -rm -f grid/$(DEPDIR)/HPL_sum.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_disp.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_free.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_init.Po + -rm -f panel/$(DEPDIR)/HPL_pdpanel_new.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp00N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp01N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp01T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp02N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp03N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp03T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp04N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp04T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp05N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp05T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp06N.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp06T.Po + -rm -f pauxil/$(DEPDIR)/HPL_dlaswp10N.Po + -rm -f pauxil/$(DEPDIR)/HPL_indxg2p.Po + -rm -f pauxil/$(DEPDIR)/HPL_infog2l.Po + -rm -f pauxil/$(DEPDIR)/HPL_numroc.Po + -rm -f pauxil/$(DEPDIR)/HPL_numrocI.Po + -rm -f pauxil/$(DEPDIR)/HPL_pabort.Po + -rm -f pauxil/$(DEPDIR)/HPL_pdlamch.Po + -rm -f pauxil/$(DEPDIR)/HPL_pdlange.Po + -rm -f pauxil/$(DEPDIR)/HPL_pwarn.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocmax.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocswpN.Po + -rm -f pfact/$(DEPDIR)/HPL_dlocswpT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdfact.Po + -rm -f pfact/$(DEPDIR)/HPL_pdmxswp.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpancrN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpancrT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanllN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanllT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanrlN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdpanrlT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpancrN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpancrT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanllN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanllT.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanrlN.Po + -rm -f pfact/$(DEPDIR)/HPL_pdrpanrlT.Po + -rm -f pgesv/$(DEPDIR)/HPL_equil.Po + -rm -f pgesv/$(DEPDIR)/HPL_logsort.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesv.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesv0.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesvK1.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdgesvK2.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00N.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp00T.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01N.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdlaswp01T.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdtrsv.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateNN.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateNT.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateTN.Po + -rm -f pgesv/$(DEPDIR)/HPL_pdupdateTT.Po + -rm -f pgesv/$(DEPDIR)/HPL_perm.Po + -rm -f pgesv/$(DEPDIR)/HPL_pipid.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx0.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx1.Po + -rm -f pgesv/$(DEPDIR)/HPL_plindx10.Po + -rm -f pgesv/$(DEPDIR)/HPL_rollN.Po + -rm -f pgesv/$(DEPDIR)/HPL_rollT.Po + -rm -f pgesv/$(DEPDIR)/HPL_spreadN.Po + -rm -f pgesv/$(DEPDIR)/HPL_spreadT.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-libLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libLIBRARIES cscopelist-am ctags ctags-am \ + distclean distclean-compile distclean-generic distclean-tags \ + distdir dvi dvi-am html html-am info info-am install \ + install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am \ + install-libLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am uninstall-libLIBRARIES + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_abort.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_abort.c new file mode 100644 index 000000000..bf0c5e727 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_abort.c @@ -0,0 +1,129 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_abort +( + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_abort( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_abort displays an error message on stderr and halts execution. + * + * + * Arguments + * ========= + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[128]; +#ifndef STDC_HEADERS + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( stderr, "%s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR in function", SRNAME, cline ); + else + HPL_fprintf( stderr, "%s %d %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR on line", LINE, "of function", SRNAME, cline ); + exit( 0 ); +/* + * End of HPL_abort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlacpy.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlacpy.c new file mode 100644 index 000000000..ec71180eb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlacpy.c @@ -0,0 +1,343 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factors + * #ifndef HPL_LACPY_M_DEPTH + * #define HPL_LACPY_M_DEPTH 32 + * #define HPL_LACPY_LOG2_M_DEPTH 5 + * #endif + * #ifndef HPL_LACPY_N_DEPTH + * #define HPL_LACPY_N_DEPTH 4 + * #define HPL_LACPY_LOG2_N_DEPTH 2 + * #endif + */ +#ifndef HPL_LACPY_M_DEPTH +#define HPL_LACPY_M_DEPTH 4 +#define HPL_LACPY_LOG2_M_DEPTH 2 +#endif +#ifndef HPL_LACPY_N_DEPTH +#define HPL_LACPY_N_DEPTH 2 +#define HPL_LACPY_LOG2_N_DEPTH 1 +#endif + +#ifdef STDC_HEADERS +void HPL_dlacpy +( + const int M, + const int N, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dlacpy +( M, N, A, LDA, B, LDB ) + const int M; + const int N; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlacpy copies an array A into an array B. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the arrays A and + * B. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the arrays A + * and B. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N). + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * B (local output) double * + * On entry, B points to an array of dimension (LDB,N). On exit, + * B is overwritten with A. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of the array B. + * LDB must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_LACPY_USE_COPY + register int j; +#else +#if ( HPL_LACPY_N_DEPTH == 1 ) + const double * A0 = A; + double * B0 = B; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + const double * A0 = A, * A1 = A + LDA; + double * B0 = B, * B1 = B + LDB; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + const double * A0 = A, * A1 = A + LDA, + * A2 = A + (LDA << 1), * A3 = A + 3 * LDA; + double * B0 = B, * B1 = B + LDB, + * B2 = B + (LDB << 1), * B3 = B + 3 * LDB; +#endif + const int incA = ( (unsigned int)(LDA) << + HPL_LACPY_LOG2_N_DEPTH ) - M, + incB = ( (unsigned int)(LDB) << + HPL_LACPY_LOG2_N_DEPTH ) - M, + incA0 = (unsigned int)(LDA) - M, + incB0 = (unsigned int)(LDB) - M; + int mu, nu; + register int i, j; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + +#ifdef HPL_LACPY_USE_COPY + for( j = 0; j < N; j++, A0 += LDA, B0 += LDB ) HPL_dcopy( M, A0, 1, B0, 1 ); +#else + mu = (int)( ( (unsigned int)(M) >> HPL_LACPY_LOG2_M_DEPTH ) << + HPL_LACPY_LOG2_M_DEPTH ); + nu = (int)( ( (unsigned int)(N) >> HPL_LACPY_LOG2_N_DEPTH ) << + HPL_LACPY_LOG2_N_DEPTH ); + + for( j = 0; j < nu; j += HPL_LACPY_N_DEPTH ) + { + for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH ) + { +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 0] = A0[ 0]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; B2[ 0] = A2[ 0]; B3[ 0] = A3[ 0]; +#endif + +#if ( HPL_LACPY_M_DEPTH > 1 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 1] = A0[ 1]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; B2[ 1] = A2[ 1]; B3[ 1] = A3[ 1]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 2 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B2[ 2] = A2[ 2]; B3[ 2] = A3[ 2]; + B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; B2[ 3] = A2[ 3]; B3[ 3] = A3[ 3]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 4 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; + B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B2[ 4] = A2[ 4]; B3[ 4] = A3[ 4]; + B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; B2[ 5] = A2[ 5]; B3[ 5] = A3[ 5]; + B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B2[ 6] = A2[ 6]; B3[ 6] = A3[ 6]; + B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; B2[ 7] = A2[ 7]; B3[ 7] = A3[ 7]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 8 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11]; + B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; + B0[10] = A0[10]; B1[10] = A1[10]; B0[11] = A0[11]; B1[11] = A1[11]; + B0[12] = A0[12]; B1[12] = A1[12]; B0[13] = A0[13]; B1[13] = A1[13]; + B0[14] = A0[14]; B1[14] = A1[14]; B0[15] = A0[15]; B1[15] = A1[15]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B2[ 8] = A2[ 8]; B3[ 8] = A3[ 8]; + B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; B2[ 9] = A2[ 9]; B3[ 9] = A3[ 9]; + B0[10] = A0[10]; B1[10] = A1[10]; B2[10] = A2[10]; B3[10] = A3[10]; + B0[11] = A0[11]; B1[11] = A1[11]; B2[11] = A2[11]; B3[11] = A3[11]; + B0[12] = A0[12]; B1[12] = A1[12]; B2[12] = A2[12]; B3[12] = A3[12]; + B0[13] = A0[13]; B1[13] = A1[13]; B2[13] = A2[13]; B3[13] = A3[13]; + B0[14] = A0[14]; B1[14] = A1[14]; B2[14] = A2[14]; B3[14] = A3[14]; + B0[15] = A0[15]; B1[15] = A1[15]; B2[15] = A2[15]; B3[15] = A3[15]; +#endif + +#endif +#if ( HPL_LACPY_M_DEPTH > 16 ) + +#if ( HPL_LACPY_N_DEPTH == 1 ) + B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19]; + B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23]; + B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27]; + B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31]; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + B0[16] = A0[16]; B1[16] = A1[16]; B0[17] = A0[17]; B1[17] = A1[17]; + B0[18] = A0[18]; B1[18] = A1[18]; B0[19] = A0[19]; B1[19] = A1[19]; + B0[20] = A0[20]; B1[20] = A1[20]; B0[21] = A0[21]; B1[21] = A1[21]; + B0[22] = A0[22]; B1[22] = A1[22]; B0[23] = A0[23]; B1[23] = A1[23]; + B0[24] = A0[24]; B1[24] = A1[24]; B0[25] = A0[25]; B1[25] = A1[25]; + B0[26] = A0[26]; B1[26] = A1[26]; B0[27] = A0[27]; B1[27] = A1[27]; + B0[28] = A0[28]; B1[28] = A1[28]; B0[29] = A0[29]; B1[29] = A1[29]; + B0[30] = A0[30]; B1[30] = A1[30]; B0[31] = A0[31]; B1[31] = A1[31]; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + B0[16] = A0[16]; B1[16] = A1[16]; B2[16] = A2[16]; B3[16] = A3[16]; + B0[17] = A0[17]; B1[17] = A1[17]; B2[17] = A2[17]; B3[17] = A3[17]; + B0[18] = A0[18]; B1[18] = A1[18]; B2[18] = A2[18]; B3[18] = A3[18]; + B0[19] = A0[19]; B1[19] = A1[19]; B2[19] = A2[19]; B3[19] = A3[19]; + B0[20] = A0[20]; B1[20] = A1[20]; B2[20] = A2[20]; B3[20] = A3[20]; + B0[21] = A0[21]; B1[21] = A1[21]; B2[21] = A2[21]; B3[21] = A3[21]; + B0[22] = A0[22]; B1[22] = A1[22]; B2[22] = A2[22]; B3[22] = A3[22]; + B0[23] = A0[23]; B1[23] = A1[23]; B2[23] = A2[23]; B3[23] = A3[23]; + B0[24] = A0[24]; B1[24] = A1[24]; B2[24] = A2[24]; B3[24] = A3[24]; + B0[25] = A0[25]; B1[25] = A1[25]; B2[25] = A2[25]; B3[25] = A3[25]; + B0[26] = A0[26]; B1[26] = A1[26]; B2[26] = A2[26]; B3[26] = A3[26]; + B0[27] = A0[27]; B1[27] = A1[27]; B2[27] = A2[27]; B3[27] = A3[27]; + B0[28] = A0[28]; B1[28] = A1[28]; B2[28] = A2[28]; B3[28] = A3[28]; + B0[29] = A0[29]; B1[29] = A1[29]; B2[29] = A2[29]; B3[29] = A3[29]; + B0[30] = A0[30]; B1[30] = A1[30]; B2[30] = A2[30]; B3[30] = A3[30]; + B0[31] = A0[31]; B1[31] = A1[31]; B2[31] = A2[31]; B3[31] = A3[31]; +#endif + +#endif + +#if ( HPL_LACPY_N_DEPTH == 1 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; + A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; + A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH; + A2 += HPL_LACPY_M_DEPTH; B2 += HPL_LACPY_M_DEPTH; + A3 += HPL_LACPY_M_DEPTH; B3 += HPL_LACPY_M_DEPTH; +#endif + } + + for( i = mu; i < M; i++ ) + { +#if ( HPL_LACPY_N_DEPTH == 1 ) + *B0 = *A0; B0++; A0++; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++; + *B2 = *A2; B2++; A2++; *B3 = *A3; B3++; A3++; +#endif + } + +#if ( HPL_LACPY_N_DEPTH == 1 ) + A0 += incA; B0 += incB; +#elif ( HPL_LACPY_N_DEPTH == 2 ) + A0 += incA; B0 += incB; A1 += incA; B1 += incB; +#elif ( HPL_LACPY_N_DEPTH == 4 ) + A0 += incA; B0 += incB; A1 += incA; B1 += incB; + A2 += incA; B2 += incB; A3 += incA; B3 += incB; +#endif + } + + for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 ) + { + for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH, + B0 += HPL_LACPY_M_DEPTH, A0 += HPL_LACPY_M_DEPTH ) + { + B0[ 0] = A0[ 0]; +#if ( HPL_LACPY_M_DEPTH > 1 ) + B0[ 1] = A0[ 1]; +#endif +#if ( HPL_LACPY_M_DEPTH > 2 ) + B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3]; +#endif +#if ( HPL_LACPY_M_DEPTH > 4 ) + B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7]; +#endif +#if ( HPL_LACPY_M_DEPTH > 8 ) + B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11]; + B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15]; +#endif +#if ( HPL_LACPY_M_DEPTH > 16 ) + B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19]; + B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23]; + B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27]; + B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31]; +#endif + } + for( i = mu; i < M; i++, B0++, A0++ ) { *B0 = *A0; } + } +#endif +/* + * End of HPL_dlacpy + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlamch.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlamch.c new file mode 100644 index 000000000..c685f0d5e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlamch.c @@ -0,0 +1,876 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static function prototypes + * --------------------------------------------------------------------- + */ +static void HPL_dlamc1 +STDC_ARGS( +( int *, int *, int *, int * ) ); +static void HPL_dlamc2 +STDC_ARGS( +( int *, int *, int *, double *, + int *, double *, int *, double * ) ); +static double HPL_dlamc3 +STDC_ARGS( +( const double, const double ) ); +static void HPL_dlamc4 +STDC_ARGS( +( int *, const double, const int ) ); +static void HPL_dlamc5 +STDC_ARGS( +( const int, const int, const int, const int, + int *, double * ) ); +static double HPL_dipow +STDC_ARGS( +( const double, const int ) ); + +#ifdef STDC_HEADERS +double HPL_dlamch +( + const HPL_T_MACH CMACH +) +#else +double HPL_dlamch +( CMACH ) + const HPL_T_MACH CMACH; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamch determines machine-specific arithmetic constants such as + * the relative machine precision (eps), the safe minimum (sfmin) such + * that 1 / sfmin does not overflow, the base of the machine (base), the + * precision (prec), the number of (base) digits in the mantissa (t), + * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the + * minimum exponent before (gradual) underflow (emin), the underflow + * threshold (rmin) base**(emin-1), the largest exponent before overflow + * (emax), the overflow threshold (rmax) (base**emax)*(1-eps). + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamch.f (version 2.0 -- 1992), that was itself + * based on the function ENVRON by Malcolm and incorporated suggestions + * by Gentleman and Marovich. See + * + * Malcolm M. A., Algorithms to reveal properties of floating-point + * arithmetic., Comms. of the ACM, 15, 949-951 (1972). + * + * Gentleman W. M. and Marovich S. B., More on algorithms that reveal + * properties of floating point arithmetic units., Comms. of the ACM, + * 17, 276-277 (1974). + * + * Arguments + * ========= + * + * CMACH (local input) const HPL_T_MACH + * Specifies the value to be returned by HPL_dlamch + * = HPL_MACH_EPS, HPL_dlamch := eps (default) + * = HPL_MACH_SFMIN, HPL_dlamch := sfmin + * = HPL_MACH_BASE, HPL_dlamch := base + * = HPL_MACH_PREC, HPL_dlamch := eps*base + * = HPL_MACH_MLEN, HPL_dlamch := t + * = HPL_MACH_RND, HPL_dlamch := rnd + * = HPL_MACH_EMIN, HPL_dlamch := emin + * = HPL_MACH_RMIN, HPL_dlamch := rmin + * = HPL_MACH_EMAX, HPL_dlamch := emax + * = HPL_MACH_RMAX, HPL_dlamch := rmax + * + * where + * + * eps = relative machine precision, + * sfmin = safe minimum, + * base = base of the machine, + * prec = eps*base, + * t = number of digits in the mantissa, + * rnd = 1.0 if rounding occurs in addition, + * emin = minimum exponent before underflow, + * rmin = underflow threshold, + * emax = largest exponent before overflow, + * rmax = overflow threshold. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + static double eps, sfmin, base, t, rnd, emin, rmin, emax, + rmax, prec; + double small; + static int first=1; + int beta=0, imax=0, imin=0, it=0, lrnd=0; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; + HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax ); + base = (double)(beta); t = (double)(it); + if( lrnd != 0 ) + { rnd = HPL_rone; eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; } + else + { rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it ); } + prec = eps * base; emin = (double)(imin); emax = (double)(imax); + sfmin = rmin; small = HPL_rone / rmax; +/* + * Use SMALL plus a bit, to avoid the possibility of rounding causing + * overflow when computing 1/sfmin. + */ + if( small >= sfmin ) sfmin = small * ( HPL_rone + eps ); + } + + if( CMACH == HPL_MACH_EPS ) return( eps ); + if( CMACH == HPL_MACH_SFMIN ) return( sfmin ); + if( CMACH == HPL_MACH_BASE ) return( base ); + if( CMACH == HPL_MACH_PREC ) return( prec ); + if( CMACH == HPL_MACH_MLEN ) return( t ); + if( CMACH == HPL_MACH_RND ) return( rnd ); + if( CMACH == HPL_MACH_EMIN ) return( emin ); + if( CMACH == HPL_MACH_RMIN ) return( rmin ); + if( CMACH == HPL_MACH_EMAX ) return( emax ); + if( CMACH == HPL_MACH_RMAX ) return( rmax ); + + return( eps ); +/* + * End of HPL_dlamch + */ +} + +#ifdef STDC_HEADERS +static void HPL_dlamc1 +( + int * BETA, + int * T, + int * RND, + int * IEEE1 +) +#else +static void HPL_dlamc1 +( BETA, T, RND, IEEE1 ) +/* + * .. Scalar Arguments .. + */ + int * BETA, * IEEE1, * RND, * T; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc1 determines the machine parameters given by BETA, T, RND, + * and IEEE1. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc1.f (version 2.0 -- 1992), that was itself + * based on the function ENVRON by Malcolm and incorporated suggestions + * by Gentleman and Marovich. See + * + * Malcolm M. A., Algorithms to reveal properties of floating-point + * arithmetic., Comms. of the ACM, 15, 949-951 (1972). + * + * Gentleman W. M. and Marovich S. B., More on algorithms that reveal + * properties of floating point arithmetic units., Comms. of the ACM, + * 17, 276-277 (1974). + * + * Arguments + * ========= + * + * BETA (local output) int * + * The base of the machine. + * + * T (local output) int * + * The number of ( BETA ) digits in the mantissa. + * + * RND (local output) int * + * Specifies whether proper rounding (RND=1) or chopping (RND=0) + * occurs in addition. This may not be a reliable guide to the + * way in which the machine performs its arithmetic. + * + * IEEE1 (local output) int * + * Specifies whether rounding appears to be done in the IEEE + * `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double a, b, c, f, one, qtr, savec, t1, t2; + static int first=1, lbeta, lieee1, lrnd, lt; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; one = HPL_rone; +/* + * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and + * RND. Throughout this routine we use the function HPL_dlamc3 to ensure + * that relevant values are stored and not held in registers, or are not + * affected by optimizers. + * + * Compute a = 2.0**m with the smallest positive integer m such that + * fl( a + 1.0 ) == a. + */ + a = HPL_rone; c = HPL_rone; + do + { a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); } + while( c == HPL_rone ); +/* + * Now compute b = 2.0**m with the smallest positive integer m such that + * fl( a + b ) > a. + */ + b = HPL_rone; c = HPL_dlamc3( a, b ); + while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); } +/* + * Now compute the base. a and c are neighbouring floating point num- + * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe- + * rence is BETA. Adding 0.25 to c is to ensure that it is truncated to + * BETA and not (BETA-1). + */ + qtr = one / 4.0; savec = c; + c = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr); +/* + * Now determine whether rounding or chopping occurs, by adding a bit + * less than BETA/2 and a bit more than BETA/2 to a. + */ + b = (double)(lbeta); + f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a ); + if( c == a ) { lrnd = 1; } else { lrnd = 0; } + f = HPL_dlamc3( b / HPL_rtwo, b / 100.0 ); c = HPL_dlamc3( f, a ); + if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0; +/* + * Try and decide whether rounding is done in the IEEE round to nea- + * rest style. b/2 is half a unit in the last place of the two numbers + * a and savec. Furthermore, a is even, i.e. has last bit zero, and sa- + * vec is odd. Thus adding b/2 to a should not change a, but adding b/2 + * to savec should change savec. + */ + t1 = HPL_dlamc3( b / HPL_rtwo, a ); + t2 = HPL_dlamc3( b / HPL_rtwo, savec ); + if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1; + else lieee1 = 0; +/* + * Now find the mantissa, T. It should be the integer part of log to the + * base BETA of a, however it is safer to determine T by powering. So we + * find T as the smallest positive integer for which fl( beta**t + 1.0 ) + * is equal to 1.0. + */ + lt = 0; a = HPL_rone; c = HPL_rone; + + do + { + lt++; a *= (double)(lbeta); + c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); + } while( c == HPL_rone ); + } + + *BETA = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1; +} + +#ifdef STDC_HEADERS +static void HPL_dlamc2 +( + int * BETA, + int * T, + int * RND, + double * EPS, + int * EMIN, + double * RMIN, + int * EMAX, + double * RMAX +) +#else +static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) +/* + * .. Scalar Arguments .. + */ + int * BETA, * EMAX, * EMIN, * RND, * T; + double * EPS, * RMAX, * RMIN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc2 determines the machine parameters specified in its argu- + * ment list. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc2.f (version 2.0 -- 1992), that was itself + * based on a function PARANOIA by W. Kahan of the University of Cali- + * fornia at Berkeley for the computation of the relative machine epsi- + * lon eps. + * + * Arguments + * ========= + * + * BETA (local output) int * + * The base of the machine. + * + * T (local output) int * + * The number of ( BETA ) digits in the mantissa. + * + * RND (local output) int * + * Specifies whether proper rounding (RND=1) or chopping (RND=0) + * occurs in addition. This may not be a reliable guide to the + * way in which the machine performs its arithmetic. + * + * EPS (local output) double * + * The smallest positive number such that fl( 1.0 - EPS ) < 1.0, + * where fl denotes the computed value. + * + * EMIN (local output) int * + * The minimum exponent before (gradual) underflow occurs. + * + * RMIN (local output) double * + * The smallest normalized number for the machine, given by + * BASE**( EMIN - 1 ), where BASE is the floating point value + * of BETA. + * + * EMAX (local output) int * + * The maximum exponent before overflow occurs. + * + * RMAX (local output) double * + * The largest positive number for the machine, given by + * BASE**EMAX * ( 1 - EPS ), where BASE is the floating point + * value of BETA. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + static double leps, lrmax, lrmin; + double a, b, c, half, one, rbase, sixth, small, + third, two, zero; + static int first=1, iwarn=0, lbeta=0, lemax, lemin, + lt=0; + int gnmin=0, gpmin=0, i, ieee, lieee1=0, + lrnd=0, ngnmin=0, ngpmin=0; +/* .. + * .. Executable Statements .. + */ + if( first != 0 ) + { + first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo; +/* + * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of BETA, + * T, RND, EPS, EMIN and RMIN. + * + * Throughout this routine we use the function HPL_dlamc3 to ensure that + * relevant values are stored and not held in registers, or are not af- + * fected by optimizers. + * + * HPL_dlamc1 returns the parameters lbeta, lt, lrnd and lieee1. + */ + HPL_dlamc1( &lbeta, <, &lrnd, &lieee1 ); +/* + * Start to find eps. + */ + b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a; +/* + * Try some tricks to see whether or not this is the correct EPS. + */ + b = two / 3.0; + half = one / HPL_rtwo; + sixth = HPL_dlamc3( b, -half ); + third = HPL_dlamc3( sixth, sixth ); + b = HPL_dlamc3( third, -half ); + b = HPL_dlamc3( b, sixth ); + b = Mabs( b ); if( b < leps ) b = leps; + + leps = HPL_rone; + + while( ( leps > b ) && ( b > zero ) ) + { + leps = b; + c = HPL_dlamc3( half * leps, + HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) ); + c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c ); + c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c ); + } + if( a < leps ) leps = a; +/* + * Computation of EPS complete. + * + * Now find EMIN. Let a = + or - 1, and + or - (1 + BASE**(-3)). Keep + * dividing a by BETA until (gradual) underflow occurs. This is detected + * when we cannot recover the previous a. + */ + rbase = one / (double)(lbeta); small = one; + for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero ); + a = HPL_dlamc3( one, small ); + HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta ); + HPL_dlamc4( &gpmin, a, lbeta ); HPL_dlamc4( &gnmin, -a, lbeta ); + + ieee = 0; + + if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) ) + { + if( ngpmin == gpmin ) + { +/* + * Non twos-complement machines, no gradual underflow; e.g., VAX ) + */ + lemin = ngpmin; + } + else if( ( gpmin-ngpmin ) == 3 ) + { +/* + * Non twos-complement machines with gradual underflow; e.g., IEEE stan- + * dard followers + */ + lemin = ngpmin - 1 + lt; ieee = 1; + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, gpmin ); + iwarn = 1; + } + } + else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) ) + { + if( Mabs( ngpmin-ngnmin ) == 1 ) + { +/* + * Twos-complement machines, no gradual underflow; e.g., CYBER 205 + */ + lemin = Mmax( ngpmin, ngnmin ); + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); + iwarn = 1; + } + } + else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) ) + { + if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 ) + { +/* + * Twos-complement machines with gradual underflow; no known machine + */ + lemin = Mmax( ngpmin, ngnmin ) - 1 + lt; + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); + iwarn = 1; + } + } + else + { +/* + * A guess; no known machine + */ + lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin ); + lemin = Mmin( lemin, gnmin ); iwarn = 1; + } +/* + * Comment out this if block if EMIN is ok + */ + if( iwarn != 0 ) + { + first = 1; + HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n", +"WARNING. The value EMIN may be incorrect:- EMIN =", lemin, +"If, after inspection, the value EMIN looks acceptable, please comment ", +"out the if block as marked within the code of routine HPL_dlamc2, ", +"otherwise supply EMIN explicitly." ); + } +/* + * Assume IEEE arithmetic if we found denormalised numbers above, or if + * arithmetic seems to round in the IEEE style, determined in routine + * HPL_dlamc1. A true IEEE machine should have both things true; how- + * ever, faulty machines may have one or the other. + */ + if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1; + else ieee = 0; +/* + * Compute RMIN by successive division by BETA. We could compute RMIN + * as BASE**( EMIN - 1 ), but some machines underflow during this compu- + * tation. + */ + lrmin = HPL_rone; + for( i = 0; i < 1 - lemin; i++ ) + lrmin = HPL_dlamc3( lrmin*rbase, zero ); +/* + * Finally, call HPL_dlamc5 to compute emax and rmax. + */ + HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax ); + } + *BETA = lbeta; *T = lt; *RND = lrnd; *EPS = leps; + *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax; +} + +#ifdef STDC_HEADERS +static double HPL_dlamc3( const double A, const double B ) +#else +static double HPL_dlamc3( A, B ) +/* + * .. Scalar Arguments .. + */ + const double A, B; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc3 is intended to force a and b to be stored prior to doing + * the addition of a and b, for use in situations where optimizers + * might hold one of these in a register. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc3.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * A, B (local input) double + * The values a and b. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + return( A + B ); +} + +#ifdef STDC_HEADERS +static void HPL_dlamc4 +( + int * EMIN, + const double START, + const int BASE +) +#else +static void HPL_dlamc4( EMIN, START, BASE ) +/* + * .. Scalar Arguments .. + */ + int * EMIN; + const int BASE; + const double START; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc4 is a service function for HPL_dlamc2. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc4.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * EMIN (local output) int * + * The minimum exponent before (gradual) underflow, computed by + * setting A = START and dividing by BASE until the previous A + * can not be recovered. + * + * START (local input) double + * The starting point for determining EMIN. + * + * BASE (local input) int + * The base of the machine. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double a, b1, b2, c1, c2, d1, d2, one, rbase, zero; + int i; +/* .. + * .. Executable Statements .. + */ + a = START; one = HPL_rone; rbase = one / (double)(BASE); + zero = HPL_rzero; + *EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a; + + do + { + (*EMIN)--; a = b1; + b1 = HPL_dlamc3( a / BASE, zero ); + c1 = HPL_dlamc3( b1 * BASE, zero ); + d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1; + b2 = HPL_dlamc3( a * rbase, zero ); + c2 = HPL_dlamc3( b2 / rbase, zero ); + d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2; + } while( ( c1 == a ) && ( c2 == a ) && ( d1 == a ) && ( d2 == a ) ); +} + +#ifdef STDC_HEADERS +static void HPL_dlamc5 +( + const int BETA, + const int P, + const int EMIN, + const int IEEE, + int * EMAX, + double * RMAX +) +#else +static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX ) +/* + * .. Scalar Arguments .. + */ + const int BETA, EMIN, IEEE, P; + int * EMAX; + double * RMAX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlamc5 attempts to compute RMAX, the largest machine floating- + * point number, without overflow. It assumes that EMAX + abs(EMIN) sum + * approximately to a power of 2. It will fail on machines where this + * assumption does not hold, for example, the Cyber 205 (EMIN = -28625, + * EMAX = 28718). It will also fail if the value supplied for EMIN is + * too large (i.e. too close to zero), probably with overflow. + * + * Notes + * ===== + * + * This function has been manually translated from the Fortran 77 LAPACK + * auxiliary function dlamc5.f (version 2.0 -- 1992). + * + * Arguments + * ========= + * + * BETA (local input) int + * The base of floating-point arithmetic. + * + * P (local input) int + * The number of base BETA digits in the mantissa of a floating- + * point value. + * + * EMIN (local input) int + * The minimum exponent before (gradual) underflow. + * + * IEEE (local input) int + * A logical flag specifying whether or not the arithmetic sys- + * tem is thought to comply with the IEEE standard. + * + * EMAX (local output) int * + * The largest exponent before overflow. + * + * RMAX (local output) double * + * The largest machine floating-point number. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double oldy=HPL_rzero, recbas, y, z; + int exbits=1, expsum, i, lexp=1, nbits, try, + uexp; +/* .. + * .. Executable Statements .. + */ +/* + * First compute lexp and uexp, two powers of 2 that bound abs(EMIN). + * We then assume that EMAX + abs( EMIN ) will sum approximately to the + * bound that is closest to abs( EMIN ). (EMAX is the exponent of the + * required number RMAX). + */ +l_10: + try = (int)( (unsigned int)(lexp) << 1 ); + if( try <= ( -EMIN ) ) { lexp = try; exbits++; goto l_10; } + + if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try; exbits++; } +/* + * Now -lexp is less than or equal to EMIN, and -uexp is greater than or + * equal to EMIN. exbits is the number of bits needed to store the expo- + * nent. + */ + if( ( uexp+EMIN ) > ( -lexp-EMIN ) ) + { expsum = (int)( (unsigned int)(lexp) << 1 ); } + else + { expsum = (int)( (unsigned int)(uexp) << 1 ); } +/* + * expsum is the exponent range, approximately equal to EMAX - EMIN + 1. + */ + *EMAX = expsum + EMIN - 1; +/* + * nbits is the total number of bits needed to store a floating-point + * number. + */ + nbits = 1 + exbits + P; + + if( ( nbits % 2 == 1 ) && ( BETA == 2 ) ) + { +/* + * Either there are an odd number of bits used to store a floating-point + * number, which is unlikely, or some bits are not used in the represen- + * tation of numbers, which is possible, (e.g. Cray machines) or the + * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines), + * which is perhaps the most likely. We have to assume the last alterna- + * tive. If this is true, then we need to reduce EMAX by one because + * there must be some way of representing zero in an implicit-bit sys- + * tem. On machines like Cray we are reducing EMAX by one unnecessarily. + */ + (*EMAX)--; + } + + if( IEEE != 0 ) + { +/* + * Assume we are on an IEEE machine which reserves one exponent for in- + * finity and NaN. + */ + (*EMAX)--; + } +/* + * Now create RMAX, the largest machine number, which should be equal to + * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being + * careful that the result is less than 1.0. + */ + recbas = HPL_rone / (double)(BETA); + z = (double)(BETA) - HPL_rone; + y = HPL_rzero; + + for( i = 0; i < P; i++ ) + { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); } + + if( y >= HPL_rone ) y = oldy; +/* + * Now multiply by BETA**EMAX to get RMAX. + */ + for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero ); + + *RMAX = y; +/* + * End of HPL_dlamch + */ +} + +#ifdef STDC_HEADERS +static double HPL_dipow +( + const double X, + const int N +) +#else +static double HPL_dipow( X, N ) +/* + * .. Scalar Arguments .. + */ + const int N; + const double X; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dipow computes the integer n-th power of a real scalar x. + * + * Arguments + * ========= + * + * X (local input) const double + * The real scalar x. + * + * N (local input) const int + * The integer power to raise x to. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r, y=HPL_rone; + int k, n; +/* .. + * .. Executable Statements .. + */ + if( X == HPL_rzero ) return( HPL_rzero ); + if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; } + for( k = 0; k < n; k++ ) y *= r; + + return( y ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlange.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlange.c new file mode 100644 index 000000000..82f118b6b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlange.c @@ -0,0 +1,184 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_dlange +( + const HPL_T_NORM NORM, + const int M, + const int N, + const double * A, + const int LDA +) +#else +double HPL_dlange +( NORM, M, N, A, LDA ) + const HPL_T_NORM NORM; + const int M; + const int N; + const double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlange returns the value of the one norm, or the infinity norm, + * or the element of largest absolute value of a matrix A: + * + * max(abs(A(i,j))) when NORM = HPL_NORM_A, + * norm1(A), when NORM = HPL_NORM_1, + * normI(A), when NORM = HPL_NORM_I, + * + * where norm1 denotes the one norm of a matrix (maximum column sum) and + * normI denotes the infinity norm of a matrix (maximum row sum). Note + * that max(abs(A(i,j))) is not a matrix norm. + * + * Arguments + * ========= + * + * NORM (local input) const HPL_T_NORM + * On entry, NORM specifies the value to be returned by this + * function as described above. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N), that + * contains the matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double s, v0=HPL_rzero, * work = NULL; + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return( HPL_rzero ); + + if( NORM == HPL_NORM_A ) + { +/* + * max( abs( A ) ) + */ + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) { v0 = Mmax( v0, Mabs( *A ) ); A++; } + A += LDA - M; + } + } + else if( NORM == HPL_NORM_1 ) + { +/* + * Find norm_1( A ). + */ + work = (double*)malloc( (size_t)(N) * sizeof( double ) ); + if( work == NULL ) + { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); } + else + { + for( j = 0; j < N; j++ ) + { + s = HPL_rzero; + for( i = 0; i < M; i++ ) { s += Mabs( *A ); A++; } + work[j] = s; A += LDA - M; + } +/* + * Find maximum sum of columns for 1-norm + */ + v0 = work[HPL_idamax( N, work, 1 )]; v0 = Mabs( v0 ); + if( work ) free( work ); + } + } + else if( NORM == HPL_NORM_I ) + { +/* + * Find norm_inf( A ) + */ + work = (double*)malloc( (size_t)(M) * sizeof( double ) ); + if( work == NULL ) + { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); } + else + { + for( i = 0; i < M; i++ ) { work[i] = HPL_rzero; } + + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) { work[i] += Mabs( *A ); A++; } + A += LDA - M; + } +/* + * Find maximum sum of rows for inf-norm + */ + v0 = work[HPL_idamax( M, work, 1 )]; v0 = Mabs( v0 ); + if( work ) free( work ); + } + } + + return( v0 ); +/* + * End of HPL_dlange + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlaprnt.c new file mode 100644 index 000000000..6e9c368c9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlaprnt.c @@ -0,0 +1,176 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dlaprnt +( + const int M, + const int N, + double * A, + const int IA, + const int JA, + const int LDA, + const char * CMATNM +) +#else +void HPL_dlaprnt +( M, N, A, IA, JA, LDA, CMATNM ) + const int M; + const int N; + double * A; + const int IA; + const int JA; + const int LDA; + const char * CMATNM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaprnt prints to standard error an M-by-N matrix A. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A. M must be at + * least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of A. N must be + * at least zero. + * + * A (local input) double * + * On entry, A points to an array of dimension (LDA,N). + * + * IA (local input) const int + * On entry, IA specifies the starting row index to be printed. + * + * JA (local input) const int + * On entry, JA specifies the starting column index to be + * printed. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * CMATNM (local input) const char * + * On entry, CMATNM is the name of the matrix to be printed. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + for( j = 0; j < N; j++ ) + { + for( i = 0; i < M; i++ ) + { + HPL_fprintf( stderr, "%s(%6d,%6d)=%30.18f\n", CMATNM, IA+i, + JA+j, *(Mptr( A, i, j, LDA )) ); + } + } +/* + * End of HPL_dlaprnt + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlatcpy.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlatcpy.c new file mode 100644 index 000000000..410451c24 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_dlatcpy.c @@ -0,0 +1,398 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factors + * #ifndef HPL_LATCPY_M_DEPTH + * #define HPL_LATCPY_M_DEPTH 32 + * #define HPL_LATCPY_LOG2_M_DEPTH 5 + * #endif + * #ifndef HPL_LATCPY_N_DEPTH + * #define HPL_LATCPY_N_DEPTH 4 + * #define HPL_LATCPY_LOG2_N_DEPTH 2 + * #endif + */ +#ifndef HPL_LATCPY_M_DEPTH +#define HPL_LATCPY_M_DEPTH 4 +#define HPL_LATCPY_LOG2_M_DEPTH 2 +#endif +#ifndef HPL_LATCPY_N_DEPTH +#define HPL_LATCPY_N_DEPTH 2 +#define HPL_LATCPY_LOG2_N_DEPTH 1 +#endif + +#ifdef STDC_HEADERS +void HPL_dlatcpy +( + const int M, + const int N, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dlatcpy +( M, N, A, LDA, B, LDB ) + const int M; + const int N; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlatcpy copies the transpose of an array A into an array B. + * + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the array B and + * the number of columns of A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of rows of the array A and + * the number of columns of B. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,M). + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,N). + * + * B (local output) double * + * On entry, B points to an array of dimension (LDB,N). On exit, + * B is overwritten with the transpose of A. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of the array B. + * LDB must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_LATCPY_USE_COPY + register int j; +#else +#if ( HPL_LATCPY_N_DEPTH == 1 ) + const double * A0 = A; + double * B0 = B; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + const double * A0 = A, * A1 = A + 1; + double * B0 = B, * B1 = B + LDB; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + const double * A0 = A, * A1 = A + 1, + * A2 = A + 2, * A3 = A + 3; + double * B0 = B, * B1 = B + LDB, + * B2 = B + (LDB << 1), * B3 = B + 3 * LDB; +#endif + const int incA = -M * LDA + (1 << HPL_LATCPY_LOG2_N_DEPTH), + incB = ( (unsigned int)(LDB) << + HPL_LATCPY_LOG2_N_DEPTH ) - M, + incA0 = -M * LDA + 1, incB0 = LDB - M; + int mu, nu; + register int i, j; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + +#ifdef HPL_LATCPY_USE_COPY + for( j = 0; j < N; j++, B0 += LDB ) HPL_dcopy( M, A0+j, LDA, B0, 1 ); +#else + mu = (int)( ( (unsigned int)(M) >> HPL_LATCPY_LOG2_M_DEPTH ) << + HPL_LATCPY_LOG2_M_DEPTH ); + nu = (int)( ( (unsigned int)(N) >> HPL_LATCPY_LOG2_N_DEPTH ) << + HPL_LATCPY_LOG2_N_DEPTH ); + + for( j = 0; j < nu; j += HPL_LATCPY_N_DEPTH ) + { + for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH ) + { +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 0] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA; + B2[ 0] = *A2; A2 += LDA; B3[ 0] = *A3; A3 += LDA; +#endif + +#if ( HPL_LATCPY_M_DEPTH > 1 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 1] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA; + B2[ 1] = *A2; A2 += LDA; B3[ 1] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 2 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 2] = *A0; A0 += LDA; B0[ 3] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA; + B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA; + B2[ 2] = *A2; A2 += LDA; B3[ 2] = *A3; A3 += LDA; + B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA; + B2[ 3] = *A2; A2 += LDA; B3[ 3] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 4 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 4] = *A0; A0 += LDA; B0[ 5] = *A0; A0 += LDA; + B0[ 6] = *A0; A0 += LDA; B0[ 7] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA; + B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA; + B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA; + B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA; + B2[ 4] = *A2; A2 += LDA; B3[ 4] = *A3; A3 += LDA; + B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA; + B2[ 5] = *A2; A2 += LDA; B3[ 5] = *A3; A3 += LDA; + B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA; + B2[ 6] = *A2; A2 += LDA; B3[ 6] = *A3; A3 += LDA; + B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA; + B2[ 7] = *A2; A2 += LDA; B3[ 7] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 8 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[ 8] = *A0; A0 += LDA; B0[ 9] = *A0; A0 += LDA; + B0[10] = *A0; A0 += LDA; B0[11] = *A0; A0 += LDA; + B0[12] = *A0; A0 += LDA; B0[13] = *A0; A0 += LDA; + B0[14] = *A0; A0 += LDA; B0[15] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA; + B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA; + B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA; + B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA; + B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA; + B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA; + B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA; + B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA; + B2[ 8] = *A2; A2 += LDA; B3[ 8] = *A3; A3 += LDA; + B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA; + B2[ 9] = *A2; A2 += LDA; B3[ 9] = *A3; A3 += LDA; + B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA; + B2[10] = *A2; A2 += LDA; B3[10] = *A3; A3 += LDA; + B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA; + B2[11] = *A2; A2 += LDA; B3[11] = *A3; A3 += LDA; + B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA; + B2[12] = *A2; A2 += LDA; B3[12] = *A3; A3 += LDA; + B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA; + B2[13] = *A2; A2 += LDA; B3[13] = *A3; A3 += LDA; + B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA; + B2[14] = *A2; A2 += LDA; B3[14] = *A3; A3 += LDA; + B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA; + B2[15] = *A2; A2 += LDA; B3[15] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_M_DEPTH > 16 ) + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0[16] = *A0; A0 += LDA; B0[17] = *A0; A0 += LDA; + B0[18] = *A0; A0 += LDA; B0[19] = *A0; A0 += LDA; + B0[20] = *A0; A0 += LDA; B0[21] = *A0; A0 += LDA; + B0[22] = *A0; A0 += LDA; B0[23] = *A0; A0 += LDA; + B0[24] = *A0; A0 += LDA; B0[25] = *A0; A0 += LDA; + B0[26] = *A0; A0 += LDA; B0[27] = *A0; A0 += LDA; + B0[28] = *A0; A0 += LDA; B0[29] = *A0; A0 += LDA; + B0[30] = *A0; A0 += LDA; B0[31] = *A0; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA; + B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA; + B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA; + B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA; + B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA; + B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA; + B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA; + B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA; + B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA; + B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA; + B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA; + B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA; + B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA; + B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA; + B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA; + B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA; + B2[16] = *A2; A2 += LDA; B3[16] = *A3; A3 += LDA; + B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA; + B2[17] = *A2; A2 += LDA; B3[17] = *A3; A3 += LDA; + B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA; + B2[18] = *A2; A2 += LDA; B3[18] = *A3; A3 += LDA; + B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA; + B2[19] = *A2; A2 += LDA; B3[19] = *A3; A3 += LDA; + B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA; + B2[20] = *A2; A2 += LDA; B3[20] = *A3; A3 += LDA; + B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA; + B2[21] = *A2; A2 += LDA; B3[21] = *A3; A3 += LDA; + B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA; + B2[22] = *A2; A2 += LDA; B3[22] = *A3; A3 += LDA; + B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA; + B2[23] = *A2; A2 += LDA; B3[23] = *A3; A3 += LDA; + B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA; + B2[24] = *A2; A2 += LDA; B3[24] = *A3; A3 += LDA; + B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA; + B2[25] = *A2; A2 += LDA; B3[25] = *A3; A3 += LDA; + B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA; + B2[26] = *A2; A2 += LDA; B3[26] = *A3; A3 += LDA; + B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA; + B2[27] = *A2; A2 += LDA; B3[27] = *A3; A3 += LDA; + B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA; + B2[28] = *A2; A2 += LDA; B3[28] = *A3; A3 += LDA; + B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA; + B2[29] = *A2; A2 += LDA; B3[29] = *A3; A3 += LDA; + B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA; + B2[30] = *A2; A2 += LDA; B3[30] = *A3; A3 += LDA; + B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA; + B2[31] = *A2; A2 += LDA; B3[31] = *A3; A3 += LDA; +#endif + +#endif +#if ( HPL_LATCPY_N_DEPTH == 1 ) + B0 += HPL_LATCPY_M_DEPTH; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH; + B2 += HPL_LATCPY_M_DEPTH; B3 += HPL_LATCPY_M_DEPTH; +#endif + } + + for( i = mu; i < M; i++ ) + { +#if ( HPL_LATCPY_N_DEPTH == 1 ) + *B0 = *A0; B0++; A0 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA; + *B2 = *A2; B2++; A2 += LDA; *B3 = *A3; B3++; A3 += LDA; +#endif + } + +#if ( HPL_LATCPY_N_DEPTH == 1 ) + A0 += incA; B0 += incB; +#elif ( HPL_LATCPY_N_DEPTH == 2 ) + A0 += incA; A1 += incA; B0 += incB; B1 += incB; +#elif ( HPL_LATCPY_N_DEPTH == 4 ) + A0 += incA; A1 += incA; A2 += incA; A3 += incA; + B0 += incB; B1 += incB; B2 += incB; B3 += incB; +#endif + } + + for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 ) + { + for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH, B0 += HPL_LATCPY_M_DEPTH ) + { + B0[ 0]=*A0; A0 += LDA; +#if ( HPL_LATCPY_M_DEPTH > 1 ) + B0[ 1]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 2 ) + B0[ 2]=*A0; A0 += LDA; B0[ 3]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 4 ) + B0[ 4]=*A0; A0 += LDA; B0[ 5]=*A0; A0 += LDA; + B0[ 6]=*A0; A0 += LDA; B0[ 7]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 8 ) + B0[ 8]=*A0; A0 += LDA; B0[ 9]=*A0; A0 += LDA; + B0[10]=*A0; A0 += LDA; B0[11]=*A0; A0 += LDA; + B0[12]=*A0; A0 += LDA; B0[13]=*A0; A0 += LDA; + B0[14]=*A0; A0 += LDA; B0[15]=*A0; A0 += LDA; +#endif +#if ( HPL_LATCPY_M_DEPTH > 16 ) + B0[16]=*A0; A0 += LDA; B0[17]=*A0; A0 += LDA; + B0[18]=*A0; A0 += LDA; B0[19]=*A0; A0 += LDA; + B0[20]=*A0; A0 += LDA; B0[21]=*A0; A0 += LDA; + B0[22]=*A0; A0 += LDA; B0[23]=*A0; A0 += LDA; + B0[24]=*A0; A0 += LDA; B0[25]=*A0; A0 += LDA; + B0[26]=*A0; A0 += LDA; B0[27]=*A0; A0 += LDA; + B0[28]=*A0; A0 += LDA; B0[29]=*A0; A0 += LDA; + B0[30]=*A0; A0 += LDA; B0[31]=*A0; A0 += LDA; +#endif + } + + for( i = mu; i < M; i++, B0++, A0 += LDA ) { *B0 = *A0; } + } +#endif +/* + * End of HPL_dlatcpy + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_fprintf.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_fprintf.c new file mode 100644 index 000000000..adaf22b39 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_fprintf.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_fprintf +( + FILE * STREAM, + const char * FORM, + ... +) +#else +void HPL_fprintf( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_fprintf is a wrapper around fprintf flushing the output stream. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[256]; +#ifndef STDC_HEADERS + FILE * STREAM; + char * FORM; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + (void) fprintf( STREAM, "%s", cline ); + (void) fflush( STREAM ); +/* + * End of HPL_fprintf + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_warn.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_warn.c new file mode 100644 index 000000000..bc40818a9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/HPL_warn.c @@ -0,0 +1,134 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_warn +( + FILE * STREAM, + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_warn( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_warn displays an error message. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + char cline[128]; +#ifndef STDC_HEADERS + FILE * STREAM; + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( STREAM, "%s %s:\n>>> %s <<<\n\n", "HPL ERROR in function", + SRNAME, cline ); + else + HPL_fprintf( STREAM, "%s %d %s %s:\n>>> %s <<<\n\n", + "HPL ERROR on line", LINE, "of function", SRNAME, cline ); +/* + * End of HPL_warn + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/Make.inc new file mode 120000 index 000000000..ae55370b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/Make.inc @@ -0,0 +1 @@ +/home/kate/hip/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/Makefile new file mode 100644 index 000000000..e92d18b80 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/Makefile @@ -0,0 +1,100 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h +# +## Object files ######################################################## +# +HPL_au0obj = \ + HPL_dlacpy.o HPL_dlatcpy.o HPL_fprintf.o \ + HPL_warn.o HPL_abort.o HPL_dlaprnt.o \ + HPL_dlange.o +HPL_au1obj = \ + HPL_dlamch.o +HPL_auxobj = \ + $(HPL_au0obj) $(HPL_au1obj) +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_auxobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_auxobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dlacpy.o : ../HPL_dlacpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlacpy.c +HPL_dlatcpy.o : ../HPL_dlatcpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlatcpy.c +HPL_fprintf.o : ../HPL_fprintf.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_fprintf.c +HPL_warn.o : ../HPL_warn.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_warn.c +HPL_abort.o : ../HPL_abort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_abort.c +HPL_dlaprnt.o : ../HPL_dlaprnt.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaprnt.c +HPL_dlange.o : ../HPL_dlange.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlange.c +HPL_dlamch.o : ../HPL_dlamch.c $(INCdep) + $(CC) -o $@ -c $(CCNOOPT) ../HPL_dlamch.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/auxil/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_daxpy.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_daxpy.c new file mode 100644 index 000000000..72be5774b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_daxpy.c @@ -0,0 +1,175 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_daxpy + +#ifdef STDC_HEADERS +void HPL_daxpy +( + const int N, + const double ALPHA, + const double * X, + const int INCX, + double * Y, + const int INCY +) +#else +void HPL_daxpy +( N, ALPHA, X, INCX, Y, INCY ) + const int N; + const double ALPHA; + const double * X; + const int INCX; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_daxpy scales the vector x by alpha and adds it to y. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vectors x and y. N + * must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero, then the entries of the incremented array X + * need not be set on input. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * On exit, the entries of the incremented array Y are updated + * with the scaled entries of the incremented array X. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_daxpy( N, ALPHA, X, INCX, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + register const double alpha = ALPHA; + register double x0, x1, x2, x3, y0, y1, y2, y3; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY; + + if( ( N > 0 ) && ( alpha != HPL_rzero ) ) + { + if( ( nu = ( N >> 2 ) << 2 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); y0 = (*Y); x1 = X[INCX ]; y1 = Y[INCY ]; + x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3]; + + *Y = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1; + Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3; + + X += incX4; + Y += incY4; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { + x0 = (*X); + y0 = (*Y); + + *Y = y0 + alpha * x0; + + X += INCX; + Y += INCY; + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; +#else +#define F77N N +#define F77incx INCX +#define F77incy INCY +#endif + F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy ); +#endif +/* + * End of HPL_daxpy + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dcopy.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dcopy.c new file mode 100644 index 000000000..a8fe24109 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dcopy.c @@ -0,0 +1,168 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dcopy + +#ifdef STDC_HEADERS +void HPL_dcopy +( + const int N, + const double * X, + const int INCX, + double * Y, + const int INCY +) +#else +void HPL_dcopy +( N, X, INCX, Y, INCY ) + const int N; + const double * X; + const int INCX; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dcopy copies the vector x into the vector y. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vectors x and y. N + * must be at least zero. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * On exit, the entries of the incremented array Y are updated + * with the entries of the incremented array X. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dcopy( N, X, INCX, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + register double x0, x1, x2, x3, x4, x5, x6, x7; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY, + incX5 = 5 * INCX, incY5 = 5 * INCY, + incX6 = 6 * INCX, incY6 = 6 * INCY, + incX7 = 7 * INCX, incY7 = 7 * INCY, + incX8 = 8 * INCX, incY8 = 8 * INCY; + + if( N > 0 ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + *Y = x0; Y[incY4] = x4; Y[INCY ] = x1; Y[incY5] = x5; + Y[incY2] = x2; Y[incY6] = x6; Y[incY3] = x3; Y[incY7] = x7; + + X += incX8; + Y += incY8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { + x0 = (*X); + *Y = x0; + + X += INCX; + Y += INCY; + } + } +#endif +#ifdef HPL_CALL_FBLAS +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; +#else +#define F77N N +#define F77incx INCX +#define F77incy INCY +#endif + F77dcopy( &F77N, X, &F77incx, Y, &F77incy ); +#endif +/* + * End of HPL_dcopy + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dgemm.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dgemm.c new file mode 100644 index 000000000..b222e4717 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dgemm.c @@ -0,0 +1,521 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dgemm + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dgemmNN +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iail, iblj, icij, j, jal, jbj, jcj, l; + + for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) + { + HPL_dscal( M, BETA, C+jcj, 1 ); + for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 ) + { + t0 = ALPHA * B[iblj]; + for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) + { C[icij] += A[iail] * t0; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmNT +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iail, ibj, ibjl, icij, j, jal, jcj, l; + + for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) + { + HPL_dscal( M, BETA, C+jcj, 1 ); + for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB ) + { + t0 = ALPHA * B[ibjl]; + for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) + { C[icij] += A[iail] * t0; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmTN +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iai, iail, iblj, icij, j, jbj, jcj, l; + + for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) + { + for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA ) + { + t0 = HPL_rzero; + for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 ) + { t0 += A[iail] * B[iblj]; } + if( BETA == HPL_rzero ) C[icij] = HPL_rzero; + else C[icij] *= BETA; + C[icij] += ALPHA * t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemmTT +( + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + register double t0; + int i, iali, ibj, ibjl, icij, j, jai, jcj, l; + + for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) + { + for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA ) + { + t0 = HPL_rzero; + for( l = 0, iali = jai, ibjl = ibj; + l < K; l++, iali += 1, ibjl += LDB ) t0 += A[iali] * B[ibjl]; + if( BETA == HPL_rzero ) C[icij] = HPL_rzero; + else C[icij] *= BETA; + C[icij] += ALPHA * t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dgemm0 +( + const enum HPL_TRANS TRANSA, + const enum HPL_TRANS TRANSB, + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, + BETA, C, LDC ) + const enum HPL_TRANS TRANSA, TRANSB; + const int K, LDA, LDB, LDC, M, N; + const double ALPHA, BETA; + const double * A, * B; + double * C; +#endif +{ + int i, j; + + if( ( M == 0 ) || ( N == 0 ) || + ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) && + ( BETA == HPL_rone ) ) ) return; + + if( ALPHA == HPL_rzero ) + { + for( j = 0; j < N; j++ ) + { for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; } + return; + } + + if( TRANSB == HplNoTrans ) + { + if( TRANSA == HplNoTrans ) + { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + else + { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + } + else + { + if( TRANSA == HplNoTrans ) + { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + else + { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dgemm +( + const enum HPL_ORDER ORDER, + const enum HPL_TRANS TRANSA, + const enum HPL_TRANS TRANSB, + const int M, + const int N, + const int K, + const double ALPHA, + const double * A, + const int LDA, + const double * B, + const int LDB, + const double BETA, + double * C, + const int LDC +) +#else +void HPL_dgemm +( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) + const enum HPL_ORDER ORDER; + const enum HPL_TRANS TRANSA; + const enum HPL_TRANS TRANSB; + const int M; + const int N; + const int K; + const double ALPHA; + const double * A; + const int LDA; + const double * B; + const int LDB; + const double BETA; + double * C; + const int LDC; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dgemm performs one of the matrix-matrix operations + * + * C := alpha * op( A ) * op( B ) + beta * C + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X^T. + * + * Alpha and beta are scalars, and A, B and C are matrices, with op(A) + * an m by k matrix, op(B) a k by n matrix and C an m by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * TRANSA (local input) const enum HPL_TRANS + * On entry, TRANSA specifies the form of op(A) to be used in + * the matrix-matrix operation follows: + * TRANSA==HplNoTrans : op( A ) = A, + * TRANSA==HplTrans : op( A ) = A^T, + * TRANSA==HplConjTrans : op( A ) = A^T. + * + * TRANSB (local input) const enum HPL_TRANS + * On entry, TRANSB specifies the form of op(B) to be used in + * the matrix-matrix operation follows: + * TRANSB==HplNoTrans : op( B ) = B, + * TRANSB==HplTrans : op( B ) = B^T, + * TRANSB==HplConjTrans : op( B ) = B^T. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix + * op(A) and of the matrix C. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix + * op(B) and the number of columns of the matrix C. N must be + * at least zero. + * + * K (local input) const int + * On entry, K specifies the number of columns of the matrix + * op(A) and the number of rows of the matrix op(B). K must be + * be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then the elements of the matrices A and B + * need not be set on input. + * + * A (local input) const double * + * On entry, A is an array of dimension (LDA,ka), where ka is + * k when TRANSA==HplNoTrans, and is m otherwise. Before + * entry with TRANSA==HplNoTrans, the leading m by k part of + * the array A must contain the matrix A, otherwise the leading + * k by m part of the array A must contain the matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the first dimension of A as declared + * in the calling (sub) program. When TRANSA==HplNoTrans then + * LDA must be at least max(1,m), otherwise LDA must be at least + * max(1,k). + * + * B (local input) const double * + * On entry, B is an array of dimension (LDB,kb), where kb is + * n when TRANSB==HplNoTrans, and is k otherwise. Before + * entry with TRANSB==HplNoTrans, the leading k by n part of + * the array B must contain the matrix B, otherwise the leading + * n by k part of the array B must contain the matrix B. + * + * LDB (local input) const int + * On entry, LDB specifies the first dimension of B as declared + * in the calling (sub) program. When TRANSB==HplNoTrans then + * LDB must be at least max(1,k), otherwise LDB must be at least + * max(1,n). + * + * BETA (local input) const double + * On entry, BETA specifies the scalar beta. When BETA is + * supplied as zero then the elements of the matrix C need + * not be set on input. + * + * C (local input/output) double * + * On entry, C is an array of dimension (LDC,n). Before entry, + * the leading m by n part of the array C must contain the + * matrix C, except when beta is zero, in which case C need not + * be set on entry. On exit, the array C is overwritten by the + * m by n matrix ( alpha*op( A )*op( B ) + beta*C ). + * + * LDC (local input) const int + * On entry, LDC specifies the first dimension of C as declared + * in the calling (sub) program. LDC must be at least + * max(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + printf("Order %d, TransA %d, TransB %d, M %d, N %d, K %d\n", ORDER, TRANSA, TRANSB, M, N, K); + cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, + C, LDC ); + } + else + { + HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA, + C, LDC ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA, beta = BETA; +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef StringStructPtr + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef StringCrayStyle + F77_CHAR ftransa; + F77_CHAR ftransb; +#endif +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, F77K = K, + F77lda = LDA, F77ldb = LDB, F77ldc = LDC; +#else +#define F77M M +#define F77N N +#define F77K K +#define F77lda LDA +#define F77ldb LDB +#define F77ldc LDC +#endif + char ctransa, ctransb; + + if( TRANSA == HplNoTrans ) ctransa = 'N'; + else if( TRANSA == HplTrans ) ctransa = 'T'; + else ctransa = 'C'; + + if( TRANSB == HplNoTrans ) ctransb = 'N'; + else if( TRANSB == HplTrans ) ctransb = 'T'; + else ctransb = 'C'; + + if( ORDER == HplColumnMajor ) + { +#ifdef StringSunStyle + F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); + F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif +#ifdef StringStructVal + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif +#ifdef StringStructPtr + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, + B, &F77ldb, &beta, C, &F77ldc ); +#endif + } + else + { +#ifdef StringSunStyle + F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); + F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif +#ifdef StringStructVal + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif +#ifdef StringStructPtr + ftransa.len = 1; ftransa.cp = &ctransa; + ftransb.len = 1; ftransb.cp = &ctransb; + F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, + A, &F77lda, &beta, C, &F77ldc ); +#endif + } +#endif +/* + * End of HPL_dgemm + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dgemv.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dgemv.c new file mode 100644 index 000000000..6366c5a48 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dgemv.c @@ -0,0 +1,326 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dgemv + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dgemv0 +( + const enum HPL_TRANS TRANS, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + const double * X, + const int INCX, + const double BETA, + double * Y, + const int INCY +) +#else +static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) + const enum HPL_TRANS TRANS; + const int INCX, INCY, LDA, M, N; + const double ALPHA, BETA; + const double * A, * X; + double * Y; +#endif +{ +/* + * .. Local Variables .. + */ + int i, iaij, ix, iy, j, jaj, jx, jy; + register double t0; +/* .. + * .. Executable Statements .. + */ + if( ( M == 0 ) || ( N == 0 ) || + ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone ) ) ) return; + + if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; } + + if( TRANS == HplNoTrans ) + { + HPL_dscal( M, BETA, Y, INCY ); + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = ALPHA * X[jx]; + for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY ) + { Y[iy] += A[iaij] * t0; } + } + } + else + { + for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) + { + t0 = HPL_rzero; + for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) + { t0 += A[iaij] * X[ix]; } + if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0; + else Y[jy] = BETA * Y[jy] + ALPHA * t0; + } + } +} +#endif + +#ifdef STDC_HEADERS +void HPL_dgemv +( + const enum HPL_ORDER ORDER, + const enum HPL_TRANS TRANS, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + const double * X, + const int INCX, + const double BETA, + double * Y, + const int INCY +) +#else +void HPL_dgemv +( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) + const enum HPL_ORDER ORDER; + const enum HPL_TRANS TRANS; + const int M; + const int N; + const double ALPHA; + const double * A; + const int LDA; + const double * X; + const int INCX; + const double BETA; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dgemv performs one of the matrix-vector operations + * + * y := alpha * op( A ) * x + beta * y, + * + * where op( X ) is one of + * + * op( X ) = X or op( X ) = X^T. + * + * where alpha and beta are scalars, x and y are vectors and A is an m + * by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANS specifies the operation to be performed as + * follows: + * TRANS = HplNoTrans y := alpha*A *x + beta*y, + * TRANS = HplTrans y := alpha*A^T*x + beta*y. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then A and X need not be set on input. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry, the leading m by n part of the + * array A must contain the matrix coefficients. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m). + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * BETA (local input) const double + * On entry, BETA specifies the scalar beta. When ALPHA is + * supplied as zero then Y need not be set on input. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * Before entry with BETA non-zero, the incremented array Y must + * contain the vector y. On exit, Y is overwritten by the + * updated vector y. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); + } + else + { + HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), + N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA, beta = BETA; +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR ftran; +#endif +#ifdef StringStructPtr + F77_CHAR ftran; +#endif +#ifdef StringCrayStyle + F77_CHAR ftran; +#endif + +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77incx = INCX, F77incy = INCY; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77incx INCX +#define F77incy INCY +#endif + char ctran; + + if( ORDER == HplColumnMajor ) + { + ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); + +#ifdef StringSunStyle + F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); + F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructVal + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructPtr + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif + } + else + { + ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); +#ifdef StringSunStyle + F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); + F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructVal + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif +#ifdef StringStructPtr + ftran.len = 1; ftran.cp = &ctran; + F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, + &beta, Y, &F77incy ); +#endif + } + +#endif +/* + * End of HPL_dgemv + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dger.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dger.c new file mode 100644 index 000000000..5ea702778 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dger.c @@ -0,0 +1,195 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dger + +#ifdef STDC_HEADERS +void HPL_dger +( + const enum HPL_ORDER ORDER, + const int M, + const int N, + const double ALPHA, + const double * X, + const int INCX, + double * Y, + const int INCY, + double * A, + const int LDA +) +#else +void HPL_dger +( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) + const enum HPL_ORDER ORDER; + const int M; + const int N; + const double ALPHA; + const double * X; + const int INCX; + double * Y; + const int INCY; + double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dger performs the rank 1 operation + * + * A := alpha * x * y^T + A, + * + * where alpha is a scalar, x is an m-element vector, y is an n-element + * vector and A is an m by n matrix. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then X and Y need not be set on input. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( m - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * A (local input/output) double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry, the leading m by n part of the + * array A must contain the matrix coefficients. On exit, A is + * overwritten by the updated matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dger( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA ); +#endif +#ifdef HPL_CALL_VSIPL + register double t0; + int i, iaij, ix, iy, j, jaj, jx, jy; + + if( ( M == 0 ) || ( N == 0 ) || ( ALPHA == HPL_rzero ) ) return; + + if( ORDER == HplColumnMajor ) + { + for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) + { + t0 = ALPHA * Y[jy]; + for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) + { A[iaij] += X[ix] * t0; } + } + } + else + { + for( j = 0, jaj = 0, jx = 0; j < M; j++, jaj += LDA, jx += INCX ) + { + t0 = ALPHA * X[jx]; + for( i = 0, iaij = jaj, iy = 0; i < N; i++, iaij += 1, iy += INCY ) + { A[iaij] += Y[iy] * t0; } + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77incx = INCX, F77incy = INCY; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77incx INCX +#define F77incy INCY +#endif + + if( ORDER == HplColumnMajor ) + { F77dger( &F77M, &F77N, &alpha, X, &F77incx, Y, &F77incy, A, &F77lda ); } + else + { F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); } +#endif +/* + * End of HPL_dger + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dscal.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dscal.c new file mode 100644 index 000000000..7e041991f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dscal.c @@ -0,0 +1,179 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dscal + +#ifdef STDC_HEADERS +void HPL_dscal +( + const int N, + const double ALPHA, + double * X, + const int INCX +) +#else +void HPL_dscal +( N, ALPHA, X, INCX ) + const int N; + const double ALPHA; + double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dscal scales the vector x by alpha. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vector x. N must be + * at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero, then the entries of the incremented array X + * need not be set on input. + * + * X (local input/output) double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * On exit, the entries of the incremented array X are scaled + * by the scalar alpha. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dscal( N, ALPHA, X, INCX ); +#endif +#ifdef HPL_CALL_VSIPL + register double x0, x1, x2, x3, x4, x5, x6, x7; + register const double alpha = ALPHA; + const double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incX3 = 3 * INCX, + incX4 = 4 * INCX, incX5 = 5 * INCX, + incX6 = 6 * INCX, incX7 = 7 * INCX, + incX8 = 8 * INCX; + + if( ( N > 0 ) && ( alpha != HPL_rone ) ) + { + if( alpha == HPL_rzero ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = (double *)X + nu * INCX; + + do + { + (*X) = HPL_rzero; X[incX4] = HPL_rzero; + X[INCX ] = HPL_rzero; X[incX5] = HPL_rzero; + X[incX2] = HPL_rzero; X[incX6] = HPL_rzero; + X[incX3] = HPL_rzero; X[incX7] = HPL_rzero; X += incX8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) { *X = HPL_rzero; X += INCX; } + } + else + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + x0 *= alpha; x4 *= alpha; x1 *= alpha; x5 *= alpha; + x2 *= alpha; x6 *= alpha; x3 *= alpha; x7 *= alpha; + + (*X) = x0; X[incX4] = x4; X[INCX ] = x1; X[incX5] = x5; + X[incX2] = x2; X[incX6] = x6; X[incX3] = x3; X[incX7] = x7; + + X += incX8; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { x0 = (*X); x0 *= alpha; *X = x0; X += INCX; } + } + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX; +#else +#define F77N N +#define F77incx INCX +#endif + + F77dscal( &F77N, &alpha, X, &F77incx ); +#endif +/* + * End of HPL_dscal + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dswap.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dswap.c new file mode 100644 index 000000000..eb1b8e08d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dswap.c @@ -0,0 +1,157 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dswap + +#ifdef STDC_HEADERS +void HPL_dswap +( + const int N, + double * X, + const int INCX, + double * Y, + const int INCY +) +#else +void HPL_dswap +( N, X, INCX, Y, INCY ) + const int N; + double * X; + const int INCX; + double * Y; + const int INCY; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dswap swaps the vectors x and y. + * + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vectors x and y. N + * must be at least zero. + * + * X (local input/output) double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * On exit, the entries of the incremented array X are updated + * with the entries of the incremented array Y. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * Y (local input/output) double * + * On entry, Y is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. + * On exit, the entries of the incremented array Y are updated + * with the entries of the incremented array X. + * + * INCY (local input) const int + * On entry, INCY specifies the increment for the elements of Y. + * INCY must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dswap( N, X, INCX, Y, INCY ); +#endif +#ifdef HPL_CALL_VSIPL + register double x0, x1, x2, x3, y0, y1, y2, y3; + double * StX; + register int i; + int nu; + const int incX2 = 2 * INCX, incY2 = 2 * INCY, + incX3 = 3 * INCX, incY3 = 3 * INCY, + incX4 = 4 * INCX, incY4 = 4 * INCY; + + if( N > 0 ) + { + if( ( nu = ( N >> 2 ) << 2 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); y0 = (*Y); x1 = X[INCX ]; y1 = Y[INCY ]; + x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3]; + *Y = x0; *X = y0; Y[INCY ] = x1; X[INCX ] = y1; + Y[incY2] = x2; X[incX2] = y2; Y[incY3] = x3; X[incX3] = y3; + X += incX4; Y += incY4; + + } while( X != StX ); + } + + for( i = N - nu; i != 0; i-- ) + { x0 = (*X); y0 = (*Y); *Y = x0; *X = y0; X += INCX; Y += INCY; } + } +#endif +#ifdef HPL_CALL_FBLAS +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; +#else +#define F77N N +#define F77incx INCX +#define F77incy INCY +#endif + F77dswap( &F77N, X, &F77incx, Y, &F77incy ); +#endif +/* + * End of HPL_dswap + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dtrsm.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dtrsm.c new file mode 100644 index 000000000..a336a7d29 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dtrsm.c @@ -0,0 +1,977 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dtrsm + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) + { + B[ibkj] /= A[k+jak]; + for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; + i < M; i++, iaik +=1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) + { + for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; + i < M; i++, iaik +=1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; + i >= 0; i--, jai -= LDA, ibij -= 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; + k < M; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + t0 /= A[i+jai]; + B[ibij] = t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLLTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; + i >= 0; i--, jai -= LDA, ibij -= 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; + k < M; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + B[ibij] = t0; + } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; + k >= 0; k--, jak -= LDA, ibkj -= 1 ) + { + B[ibkj] /= A[k+jak]; + for( i = 0, iaik = jak, ibij = jbj; + i < k; i++, iaik += 1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaik, ibij, ibkj, j, jak, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; + k >= 0; k--, jak -= LDA, ibkj -= 1 ) + { + for( i = 0, iaik = jak, ibij = jbj; + i < k; i++, iaik += 1, ibij += 1 ) + { B[ibij] -= B[ibkj] * A[iaik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iaki, ibij, ibkj, j, jai, jbj, k; + register double t0; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + t0 /= A[i+jai]; + B[ibij] = t0; + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmLUTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iaki, ibij, ibkj, j, jai, jbj, k; + + for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) + { + for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) + { + t0 = ALPHA * B[ibij]; + for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) + { t0 -= A[iaki] * B[ibkj]; } + B[ibij] = t0; + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; + j >= 0; j--, jaj -= LDA, jbj -= LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; + k < N; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; + j >= 0; j--, jaj -= LDA, jbj -= LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; + k < N; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) + { + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } + for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; + j < N; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRLTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) + { + for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; + j < N; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUNN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUNU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; + + for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) + { + for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } + for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) + { + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= A[iakj] * B[ibik]; } + } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUTN +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; + k >= 0; k--, jak -= LDA, jbk -= LDB ) + { + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } + for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsmRUTU +( + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ) + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + register double t0; + int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; + + for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; + k >= 0; k--, jak -= LDA, jbk -= LDB ) + { + for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) + { + t0 = A[iajk]; + for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) + { B[ibij] -= t0 * B[ibik]; } + } + for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsm0 +( + const enum HPL_SIDE SIDE, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) + const enum HPL_SIDE SIDE; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int LDA, LDB, M, N; + const double ALPHA; + const double * A; + double * B; +#endif +{ + int i, j; + + if( ( M == 0 ) || ( N == 0 ) ) return; + + if( ALPHA == HPL_rzero ) + { + for( j = 0; j < N; j++ ) + { for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; } + return; + } + + if( SIDE == HplLeft ) + { + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + } + else + { + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); } + } + else + { + if( DIAG == HplNonUnit ) + { HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); } + else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); } + } + } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dtrsm +( + const enum HPL_ORDER ORDER, + const enum HPL_SIDE SIDE, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int M, + const int N, + const double ALPHA, + const double * A, + const int LDA, + double * B, + const int LDB +) +#else +void HPL_dtrsm +( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) + const enum HPL_ORDER ORDER; + const enum HPL_SIDE SIDE; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int M; + const int N; + const double ALPHA; + const double * A; + const int LDA; + double * B; + const int LDB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dtrsm solves one of the matrix equations + * + * op( A ) * X = alpha * B, or X * op( A ) = alpha * B, + * + * where alpha is a scalar, X and B are m by n matrices, A is a unit, or + * non-unit, upper or lower triangular matrix and op(A) is one of + * + * op( A ) = A or op( A ) = A^T. + * + * The matrix X is overwritten on B. + * + * No test for singularity or near-singularity is included in this + * routine. Such tests must be performed before calling this routine. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * SIDE (local input) const enum HPL_SIDE + * On entry, SIDE specifies whether op(A) appears on the left + * or right of X as follows: + * SIDE==HplLeft op( A ) * X = alpha * B, + * SIDE==HplRight X * op( A ) = alpha * B. + * + * UPLO (local input) const enum HPL_UPLO + * On entry, UPLO specifies whether the upper or lower + * triangular part of the array A is to be referenced. When + * UPLO==HplUpper, only the upper triangular part of A is to be + * referenced, otherwise only the lower triangular part of A is + * to be referenced. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANSA specifies the form of op(A) to be used in + * the matrix-matrix operation follows: + * TRANSA==HplNoTrans : op( A ) = A, + * TRANSA==HplTrans : op( A ) = A^T, + * TRANSA==HplConjTrans : op( A ) = A^T. + * + * DIAG (local input) const enum HPL_DIAG + * On entry, DIAG specifies whether A is unit triangular or + * not. When DIAG==HplUnit, A is assumed to be unit triangular, + * and otherwise, A is not assumed to be unit triangular. + * + * M (local input) const int + * On entry, M specifies the number of rows of the matrix B. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the matrix B. + * N must be at least zero. + * + * ALPHA (local input) const double + * On entry, ALPHA specifies the scalar alpha. When ALPHA is + * supplied as zero then the elements of the matrix B need not + * be set on input. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * k, where k is m when SIDE==HplLeft and is n + * otherwise. Before entry with UPLO==HplUpper, the leading + * k by k upper triangular part of the array A must contain the + * upper triangular matrix and the strictly lower triangular + * part of A is not referenced. When UPLO==HplLower on entry, + * the leading k by k lower triangular part of the array A must + * contain the lower triangular matrix and the strictly upper + * triangular part of A is not referenced. + * + * Note that when DIAG==HplUnit, the diagonal elements of A + * not referenced either, but are assumed to be unity. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise. + * + * B (local input/output) double * + * On entry, B points to an array of size equal to or greater + * than LDB * n. Before entry, the leading m by n part of the + * array B must contain the matrix B, except when beta is zero, + * in which case B need not be set on entry. On exit, the array + * B is overwritten by the m by n solution matrix. + * + * LDB (local input) const int + * On entry, LDB specifies the leading dimension of B as + * declared in the calling (sub) program. LDB must be at + * least MAX(1,m). + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); + } + else + { + HPL_dtrsm0( ( SIDE == HplRight ? HplLeft : HplRight ), + ( UPLO == HplLower ? HplUpper : HplLower ), + TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB ); + } +#endif +#ifdef HPL_CALL_FBLAS + double alpha = ALPHA; +#ifdef StringSunStyle +#if defined( HPL_USE_F77_INTEGER_DEF ) + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef StringStructPtr + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef StringCrayStyle + F77_CHAR fside; + F77_CHAR fuplo; + F77_CHAR ftran; + F77_CHAR fdiag; +#endif +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77M = M, F77N = N, + F77lda = LDA, F77ldb = LDB; +#else +#define F77M M +#define F77N N +#define F77lda LDA +#define F77ldb LDB +#endif + char cside, cuplo, ctran, cdiag; + + if( TRANS == HplNoTrans ) ctran = 'N'; + else if( TRANS == HplTrans ) ctran = 'T'; + else ctran = 'C'; + cdiag = ( DIAG == HplUnit ? 'U' : 'N' ); + + if( ORDER == HplColumnMajor ) + { + cside = ( SIDE == HplRight ? 'R' : 'L' ); + cuplo = ( UPLO == HplLower ? 'L' : 'U' ); +#ifdef StringSunStyle + F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructVal + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructPtr + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha, + A, &F77lda, B, &F77ldb ); +#endif + } + else + { + cside = ( SIDE == HplRight ? 'L' : 'R' ); + cuplo = ( UPLO == HplLower ? 'U' : 'L' ); +#ifdef StringSunStyle + F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructVal + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif +#ifdef StringStructPtr + fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; + ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha, + A, &F77lda, B, &F77ldb ); +#endif + } +#endif +/* + * End of HPL_dtrsm + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dtrsv.c new file mode 100644 index 000000000..99e84f073 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_dtrsv.c @@ -0,0 +1,520 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_dtrsv + +#ifdef HPL_CALL_VSIPL + +#ifdef STDC_HEADERS +static void HPL_dtrsvLNN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLNN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX ) + { + X[jx] /= A[jaj]; t0 = X[jx]; + for( i = j+1, iaij = jaj+1, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLNU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLNU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = jaj+1, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLTN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLTN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX; + j >= 0; j--, jaj -= ldap1, jx -= INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = 1+jaj, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } + t0 /= A[jaj]; X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvLTU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvLTU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; + register double t0; + + for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX; + j >= 0; j--, jaj -= ldap1, jx -= INCX ) + { + t0 = X[jx]; + for( i = j+1, iaij = 1+jaj, ix = jx + INCX; + i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } + X[jx] = t0; + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUNN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUNN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX; + j >= 0; j--, jaj -= LDA, jx -= INCX ) + { + X[jx] /= A[j+jaj]; t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { X[ix] -= t0 * A[iaij]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUNU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUNU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX; + j >= 0; j--, jaj -= LDA, jx -= INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { X[ix] -= t0 * A[iaij]; } + } +} + + +#ifdef STDC_HEADERS +static void HPL_dtrsvUTN +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUTN( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = 0, jaj = 0,jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { t0 -= A[iaij] * X[ix]; } + t0 /= A[iaij]; X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsvUTU +( + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsvUTU( N, A, LDA, X, INCX ) + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + int i, iaij, ix, j, jaj, jx; + register double t0; + + for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) + { + t0 = X[jx]; + for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) + { t0 -= A[iaij] * X[ix]; } + X[jx] = t0; + } +} + +#ifdef STDC_HEADERS +static void HPL_dtrsv0 +( + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +static void HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int INCX, LDA, N; + const double * A; + double * X; +#endif +{ + if( N == 0 ) return; + + if( UPLO == HplUpper ) + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) { HPL_dtrsvUNN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvUNU( N, A, LDA, X, INCX ); } + } + else + { + if( DIAG == HplNonUnit ) { HPL_dtrsvUTN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvUTU( N, A, LDA, X, INCX ); } + } + } + else + { + if( TRANS == HplNoTrans ) + { + if( DIAG == HplNonUnit ) { HPL_dtrsvLNN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvLNU( N, A, LDA, X, INCX ); } + } + else + { + if( DIAG == HplNonUnit ) { HPL_dtrsvLTN( N, A, LDA, X, INCX ); } + else { HPL_dtrsvLTU( N, A, LDA, X, INCX ); } + } + } +} + +#endif + +#ifdef STDC_HEADERS +void HPL_dtrsv +( + const enum HPL_ORDER ORDER, + const enum HPL_UPLO UPLO, + const enum HPL_TRANS TRANS, + const enum HPL_DIAG DIAG, + const int N, + const double * A, + const int LDA, + double * X, + const int INCX +) +#else +void HPL_dtrsv +( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) + const enum HPL_ORDER ORDER; + const enum HPL_UPLO UPLO; + const enum HPL_TRANS TRANS; + const enum HPL_DIAG DIAG; + const int N; + const double * A; + const int LDA; + double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dtrsv solves one of the systems of equations + * + * A * x = b, or A^T * x = b, + * + * where b and x are n-element vectors and A is an n by n non-unit, or + * unit, upper or lower triangular matrix. + * + * No test for singularity or near-singularity is included in this + * routine. Such tests must be performed before calling this routine. + * + * Arguments + * ========= + * + * ORDER (local input) const enum HPL_ORDER + * On entry, ORDER specifies the storage format of the operands + * as follows: + * ORDER = HplRowMajor, + * ORDER = HplColumnMajor. + * + * UPLO (local input) const enum HPL_UPLO + * On entry, UPLO specifies whether the upper or lower + * triangular part of the array A is to be referenced. When + * UPLO==HplUpper, only the upper triangular part of A is to be + * referenced, otherwise only the lower triangular part of A is + * to be referenced. + * + * TRANS (local input) const enum HPL_TRANS + * On entry, TRANS specifies the equations to be solved as + * follows: + * TRANS==HplNoTrans A * x = b, + * TRANS==HplTrans A^T * x = b. + * + * DIAG (local input) const enum HPL_DIAG + * On entry, DIAG specifies whether A is unit triangular or + * not. When DIAG==HplUnit, A is assumed to be unit triangular, + * and otherwise, A is not assumed to be unit triangular. + * + * N (local input) const int + * On entry, N specifies the order of the matrix A. N must be at + * least zero. + * + * A (local input) const double * + * On entry, A points to an array of size equal to or greater + * than LDA * n. Before entry with UPLO==HplUpper, the leading + * n by n upper triangular part of the array A must contain the + * upper triangular matrix and the strictly lower triangular + * part of A is not referenced. When UPLO==HplLower on entry, + * the leading n by n lower triangular part of the array A must + * contain the lower triangular matrix and the strictly upper + * triangular part of A is not referenced. + * + * Note that when DIAG==HplUnit, the diagonal elements of A + * not referenced either, but are assumed to be unity. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of A as + * declared in the calling (sub) program. LDA must be at + * least MAX(1,n). + * + * X (local input/output) double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * Before entry, the incremented array X must contain the n + * element right-hand side vector b. On exit, X is overwritten + * with the solution vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + cblas_dtrsv( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX ); +#endif +#ifdef HPL_CALL_VSIPL + if( ORDER == HplColumnMajor ) + { + HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ); + } + else + { + HPL_dtrsv0( ( UPLO == HplUpper ? HplLower : HplUpper ), + ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), + DIAG, N, A, LDA, X, INCX ); + } +#endif +#ifdef HPL_CALL_FBLAS +#ifdef StringSunStyle +#ifdef HPL_USE_F77_INTEGER_DEF + F77_INTEGER IONE = 1; +#else + int IONE = 1; +#endif +#endif +#ifdef StringStructVal + F77_CHAR fuplo, ftran, fdiag; +#endif +#ifdef StringStructPtr + F77_CHAR fuplo, ftran, fdiag; +#endif +#ifdef StringCrayStyle + F77_CHAR fuplo, ftran, fdiag; +#endif + +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77lda = LDA, F77incx = INCX; +#else +#define F77N N +#define F77lda LDA +#define F77incx INCX +#endif + char cuplo, ctran, cdiag; + + if( ORDER == HplColumnMajor ) + { + cuplo = ( UPLO == HplUpper ? 'U' : 'L' ); + ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); + } + else + { + cuplo = ( UPLO == HplUpper ? 'L' : 'U' ); + ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); + } + cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' ); + +#ifdef StringSunStyle + F77dtrsv( &cuplo, &ctran, &cdiag, &F77N, A, &F77lda, X, &F77incx, + IONE, IONE, IONE ); +#endif +#ifdef StringCrayStyle + ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); + fuplo = HPL_C2F_CHAR( cuplo ); + F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif +#ifdef StringStructVal + fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; + fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif +#ifdef StringStructPtr + fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; + fdiag.len = 1; fdiag.cp = &cdiag; + F77dtrsv( &fuplo, &ftran, &fdiag, &F77N, A, &F77lda, X, &F77incx ); +#endif + +#endif +/* + * End of HPL_dtrsv + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_idamax.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_idamax.c new file mode 100644 index 000000000..5ceabdf25 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/HPL_idamax.c @@ -0,0 +1,167 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifndef HPL_idamax + +#ifdef STDC_HEADERS +int HPL_idamax +( + const int N, + const double * X, + const int INCX +) +#else +int HPL_idamax +( N, X, INCX ) + const int N; + const double * X; + const int INCX; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_idamax returns the index in an n-vector x of the first element + * having maximum absolute value. + * + * Arguments + * ========= + * + * N (local input) const int + * On entry, N specifies the length of the vector x. N must be + * at least zero. + * + * X (local input) const double * + * On entry, X is an incremented array of dimension at least + * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. + * + * INCX (local input) const int + * On entry, INCX specifies the increment for the elements of X. + * INCX must not be zero. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_CALL_CBLAS + return( (int)(cblas_idamax( N, X, INCX )) ); +#endif +#ifdef HPL_CALL_VSIPL + register double absxi, smax = HPL_rzero, x0, x1, x2, x3, + x4, x5, x6, x7; + const double * StX; + register int imax = 0, i = 0, j; + int nu; + const int incX2 = 2 * INCX, incX3 = 3 * INCX, + incX4 = 4 * INCX, incX5 = 5 * INCX, + incX6 = 6 * INCX, incX7 = 7 * INCX, + incX8 = 8 * INCX; + + if( N > 0 ) + { + if( ( nu = ( N >> 3 ) << 3 ) != 0 ) + { + StX = X + nu * INCX; + + do + { + x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; + x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; + + absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x1 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x2 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x3 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x4 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x5 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x6 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + absxi = Mabs( x7 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + + X += incX8; + + } while( X != StX ); + } + + for( j = N - nu; j != 0; j-- ) + { + x0 = (*X); + absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; } + i += 1; + X += INCX; + } + } + return( imax ); +#endif +#ifdef HPL_CALL_FBLAS +#ifdef HPL_USE_F77_INTEGER_DEF + const F77_INTEGER F77N = N, F77incx = INCX; +#else +#define F77N N +#define F77incx INCX +#endif + int imax = 0; + + if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1; + return( imax ); +#endif +/* + * End of HPL_idamax + */ +} + +#endif diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/Make.inc new file mode 120000 index 000000000..ae55370b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/Make.inc @@ -0,0 +1 @@ +/home/kate/hip/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/Makefile new file mode 100644 index 000000000..ed9f3d0e2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/Makefile @@ -0,0 +1,98 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h +# +## Object files ######################################################## +# +HPL_blaobj = \ + HPL_dcopy.o HPL_daxpy.o HPL_dscal.o \ + HPL_idamax.o HPL_dgemv.o HPL_dtrsv.o \ + HPL_dger.o HPL_dgemm.o HPL_dtrsm.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_blaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_blaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dcopy.o : ../HPL_dcopy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dcopy.c +HPL_daxpy.o : ../HPL_daxpy.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_daxpy.c +HPL_dscal.o : ../HPL_dscal.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dscal.c +HPL_idamax.o : ../HPL_idamax.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_idamax.c +HPL_dgemv.o : ../HPL_dgemv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemv.c +HPL_dtrsv.o : ../HPL_dtrsv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsv.c +HPL_dger.o : ../HPL_dger.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dger.c +HPL_dgemm.o : ../HPL_dgemm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemm.c +HPL_dtrsm.o : ../HPL_dtrsm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsm.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/blas/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_1rinM.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_1rinM.c new file mode 100644 index 000000000..dd03b79b1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_1rinM.c @@ -0,0 +1,224 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_1rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_1rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_1rinM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_1rinM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, prev, + rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, then send message to its two + * next neighbors. Otherwise, probe for message. If the message is here, + * then receive it, and if I am not the last process of the ring, or + * just after the root process, then forward it to the next. Otherwise, + * inform the caller that the panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( next, + size ), msgid, comm ); + } + } + else + { + prev = MModSub1( rank, size ); + if( ( size > 2 ) && + ( MModSub1( prev, size ) == root ) ) partner = root; + else partner = prev; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && + ( prev != root ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_1rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_1rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_1ring.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_1ring.c new file mode 100644 index 000000000..dd5eb2d12 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_1ring.c @@ -0,0 +1,216 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_1ring +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_1ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_1ring +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_1ring( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, prev, rank, root, + size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, start spreading the panel. If + * I am not the root process, probe for message. If the message is here, + * then receive it, and if I am not the last process of the ring, then + * forward it to the next. Otherwise, inform the caller that the panel + * has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( rank, + size ), msgid, comm ); + } + else + { + prev = MModSub1( rank, size ); + + ierr = MPI_Iprobe( prev, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, prev, msgid, + comm, &PANEL->status[0] ); + next = MModAdd1( rank, size ); + if( ( ierr == MPI_SUCCESS ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, + msgid, comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_1ring +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_1ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_2rinM.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_2rinM.c new file mode 100644 index 000000000..56581ea0d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_2rinM.c @@ -0,0 +1,236 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_2rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_2rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_2rinM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_2rinM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, prev, + rank, roo2, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process send to its two right neighbors and mid-pro- + * cess. If I am not the root process, probe for message. If the message + * is there, then receive it. If I am not the last process of both rings + * then forward it to the next. Otherwise, inform the caller that the + * panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); roo2 = ( ( size + 1 ) >> 1 ); + roo2 = MModAdd( root, roo2, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + if( MModAdd1( next, size ) != roo2 ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, + MModAdd1( next, size ), msgid, comm ); + } + + if( ierr == MPI_SUCCESS ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid, + comm ); + } + } + } + else + { + prev = MModSub1( rank, size ); + if( ( prev == root ) || ( rank == roo2 ) || + ( MModSub1( prev, size ) == root ) ) partner = root; + else partner = prev; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && ( prev != root ) && + ( next != roo2 ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_2rinM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_2rinM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_2ring.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_2ring.c new file mode 100644 index 000000000..f0e6e2647 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_2ring.c @@ -0,0 +1,224 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +int HPL_binit_2ring +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_2ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +/* + * Create the MPI user-defined data type + */ + ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); + + return( HPL_SUCCESS ); +#endif +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF PANEL->buffers[0] +#define _M_COUNT PANEL->counts[0] +#define _M_TYPE PANEL->dtypes[0] + +#else + +#define _M_BUFF (void *)(PANEL->L2) +#define _M_COUNT PANEL->len +#define _M_TYPE MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_2ring +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_2ring( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int ierr, go, next, msgid, partner, rank, + roo2, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process send to its right neighbor and mid-process. + * If I am not the root process, probe for message. If the message is + * there, then receive it, and if I am not the last process of both + * rings, then forward it to the next. Otherwise, inform the caller that + * the panel has still not been received. + */ + rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; + root = PANEL->pcol; msgid = PANEL->msgid; + next = MModAdd1( rank, size ); roo2 = ( ( size + 1 ) >> 1 ); + roo2 = MModAdd( root, roo2, size ); + + if( rank == root ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); + if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid, + comm ); + } + } + else + { + partner = MModSub1( rank, size ); + if( ( partner == root ) || ( rank == roo2 ) ) partner = root; + + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { + if( go != 0 ) + { + ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, + comm, &PANEL->status[0] ); + if( ( ierr == MPI_SUCCESS ) && + ( next != roo2 ) && ( next != root ) ) + { + ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, + comm ); + } + } + else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } + } + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_2ring +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_2ring( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +/* + * Release the arrays of request / status / data-types and buffers + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_free( &PANEL->dtypes[0] ); + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +#else + return( HPL_SUCCESS ); +#endif +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_bcast.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_bcast.c new file mode 100644 index 000000000..100161152 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_bcast.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_bcast +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast +( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_bcast broadcasts the current panel. Successful completion is + * indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to + * HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was + * not completed, in which case this function should be called again. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * IFLAG (output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * occured. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_bcast_1rinM( PANEL, IFLAG ); break; + case HPL_1RING : ierr = HPL_bcast_1ring( PANEL, IFLAG ); break; + case HPL_2RING_M : ierr = HPL_bcast_2rinM( PANEL, IFLAG ); break; + case HPL_2RING : ierr = HPL_bcast_2ring( PANEL, IFLAG ); break; + case HPL_BLONG_M : ierr = HPL_bcast_blonM( PANEL, IFLAG ); break; + case HPL_BLONG : ierr = HPL_bcast_blong( PANEL, IFLAG ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_bcast + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_binit.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_binit.c new file mode 100644 index 000000000..3daf72b7d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_binit.c @@ -0,0 +1,108 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_binit +( + HPL_T_panel * PANEL +) +#else +int HPL_binit +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_binit initializes a row broadcast. Successful completion is + * indicated by the returned error code HPL_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS ); +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_binit_1rinM( PANEL ); break; + case HPL_1RING : ierr = HPL_binit_1ring( PANEL ); break; + case HPL_2RING_M : ierr = HPL_binit_2rinM( PANEL ); break; + case HPL_2RING : ierr = HPL_binit_2ring( PANEL ); break; + case HPL_BLONG_M : ierr = HPL_binit_blonM( PANEL ); break; + case HPL_BLONG : ierr = HPL_binit_blong( PANEL ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_binit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_blonM.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_blonM.c new file mode 100644 index 000000000..5fa221937 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_blonM.c @@ -0,0 +1,445 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +int HPL_binit_blonM +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_blonM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif + return( HPL_SUCCESS ); +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF_S1 PANEL->buffers[I_SEND] +#define _M_COUNT_S1 PANEL->counts[I_SEND] +#define _M_TYPE_S1 PANEL->dtypes[I_SEND] + +#define _M_BUFF_S2 PANEL->buffers[I_SEND] +#define _M_COUNT_S2 PANEL->counts[I_SEND] +#define _M_TYPE_S2 PANEL->dtypes[I_SEND] + +#define _M_BUFF_R1 PANEL->buffers[I_RECV] +#define _M_COUNT_R1 PANEL->counts[I_RECV] +#define _M_TYPE_R1 PANEL->dtypes[I_RECV] + +#define _M_BUFF_R2 PANEL->buffers[I_RECV] +#define _M_COUNT_R2 PANEL->counts[I_RECV] +#define _M_TYPE_R2 PANEL->dtypes[I_RECV] + +#define _M_ROLL_BUFF_S PANEL->buffers[I_SEND] +#define _M_ROLL_COUNT_S PANEL->counts[I_SEND] +#define _M_ROLL_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_ROLL_BUFF_R PANEL->buffers[I_RECV] +#define _M_ROLL_COUNT_R PANEL->counts[I_RECV] +#define _M_ROLL_TYPE_R PANEL->dtypes[I_RECV] + +#else + +#define _M_BUFF_S1 (void *)(PANEL->L2) +#define _M_COUNT_S1 PANEL->len +#define _M_TYPE_S1 MPI_DOUBLE + +#define _M_BUFF_S2 (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_S2 lbuf +#define _M_TYPE_S2 MPI_DOUBLE + +#define _M_BUFF_R1 (void *)(PANEL->L2) +#define _M_COUNT_R1 PANEL->len +#define _M_TYPE_R1 MPI_DOUBLE + +#define _M_BUFF_R2 (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_R2 lbuf +#define _M_TYPE_R2 MPI_DOUBLE + +#define _M_ROLL_BUFF_S (void *)(PANEL->L2 + ibufS) +#define _M_ROLL_COUNT_S lbufS +#define _M_ROLL_TYPE_S MPI_DOUBLE +#define _M_ROLL_BUFF_R (void *)(PANEL->L2 + ibufR) +#define _M_ROLL_COUNT_R lbufR +#define _M_ROLL_TYPE_R MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_blonM +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_blonM( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int COUNT, count, go=1, ierr=MPI_SUCCESS, ibuf, + ibufR, ibufS, dummy=0, indx, ip2=1, k, l, + lbuf, lbufR, lbufS, mask=1, msgid, mydist, + mydist2, next, npm1, npm2, partner, prev, + rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: root process sends to its right neighbor, then spread + * the panel on the other npcol - 2 processes. If I am not the root + * process, probe for message received. If the message is there, then + * receive it. If I am just after the root process, return. Otherwise, + * keep spreading on those npcol - 2 processes. Otherwise, inform the + * caller that the panel has still not been received. + */ + comm = PANEL->grid->row_comm; rank = PANEL->grid->mycol; + root = PANEL->pcol; msgid = PANEL->msgid; + prev = MModSub1( rank, size ); + + if( rank == root ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, 0, PANEL->len, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S1, _M_COUNT_S1, _M_TYPE_S1, + MModAdd1( rank, size ), msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else if( prev == root ) + { +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + * + * ierr = MPI_Iprobe( root, msgid, comm, &go, &PANEL->status[0] ); + */ + if( ierr == MPI_SUCCESS ) + { /* if panel is here, proceed */ + if( go != 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + ierr = HPL_packL( PANEL, 0, PANEL->len, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R1, _M_COUNT_R1, _M_TYPE_R1, + root, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } + } +/* + * if I am just after the root, exit now. The message receive completed + * successfully, this guy is done. If there are only 2 processes in each + * row of processes, we are done as well. + */ + if( ( prev == root ) || ( size == 2 ) ) + { + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + return( *IFLAG ); + } +/* + * Otherwise, proceed with broadcast - Spread the panel across process + * columns + */ + npm2 = ( npm1 = size - 1 ) - 1; COUNT = PANEL->len; + + k = npm2; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + if( rank == root ) mydist2 = ( mydist = 0 ); + else mydist2 = ( mydist = MModSub( rank, root, size ) - 1 ); + + indx = ip2; count = COUNT / npm1; count = Mmax( count, 1 ); + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = COUNT - ( ibuf = indx * count ); + if( indx + ip2 < npm1 ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); } + + partner = mydist ^ ip2; + + if( ( mydist & ip2 ) != 0 ) + { + partner = MModAdd( root, partner, size ); + if( partner != root ) partner = MModAdd1( partner, size ); +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + */ +#if 0 + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + + if( ierr == MPI_SUCCESS ) + { /* if panel is not here, return and keep testing */ + if( go == 0 ) + { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } +#endif + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R2, _M_COUNT_R2, _M_TYPE_R2, + partner, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + } + else if( partner < npm1 ) + { + partner = MModAdd( root, partner, size ); + if( partner != root ) partner = MModAdd1( partner, size ); + + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S2, _M_COUNT_S2, _M_TYPE_S2, + partner, msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE, + partner, msgid, comm ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; indx -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; } + + } while( ip2 > 0 ); +/* + * Roll the pieces + */ + prev = MModSub1( rank, size ); + if( MModSub1( prev, size ) == root ) prev = root; + next = MModAdd1( rank, size ); + if( rank == root ) next = MModAdd1( next, size ); + + for( k = 0; k < npm2; k++ ) + { + l = ( k >> 1 ); +/* + * Who is sending to who and how much + */ + if( ( ( mydist + k ) & 1 ) != 0 ) + { + ibufS = ( indx = MModAdd( mydist, l, npm1 ) ) * count; + lbufS = ( indx == npm2 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModSub( mydist, l+1, npm1 ) ) * count; + lbufR = ( indx == npm2 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = prev; + } + else + { + ibufS = ( indx = MModSub( mydist, l, npm1 ) ) * count; + lbufS = ( indx == npm2 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModAdd( mydist, l+1, npm1 ) ) * count; + lbufR = ( indx == npm2 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = next; + } +/* + * Exchange the messages + */ + if( lbufS > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufS, lbufS, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S, + _M_ROLL_TYPE_S, partner, msgid, comm, + &PANEL->request[0] ); + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->request[0] ); + } + + if( lbufR > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufR, lbufR, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R, + _M_ROLL_TYPE_R, partner, msgid, comm, + &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait ( &PANEL->request[0], &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_blonM +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_blonM( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } + + return( HPL_SUCCESS ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_blong.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_blong.c new file mode 100644 index 000000000..e57f11bcc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_blong.c @@ -0,0 +1,363 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +int HPL_binit_blong +( + HPL_T_panel * PANEL +) +#else +int HPL_binit_blong( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } +#ifdef HPL_USE_MPI_DATATYPE +#ifdef HPL_COPY_L +/* + * Copy the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif +#else +/* + * Force the copy of the panel into a contiguous buffer + */ + HPL_copyL( PANEL ); +#endif + return( HPL_SUCCESS ); +} + +#ifdef HPL_USE_MPI_DATATYPE + +#define _M_BUFF_S PANEL->buffers[I_SEND] +#define _M_COUNT_S PANEL->counts[I_SEND] +#define _M_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_BUFF_R PANEL->buffers[I_RECV] +#define _M_COUNT_R PANEL->counts[I_RECV] +#define _M_TYPE_R PANEL->dtypes[I_RECV] + +#define _M_ROLL_BUFF_S PANEL->buffers[I_SEND] +#define _M_ROLL_COUNT_S PANEL->counts[I_SEND] +#define _M_ROLL_TYPE_S PANEL->dtypes[I_SEND] + +#define _M_ROLL_BUFF_R PANEL->buffers[I_RECV] +#define _M_ROLL_COUNT_R PANEL->counts[I_RECV] +#define _M_ROLL_TYPE_R PANEL->dtypes[I_RECV] + +#else + +#define _M_BUFF_S (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_S lbuf +#define _M_TYPE_S MPI_DOUBLE + +#define _M_BUFF_R (void *)(PANEL->L2 + ibuf) +#define _M_COUNT_R lbuf +#define _M_TYPE_R MPI_DOUBLE + +#define _M_ROLL_BUFF_S (void *)(PANEL->L2 + ibufS) +#define _M_ROLL_COUNT_S lbufS +#define _M_ROLL_TYPE_S MPI_DOUBLE + +#define _M_ROLL_BUFF_R (void *)(PANEL->L2 + ibufR) +#define _M_ROLL_COUNT_R lbufR +#define _M_ROLL_TYPE_R MPI_DOUBLE + +#endif + +#ifdef STDC_HEADERS +int HPL_bcast_blong +( + HPL_T_panel * PANEL, + int * IFLAG +) +#else +int HPL_bcast_blong( PANEL, IFLAG ) + HPL_T_panel * PANEL; + int * IFLAG; +#endif +{ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + int COUNT, count, dummy=0, ierr=MPI_SUCCESS, + ibuf, ibufR, ibufS, indx, ip2, k, l, lbuf, + lbufR, lbufS, mask, msgid, mydist, mydist2, + next, npm1, partner, prev, rank, root, size; +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } + if( ( size = PANEL->grid->npcol ) <= 1 ) + { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } +/* + * Cast phase: If I am the root process, start spreading the panel. If + * I am not the root process, test for message receive completion. If + * the message is there, then receive it, and keep spreading in a + * blocking fashion this time. Otherwise, inform the caller that the + * panel has still not been received. + */ + comm = PANEL->grid->row_comm; rank = PANEL->grid->mycol; + mask = PANEL->grid->col_mask; ip2 = PANEL->grid->col_ip2m1; + root = PANEL->pcol; msgid = PANEL->msgid; + COUNT = PANEL->len; npm1 = size - 1; + mydist2 = ( mydist = MModSub( rank, root, size ) ); indx = ip2; + count = COUNT / size; count = Mmax( count, 1 ); +/* + * Spread the panel across process columns + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = COUNT - ( ibuf = indx * count ); + if( indx + ip2 < size ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); } + + partner = mydist ^ ip2; + + if( ( mydist & ip2 ) != 0 ) + { + partner = MModAdd( root, partner, size ); +/* + * This probing mechanism causes problems when lookhead is on. Too many + * messages are exchanged in this virtual topology causing a hang on + * some machines. It is currently disabled until a better understanding + * is acquired. + */ +#if 0 + ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); + if( ierr == MPI_SUCCESS ) + { /* if panel is not here, return and keep testing */ + if( go == 0 ) + { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } + } +#endif + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_BUFF_R, _M_COUNT_R, _M_TYPE_R, + partner, msgid, comm, &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else /* Recv message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + } + else if( partner < size ) + { + partner = MModAdd( root, partner, size ); + + if( lbuf > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibuf, lbuf, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( _M_BUFF_S, _M_COUNT_S, _M_TYPE_S, + partner, msgid, comm ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } + else /* Send message of length zero to enable probe */ + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE, + partner, msgid, comm ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; indx -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; } + + } while( ip2 > 0 ); +/* + * Roll the pieces + */ + prev = MModSub1( rank, size ); next = MModAdd1( rank, size ); + + for( k = 0; k < npm1; k++ ) + { + l = ( k >> 1 ); +/* + * Who is sending to who and how much + */ + if( ( ( mydist + k ) & 1 ) != 0 ) + { + ibufS = ( indx = MModAdd( mydist, l, size ) ) * count; + lbufS = ( indx == npm1 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModSub( mydist, l+1, size ) ) * count; + lbufR = ( indx == npm1 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = prev; + } + else + { + ibufS = ( indx = MModSub( mydist, l, size ) ) * count; + lbufS = ( indx == npm1 ? COUNT : ibufS + count ); + lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); + + ibufR = ( indx = MModAdd( mydist, l+1, size ) ) * count; + lbufR = ( indx == npm1 ? COUNT : ibufR + count ); + lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); + + partner = next; + } +/* + * Exchange the messages + */ + if( lbufS > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufS, lbufS, I_SEND ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S, + _M_ROLL_TYPE_S, partner, msgid, comm, + &PANEL->request[0] ); + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->request[0] ); + } + + if( lbufR > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = HPL_packL( PANEL, ibufR, lbufR, I_RECV ); +#endif + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R, + _M_ROLL_TYPE_R, partner, msgid, comm, + &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); +#endif + } + else + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, + msgid, comm, &PANEL->status[0] ); + } + + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait ( &PANEL->request[0], &PANEL->status[0] ); +#ifdef HPL_USE_MPI_DATATYPE + if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) ) + ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); +#endif + } +/* + * If the message was received and being forwarded, return HPL_SUCCESS. + * If an error occured in an MPI call, return HPL_FAILURE. + */ + *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); + + return( *IFLAG ); +} + +#ifdef STDC_HEADERS +int HPL_bwait_blong +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait_blong( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* .. + * .. Executable Statements .. + */ + if( PANEL == NULL ) { return( HPL_SUCCESS ); } + if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } + + return( HPL_SUCCESS ); +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_bwait.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_bwait.c new file mode 100644 index 000000000..a2e0f4df8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_bwait.c @@ -0,0 +1,109 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_bwait +( + HPL_T_panel * PANEL +) +#else +int HPL_bwait +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_bwait HPL_bwait waits for the row broadcast of the current panel to + * terminate. Successful completion is indicated by the returned error + * code HPL_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ierr; + HPL_T_TOP top; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS ); +/* + * Retrieve the selected virtual broadcast topology + */ + top = PANEL->algo->btopo; + + switch( top ) + { + case HPL_1RING_M : ierr = HPL_bwait_1rinM( PANEL ); break; + case HPL_1RING : ierr = HPL_bwait_1ring( PANEL ); break; + case HPL_2RING_M : ierr = HPL_bwait_2rinM( PANEL ); break; + case HPL_2RING : ierr = HPL_bwait_2ring( PANEL ); break; + case HPL_BLONG_M : ierr = HPL_bwait_blonM( PANEL ); break; + case HPL_BLONG : ierr = HPL_bwait_blong( PANEL ); break; + default : ierr = HPL_SUCCESS; + } + + return( ierr ); +/* + * End of HPL_bwait + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_copyL.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_copyL.c new file mode 100644 index 000000000..04f765a6b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_copyL.c @@ -0,0 +1,108 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_copyL +( + HPL_T_panel * PANEL +) +#else +void HPL_copyL +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_copyL copies the panel of columns, the L1 replicated submatrix, + * the pivot array and the info scalar into a contiguous workspace for + * later broadcast. + * + * The copy of this panel into a contiguous buffer can be enforced by + * specifying -DHPL_COPY_L in the architecture specific Makefile. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int jb, lda; +/* .. + * .. Executable Statements .. + */ + if( PANEL->grid->mycol == PANEL->pcol ) + { + jb = PANEL->jb; lda = PANEL->lda; + + if( PANEL->grid->myrow == PANEL->prow ) + { + HPL_dlacpy( PANEL->mp-jb, jb, Mptr( PANEL->A, jb, -jb, lda ), + lda, PANEL->L2, PANEL->ldl2 ); + } + else + { + HPL_dlacpy( PANEL->mp, jb, Mptr( PANEL->A, 0, -jb, lda ), + lda, PANEL->L2, PANEL->ldl2 ); + } + } +/* + * End of HPL_copyL + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_packL.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_packL.c new file mode 100644 index 000000000..8a70ef83d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_packL.c @@ -0,0 +1,245 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_packL +( + HPL_T_panel * PANEL, + const int INDEX, + const int LEN, + const int IBUF +) +#else +int HPL_packL +( PANEL, INDEX, LEN, IBUF ) + HPL_T_panel * PANEL; + const int INDEX; + const int LEN; + const int IBUF; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_packL forms the MPI data type for the panel to be broadcast. + * Successful completion is indicated by the returned error code + * MPI_SUCCESS. + * + * Arguments + * ========= + * + * PANEL (input/output) HPL_T_panel * + * On entry, PANEL points to the current panel data structure + * being broadcast. + * + * INDEX (input) const int + * On entry, INDEX points to the first entry of the packed + * buffer being broadcast. + * + * LEN (input) const int + * On entry, LEN is the length of the packed buffer. + * + * IBUF (input) const int + * On entry, IBUF specifies the panel buffer/count/type entries + * that should be initialized. + * + * --------------------------------------------------------------------- + */ +#ifdef HPL_USE_MPI_DATATYPE +/* + * .. Local Variables .. + */ +#ifndef HPL_COPY_L + MPI_Datatype * type = NULL; + void * * * bufs = NULL; + double * A; + int * blen = NULL; + MPI_Aint * disp = NULL; + int curr, i, i1, ibuf, ierr=MPI_SUCCESS, j1, + jb, jbm, jbp1, lda, len, m, m1, nbufs; +#else + int ierr; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_COPY_L +/* + * Panel + L1 + DPIV have been copied into a contiguous buffer - Create + * and commit a contiguous data type + */ + PANEL->buffers[IBUF] = (void *)(PANEL->L2 + INDEX); + PANEL->counts [IBUF] = 1; + + ierr = MPI_Type_contiguous( LEN, MPI_DOUBLE, &PANEL->dtypes[IBUF] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &PANEL->dtypes[IBUF] ); + + return( ierr ); +#else +/* + * Panel is not contiguous (because of LDA and also L1 + DPIV) - Create + * and commit a struct data type + */ + jbp1 = ( jb = PANEL->jb ) + 1; +/* + * Temporaries to create the type struct. + */ + bufs = (void * * *)malloc( jbp1 * sizeof( void * * ) ); + blen = (int *)malloc( jbp1 * sizeof( int ) ); + disp = (MPI_Aint *)malloc( jbp1 * sizeof( MPI_Aint ) ); + type = (MPI_Datatype *)malloc( jbp1 * sizeof( MPI_Datatype ) ); + + if( ( bufs != NULL ) && ( blen != NULL ) && + ( disp != NULL ) && ( type != NULL ) ) + { + m = PANEL->mp; curr = (int)( PANEL->grid->myrow == PANEL->prow ); + if( curr != 0 ) m -= jb; + + len = LEN; ibuf = INDEX; nbufs = 0; jbm = jb * m; + + if( ( m > 0 ) && ( ibuf < jbm ) ) + { +/* + * Retrieve proper pointers depending on process row and column + */ + if( PANEL->grid->mycol == PANEL->pcol ) + { + lda = PANEL->lda; + if( curr != 0 ) { A = Mptr( PANEL->A, jb, -jb, lda ); } + else { A = Mptr( PANEL->A, 0, -jb, lda ); } + } + else { lda = PANEL->ldl2; A = PANEL->L2; } +/* + * Pack the first (partial) column of L + */ + m1 = m - ( i1 = ibuf - ( j1 = ibuf / m ) * m ); + m1 = Mmin( len, m1 ); + + bufs[nbufs] = (void *)(Mptr( A, i1, j1, lda )); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = m1; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + + nbufs++; len -= m1; j1++; ibuf += m1; +/* + * Pack the remaining columns of L + */ + while( ( len > 0 ) && ( j1 < jb ) ) + { + m1 = Mmin( len, m ); + + bufs[nbufs] = (void*)(Mptr( A, 0, j1, lda )); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = m1; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + + nbufs++; len -= m1; j1++; ibuf += m1; + } + } +/* + * Pack L1, DPIV, DINFO + */ + if( len > 0 ) + { /* L1, DPIV, DINFO */ + bufs[nbufs] = (void *)(PANEL->L1 + ibuf - jbm); + type[nbufs] = MPI_DOUBLE; + blen[nbufs] = len; + if( ierr == MPI_SUCCESS ) + ierr = MPI_Get_address( bufs[nbufs], &disp[nbufs] ); + nbufs++; + } + + for( i = 1; i < nbufs; i++ ) disp[i] -= disp[0]; disp[0] = 0; + + PANEL->buffers[IBUF] = (void *)(bufs[0]); PANEL->counts [IBUF] = 1; +/* + * construct the struct type + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_create_struct( nbufs, blen, disp, type, + &PANEL->dtypes[IBUF] ); +/* + * release temporaries + */ + if( bufs ) free( bufs ); + if( blen ) free( blen ); + if( disp ) free( disp ); + if( type ) free( type ); +/* + * commit the type + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &PANEL->dtypes[IBUF] ); + + return( ierr ); + } + else + { +/* + * Memory allocation failed -> abort + */ + HPL_pabort( __LINE__, "HPL_packL", "Memory allocation failed" ); + return( MPI_SUCCESS ); /* never executed (hopefully ...) */ + } +#endif +#else + /* HPL_USE_MPI_DATATYPE not defined - Oops, there is a bug + somewhere, so, just in case and until I find it ... */ + return( MPI_SUCCESS ); +#endif +/* + * End of HPL_packL + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_recv.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_recv.c new file mode 100644 index 000000000..ff426891c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_recv.c @@ -0,0 +1,142 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_recv +( + double * RBUF, + int RCOUNT, + int SRC, + int RTAG, + MPI_Comm COMM +) +#else +int HPL_recv +( RBUF, RCOUNT, SRC, RTAG, COMM ) + double * RBUF; + int RCOUNT; + int SRC; + int RTAG; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_recv is a simple wrapper around MPI_Recv. Its main purpose is + * to allow for some experimentation / tuning of this simple routine. + * Successful completion is indicated by the returned error code + * HPL_SUCCESS. In the case of messages of length less than or equal to + * zero, this function returns immediately. + * + * Arguments + * ========= + * + * RBUF (local output) double * + * On entry, RBUF specifies the starting address of buffer to be + * received. + * + * RCOUNT (local input) int + * On entry, RCOUNT specifies the number of double precision + * entries in RBUF. RCOUNT must be at least zero. + * + * SRC (local input) int + * On entry, SRC specifies the rank of the sending process in + * the communication space defined by COMM. + * + * RTAG (local input) int + * On entry, STAG specifies the message tag to be used for this + * communication operation. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Status status; +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type; +#endif + int ierr; +/* .. + * .. Executable Statements .. + */ + if( RCOUNT <= 0 ) return( HPL_SUCCESS ); + +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(RBUF), 1, type, SRC, RTAG, COMM, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else + ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, SRC, RTAG, + COMM, &status ); +#endif + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_recv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_sdrv.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_sdrv.c new file mode 100644 index 000000000..0b2363563 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_sdrv.c @@ -0,0 +1,239 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_sdrv +( + double * SBUF, + int SCOUNT, + int STAG, + double * RBUF, + int RCOUNT, + int RTAG, + int PARTNER, + MPI_Comm COMM +) +#else +int HPL_sdrv +( SBUF, SCOUNT, STAG, RBUF, RCOUNT, RTAG, PARTNER, COMM ) + double * SBUF; + int SCOUNT; + int STAG; + double * RBUF; + int RCOUNT; + int RTAG; + int PARTNER; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_sdrv is a simple wrapper around MPI_Sendrecv. Its main purpose is + * to allow for some experimentation and tuning of this simple function. + * Messages of length less than or equal to zero are not sent nor + * received. Successful completion is indicated by the returned error + * code HPL_SUCCESS. + * + * Arguments + * ========= + * + * SBUF (local input) double * + * On entry, SBUF specifies the starting address of buffer to be + * sent. + * + * SCOUNT (local input) int + * On entry, SCOUNT specifies the number of double precision + * entries in SBUF. SCOUNT must be at least zero. + * + * STAG (local input) int + * On entry, STAG specifies the message tag to be used for the + * sending communication operation. + * + * RBUF (local output) double * + * On entry, RBUF specifies the starting address of buffer to be + * received. + * + * RCOUNT (local input) int + * On entry, RCOUNT specifies the number of double precision + * entries in RBUF. RCOUNT must be at least zero. + * + * RTAG (local input) int + * On entry, RTAG specifies the message tag to be used for the + * receiving communication operation. + * + * PARTNER (local input) int + * On entry, PARTNER specifies the rank of the collaborative + * process in the communication space defined by COMM. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type[2]; +#endif + MPI_Request request; + MPI_Status status; + int ierr; +/* .. + * .. Executable Statements .. + */ + if( RCOUNT > 0 ) + { + if( SCOUNT > 0 ) + { +#ifdef HPL_USE_MPI_DATATYPE +/* + * Post asynchronous receive + */ + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( (void *)(RBUF), 1, type[0], PARTNER, + RTAG, COMM, &request ); +/* + * Blocking send + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, + STAG, COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[1] ); +/* + * Wait for the receive to complete + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[0] ); +#else +/* + * Post asynchronous receive + */ + ierr = MPI_Irecv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, + PARTNER, RTAG, COMM, &request ); +/* + * Blocking send + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, + PARTNER, STAG, COMM ); +/* + * Wait for the receive to complete + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); +#endif + } + else + { +/* + * Blocking receive + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[0] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( (void *)(RBUF), 1, type[0], PARTNER, RTAG, + COMM, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[0] ); +#else + ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, + PARTNER, RTAG, COMM, &status ); +#endif + } + } + else if( SCOUNT > 0 ) + { +/* + * Blocking send + */ +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[1] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG, + COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[1] ) ); +#else + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, PARTNER, + STAG, COMM ); +#endif + } + else { ierr = MPI_SUCCESS; } + + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_sdrv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_send.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_send.c new file mode 100644 index 000000000..9e9868594 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/HPL_send.c @@ -0,0 +1,139 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Do not use MPI user-defined data types no matter what. This routine + * is used for small contiguous messages. + */ +#ifdef HPL_USE_MPI_DATATYPE +#undef HPL_USE_MPI_DATATYPE +#endif + +#ifdef STDC_HEADERS +int HPL_send +( + double * SBUF, + int SCOUNT, + int DEST, + int STAG, + MPI_Comm COMM +) +#else +int HPL_send +( SBUF, SCOUNT, DEST, STAG, COMM ) + double * SBUF; + int SCOUNT; + int DEST; + int STAG; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_send is a simple wrapper around MPI_Send. Its main purpose is + * to allow for some experimentation / tuning of this simple routine. + * Successful completion is indicated by the returned error code + * MPI_SUCCESS. In the case of messages of length less than or equal to + * zero, this function returns immediately. + * + * Arguments + * ========= + * + * SBUF (local input) double * + * On entry, SBUF specifies the starting address of buffer to be + * sent. + * + * SCOUNT (local input) int + * On entry, SCOUNT specifies the number of double precision + * entries in SBUF. SCOUNT must be at least zero. + * + * DEST (local input) int + * On entry, DEST specifies the rank of the receiving process in + * the communication space defined by COMM. + * + * STAG (local input) int + * On entry, STAG specifies the message tag to be used for this + * communication operation. + * + * COMM (local input) MPI_Comm + * The MPI communicator identifying the communication space. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_USE_MPI_DATATYPE + MPI_Datatype type; +#endif + int ierr; +/* .. + * .. Executable Statements .. + */ + if( SCOUNT <= 0 ) return( HPL_SUCCESS ); + +#ifdef HPL_USE_MPI_DATATYPE + ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( (void *)(SBUF), 1, type, DEST, STAG, COMM ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else + ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, DEST, STAG, COMM ); +#endif + return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); +/* + * End of HPL_send + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/Make.inc new file mode 120000 index 000000000..ae55370b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/Make.inc @@ -0,0 +1 @@ +/home/kate/hip/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/Makefile new file mode 100644 index 000000000..529fe9aea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/Makefile @@ -0,0 +1,111 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h \ + $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_comobj = \ + HPL_1ring.o HPL_1rinM.o HPL_2ring.o \ + HPL_2rinM.o HPL_blong.o HPL_blonM.o \ + HPL_packL.o HPL_copyL.o HPL_binit.o \ + HPL_bcast.o HPL_bwait.o HPL_send.o \ + HPL_recv.o HPL_sdrv.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_comobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_comobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_1ring.o : ../HPL_1ring.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_1ring.c +HPL_1rinM.o : ../HPL_1rinM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_1rinM.c +HPL_2ring.o : ../HPL_2ring.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_2ring.c +HPL_2rinM.o : ../HPL_2rinM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_2rinM.c +HPL_blong.o : ../HPL_blong.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_blong.c +HPL_blonM.o : ../HPL_blonM.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_blonM.c +HPL_packL.o : ../HPL_packL.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_packL.c +HPL_copyL.o : ../HPL_copyL.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_copyL.c +HPL_binit.o : ../HPL_binit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_binit.c +HPL_bcast.o : ../HPL_bcast.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_bcast.c +HPL_bwait.o : ../HPL_bwait.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_bwait.c +HPL_send.o : ../HPL_send.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_send.c +HPL_recv.o : ../HPL_recv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_recv.c +HPL_sdrv.o : ../HPL_sdrv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_sdrv.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/comm/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/Makefile new file mode 100644 index 000000000..d3c61cb93 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/Makefile @@ -0,0 +1,119 @@ +# /* +# * -- High Performance Computing Linpack Benchmark (HPL) +# * Modifications Copyright (C) 2023 Intel Corporation​ +# * +# * -- Copyright notice and Licensing terms: +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions +# * are met: +# * +# * 1. Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * +# * 2. Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions, and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * +# * 3. All advertising materials mentioning features or use of this +# * software must display the following acknowledgement: +# * This product includes software developed at the University of +# * Tennessee, Knoxville, Innovative Computing Laboratory. +# * +# * 4. The name of the University, the name of the Laboratory, or the +# * names of its contributors may not be used to endorse or promote +# * products derived from this software without specific written +# * permission. +# * +# * -- Disclaimer: +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * --------------------------------------------------------------------- +# */ + +# /* +# * -- High Performance Computing Linpack Benchmark (HPL) +# * HPL - 2.3 - December 2, 2018 +# * Antoine P. Petitet +# * University of Tennessee, Knoxville +# * Innovative Computing Laboratory +# * (C) Copyright 2000-2008 All Rights Reserved +# * +# * -- Copyright notice and Licensing terms: +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions +# * are met: +# * +# * 1. Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * +# * 2. Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions, and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * +# * 3. All advertising materials mentioning features or use of this +# * software must display the following acknowledgement: +# * This product includes software developed at the University of +# * Tennessee, Knoxville, Innovative Computing Laboratory. +# * +# * 4. The name of the University, the name of the Laboratory, or the +# * names of its contributors may not be used to endorse or promote +# * products derived from this software without specific written +# * permission. +# * +# * -- Disclaimer: +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * --------------------------------------------------------------------- +# */ + + +all: libdgemm.so.1.0.1 + +OBJS = cuda_dgemm.o + +.PRECIOUS: $(OBJS) + +all : libdgemm.so.1.0.1 + +fermi_dgemm.o : fermi_dgemm.c fermi_dgemm.h + +DEFINES = -DMPI -g +#DEFINES += -DUSE_FERMI_DGEMM +#DEFINES += -DVERBOSE_PRINT +#DEFINES += -DACML +#DEFINES += -DGOTO + +%.o: %.cpp + mpicc -O0 -c -fPIC $(DEFINES) $*.cpp -o $*.o -I/opt/rocm/hipblas/include -I/opt/rocm-5.1.3/hip/include -D__HIP_PLATFORM_AMD__ + +libdgemm.so.1.0.1: $(OBJS) + + mpicc -O3 -shared -Wl,-soname,libdgemm.so.1 -o libdgemm.so.1.0.1 $(OBJS) -L/opt/rocm-5.1.3/hipblas/lib/ -lhipblas + ln -sf libdgemm.so.1.0.1 libdgemm.so.1.0 + ln -sf libdgemm.so.1.0 libdgemm.so.1 + ln -sf libdgemm.so.1 libdgemm.so + +clean: + rm -f $(OBJS) $(CUBINS) libdgemm.so.1.0.1 libdgemm.so.1.0 libdgemm.so.1 libdgemm.so diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/cuda_dgemm.cpp b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/cuda_dgemm.cpp new file mode 100644 index 000000000..c4ac764c2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/cuda_dgemm.cpp @@ -0,0 +1,277 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ + + + +#define NUMBER_OF_STREAMS 4 +#define CHUNK_SIZE 512 +#define NN 64 +#define NM 128 +#define ERRCODE(e) (-(__LINE__ * 1000 + (e))) +//#define DEVICE_DEBUG +//#ifdef MPI +//#include +//#endif + + +#define _GNU_SOURCE + +//#define CUDA_ERROR_CHECK +//#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ ) +//#define CudaCheckError() __cudaCheckError( __FILE__, __LINE__ ) + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +/* +#include +#include +#include +*/ +#include "hip/hip_runtime.h" + +#include "mkl.h" +#include "hipblas.h" + +extern "C" { + + void dpcpp_dgemm + ( const int ORDER, + const int TRANSA, const int TRANSB, + const int M, const int N, const int K, + const double ALPHA, const double *A, const int LDA, + const double *B, const int LDB, const double BETA, + double *C, const int LDC); + + void dpcpp_dtrsm( + int HPL_ORDER, + int HPL_SIDE, + int HPL_UPLO, + int HPL_TRANS, + int HPL_DIAG, + const int, + const int, + const double, + const double *, + const int, + double *, + const int); +} + + +void dpcpp_dgemm +( const int ORDER, const int TRANSA, const int TRANSB, + const int M, const int N, const int K, + const double ALPHA,const double *A, const int LDA, + const double *B, const int LDB, + const double BETA, double *C, const int LDC) +{ + + if ((M==0)||(K==0)||(N==0)){ + return; + } + + + if ( (N) < NN || (M) < NM || (K) < 128){ + + #ifdef DEVICE_DEBUG + std::cout << "dgemm-Running on CPU" << std::endl; + #endif + + cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC); + return; + } + + + #ifdef DEVICE_DEBUG + std::cout << "dgemm-Running on GPU" << std::endl; + #endif + + double *devPtrA, *devPtrB, *devPtrC; + //int status; + + hipblasHandle_t handle = NULL; + + + hipblasStatus_t status; + status = hipblasCreate(&handle); + + + hipMalloc((void **)&devPtrA, K * LDA *sizeof(double)); + hipMemcpy(devPtrA, &A[0], K * LDA *sizeof(double), hipMemcpyHostToDevice); + + hipMalloc((void **)&devPtrB, N * LDB *sizeof(double)); + hipMemcpy(devPtrB, &B[0], N * LDB *sizeof(double), hipMemcpyHostToDevice); + + hipMalloc((void **)&devPtrC, N * LDC *sizeof(double)); + hipMemcpy(devPtrC, &C[0], N * LDC *sizeof(double), hipMemcpyHostToDevice); + + + hipDeviceSynchronize(); + hipblasDgemm(handle, HIPBLAS_OP_N, HIPBLAS_OP_N, M, N, K, &ALPHA, devPtrA, LDA, devPtrB, LDB, &BETA, devPtrC, LDC); + hipDeviceSynchronize(); + hipMemcpy(&C[0], devPtrC, N * LDC *sizeof(double), hipMemcpyDeviceToHost); + hipDeviceSynchronize(); + hipFree(devPtrA); + hipFree(devPtrB); + hipFree(devPtrC); +} + +void dpcpp_dtrsm + +( const int ORDER, const int SIDE, + const int UPLO, const int TRANS, + const int DIAG, const int M, const int N, + const double ALPHA, const double* A, const int LDA, double* B, + const int LDB) +{ + + if ((M==0)||(N==0)){ + return; + } + + double *devPtrA, *devPtrB; + //int status; + + + if ( (M) < 512 || (N) < 2*(M)){ + #ifdef DEVICE_DEBUG + std::cout << "dtrsm-Running on CPU" << std::endl; + #endif + cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, M, N, ALPHA, A, LDA, B, LDB); + + + return; + } + + #ifdef DEVICE_DEBUG + std::cout << "dtrsm-Running on GPU" << std::endl; + #endif + + hipblasHandle_t handle = NULL; + + + hipblasStatus_t status; + status = hipblasCreate(&handle); + + + hipMalloc((void **)&devPtrA, M * LDA * sizeof(double)); + hipMemcpy(devPtrA, A, M * LDA * sizeof(double), hipMemcpyHostToDevice); + + + hipMalloc((void **)&devPtrB, N * LDB * sizeof(double)); + hipMemcpy(devPtrB, B, N * LDB * sizeof(double), hipMemcpyHostToDevice); + hipDeviceSynchronize(); + + hipblasDtrsm(handle,HIPBLAS_SIDE_LEFT,HIPBLAS_FILL_MODE_LOWER,HIPBLAS_OP_N,HIPBLAS_DIAG_UNIT,M,N,&ALPHA,devPtrA,LDA,devPtrB,LDB); + + hipDeviceSynchronize(); + hipMemcpy(B, devPtrB, N * LDB * sizeof(double), hipMemcpyDeviceToHost); + + hipDeviceSynchronize(); + hipFree(devPtrA); + hipFree(devPtrB); + + +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/cuda_dgemm.h b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/cuda_dgemm.h new file mode 100644 index 000000000..8b9052fba --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/cuda_dgemm.h @@ -0,0 +1,149 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ + + +#define NUMBER_OF_STREAMS 2 + +#include +#include +#include + +class DeviceManager; +static DeviceManager *instance[2]; + +class DeviceManager{ + cl::sycl::device *m_pDevice; + cl::sycl::queue queues[NUMBER_OF_STREAMS]; + + DeviceManager(){ + try{ + m_pDevice = new cl::sycl::device(cl::sycl::default_selector()); + }catch(...){ + std::cout << "ERROR: failed to create sycl device.\n"; + } + + auto exception_handler = [] (cl::sycl::exception_list exceptions) { + for (std::exception_ptr const& e : exceptions) { + try { + std::rethrow_exception(e); + } catch(cl::sycl::exception const& e) { + std::cout << "Caught asynchronous SYCL exception during GEMM:\n" + << e.what() << std::endl; + } + } + }; + + + + queues[0] = cl::sycl::queue(*m_pDevice, exception_handler); + queues[1] = cl::sycl::queue(*m_pDevice, exception_handler); + //DeviceManager::display_device_properties(*m_pDevice); + //std::cout << "Done\n"; + + } + public: + + static DeviceManager* getInstance(int mpi_id){ + if(!instance[mpi_id]){ + + std::cout << "Creating device for " << mpi_id << "\n"; + instance[mpi_id] = new DeviceManager(); + + } + return instance[mpi_id]; + } + + cl::sycl::device &getDevice(){ return *m_pDevice;} + cl::sycl::queue *getQueues(){ return queues;} + + static void display_device_properties(cl::sycl::device const &dev); +}; diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1 new file mode 120000 index 000000000..ab21c8005 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1 @@ -0,0 +1 @@ +libdgemm.so.1.0 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1.0 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1.0 new file mode 120000 index 000000000..d08629732 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1.0 @@ -0,0 +1 @@ +libdgemm.so.1.0.1 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1.0.1 b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1.0.1 new file mode 100755 index 000000000..6a9f7501f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/cuda/libdgemm.so.1.0.1 differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_all_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_all_reduce.c new file mode 100644 index 000000000..776f48504 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_all_reduce.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_all_reduce +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const HPL_T_OP OP, + MPI_Comm COMM +) +#else +int HPL_all_reduce +( BUFFER, COUNT, DTYPE, OP, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const HPL_T_OP OP; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_all_reduce performs a global reduce operation across all + * processes of a group leaving the results on all processes. + * + * Arguments + * ========= + * + * BUFFER (local input/global output) void * + * On entry, BUFFER points to the buffer to be combined. On + * exit, this array contains the combined data and is identical + * on all processes in the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * OP (global input) const HPL_T_OP + * On entry, OP is a pointer to the local combine function. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr; +/* .. + * .. Executable Statements .. + */ + hplerr = HPL_reduce( BUFFER, COUNT, DTYPE, OP, 0, COMM ); + if( hplerr != MPI_SUCCESS ) return( hplerr ); + return( HPL_broadcast( BUFFER, COUNT, DTYPE, 0, COMM ) ); +/* + * End of HPL_all_reduce + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_barrier.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_barrier.c new file mode 100644 index 000000000..9a5d9b10a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_barrier.c @@ -0,0 +1,90 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_barrier +( + MPI_Comm COMM +) +#else +int HPL_barrier +( COMM ) + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_barrier blocks the caller until all process members have call it. + * The call returns at any process only after all group members have + * entered the call. + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i=0; +/* .. + * .. Executable Statements .. + */ + return( HPL_broadcast( (void*)(&i), 1, HPL_INT, 0, COMM ) ); +/* + * End of HPL_barrier + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_broadcast.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_broadcast.c new file mode 100644 index 000000000..42d962864 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_broadcast.c @@ -0,0 +1,147 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_broadcast +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const int ROOT, + MPI_Comm COMM +) +#else +int HPL_broadcast +( BUFFER, COUNT, DTYPE, ROOT, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const int ROOT; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_broadcast broadcasts a message from the process with rank ROOT to + * all processes in the group. + * + * Arguments + * ========= + * + * BUFFER (local input/output) void * + * On entry, BUFFER points to the buffer to be broadcast. On + * exit, this array contains the broadcast data and is identical + * on all processes in the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * ROOT (global input) const int + * On entry, ROOT is the coordinate of the source process. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr=MPI_SUCCESS, ip2=1, kk, mask=1, + mpierr, mydist, partner, rank, size, + tag = MSGID_BEGIN_COLL; + MPI_Status status; +/* .. + * .. Executable Statements .. + */ + if( COUNT <= 0 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_size( COMM, &size ); if( size <= 1 ) return( mpierr ); + mpierr = MPI_Comm_rank( COMM, &rank ); + + kk = size - 1; + while( kk > 1 ) { kk >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist = MModSub( rank, ROOT, size ); + + do + { + mask ^= ip2; + if( ( mydist & mask ) == 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Recv( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM, &status ); + } + else if( partner < size ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM ); + } + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + ip2 >>= 1; + } while( ip2 ); + + return( hplerr ); +/* + * End of HPL_broadcast + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_exit.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_exit.c new file mode 100644 index 000000000..f0d00b065 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_exit.c @@ -0,0 +1,109 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_exit +( + HPL_T_grid * GRID +) +#else +int HPL_grid_exit +( GRID ) + HPL_T_grid * GRID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_exit marks the process grid object for deallocation. The + * returned error code MPI_SUCCESS indicates successful completion. + * Other error codes are (MPI) implementation dependent. + * + * Arguments + * ========= + * + * GRID (local input/output) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid to be released. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hplerr = MPI_SUCCESS, mpierr; +/* .. + * .. Executable Statements .. + */ + if( GRID->all_comm != MPI_COMM_NULL ) + { + mpierr = MPI_Comm_free( &(GRID->row_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + mpierr = MPI_Comm_free( &(GRID->col_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + mpierr = MPI_Comm_free( &(GRID->all_comm) ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + + GRID->order = HPL_COLUMN_MAJOR; + + GRID->iam = GRID->myrow = GRID->mycol = -1; + GRID->nprow = GRID->npcol = GRID->nprocs = -1; + + GRID->row_ip2 = GRID->row_hdim = GRID->row_ip2m1 = GRID->row_mask = -1; + GRID->col_ip2 = GRID->col_hdim = GRID->col_ip2m1 = GRID->col_mask = -1; + + return( hplerr ); +/* + * End of HPL_grid_exit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_info.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_info.c new file mode 100644 index 000000000..95c5a7315 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_info.c @@ -0,0 +1,116 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_info +( + const HPL_T_grid * GRID, + int * NPROW, + int * NPCOL, + int * MYROW, + int * MYCOL +) +#else +int HPL_grid_info +( GRID, NPROW, NPCOL, MYROW, MYCOL ) + const HPL_T_grid * GRID; + int * NPROW; + int * NPCOL; + int * MYROW; + int * MYCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_info returns the grid shape and the coordinates in the grid + * of the calling process. Successful completion is indicated by the + * returned error code MPI_SUCCESS. Other error codes depend on the MPI + * implementation. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * NPROW (global output) int * + * On exit, NPROW specifies the number of process rows in the + * grid. NPROW is at least one. + * + * NPCOL (global output) int * + * On exit, NPCOL specifies the number of process columns in + * the grid. NPCOL is at least one. + * + * MYROW (global output) int * + * On exit, MYROW specifies my row process coordinate in the + * grid. MYROW is greater than or equal to zero and less than + * NPROW. + * + * MYCOL (global output) int * + * On exit, MYCOL specifies my column process coordinate in the + * grid. MYCOL is greater than or equal to zero and less than + * NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + *NPROW = GRID->nprow; *NPCOL = GRID->npcol; + *MYROW = GRID->myrow; *MYCOL = GRID->mycol; + return( MPI_SUCCESS ); +/* + * End of HPL_grid_info + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_init.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_init.c new file mode 100644 index 000000000..52111ac52 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_grid_init.c @@ -0,0 +1,184 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_grid_init +( + MPI_Comm COMM, + const HPL_T_ORDER ORDER, + const int NPROW, + const int NPCOL, + HPL_T_grid * GRID +) +#else +int HPL_grid_init +( COMM, ORDER, NPROW, NPCOL, GRID ) + MPI_Comm COMM; + const HPL_T_ORDER ORDER; + const int NPROW; + const int NPCOL; + HPL_T_grid * GRID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_grid_init creates a NPROW x NPCOL process grid using column- or + * row-major ordering from an initial collection of processes identified + * by an MPI communicator. Successful completion is indicated by the + * returned error code MPI_SUCCESS. Other error codes depend on the MPI + * implementation. The coordinates of processes that are not part of the + * grid are set to values outside of [0..NPROW) x [0..NPCOL). + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * On entry, COMM is the MPI communicator identifying the + * initial collection of processes out of which the grid is + * formed. + * + * ORDER (global input) const HPL_T_ORDER + * On entry, ORDER specifies how the processes should be ordered + * in the grid as follows: + * ORDER = HPL_ROW_MAJOR row-major ordering; + * ORDER = HPL_COLUMN_MAJOR column-major ordering; + * + * NPROW (global input) const int + * On entry, NPROW specifies the number of process rows in the + * grid to be created. NPROW must be at least one. + * + * NPCOL (global input) const int + * On entry, NPCOL specifies the number of process columns in + * the grid to be created. NPCOL must be at least one. + * + * GRID (local input/output) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information to be initialized. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int hdim, hplerr=MPI_SUCCESS, ierr, ip2, k, + mask, mycol, myrow, nprocs, rank, size; +/* .. + * .. Executable Statements .. + */ + MPI_Comm_rank( COMM, &rank ); MPI_Comm_size( COMM, &size ); +/* + * Abort if illegal process grid + */ + nprocs = NPROW * NPCOL; + if( ( nprocs > size ) || ( NPROW < 1 ) || ( NPCOL < 1 ) ) + { HPL_pabort( __LINE__, "HPL_grid_init", "Illegal Grid" ); } +/* + * Row- or column-major ordering of the processes + */ + if( ORDER == HPL_ROW_MAJOR ) + { + GRID->order = HPL_ROW_MAJOR; + myrow = rank / NPCOL; mycol = rank - myrow * NPCOL; + } + else + { + GRID->order = HPL_COLUMN_MAJOR; + mycol = rank / NPROW; myrow = rank - mycol * NPROW; + } + GRID->iam = rank; GRID->myrow = myrow; GRID->mycol = mycol; + GRID->nprow = NPROW; GRID->npcol = NPCOL; GRID->nprocs = nprocs; +/* + * row_ip2 : largest power of two <= nprow; + * row_hdim : row_ip2 procs hypercube dim; + * row_ip2m1 : largest power of two <= nprow-1; + * row_mask : row_ip2m1 procs hypercube mask; + */ + hdim = 0; ip2 = 1; k = NPROW; + while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; } + GRID->row_ip2 = ip2; GRID->row_hdim = hdim; + + mask = ip2 = 1; k = NPROW - 1; + while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + GRID->row_ip2m1 = ip2; GRID->row_mask = mask; +/* + * col_ip2 : largest power of two <= npcol; + * col_hdim : col_ip2 procs hypercube dim; + * col_ip2m1 : largest power of two <= npcol-1; + * col_mask : col_ip2m1 procs hypercube mask; + */ + hdim = 0; ip2 = 1; k = NPCOL; + while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; } + GRID->col_ip2 = ip2; GRID->col_hdim = hdim; + + mask = ip2 = 1; k = NPCOL - 1; + while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + GRID->col_ip2m1 = ip2; GRID->col_mask = mask; +/* + * All communicator, leave if I am not part of this grid. Creation of the + * row- and column communicators. + */ + ierr = MPI_Comm_split( COMM, ( rank < nprocs ? 0 : MPI_UNDEFINED ), + rank, &(GRID->all_comm) ); + if( GRID->all_comm == MPI_COMM_NULL ) return( ierr ); + + ierr = MPI_Comm_split( GRID->all_comm, myrow, mycol, &(GRID->row_comm) ); + if( ierr != MPI_SUCCESS ) hplerr = ierr; + + ierr = MPI_Comm_split( GRID->all_comm, mycol, myrow, &(GRID->col_comm) ); + if( ierr != MPI_SUCCESS ) hplerr = ierr; + + return( hplerr ); +/* + * End of HPL_grid_init + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_max.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_max.c new file mode 100644 index 000000000..002aabe01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_max.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_max +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_max +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_max combines (max) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] ); + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] ); + } +/* + * End of HPL_max + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_min.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_min.c new file mode 100644 index 000000000..a99e5e58a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_min.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_min +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_min +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_min combines (min) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] ); + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] ); + } +/* + * End of HPL_min + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_pnum.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_pnum.c new file mode 100644 index 000000000..c80885b9a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_pnum.c @@ -0,0 +1,103 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pnum +( + const HPL_T_grid * GRID, + const int MYROW, + const int MYCOL +) +#else +int HPL_pnum +( GRID, MYROW, MYCOL ) + const HPL_T_grid * GRID; + const int MYROW; + const int MYCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pnum determines the rank of a process as a function of its + * coordinates in the grid. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * MYROW (local input) const int + * On entry, MYROW specifies the row coordinate of the process + * whose rank is to be determined. MYROW must be greater than or + * equal to zero and less than NPROW. + * + * MYCOL (local input) const int + * On entry, MYCOL specifies the column coordinate of the + * process whose rank is to be determined. MYCOL must be greater + * than or equal to zero and less than NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + if( GRID->order == HPL_ROW_MAJOR ) + return( MYROW * GRID->npcol + MYCOL ); + else + return( MYCOL * GRID->nprow + MYROW ); +/* + * End of HPL_pnum + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_reduce.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_reduce.c new file mode 100644 index 000000000..417c21163 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_reduce.c @@ -0,0 +1,179 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_reduce +( + void * BUFFER, + const int COUNT, + const HPL_T_TYPE DTYPE, + const HPL_T_OP OP, + const int ROOT, + MPI_Comm COMM +) +#else +int HPL_reduce +( BUFFER, COUNT, DTYPE, OP, ROOT, COMM ) + void * BUFFER; + const int COUNT; + const HPL_T_TYPE DTYPE; + const HPL_T_OP OP; + const int ROOT; + MPI_Comm COMM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_reduce performs a global reduce operation across all processes of + * a group. Note that the input buffer is used as workarray and in all + * processes but the accumulating process corrupting the original data. + * + * Arguments + * ========= + * + * BUFFER (local input/output) void * + * On entry, BUFFER points to the buffer to be reduced. On + * exit, and in process of rank ROOT this array contains the + * reduced data. This buffer is also used as workspace during + * the operation in the other processes of the group. + * + * COUNT (global input) const int + * On entry, COUNT indicates the number of entries in BUFFER. + * COUNT must be at least zero. + * + * DTYPE (global input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * OP (global input) const HPL_T_OP + * On entry, OP is a pointer to the local combine function. + * + * ROOT (global input) const int + * On entry, ROOT is the coordinate of the accumulating process. + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Status status; + void * buffer = NULL; + int hplerr=MPI_SUCCESS, d=1, i, ip2=1, mask=0, + mpierr, mydist, partner, rank, size, + tag = MSGID_BEGIN_COLL; +/* .. + * .. Executable Statements .. + */ + if( COUNT <= 0 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_size( COMM, &size ); + if( size == 1 ) return( MPI_SUCCESS ); + mpierr = MPI_Comm_rank( COMM, &rank ); + i = size - 1; while( i > 1 ) { i >>= 1; d++; } + + if( DTYPE == HPL_INT ) + buffer = (void *)( (int *) malloc( (size_t)(COUNT) * + sizeof( int ) ) ); + else + buffer = (void *)( (double *)malloc( (size_t)(COUNT) * + sizeof( double ) ) ); + + if( !( buffer ) ) + { HPL_pabort( __LINE__, "HPL_reduce", "Memory allocation failed" ); } + + if( ( mydist = MModSub( rank, ROOT, size ) ) == 0 ) + { + do + { + mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ), + MModAdd( ROOT, ip2, size ), tag, COMM, + &status ); + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + OP( COUNT, buffer, BUFFER, DTYPE ); + ip2 <<= 1; d--; + } while( d ); + } + else + { + do + { + if( ( mydist & mask ) == 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM ); + } + else if( partner < size ) + { + partner = MModAdd( ROOT, partner, size ); + mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ), + partner, tag, COMM, &status ); + OP( COUNT, buffer, BUFFER, DTYPE ); + } + if( mpierr != MPI_SUCCESS ) hplerr = mpierr; + } + mask ^= ip2; ip2 <<= 1; d--; + } while( d ); + } + if( buffer ) free( buffer ); + + return( hplerr ); +/* + * End of HPL_reduce + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_sum.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_sum.c new file mode 100644 index 000000000..34cf87210 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/HPL_sum.c @@ -0,0 +1,118 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_sum +( + const int N, + const void * IN, + void * INOUT, + const HPL_T_TYPE DTYPE +) +#else +void HPL_sum +( N, IN, INOUT, DTYPE ) + const int N; + const void * IN; + void * INOUT; + const HPL_T_TYPE DTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_sum combines (sum) two buffers. + * + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the length of the buffers to be + * combined. N must be at least zero. + * + * IN (input) const void * + * On entry, IN points to the input-only buffer to be combined. + * + * INOUT (input/output) void * + * On entry, INOUT points to the input-output buffer to be + * combined. On exit, the entries of this array contains the + * combined results. + * + * DTYPE (input) const HPL_T_TYPE + * On entry, DTYPE specifies the type of the buffers operands. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register int i; +/* .. + * .. Executable Statements .. + */ + if( DTYPE == HPL_INT ) + { + const int * a = (const int *)(IN); + int * b = (int *)(INOUT); + for( i = 0; i < N; i++ ) b[i] += a[i]; + } + else + { + const double * a = (const double *)(IN); + double * b = (double *)(INOUT); + for( i = 0; i < N; i++ ) b[i] += a[i]; + } +/* + * End of HPL_sum + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/Make.inc new file mode 120000 index 000000000..ae55370b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/Make.inc @@ -0,0 +1 @@ +/home/kate/hip/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/Makefile new file mode 100644 index 000000000..51549d817 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/Makefile @@ -0,0 +1,103 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h +# +## Object files ######################################################## +# +HPL_griobj = \ + HPL_grid_init.o HPL_pnum.o HPL_grid_info.o \ + HPL_grid_exit.o HPL_broadcast.o HPL_reduce.o \ + HPL_all_reduce.o HPL_barrier.o HPL_min.o \ + HPL_max.o HPL_sum.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_griobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_griobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_grid_init.o : ../HPL_grid_init.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_init.c +HPL_pnum.o : ../HPL_pnum.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pnum.c +HPL_grid_info.o : ../HPL_grid_info.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_info.c +HPL_grid_exit.o : ../HPL_grid_exit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_exit.c +HPL_broadcast.o : ../HPL_broadcast.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_broadcast.c +HPL_reduce.o : ../HPL_reduce.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_reduce.c +HPL_all_reduce.o : ../HPL_all_reduce.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_all_reduce.c +HPL_barrier.o : ../HPL_barrier.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_barrier.c +HPL_min.o : ../HPL_min.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_min.c +HPL_max.o : ../HPL_max.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_max.c +HPL_sum.o : ../HPL_sum.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_sum.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/grid/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_disp.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_disp.c new file mode 100644 index 000000000..757dad242 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_disp.c @@ -0,0 +1,97 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pdpanel_disp +( + HPL_T_panel * * PANEL +) +#else +int HPL_pdpanel_disp +( PANEL ) + HPL_T_panel * * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_disp deallocates the panel structure and resources and + * stores the error code returned by the panel factorization. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * * + * On entry, PANEL points to the address of the panel data + * structure to be deallocated. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int mpierr; +/* .. + * .. Executable Statements .. + */ +/* + * Deallocate the panel resources and panel structure + */ + mpierr = HPL_pdpanel_free( *PANEL ); + if( *PANEL ) free( *PANEL ); + *PANEL = NULL; + + return( mpierr ); +/* + * End of HPL_pdpanel_disp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_free.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_free.c new file mode 100644 index 000000000..38b5b0d97 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_free.c @@ -0,0 +1,104 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_pdpanel_free +( + HPL_T_panel * PANEL +) +#else +int HPL_pdpanel_free +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_free deallocates the panel resources and stores the error + * code returned by the panel factorization. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the panel data structure from + * which the resources should be deallocated. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( PANEL->pmat->info == 0 ) PANEL->pmat->info = *(PANEL->DINFO); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( PANEL->L1block, VSIP_TRUE ); + (void) vsip_blockrelease_d( PANEL->L2block, VSIP_TRUE ); + if( PANEL->grid->nprow > 1 ) + (void) vsip_blockrelease_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Destroy blocks + */ + vsip_blockdestroy_d( PANEL->L1block ); + vsip_blockdestroy_d( PANEL->L2block ); + if( PANEL->grid->nprow > 1 ) + vsip_blockdestroy_d( PANEL->Ublock ); +#endif + + if( PANEL->WORK ) free( PANEL->WORK ); + if( PANEL->IWORK ) free( PANEL->IWORK ); + + return( MPI_SUCCESS ); +/* + * End of HPL_pdpanel_free + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_init.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_init.c new file mode 100644 index 000000000..9e35c7fb4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_init.c @@ -0,0 +1,348 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ +#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ +#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ +#endif +#endif + +#ifdef STDC_HEADERS +void HPL_pdpanel_init +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int M, + const int N, + const int JB, + HPL_T_pmat * A, + const int IA, + const int JA, + const int TAG, + HPL_T_panel * PANEL +) +#else +void HPL_pdpanel_init +( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int M; + const int N; + const int JB; + HPL_T_pmat * A; + const int IA; + const int JA; + const int TAG; + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_init initializes a panel data structure. + * + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * M (local input) const int + * On entry, M specifies the global number of rows of the panel. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the global number of columns of the + * panel and trailing submatrix. N must be at least zero. + * + * JB (global input) const int + * On entry, JB specifies is the number of columns of the panel. + * JB must be at least zero. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * IA (global input) const int + * On entry, IA is the global row index identifying the panel + * and trailing submatrix. IA must be at least zero. + * + * JA (global input) const int + * On entry, JA is the global column index identifying the panel + * and trailing submatrix. JA must be at least zero. + * + * TAG (global input) const int + * On entry, TAG is the row broadcast message id. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + size_t dalign; + int icurcol, icurrow, ii, itmp1, jj, lwork, + ml2, mp, mycol, myrow, nb, npcol, nprow, + nq, nu; +/* .. + * .. Executable Statements .. + */ + PANEL->grid = GRID; /* ptr to the process grid */ + PANEL->algo = ALGO; /* ptr to the algo parameters */ + PANEL->pmat = A; /* ptr to the local array info */ + + myrow = GRID->myrow; mycol = GRID->mycol; + nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb; + + HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol, + nprow, npcol, &ii, &jj, &icurrow, &icurcol ); + mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow ); + nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol ); + /* ptr to trailing part of A */ + PANEL->A = Mptr( (double *)(A->A), ii, jj, A->ld ); +/* + * Workspace pointers are initialized to NULL. + */ + PANEL->WORK = NULL; PANEL->L2 = NULL; PANEL->L1 = NULL; + PANEL->DPIV = NULL; PANEL->DINFO = NULL; PANEL->U = NULL; + PANEL->IWORK = NULL; +/* + * Local lengths, indexes process coordinates + */ + PANEL->nb = nb; /* distribution blocking factor */ + PANEL->jb = JB; /* panel width */ + PANEL->m = M; /* global # of rows of trailing part of A */ + PANEL->n = N; /* global # of cols of trailing part of A */ + PANEL->ia = IA; /* global row index of trailing part of A */ + PANEL->ja = JA; /* global col index of trailing part of A */ + PANEL->mp = mp; /* local # of rows of trailing part of A */ + PANEL->nq = nq; /* local # of cols of trailing part of A */ + PANEL->ii = ii; /* local row index of trailing part of A */ + PANEL->jj = jj; /* local col index of trailing part of A */ + PANEL->lda = A->ld; /* local leading dim of array A */ + PANEL->prow = icurrow; /* proc row owning 1st row of trailing A */ + PANEL->pcol = icurcol; /* proc col owning 1st col of trailing A */ + PANEL->msgid = TAG; /* message id to be used for panel bcast */ +/* + * Initialize ldl2 and len to temporary dummy values and Update tag for + * next panel + */ + PANEL->ldl2 = 0; /* local leading dim of array L2 */ + PANEL->len = 0; /* length of the buffer to broadcast */ +/* + * Figure out the exact amount of workspace needed by the factorization + * and the update - Allocate that space - Finish the panel data structu- + * re initialization. + * + * L1: JB x JB in all processes + * DPIV: JB in all processes + * DINFO: 1 in all processes + * + * We make sure that those three arrays are contiguous in memory for the + * later panel broadcast. We also choose to put this amount of space + * right after L2 (when it exist) so that one can receive a contiguous + * buffer. + */ + dalign = ALGO->align * sizeof( double ); + + if( npcol == 1 ) /* P x 1 process grid */ + { /* space for L1, DPIV, DINFO */ + lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 ); + if( nprow > 1 ) /* space for U */ + { nu = nq - JB; lwork += JB * Mmax( 0, nu ); } + + if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * + sizeof( double ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_init", + "Memory allocation failed" ); + } +/* + * Initialize the pointers of the panel structure - Always re-use A in + * the only process column + */ + PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 ); + PANEL->ldl2 = A->ld; + PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->DPIV = PANEL->L1 + JB * JB; + PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; + PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1: NULL ); + } + else + { /* space for L2, L1, DPIV */ + ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 ); + PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 ); +#ifdef HPL_COPY_L + lwork = ALGO->align + PANEL->len; +#else + lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len ); +#endif + if( nprow > 1 ) /* space for U */ + { + nu = ( mycol == icurcol ? nq - JB : nq ); + lwork += JB * Mmax( 0, nu ); + } + + if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * + sizeof( double ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_init", + "Memory allocation failed" ); + } +/* + * Initialize the pointers of the panel structure - Re-use A in the cur- + * rent process column when HPL_COPY_L is not defined. + */ +#ifdef HPL_COPY_L + PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->ldl2 = Mmax( 1, ml2 ); + PANEL->L1 = PANEL->L2 + ml2 * JB; +#else + if( mycol == icurcol ) + { + PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 ); + PANEL->ldl2 = A->ld; + PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign ); + } + else + { + PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign ); + PANEL->ldl2 = Mmax( 1, ml2 ); + PANEL->L1 = PANEL->L2 + ml2 * JB; + } +#endif + PANEL->DPIV = PANEL->L1 + JB * JB; + PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; + PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1 : NULL ); + } +#ifdef HPL_CALL_VSIPL + PANEL->Ablock = A->block; +/* + * Create blocks and bind them to the data pointers + */ + PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1), + (vsip_length)(JB*JB), VSIP_MEM_NONE ); + PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2), + (vsip_length)(PANEL->ldl2*JB), + VSIP_MEM_NONE ); + if( nprow > 1 ) + { + nu = ( mycol == icurcol ? nq - JB : nq ); + PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U), + (vsip_length)(JB * Mmax( 0, nu )), + VSIP_MEM_NONE ); + } + else { PANEL->Ublock = A->block; } +#endif +/* + * If nprow is 1, we just allocate an array of JB integers for the swap. + * When nprow > 1, we allocate the space for the index arrays immediate- + * ly. The exact size of this array depends on the swapping routine that + * will be used, so we allocate the maximum: + * + * IWORK[0] is of size at most 1 + + * IPL is of size at most 1 + + * IPID is of size at most 4 * JB + + * + * For HPL_pdlaswp00: + * lindxA is of size at most 2 * JB + + * lindxAU is of size at most 2 * JB + + * llen is of size at most NPROW + + * llen_sv is of size at most NPROW. + * + * For HPL_pdlaswp01: + * ipA is of size ar most 1 + + * lindxA is of size at most 2 * JB + + * lindxAU is of size at most 2 * JB + + * iplen is of size at most NPROW + 1 + + * ipmap is of size at most NPROW + + * ipmapm1 is of size at most NPROW + + * permU is of size at most JB + + * iwork is of size at most MAX( 2*JB, NPROW+1 ). + * + * that is 3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1)) + * = 4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ). + * + * We use the fist entry of this to work array to indicate whether the + * the local index arrays have already been computed, and if yes, by + * which function: + * IWORK[0] = -1: no index arrays have been computed so far; + * IWORK[0] = 0: HPL_pdlaswp00 already computed those arrays; + * IWORK[0] = 1: HPL_pdlaswp01 already computed those arrays; + * This allows to save some redundant and useless computations. + */ + if( nprow == 1 ) { lwork = JB; } + else + { + itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork ); + lwork = 4 + (9 * JB) + (3 * nprow) + itmp1; + } + + PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); + + if( PANEL->IWORK == NULL ) + { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); } + /* Initialize the first entry of the workarray */ + *(PANEL->IWORK) = -1; +/* + * End of HPL_pdpanel_init + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_new.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_new.c new file mode 100644 index 000000000..1dbd8a18f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/HPL_pdpanel_new.c @@ -0,0 +1,152 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanel_new +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int M, + const int N, + const int JB, + HPL_T_pmat * A, + const int IA, + const int JA, + const int TAG, + HPL_T_panel * * PANEL +) +#else +void HPL_pdpanel_new +( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int M; + const int N; + const int JB; + HPL_T_pmat * A; + const int IA; + const int JA; + const int TAG; + HPL_T_panel * * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanel_new creates and initializes a panel data structure. + * + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * M (local input) const int + * On entry, M specifies the global number of rows of the panel. + * M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the global number of columns of the + * panel and trailing submatrix. N must be at least zero. + * + * JB (global input) const int + * On entry, JB specifies is the number of columns of the panel. + * JB must be at least zero. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * IA (global input) const int + * On entry, IA is the global row index identifying the panel + * and trailing submatrix. IA must be at least zero. + * + * JA (global input) const int + * On entry, JA is the global column index identifying the panel + * and trailing submatrix. JA must be at least zero. + * + * TAG (global input) const int + * On entry, TAG is the row broadcast message id. + * + * PANEL (local input/output) HPL_T_panel * * + * On entry, PANEL points to the address of the panel data + * structure to create and initialize. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * p = NULL; +/* .. + * .. Executable Statements .. + */ +/* + * Allocate the panel structure - Check for enough memory + */ + if( !( p = (HPL_T_panel *)malloc( sizeof( HPL_T_panel ) ) ) ) + { + HPL_pabort( __LINE__, "HPL_pdpanel_new", "Memory allocation failed" ); + } + + HPL_pdpanel_init( GRID, ALGO, M, N, JB, A, IA, JA, TAG, p ); + *PANEL = p; +/* + * End of HPL_pdpanel_new + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/Make.inc new file mode 120000 index 000000000..ae55370b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/Make.inc @@ -0,0 +1 @@ +/home/kate/hip/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/Makefile new file mode 100644 index 000000000..804749cc2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/Makefile @@ -0,0 +1,90 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ + $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_panobj = \ + HPL_pdpanel_new.o HPL_pdpanel_init.o HPL_pdpanel_disp.o \ + HPL_pdpanel_free.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_panobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_panobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pdpanel_new.o : ../HPL_pdpanel_new.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_new.c +HPL_pdpanel_init.o : ../HPL_pdpanel_init.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_init.c +HPL_pdpanel_disp.o : ../HPL_pdpanel_disp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_disp.c +HPL_pdpanel_free.o : ../HPL_pdpanel_free.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_free.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/panel/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp00N.c new file mode 100644 index 000000000..7ad5a1a99 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp00N.c @@ -0,0 +1,198 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP00N_DEPTH +#define HPL_LASWP00N_DEPTH 32 +#define HPL_LASWP00N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp00N +( + const int M, + const int N, + double * A, + const int LDA, + const int * IPIV +) +#else +void HPL_dlaswp00N +( M, N, A, LDA, IPIV ) + const int M; + const int N; + double * A; + const int LDA; + const int * IPIV; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp00N performs a series of local row interchanges on a matrix + * A. One row interchange is initiated for rows 0 through M-1 of A. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of the array A to be + * interchanged. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the number of columns of the array A. + * N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N) to which + * the row interchanges will be applied. On exit, the permuted + * matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * IPIV (local input) const int * + * On entry, IPIV is an array of size M that contains the + * pivoting information. For k in [0..M), IPIV[k]=IROFF + l + * implies that local rows k and l are to be interchanged. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + register double r; + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP00N_LOG2_DEPTH ); + int ip, nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP00N_LOG2_DEPTH ) + << HPL_LASWP00N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP00N_DEPTH, A += incA ) + { + for( i = 0; i < M; i++ ) + { + if( i != ( ip = IPIV[i] ) ) + { + a0 = A + i; a1 = A + ip; + + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#if ( HPL_LASWP00N_DEPTH > 1 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 2 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 4 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 8 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif +#if ( HPL_LASWP00N_DEPTH > 16 ) + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; + r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; +#endif + } + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + if( i != ( ip = IPIV[i] ) ) + { + a0 = A + i; a1 = A + ip; + for( j = 0; j < nr; j++, a0 += LDA, a1 += LDA ) + { r = *a0; *a0 = *a1; *a1 = r; } + } + } + } +/* + * End of HPL_dlaswp00N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp01N.c new file mode 100644 index 000000000..786d1eff4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp01N.c @@ -0,0 +1,209 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP01N_DEPTH +#define HPL_LASWP01N_DEPTH 32 +#define HPL_LASWP01N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp01N +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp01N +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp01N copies scattered rows of A into itself and into an + * array U. The row offsets in A of the source rows are specified by + * LINDXA. The destination of those rows are specified by LINDXAU. A + * positive value of LINDXAU indicates that the array destination is U, + * and A otherwise. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * moved within A or copied into U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * moved within A or copied into U. N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be moved within A or + * copied into U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). The rows + * of A specified by LINDXA are be copied within this array U at + * the positions indicated by positive values of LINDXAU. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be moved within A or + * or copied into U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U where the rows of A should be + * copied at. This array also contains the local row offsets in + * A where some of the rows of A should be moved to. A positive + * value of LINDXAU[i] indicates that the row LINDXA[i] of A + * should be copied into U at the position LINDXAU[i]; otherwise + * the row LINDXA[i] of A should be moved at the position + * -LINDXAU[i] within A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP01N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP01N_LOG2_DEPTH ); + int lda1, nu, nr; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01N_LOG2_DEPTH ) << + HPL_LASWP01N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP01N_DEPTH, A += incA, U += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; } + else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; } + + *a1 = *a0; a1 += lda1; a0 += LDA; +#if ( HPL_LASWP01N_DEPTH > 1 ) + *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 2 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 4 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 8 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif +#if ( HPL_LASWP01N_DEPTH > 16 ) + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; + *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; } + else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; } + for( j = 0; j < nr; j++, a1 += lda1, a0 += LDA ) { *a1 = *a0; } + } + } +/* + * End of HPL_dlaswp01N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp01T.c new file mode 100644 index 000000000..429cfb6f2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp01T.c @@ -0,0 +1,252 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP01T_DEPTH +#define HPL_LASWP01T_DEPTH 32 +#define HPL_LASWP01T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp01T +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp01T +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp01T copies scattered rows of A into itself and into an + * array U. The row offsets in A of the source rows are specified by + * LINDXA. The destination of those rows are specified by LINDXAU. A + * positive value of LINDXAU indicates that the array destination is U, + * and A otherwise. Rows of A are stored as columns in U. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * moved within A or copied into U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * moved within A or copied into U. N must be at least zero. + * + * A (local input/output) double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be moved within A or + * copied into U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,M). The rows + * of A specified by LINDXA are copied within this array U at + * the positions indicated by positive values of LINDXAU. The + * rows of A are stored as columns in U. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be moved within A or + * or copied into U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U where the rows of A should be + * copied at. This array also contains the local row offsets in + * A where some of the rows of A should be moved to. A positive + * value of LINDXAU[i] indicates that the row LINDXA[i] of A + * should be copied into U at the position LINDXAU[i]; otherwise + * the row LINDXA[i] of A should be moved at the position + * -LINDXAU[i] within A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * a0, * a1; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP01T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP01T_LOG2_DEPTH ); + int nu, nr; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01T_LOG2_DEPTH ) << + HPL_LASWP01T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP01T_DEPTH, A += incA, U += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + + if( LINDXAU[i] >= 0 ) + { + a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU); + + a1[ 0] = *a0; a0 += LDA; +#if ( HPL_LASWP01T_DEPTH > 1 ) + a1[ 1] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 2 ) + a1[ 2] = *a0; a0 += LDA; a1[ 3] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 4 ) + a1[ 4] = *a0; a0 += LDA; a1[ 5] = *a0; a0 += LDA; + a1[ 6] = *a0; a0 += LDA; a1[ 7] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 8 ) + a1[ 8] = *a0; a0 += LDA; a1[ 9] = *a0; a0 += LDA; + a1[10] = *a0; a0 += LDA; a1[11] = *a0; a0 += LDA; + a1[12] = *a0; a0 += LDA; a1[13] = *a0; a0 += LDA; + a1[14] = *a0; a0 += LDA; a1[15] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 16 ) + a1[16] = *a0; a0 += LDA; a1[17] = *a0; a0 += LDA; + a1[18] = *a0; a0 += LDA; a1[19] = *a0; a0 += LDA; + a1[20] = *a0; a0 += LDA; a1[21] = *a0; a0 += LDA; + a1[22] = *a0; a0 += LDA; a1[23] = *a0; a0 += LDA; + a1[24] = *a0; a0 += LDA; a1[25] = *a0; a0 += LDA; + a1[26] = *a0; a0 += LDA; a1[27] = *a0; a0 += LDA; + a1[28] = *a0; a0 += LDA; a1[29] = *a0; a0 += LDA; + a1[30] = *a0; a0 += LDA; a1[31] = *a0; a0 += LDA; +#endif + } + else + { + a1 = A - (size_t)(LINDXAU[i]); + + *a1 = *a0; a1 += LDA; a0 += LDA; +#if ( HPL_LASWP01T_DEPTH > 1 ) + *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 2 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 4 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 8 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif +#if ( HPL_LASWP01T_DEPTH > 16 ) + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; + *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; +#endif + } + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + + if( LINDXAU[i] >= 0 ) + { + a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) { a1[j] = *a0; } + } + else + { + a1 = A - (size_t)(LINDXAU[i]); + for( j = 0; j < nr; j++, a1 += LDA, a0 += LDA ) { *a1 = *a0; } + } + } + } +/* + * End of HPL_dlaswp01T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp02N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp02N.c new file mode 100644 index 000000000..45c2f5f1f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp02N.c @@ -0,0 +1,205 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP02N_DEPTH +#define HPL_LASWP02N_DEPTH 32 +#define HPL_LASWP02N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp02N +( + const int M, + const int N, + const double * A, + const int LDA, + double * W0, + double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp02N +( M, N, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M; + const int N; + const double * A; + const int LDA; + double * W0; + double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp02N packs scattered rows of an array A into workspace W. + * The row offsets in A are specified by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * copied into W. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of rows of A that should be + * copied into W. N must be at least zero. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,N). The rows + * of this array specified by LINDXA should be copied into W. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * W0 (local input/output) double * + * On exit, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local output) double * + * On entry, W is an array of size (LDW,M). On exit, W contains + * the rows LINDXA[i] for i in [0..M) of A stored contiguously + * in W(:,i). + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied into W. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U that should be copied into A and + * replaced by the rows of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * A0 = A, * a0; + double * w0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP02N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + for( i = 0; i < M; i++ ) + *(W0+(size_t)(i)*(size_t)(LDW)) = (double)(LINDXAU[i]); + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP02N_LOG2_DEPTH ) << + HPL_LASWP02N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP02N_DEPTH, A0 += incA, W += HPL_LASWP02N_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW); + + w0[ 0] = *a0; a0 += LDA; +#if ( HPL_LASWP02N_DEPTH > 1 ) + w0[ 1] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 2 ) + w0[ 2] = *a0; a0 += LDA; w0[ 3] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 4 ) + w0[ 4] = *a0; a0 += LDA; w0[ 5] = *a0; a0 += LDA; + w0[ 6] = *a0; a0 += LDA; w0[ 7] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 8 ) + w0[ 8] = *a0; a0 += LDA; w0[ 9] = *a0; a0 += LDA; + w0[10] = *a0; a0 += LDA; w0[11] = *a0; a0 += LDA; + w0[12] = *a0; a0 += LDA; w0[13] = *a0; a0 += LDA; + w0[14] = *a0; a0 += LDA; w0[15] = *a0; a0 += LDA; +#endif +#if ( HPL_LASWP02N_DEPTH > 16 ) + w0[16] = *a0; a0 += LDA; w0[17] = *a0; a0 += LDA; + w0[18] = *a0; a0 += LDA; w0[19] = *a0; a0 += LDA; + w0[20] = *a0; a0 += LDA; w0[21] = *a0; a0 += LDA; + w0[22] = *a0; a0 += LDA; w0[23] = *a0; a0 += LDA; + w0[24] = *a0; a0 += LDA; w0[25] = *a0; a0 += LDA; + w0[26] = *a0; a0 += LDA; w0[27] = *a0; a0 += LDA; + w0[28] = *a0; a0 += LDA; w0[29] = *a0; a0 += LDA; + w0[30] = *a0; a0 += LDA; w0[31] = *a0; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, a0 += LDA ) { w0[j] = *a0; } + } + } +/* + * End of HPL_dlaswp02N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp03N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp03N.c new file mode 100644 index 000000000..760732a8d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp03N.c @@ -0,0 +1,194 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP03N_DEPTH +#define HPL_LASWP03N_DEPTH 32 +#define HPL_LASWP03N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp03N +( + const int M, + const int N, + double * U, + const int LDU, + const double * W0, + const double * W, + const int LDW +) +#else +void HPL_dlaswp03N +( M, N, U, LDU, W0, W, LDW ) + const int M; + const int N; + double * U; + const int LDU; + const double * W0; + const double * W; + const int LDW; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp03N copies columns of W into rows of an array U. The + * destination in U of these columns contained in W is stored within W0. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of W stored + * contiguously that should be copied into U. M must be at least + * zero. + * + * N (local input) const int + * On entry, N specifies the length of columns of W stored + * contiguously that should be copied into U. N must be at least + * zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). Columns + * of W are copied as rows within this array U at the positions + * specified in W0. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M), that contains data + * to be copied into U. For i in [0..M), entries W(:,i) should + * be copied into the row or column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * u0; + const int incU = (int)( (unsigned int)(LDU) << + HPL_LASWP03N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03N_LOG2_DEPTH ) << + HPL_LASWP03N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP03N_DEPTH, U += incU, w += HPL_LASWP03N_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *u0 = w0[ 0]; u0 += LDU; +#if ( HPL_LASWP03N_DEPTH > 1 ) + *u0 = w0[ 1]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 2 ) + *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 4 ) + *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU; + *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 8 ) + *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU; + *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU; + *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU; + *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU; +#endif +#if ( HPL_LASWP03N_DEPTH > 16 ) + *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU; + *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU; + *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU; + *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU; + *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU; + *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU; + *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU; + *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; } + } + } +/* + * End of HPL_dlaswp03N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp03T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp03T.c new file mode 100644 index 000000000..fece692ce --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp03T.c @@ -0,0 +1,186 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP03T_DEPTH +#define HPL_LASWP03T_DEPTH 32 +#define HPL_LASWP03T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp03T +( + const int M, + const int N, + double * U, + const int LDU, + const double * W0, + const double * W, + const int LDW +) +#else +void HPL_dlaswp03T +( M, N, U, LDU, W0, W, LDW ) + const int M; + const int N; + double * U; + const int LDU; + const double * W0; + const double * W; + const int LDW; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp03T copies columns of W into an array U. The destination + * in U of these columns contained in W is stored within W0. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of W stored + * contiguously that should be copied into U. M must be at least + * zero. + * + * N (local input) const int + * On entry, N specifies the length of columns of W stored + * contiguously that should be copied into U. N must be at least + * zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,M). Columns + * of W are copied within the array U at the positions specified + * in W0. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M), that contains data + * to be copied into U. For i in [0..M), entries W(:,i) should + * be copied into the row or column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * u0; + const int incU = ( 1 << HPL_LASWP03T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03T_LOG2_DEPTH ) << + HPL_LASWP03T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; + j += HPL_LASWP03T_DEPTH, U += incU, w += HPL_LASWP03T_DEPTH ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU); + w0 = w + (size_t)(i) * (size_t)(LDW); + + u0[ 0] = w0[ 0]; +#if ( HPL_LASWP03T_DEPTH > 1 ) + u0[ 1] = w0[ 1]; +#endif +#if ( HPL_LASWP03T_DEPTH > 2 ) + u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3]; +#endif +#if ( HPL_LASWP03T_DEPTH > 4 ) + u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7]; +#endif +#if ( HPL_LASWP03T_DEPTH > 8 ) + u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11]; + u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15]; +#endif +#if ( HPL_LASWP03T_DEPTH > 16 ) + u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19]; + u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23]; + u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27]; + u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31]; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; } + } + } +/* + * End of HPL_dlaswp03T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp04N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp04N.c new file mode 100644 index 000000000..4f9c490a5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp04N.c @@ -0,0 +1,285 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP04N_DEPTH +#define HPL_LASWP04N_DEPTH 32 +#define HPL_LASWP04N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp04N +( + const int M0, + const int M1, + const int N, + double * U, + const int LDU, + double * A, + const int LDA, + const double * W0, + const double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp04N +( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M0; + const int M1; + const int N; + double * U; + const int LDU; + double * A; + const int LDA; + const double * W0; + const double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp04N copies M0 rows of U into A and replaces those rows of U + * with columns of W. In addition M1 - M0 columns of W are copied into + * rows of U. + * + * Arguments + * ========= + * + * M0 (local input) const int + * On entry, M0 specifies the number of rows of U that should be + * copied into A and replaced by columns of W. M0 must be at + * least zero. + * + * M1 (local input) const int + * On entry, M1 specifies the number of columns of W that should + * be copied into rows of U. M1 must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of U that should + * be copied into A. N must be at least zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows that are to be copied into A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M1). + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M0). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M0+M1), that contains + * data to be copied into U. For i in [M0..M0+M1), the entries + * W(:,i) are copied into the row W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M0 containing the + * local row indexes A into which rows of U are copied. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M0 that contains + * the local row indexes of U that should be copied into A and + * replaced by the columns of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP04N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP04N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04N_LOG2_DEPTH ) << + HPL_LASWP04N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP04N_DEPTH, A += incA, U += incU, + w += HPL_LASWP04N_DEPTH ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U + (size_t)(LINDXAU[i]); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *a0 = *u0; *u0 = w0[ 0]; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP04N_DEPTH > 1 ) + *a0 = *u0; *u0 = w0[ 1]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 2 ) + *a0 = *u0; *u0 = w0[ 2]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 3]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 4 ) + *a0 = *u0; *u0 = w0[ 4]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 5]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 6]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 7]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 8 ) + *a0 = *u0; *u0 = w0[ 8]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[ 9]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[10]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[11]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[12]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[13]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[14]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[15]; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 16 ) + *a0 = *u0; *u0 = w0[16]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[17]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[18]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[19]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[20]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[21]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[22]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[23]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[24]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[25]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[26]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[27]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[28]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[29]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[30]; a0 += LDA; u0 += LDU; + *a0 = *u0; *u0 = w0[31]; a0 += LDA; u0 += LDU; +#endif + } + + for( i = M0; i < M1; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))); + w0 = w + (size_t)(i) * (size_t)(LDW); + + *u0 = w0[ 0]; u0 += LDU; +#if ( HPL_LASWP04N_DEPTH > 1 ) + *u0 = w0[ 1]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 2 ) + *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 4 ) + *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU; + *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 8 ) + *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU; + *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU; + *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU; + *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU; +#endif +#if ( HPL_LASWP04N_DEPTH > 16 ) + *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU; + *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU; + *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU; + *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU; + *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU; + *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU; + *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU; + *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U + (size_t)(LINDXAU[i]); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) + { *a0 = *u0; *u0 = w0[j]; } + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))); + w0 = w + (size_t)(i) * (size_t)(LDW); + for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; } + } + } +/* + * End of HPL_dlaswp04N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp04T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp04T.c new file mode 100644 index 000000000..9cbb4c863 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp04T.c @@ -0,0 +1,270 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP04T_DEPTH +#define HPL_LASWP04T_DEPTH 32 +#define HPL_LASWP04T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp04T +( + const int M0, + const int M1, + const int N, + double * U, + const int LDU, + double * A, + const int LDA, + const double * W0, + const double * W, + const int LDW, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp04T +( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) + const int M0; + const int M1; + const int N; + double * U; + const int LDU; + double * A; + const int LDA; + const double * W0; + const double * W; + const int LDW; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp04T copies M0 columns of U into rows of A and replaces those + * columns of U with columns of W. In addition M1 - M0 columns of W are + * copied into U. + * + * Arguments + * ========= + * + * M0 (local input) const int + * On entry, M0 specifies the number of columns of U that should + * be copied into A and replaced by columns of W. M0 must be at + * least zero. + * + * M1 (local input) const int + * On entry, M1 specifies the number of columnns of W that will + * be copied into U. M1 must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the columns of U that + * will be copied into rows of A. N must be at least zero. + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns that are to be copied into rows of + * A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M0). + * + * W0 (local input) const double * + * On entry, W0 is an array of size (M-1)*LDW+1, that contains + * the destination offset in U where the columns of W should be + * copied. + * + * W (local input) const double * + * On entry, W is an array of size (LDW,M0+M1), that contains + * data to be copied into U. For i in [M0..M0+M1), the entries + * W(:,i) are copied into the column W0(i*LDW) of U. + * + * LDW (local input) const int + * On entry, LDW specifies the leading dimension of the array W. + * LDW must be at least MAX(1,N+1). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M0 containing the + * local row indexes A into which columns of U are copied. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M0 that contains + * the local column indexes of U that should be copied into A + * and replaced by the columns of W. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * w = W, * w0; + double * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP04T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP04T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04T_LOG2_DEPTH ) << + HPL_LASWP04T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP04T_DEPTH, A += incA, U += incU, + w += HPL_LASWP04T_DEPTH ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW; + + *a0 = u0[ 0]; u0[ 0] = w0[ 0]; a0 += LDA; +#if ( HPL_LASWP04T_DEPTH > 1 ) + *a0 = u0[ 1]; u0[ 1] = w0[ 1]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 2 ) + *a0 = u0[ 2]; u0[ 2] = w0[ 2]; a0 += LDA; + *a0 = u0[ 3]; u0[ 3] = w0[ 3]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 4 ) + *a0 = u0[ 4]; u0[ 4] = w0[ 4]; a0 += LDA; + *a0 = u0[ 5]; u0[ 5] = w0[ 5]; a0 += LDA; + *a0 = u0[ 6]; u0[ 6] = w0[ 6]; a0 += LDA; + *a0 = u0[ 7]; u0[ 7] = w0[ 7]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 8 ) + *a0 = u0[ 8]; u0[ 8] = w0[ 8]; a0 += LDA; + *a0 = u0[ 9]; u0[ 9] = w0[ 9]; a0 += LDA; + *a0 = u0[10]; u0[10] = w0[10]; a0 += LDA; + *a0 = u0[11]; u0[11] = w0[11]; a0 += LDA; + *a0 = u0[12]; u0[12] = w0[12]; a0 += LDA; + *a0 = u0[13]; u0[13] = w0[13]; a0 += LDA; + *a0 = u0[14]; u0[14] = w0[14]; a0 += LDA; + *a0 = u0[15]; u0[15] = w0[15]; a0 += LDA; +#endif +#if ( HPL_LASWP04T_DEPTH > 16 ) + *a0 = u0[16]; u0[16] = w0[16]; a0 += LDA; + *a0 = u0[17]; u0[17] = w0[17]; a0 += LDA; + *a0 = u0[18]; u0[18] = w0[18]; a0 += LDA; + *a0 = u0[19]; u0[19] = w0[19]; a0 += LDA; + *a0 = u0[20]; u0[20] = w0[20]; a0 += LDA; + *a0 = u0[21]; u0[21] = w0[21]; a0 += LDA; + *a0 = u0[22]; u0[22] = w0[22]; a0 += LDA; + *a0 = u0[23]; u0[23] = w0[23]; a0 += LDA; + *a0 = u0[24]; u0[24] = w0[24]; a0 += LDA; + *a0 = u0[25]; u0[25] = w0[25]; a0 += LDA; + *a0 = u0[26]; u0[26] = w0[26]; a0 += LDA; + *a0 = u0[27]; u0[27] = w0[27]; a0 += LDA; + *a0 = u0[28]; u0[28] = w0[28]; a0 += LDA; + *a0 = u0[29]; u0[29] = w0[29]; a0 += LDA; + *a0 = u0[30]; u0[30] = w0[30]; a0 += LDA; + *a0 = u0[31]; u0[31] = w0[31]; a0 += LDA; +#endif + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW; + + u0[ 0] = w0[ 0]; +#if ( HPL_LASWP04T_DEPTH > 1 ) + u0[ 1] = w0[ 1]; +#endif +#if ( HPL_LASWP04T_DEPTH > 2 ) + u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3]; +#endif +#if ( HPL_LASWP04T_DEPTH > 4 ) + u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7]; +#endif +#if ( HPL_LASWP04T_DEPTH > 8 ) + u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11]; + u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15]; +#endif +#if ( HPL_LASWP04T_DEPTH > 16 ) + u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19]; + u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23]; + u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27]; + u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31]; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M0; i++ ) + { + a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW; + for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; u0[j] = w0[j]; } + } + for( i = M0; i < M1; i++ ) + { + u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW; + for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; } + } + } +/* + * End of HPL_dlaswp04T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp05N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp05N.c new file mode 100644 index 000000000..3edcf91a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp05N.c @@ -0,0 +1,195 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP05N_DEPTH +#define HPL_LASWP05N_DEPTH 32 +#define HPL_LASWP05N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp05N +( + const int M, + const int N, + double * A, + const int LDA, + const double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp05N +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + const double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp05N copies rows of U of global offset LINDXAU into rows of + * A at positions indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of U that should be + * copied into A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of U that should + * be copied into A. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) const double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows that are to be copied into A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied from U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local row indexes of U that should be copied in A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * U0 = U, * u0; + double * a0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP05N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP05N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05N_LOG2_DEPTH ) << + HPL_LASWP05N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP05N_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]); + + *a0 = *u0; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP05N_DEPTH > 1 ) + *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 2 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 4 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 8 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP05N_DEPTH > 16 ) + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; + *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { *a0 = *u0; } + } + } +/* + * End of HPL_dlaswp05N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp05T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp05T.c new file mode 100644 index 000000000..0adaa102d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp05T.c @@ -0,0 +1,196 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP05T_DEPTH +#define HPL_LASWP05T_DEPTH 32 +#define HPL_LASWP05T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp05T +( + const int M, + const int N, + double * A, + const int LDA, + const double * U, + const int LDU, + const int * LINDXA, + const int * LINDXAU +) +#else +void HPL_dlaswp05T +( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) + const int M; + const int N; + double * A; + const int LDA; + const double * U; + const int LDU; + const int * LINDXA; + const int * LINDXAU; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp05T copies columns of U of global offset LINDXAU into rows + * of A at positions indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of columns of U that shouldbe copied into A. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the columns of U that will + * be copied into rows of A. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U indicated by LINDXAU. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) const double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns that are to be copied into rows of + * A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be copied from U. + * + * LINDXAU (local input) const int * + * On entry, LINDXAU is an array of dimension M that contains + * the local column indexes of U that should be copied in A. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + const double * U0 = U, * u0; + double * a0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP05T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP05T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05T_LOG2_DEPTH ) << + HPL_LASWP05T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP05T_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[ i]); + u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU); + + *a0 = u0[ 0]; a0 += LDA; +#if ( HPL_LASWP05T_DEPTH > 1 ) + *a0 = u0[ 1]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 2 ) + *a0 = u0[ 2]; a0 += LDA; *a0 = u0[ 3]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 4 ) + *a0 = u0[ 4]; a0 += LDA; *a0 = u0[ 5]; a0 += LDA; + *a0 = u0[ 6]; a0 += LDA; *a0 = u0[ 7]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 8 ) + *a0 = u0[ 8]; a0 += LDA; *a0 = u0[ 9]; a0 += LDA; + *a0 = u0[10]; a0 += LDA; *a0 = u0[11]; a0 += LDA; + *a0 = u0[12]; a0 += LDA; *a0 = u0[13]; a0 += LDA; + *a0 = u0[14]; a0 += LDA; *a0 = u0[15]; a0 += LDA; +#endif +#if ( HPL_LASWP05T_DEPTH > 16 ) + *a0 = u0[16]; a0 += LDA; *a0 = u0[17]; a0 += LDA; + *a0 = u0[18]; a0 += LDA; *a0 = u0[19]; a0 += LDA; + *a0 = u0[20]; a0 += LDA; *a0 = u0[21]; a0 += LDA; + *a0 = u0[22]; a0 += LDA; *a0 = u0[23]; a0 += LDA; + *a0 = u0[24]; a0 += LDA; *a0 = u0[25]; a0 += LDA; + *a0 = u0[26]; a0 += LDA; *a0 = u0[27]; a0 += LDA; + *a0 = u0[28]; a0 += LDA; *a0 = u0[29]; a0 += LDA; + *a0 = u0[30]; a0 += LDA; *a0 = u0[31]; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[ i]); + u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; } + } + } +/* + * End of HPL_dlaswp05T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp06N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp06N.c new file mode 100644 index 000000000..a74bae75c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp06N.c @@ -0,0 +1,206 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP06N_DEPTH +#define HPL_LASWP06N_DEPTH 32 +#define HPL_LASWP06N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp06N +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA +) +#else +void HPL_dlaswp06N +( M, N, A, LDA, U, LDU, LINDXA ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp06N swaps rows of U with rows of A at positions + * indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * swapped with rows of U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of A that should + * be swapped with rows of U. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * rows or columns of U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,N). This + * array contains the rows of U that are to be swapped with rows + * of A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,M). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be swapped with U. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * U0 = U, * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP06N_LOG2_DEPTH ), + incU = (int)( (unsigned int)(LDU) << + HPL_LASWP06N_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06N_LOG2_DEPTH ) << + HPL_LASWP06N_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP06N_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i); + + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#if ( HPL_LASWP06N_DEPTH > 1 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 2 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 4 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 8 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif +#if ( HPL_LASWP06N_DEPTH > 16 ) + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; + r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; +#endif + } + } + + if( nr ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i); + for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) + { r = *a0; *a0 = *u0; *u0 = r; } + } + } +/* + * End of HPL_dlaswp06N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp06T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp06T.c new file mode 100644 index 000000000..fb53c2a31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp06T.c @@ -0,0 +1,207 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP06T_DEPTH +#define HPL_LASWP06T_DEPTH 32 +#define HPL_LASWP06T_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp06T +( + const int M, + const int N, + double * A, + const int LDA, + double * U, + const int LDU, + const int * LINDXA +) +#else +void HPL_dlaswp06T +( M, N, A, LDA, U, LDU, LINDXA ) + const int M; + const int N; + double * A; + const int LDA; + double * U; + const int LDU; + const int * LINDXA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp06T swaps columns of U with rows of A at positions + * indicated by LINDXA. + * + * Arguments + * ========= + * + * M (local input) const int + * On entry, M specifies the number of rows of A that should be + * swapped with columns of U. M must be at least zero. + * + * N (local input) const int + * On entry, N specifies the length of the rows of A that should + * be swapped with columns of U. N must be at least zero. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * the rows of this array specified by LINDXA are replaced by + * columns of U. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * U (local input/output) double * + * On entry, U points to an array of dimension (LDU,*). This + * array contains the columns of U that are to be swapped with + * rows of A. + * + * LDU (local input) const int + * On entry, LDU specifies the leading dimension of the array U. + * LDU must be at least MAX(1,N). + * + * LINDXA (local input) const int * + * On entry, LINDXA is an array of dimension M that contains the + * local row indexes of A that should be swapped with U. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * U0 = U, * a0, * u0; + const int incA = (int)( (unsigned int)(LDA) << + HPL_LASWP06T_LOG2_DEPTH ), + incU = ( 1 << HPL_LASWP06T_LOG2_DEPTH ); + int nr, nu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06T_LOG2_DEPTH ) << + HPL_LASWP06T_LOG2_DEPTH ) ); + + for( j = 0; j < nu; j += HPL_LASWP06T_DEPTH, A += incA, U0 += incU ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U0 + (size_t)(i) * (size_t)(LDU); + + r = *a0; *a0 = u0[ 0]; u0[ 0] = r; a0 += LDA; +#if ( HPL_LASWP06T_DEPTH > 1 ) + r = *a0; *a0 = u0[ 1]; u0[ 1] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 2 ) + r = *a0; *a0 = u0[ 2]; u0[ 2] = r; a0 += LDA; + r = *a0; *a0 = u0[ 3]; u0[ 3] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 4 ) + r = *a0; *a0 = u0[ 4]; u0[ 4] = r; a0 += LDA; + r = *a0; *a0 = u0[ 5]; u0[ 5] = r; a0 += LDA; + r = *a0; *a0 = u0[ 6]; u0[ 6] = r; a0 += LDA; + r = *a0; *a0 = u0[ 7]; u0[ 7] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 8 ) + r = *a0; *a0 = u0[ 8]; u0[ 8] = r; a0 += LDA; + r = *a0; *a0 = u0[ 9]; u0[ 9] = r; a0 += LDA; + r = *a0; *a0 = u0[10]; u0[10] = r; a0 += LDA; + r = *a0; *a0 = u0[11]; u0[11] = r; a0 += LDA; + r = *a0; *a0 = u0[12]; u0[12] = r; a0 += LDA; + r = *a0; *a0 = u0[13]; u0[13] = r; a0 += LDA; + r = *a0; *a0 = u0[14]; u0[14] = r; a0 += LDA; + r = *a0; *a0 = u0[15]; u0[15] = r; a0 += LDA; +#endif +#if ( HPL_LASWP06T_DEPTH > 16 ) + r = *a0; *a0 = u0[16]; u0[16] = r; a0 += LDA; + r = *a0; *a0 = u0[17]; u0[17] = r; a0 += LDA; + r = *a0; *a0 = u0[18]; u0[18] = r; a0 += LDA; + r = *a0; *a0 = u0[19]; u0[19] = r; a0 += LDA; + r = *a0; *a0 = u0[20]; u0[20] = r; a0 += LDA; + r = *a0; *a0 = u0[21]; u0[21] = r; a0 += LDA; + r = *a0; *a0 = u0[22]; u0[22] = r; a0 += LDA; + r = *a0; *a0 = u0[23]; u0[23] = r; a0 += LDA; + r = *a0; *a0 = u0[24]; u0[24] = r; a0 += LDA; + r = *a0; *a0 = u0[25]; u0[25] = r; a0 += LDA; + r = *a0; *a0 = u0[26]; u0[26] = r; a0 += LDA; + r = *a0; *a0 = u0[27]; u0[27] = r; a0 += LDA; + r = *a0; *a0 = u0[28]; u0[28] = r; a0 += LDA; + r = *a0; *a0 = u0[29]; u0[29] = r; a0 += LDA; + r = *a0; *a0 = u0[30]; u0[30] = r; a0 += LDA; + r = *a0; *a0 = u0[31]; u0[31] = r; a0 += LDA; +#endif + } + } + + if( nr > 0 ) + { + for( i = 0; i < M; i++ ) + { + a0 = A + (size_t)(LINDXA[i]); + u0 = U0 + (size_t)(i) * (size_t)(LDU); + for( j = 0; j < nr; j++, a0 += LDA ) + { r = *a0; *a0 = u0[j]; u0[j] = r; } + } + } +/* + * End of HPL_dlaswp06T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp10N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp10N.c new file mode 100644 index 000000000..7dbf934f2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_dlaswp10N.c @@ -0,0 +1,186 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LASWP10N_DEPTH +#define HPL_LASWP10N_DEPTH 32 +#define HPL_LASWP10N_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlaswp10N +( + const int M, + const int N, + double * A, + const int LDA, + const int * IPIV +) +#else +void HPL_dlaswp10N +( M, N, A, LDA, IPIV ) + const int M; + const int N; + double * A; + const int LDA; + const int * IPIV; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlaswp10N performs a sequence of local column interchanges on a + * matrix A. One column interchange is initiated for columns 0 through + * N-1 of A. + * + * Arguments + * ========= + * + * M (local input) const int + * __arg0__ + * + * N (local input) const int + * On entry, M specifies the number of rows of the array A. M + * must be at least zero. + * + * A (local input/output) double * + * On entry, N specifies the number of columns of the array A. N + * must be at least zero. + * + * LDA (local input) const int + * On entry, A points to an array of dimension (LDA,N). This + * array contains the columns onto which the interchanges should + * be applied. On exit, A contains the permuted matrix. + * + * IPIV (local input) const int * + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least MAX(1,M). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double r; + double * a0, * a1; + const int incA = ( 1 << HPL_LASWP10N_LOG2_DEPTH ); + int jp, mr, mu; + register int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; + + mr = M - ( mu = (int)( ( (unsigned int)(M) >> HPL_LASWP10N_LOG2_DEPTH ) + << HPL_LASWP10N_LOG2_DEPTH ) ); + + for( j = 0; j < N; j++ ) + { + if( j != ( jp = IPIV[j] ) ) + { + a0 = A + j * LDA; a1 = A + jp * LDA; + + for( i = 0; i < mu; i += incA, a0 += incA, a1 += incA ) + { + r = *a0; *a0 = *a1; *a1 = r; +#if ( HPL_LASWP10N_DEPTH > 1 ) + r = a0[ 1]; a0[ 1] = a1[ 1]; a1[ 1] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 2 ) + r = a0[ 2]; a0[ 2] = a1[ 2]; a1[ 2] = r; + r = a0[ 3]; a0[ 3] = a1[ 3]; a1[ 3] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 4 ) + r = a0[ 4]; a0[ 4] = a1[ 4]; a1[ 4] = r; + r = a0[ 5]; a0[ 5] = a1[ 5]; a1[ 5] = r; + r = a0[ 6]; a0[ 6] = a1[ 6]; a1[ 6] = r; + r = a0[ 7]; a0[ 7] = a1[ 7]; a1[ 7] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 8 ) + r = a0[ 8]; a0[ 8] = a1[ 8]; a1[ 8] = r; + r = a0[ 9]; a0[ 9] = a1[ 9]; a1[ 9] = r; + r = a0[10]; a0[10] = a1[10]; a1[10] = r; + r = a0[11]; a0[11] = a1[11]; a1[11] = r; + r = a0[12]; a0[12] = a1[12]; a1[12] = r; + r = a0[13]; a0[13] = a1[13]; a1[13] = r; + r = a0[14]; a0[14] = a1[14]; a1[14] = r; + r = a0[15]; a0[15] = a1[15]; a1[15] = r; +#endif +#if ( HPL_LASWP10N_DEPTH > 16 ) + r = a0[16]; a0[16] = a1[16]; a1[16] = r; + r = a0[17]; a0[17] = a1[17]; a1[17] = r; + r = a0[18]; a0[18] = a1[18]; a1[18] = r; + r = a0[19]; a0[19] = a1[19]; a1[19] = r; + r = a0[20]; a0[20] = a1[20]; a1[20] = r; + r = a0[21]; a0[21] = a1[21]; a1[21] = r; + r = a0[22]; a0[22] = a1[22]; a1[22] = r; + r = a0[23]; a0[23] = a1[23]; a1[23] = r; + r = a0[24]; a0[24] = a1[24]; a1[24] = r; + r = a0[25]; a0[25] = a1[25]; a1[25] = r; + r = a0[26]; a0[26] = a1[26]; a1[26] = r; + r = a0[27]; a0[27] = a1[27]; a1[27] = r; + r = a0[28]; a0[28] = a1[28]; a1[28] = r; + r = a0[29]; a0[29] = a1[29]; a1[29] = r; + r = a0[30]; a0[30] = a1[30]; a1[30] = r; + r = a0[31]; a0[31] = a1[31]; a1[31] = r; +#endif + } + + for( i = 0; i < mr; i++ ) + { r = a0[i]; a0[i] = a1[i]; a1[i] = r; } + } + } +/* + * End of HPL_dlaswp10N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2l.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2l.c new file mode 100644 index 000000000..e1b5bbfac --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2l.c @@ -0,0 +1,151 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxg2l +( + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxg2l +( IG, INB, NB, SRCPROC, NPROCS ) + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2l computes the local index of a matrix entry pointed to by + * the global index IG. This local returned index is the same in all + * processes. + * + * Arguments + * ========= + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, if SRCPROC = -1, the data is not distributed but + * replicated, in which case this routine returns IG in all + * processes. Otherwise, the value of SRCPROC is ignored. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + return( IG ); +/* + * IG = INB - NB + ( l * NPROCS + MYROC ) * NB + X with 0 <= X < NB, + * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC + * with 0 <= MYROC < NPROCS. The local index to be returned depends on + * whether IG resides in the process owning the first partial block of + * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB, + * so that if NPROCS divides i+1, i.e. MYROC=0, we have i+1 = l*NPROCS. + * If we set j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is + * equal to (j+1) * NPROCS. Conversely, if NPROCS does not divide i+1, + * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that + * j=l and thus (j+1)*NPROCS > i+1. + */ + j = ( i = ( IG - INB ) / NB ) / NPROCS; +/* + * When IG resides in the process owning the first partial block of size + * INB (MYROC = 0), then the result IL can be written as: + * IL = INB - NB + l * NB + X = IG + ( l - (l * NPROCS + MYROC) ) * NB. + * Using the above notation, we have i+1 = l*NPROCS + MYROC = l*NPROCS, + * i.e l = ( i+1 ) / NPROCS = j+1, since NPROCS divides i+1, therefore + * IL = IG + ( j + 1 - ( i + 1 ) ) * NB. + * + * Otherwise when MYROC >= 1, the result IL can be written as: + * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB. + * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1, + * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e + * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB. + */ + return( NB * (j - i) + + ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ) ); +/* + * End of HPL_indxg2l + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2lp.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2lp.c new file mode 100644 index 000000000..74662f9d2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2lp.c @@ -0,0 +1,176 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_indxg2lp +( + int * IL, + int * PROC, + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +void HPL_indxg2lp +( IL, PROC, IG, INB, NB, SRCPROC, NPROCS ) + int * IL; + int * PROC; + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2lp computes the local index of a matrix entry pointed to by + * the global index IG as well as the process coordinate which posseses + * this entry. The local returned index is the same in all processes. + * + * Arguments + * ========= + * + * IL (output) int * + * On exit, IL specifies the local index corresponding to IG. IL + * is at least zero. + * + * PROC (output) int * + * On exit, PROC is the coordinate of the process owning the + * entry specified by the global index IG. PROC is at least zero + * and less than NPROCS. + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, if SRCPROC = -1, the data is not distributed but + * replicated, in which case this routine returns IG in all + * processes. Otherwise, the value of SRCPROC is ignored. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) + { +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + *IL = IG; + *PROC = SRCPROC; + } + else + { +/* + * IG = INB - NB + ( l * NPROCS + MYROC ) * NB + X with 0 <= X < NB, + * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC + * with 0 <= MYROC < NPROCS. The local index to be returned depends on + * whether IG resides in the process owning the first partial block of + * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB, + * so that if NPROCS divides i+1, i.e. MYROC=0, we have i+1 = l*NPROCS. + * If we set j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is + * equal to (j+1) * NPROCS. Conversely, if NPROCS does not divide i+1, + * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that + * j=l and thus (j+1)*NPROCS > i+1. + */ + j = ( i = ( IG - INB ) / NB ) / NPROCS; +/* + * IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC and take + * the NPROCS modulo (definition of the block-cyclic data distribution). + */ + *PROC = SRCPROC + 1 + i; + *PROC = MPosMod( *PROC, NPROCS ); +/* + * When IG resides in the process owning the first partial block of size + * INB (MYROC = 0), then the result IL can be written as: + * IL = INB - NB + l * NB + X = IG + ( l - (l * NPROCS + MYROC) ) * NB. + * Using the above notation, we have i+1 = l*NPROCS + MYROC = l*NPROCS, + * i.e l = ( i+1 ) / NPROCS = j+1, since NPROCS divides i+1, therefore + * IL = IG + ( j + 1 - ( i + 1 ) ) * NB. + * + * Otherwise when MYROC >= 1, the result IL can be written as: + * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB. + * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1, + * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e + * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB. + */ + *IL = NB * (j - i) + + ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ); + } +/* + * End of HPL_indxg2lp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2p.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2p.c new file mode 100644 index 000000000..d0e75f516 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxg2p.c @@ -0,0 +1,128 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxg2p +( + const int IG, + const int INB, + const int NB, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxg2p +( IG, INB, NB, SRCPROC, NPROCS ) + const int IG; + const int INB; + const int NB; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxg2p computes the process coordinate which posseses the entry + * of a matrix specified by a global index IG. + * + * Arguments + * ========= + * + * IG (input) const int + * On entry, IG specifies the global index of the matrix entry. + * IG must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int proc; +/* .. + * .. Executable Statements .. + */ + if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * IG belongs to the first block, or the data is not distributed, or + * there is just one process in this dimension of the grid. + */ + return( SRCPROC ); +/* + * Otherwise, IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC + * and take the NPROCS modulo (definition of the block-cyclic data dis- + * tribution). + */ + proc = SRCPROC + 1 + ( IG - INB ) / NB; + return( MPosMod( proc, NPROCS ) ); +/* + * End of HPL_indxg2p + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxl2g.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxl2g.c new file mode 100644 index 000000000..7f139425a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_indxl2g.c @@ -0,0 +1,164 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_indxl2g +( + const int IL, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_indxl2g +( IL, INB, NB, PROC, SRCPROC, NPROCS ) + const int IL; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_indxl2g computes the global index of a matrix entry pointed to + * by the local index IL of the process indicated by PROC. + * + * Arguments + * ========= + * + * IL (input) const int + * On entry, IL specifies the local index of the matrix entry. + * IL must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whose + * local array row or column is to be determined. PROC must be + * at least zero and strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) + { +/* + * The data is not distributed, or there is just one process in this di- + * mension of the grid. + */ + return( IL ); + } + else if( PROC == SRCPROC ) + { +/* + * If I am SRCPROC, my first block is of size INB + */ + if( IL < INB ) +/* + * If IL belongs to the first block, the local and global indexes are + * equal. + */ + return ( IL ); +/* + * The number of entire blocks before the one IL belongs to is + * ( IL - INB ) / NB + 1. In the other NPROCS-1 processes, there are + * thus NB*( ( IL-INB )/NB + 1 ) entries, that are globally before the + * global entry corresponding to IL. + */ + return( ( NPROCS - 1 ) * NB * ( ( IL - INB ) / NB + 1 ) + IL ); + } + else if( PROC < SRCPROC ) + { +/* + * Otherwise, the process of coordinate MOD(SRCPROC+1, NPROCS) owns the + * second block. Let IPROC = PROC-SRCPROC-1+NPROCS be the number of pro- + * cesses between this process and PROC not included when going from + * left to right on the process line with possible wrap around. These + * IPROC processes have one more NB block than the other processes, who + * own IL / NB blocks of size NB. + */ + return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1+NPROCS )+IL+INB ); + } + else + { +/* + * Same reasoning as above with IPROC = PROC - SRCPROC - 1. + */ + return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1 )+IL+INB ); + } +/* + * End of HPL_indxl2g + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_infog2l.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_infog2l.c new file mode 100644 index 000000000..2580f2ad4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_infog2l.c @@ -0,0 +1,382 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_infog2l +( + int I, + int J, + const int IMB, + const int MB, + const int INB, + const int NB, + const int RSRC, + const int CSRC, + const int MYROW, + const int MYCOL, + const int NPROW, + const int NPCOL, + int * II, + int * JJ, + int * PROW, + int * PCOL +) +#else +void HPL_infog2l +( I, J, IMB, MB, INB, NB, RSRC, CSRC, MYROW, MYCOL, NPROW, NPCOL, II, JJ, PROW, PCOL ) + int I; + int J; + const int IMB; + const int MB; + const int INB; + const int NB; + const int RSRC; + const int CSRC; + const int MYROW; + const int MYCOL; + const int NPROW; + const int NPCOL; + int * II; + int * JJ; + int * PROW; + int * PCOL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_infog2l computes the starting local index II, JJ corresponding to + * the submatrix starting globally at the entry pointed by I, J. This + * routine returns the coordinates in the grid of the process owning the + * matrix entry of global indexes I, J, namely PROW and PCOL. + * + * Arguments + * ========= + * + * I (global input) int + * On entry, I specifies the global row index of the matrix + * entry. I must be at least zero. + * + * J (global input) int + * On entry, J specifies the global column index of the matrix + * entry. J must be at least zero. + * + * IMB (global input) const int + * On entry, IMB specifies the size of the first row block of + * the global matrix. IMB must be at least one. + * + * MB (global input) const int + * On entry, MB specifies the blocking factor used to partition + * and distribute the rows of the matrix A. MB must be larger + * than one. + * + * INB (global input) const int + * On entry, INB specifies the size of the first column block of + * the global matrix. INB must be at least one. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the columns of the matrix A. NB must be larger + * than one. + * + * RSRC (global input) const int + * On entry, RSRC specifies the row coordinate of the process + * that possesses the row I. RSRC must be at least zero and + * strictly less than NPROW. + * + * CSRC (global input) const int + * On entry, CSRC specifies the column coordinate of the process + * that possesses the column J. CSRC must be at least zero and + * strictly less than NPCOL. + * + * MYROW (local input) const int + * On entry, MYROW specifies my row process coordinate in the + * grid. MYROW is greater than or equal to zero and less than + * NPROW. + * + * MYCOL (local input) const int + * On entry, MYCOL specifies my column process coordinate in the + * grid. MYCOL is greater than or equal to zero and less than + * NPCOL. + * + * NPROW (global input) const int + * On entry, NPROW specifies the number of process rows in the + * grid. NPROW is at least one. + * + * NPCOL (global input) const int + * On entry, NPCOL specifies the number of process columns in + * the grid. NPCOL is at least one. + * + * II (local output) int * + * On exit, II specifies the local starting row index of the + * submatrix. On exit, II is at least 0. + * + * JJ (local output) int * + * On exit, JJ specifies the local starting column index of the + * submatrix. On exit, JJ is at least 0. + * + * PROW (global output) int * + * On exit, PROW is the row coordinate of the process owning the + * entry specified by the global index I. PROW is at least zero + * and less than NPROW. + * + * PCOL (global output) int * + * On exit, PCOL is the column coordinate of the process owning + * the entry specified by the global index J. PCOL is at least + * zero and less than NPCOL. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc; +/* .. + * .. Executable Statements .. + */ + imb = IMB; + *PROW = RSRC; + + if( ( *PROW == -1 ) || ( NPROW == 1 ) ) + { +/* + * The data is not distributed, or there is just one process row in the + * grid. + */ + *II = I; + } + else if( I < imb ) + { +/* + * I refers to an entry in the first block of rows + */ + *II = ( MYROW == *PROW ? I : 0 ); + } + else + { + mb = MB; + rsrc = *PROW; +/* + * The discussion goes as follows: compute my distance from the source + * process so that within this process coordinate system, the source + * process is the process such that mydist = 0, or equivalently + * MYROW == rsrc. + * + * Find out the global coordinate of the block I belongs to (nblocks), + * as well as the minimum local number of blocks that every process has. + * + * when mydist < nblocks-ilocblk*NPROCS, I own ilocblk + 1 full blocks, + * when mydist > nblocks-ilocblk*NPROCS, I own ilocblk full blocks, + * when mydist = nblocks-ilocblk*NPROCS, I own ilocblk full blocks + * but not I, or I own ilocblk + 1 blocks and the entry I refers to. + */ + if( MYROW == rsrc ) + { +/* + * I refers to an entry that is not in the first block, find out which + * process has it. + */ + nblocks = ( I - imb ) / mb + 1; + *PROW += nblocks; + *PROW -= ( *PROW / NPROW ) * NPROW; +/* + * Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only + * three possible cases: + * + * 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I do not own + * I, in which case II = IMB + ( ilocblk - 1 ) * MB. Note that this + * case cannot happen when ilocblk is zero, since nblocks is at + * least one. + * + * 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in + * which case I and II can respectively be written as IMB + + * (nblocks-1)*NB + IL and IMB + (ilocblk-1) * MB + IL. That is + * II = I + (ilocblk-nblocks)*MB. Note that this case cannot happen + * when ilocblk is zero, since nblocks is at least one. + * + * 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns + * ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB. + * Note that when ilocblk is zero, II is just IMB. + */ + if( nblocks < NPROW ) + { + *II = imb; + } + else + { + ilocblk = nblocks / NPROW; + if( ilocblk * NPROW >= nblocks ) + { + *II = ( ( MYROW == *PROW ) ? + I + ( ilocblk - nblocks ) * mb : + imb + ( ilocblk - 1 ) * mb ); + } + else + { + *II = imb + ilocblk * mb; + } + } + } + else + { +/* + * I refers to an entry that is not in the first block, find out which + * process has it. + */ + nblocks = ( I -= imb ) / mb + 1; + *PROW += nblocks; + *PROW -= ( *PROW / NPROW ) * NPROW; +/* + * Compute my distance from the source process so that within this pro- + * cess coordinate system, the source process is the process such that + * mydist=0. + */ + if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW; +/* + * When mydist < nblocks - ilocblk * NPROW, I own ilocblk+1 full blocks + * of size MB since I am not the source process, i.e. II=(ilocblk+1)*MB. + * When mydist>=nblocks-ilocblk*NPROW and I do not own I, I own ilocblk + * full blocks of size MB, i.e. II = ilocblk*MB, otherwise I own ilocblk + * blocks and I, in which case I can be written as IMB + (nblocks-1)*MB + * + IL and II = ilocblk*MB + IL = I - IMB + (ilocblk - nblocks + 1)*MB. + */ + if( nblocks < NPROW ) + { + mydist -= nblocks; + *II = ( ( mydist < 0 ) ? mb : + ( ( MYROW == *PROW ) ? + I + ( 1 - nblocks ) * mb : 0 ) ); + } + else + { + ilocblk = nblocks / NPROW; + mydist -= nblocks - ilocblk * NPROW; + *II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb : + ( ( MYROW == *PROW ) ? + ( ilocblk - nblocks + 1 ) * mb + I : + ilocblk * mb ) ); + } + } + } +/* + * Idem for the columns + */ + inb = INB; + *PCOL = CSRC; + + if( ( *PCOL == -1 ) || ( NPCOL == 1 ) ) + { + *JJ = J; + } + else if( J < inb ) + { + *JJ = ( MYCOL == *PCOL ? J : 0 ); + } + else + { + nb = NB; + csrc = *PCOL; + + if( MYCOL == csrc ) + { + nblocks = ( J - inb ) / nb + 1; + *PCOL += nblocks; + *PCOL -= ( *PCOL / NPCOL ) * NPCOL; + + if( nblocks < NPCOL ) + { + *JJ = inb; + } + else + { + ilocblk = nblocks / NPCOL; + if( ilocblk * NPCOL >= nblocks ) + { + *JJ = ( ( MYCOL == *PCOL ) ? + J + ( ilocblk - nblocks ) * nb : + inb + ( ilocblk - 1 ) * nb ); + } + else + { + *JJ = inb + ilocblk * nb; + } + } + } + else + { + nblocks = ( J -= inb ) / nb + 1; + *PCOL += nblocks; + *PCOL -= ( *PCOL / NPCOL ) * NPCOL; + + if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL; + + if( nblocks < NPCOL ) + { + mydist -= nblocks; + *JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ? + J + ( 1 - nblocks )*nb : 0 ) ); + } + else + { + ilocblk = nblocks / NPCOL; + mydist -= nblocks - ilocblk * NPCOL; + *JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb : + ( ( MYCOL == *PCOL ) ? + ( ilocblk - nblocks + 1 ) * nb + J : + ilocblk * nb ) ); + } + } + } +/* + * End of HPL_infog2l + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_numroc.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_numroc.c new file mode 100644 index 000000000..39cd736d3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_numroc.c @@ -0,0 +1,120 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_numroc +( + const int N, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_numroc +( N, INB, NB, PROC, SRCPROC, NPROCS ) + const int N; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_numroc returns the local number of matrix rows/columns process + * PROC will get if we give out N rows/columns starting from global + * index 0. + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the number of rows/columns being dealt + * out. N must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of the + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whose + * local portion is determined. PROC must be at least zero and + * strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the process + * that possesses the first row or column of the matrix. SRCPROC + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process rows + * or columns over which the matrix is distributed. NPROCS must + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + return( HPL_numrocI( N, 0, INB, NB, PROC, SRCPROC, NPROCS ) ); +/* + * End of HPL_numroc + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_numrocI.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_numrocI.c new file mode 100644 index 000000000..70f3497de --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_numrocI.c @@ -0,0 +1,243 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int HPL_numrocI +( + const int N, + const int I, + const int INB, + const int NB, + const int PROC, + const int SRCPROC, + const int NPROCS +) +#else +int HPL_numrocI +( N, I, INB, NB, PROC, SRCPROC, NPROCS ) + const int N; + const int I; + const int INB; + const int NB; + const int PROC; + const int SRCPROC; + const int NPROCS; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_numrocI returns the local number of matrix rows/columns process + * PROC will get if we give out N rows/columns starting from global + * index I. + * + * Arguments + * ========= + * + * N (input) const int + * On entry, N specifies the number of rows/columns being dealt + * out. N must be at least zero. + * + * I (input) const int + * On entry, I specifies the global index of the matrix entry + * I must be at least zero. + * + * INB (input) const int + * On entry, INB specifies the size of the first block of th + * global matrix. INB must be at least one. + * + * NB (input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * PROC (input) const int + * On entry, PROC specifies the coordinate of the process whos + * local portion is determined. PROC must be at least zero an + * strictly less than NPROCS. + * + * SRCPROC (input) const int + * On entry, SRCPROC specifies the coordinate of the proces + * that possesses the first row or column of the matrix. SRCPRO + * must be at least zero and strictly less than NPROCS. + * + * NPROCS (input) const int + * On entry, NPROCS specifies the total number of process row + * or columns over which the matrix is distributed. NPROCS mus + * be at least one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int ilocblk, inb, mydist, nblocks, srcproc; +/* .. + * .. Executable Statements .. + */ + if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) +/* + * The data is not distributed, or there is just one process in this di- + * mension of the grid. + */ + return( N ); +/* + * Compute coordinate of process owning I and corresponding INB + */ + srcproc = SRCPROC; + + if( ( inb = INB - I ) <= 0 ) + { +/* + * I is not in the first block, find out which process has it and update + * the size of first block + */ + srcproc += ( nblocks = (-inb) / NB + 1 ); + srcproc -= ( srcproc / NPROCS ) * NPROCS; + inb += nblocks * NB; + } +/* + * Now everything is just like N, I=0, INB, NB, srcproc, NPROCS. The + * discussion goes as follows: compute my distance from the source pro- + * cess so that within this process coordinate system, the source pro- + * cess is the process such that mydist = 0, or PROC == srcproc. + * + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries. Then remark that + * + * when mydist < nblocks - ilocblk*NPROCS, I own ilocblk+1 full blocks, + * when mydist > nblocks - ilocblk*NPROCS, I own ilocblk full blocks, + * when mydist = nblocks - ilocblk*NPROCS, either the last block is not + * full and I own it, or the last block is full and I am the first pro- + * cess owning only ilocblk full blocks. + */ + if( PROC == srcproc ) + { +/* + * I am the source process, i.e. I own I (mydist=0). When N <= INB, the + * answer is simply N. + */ + if( N <= inb ) return( N ); +/* + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries. + */ + nblocks = ( N - inb ) / NB + 1; +/* + * Since mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only + * two possible cases: + * + * 1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS di- + * vides the global number of full blocks, then the source process + * srcproc owns one more block than the other processes; and N can + * be rewritten as N = INB + (nblocks-1) * NB + LNB with LNB >= 0 + * size of the last block. Similarly, the local value Np correspon- + * ding to N can be written as Np = INB + (ilocblk-1) * NB + LNB = + * N + ( ilocblk-1 - (nblocks-1) )*NB. Note that this case cannot + * happen when ilocblk is zero, since nblocks is at least one. + * + * 2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only + * owns full blocks, and therefore Np = INB + ilocblk * NB. Note + * that when ilocblk is zero, Np is just INB. + */ + if( nblocks < NPROCS ) return( inb ); + + ilocblk = nblocks / NPROCS; + return( ( nblocks - ilocblk * NPROCS ) ? inb + ilocblk * NB : + N + ( ilocblk - nblocks ) * NB ); + } + else + { +/* + * I am not the source process. When N <= INB, the answer is simply 0. + */ + if( N <= inb ) return( 0 ); +/* + * Find out how many full blocks are globally (nblocks) and locally + * (ilocblk) in those N entries + */ + nblocks = ( N - inb ) / NB + 1; +/* + * Compute my distance from the source process so that within this pro- + * cess coordinate system, the source process is the process such that + * mydist=0. + */ + if( ( mydist = PROC - srcproc ) < 0 ) mydist += NPROCS; +/* + * When mydist < nblocks - ilocblk*NPROCS, I own ilocblk + 1 full blocks + * of size NB since I am not the source process, + * + * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks + * of size NB since I am not the source process, + * + * when mydist = nblocks - ilocblk*NPROCS, + * either the last block is not full and I own it, in which case + * N = INB + (nblocks - 1)*NB + LNB with LNB the size of the last + * block such that NB > LNB > 0; the local value Np corresponding to + * N is given by Np = ilocblk*NB+LNB = N-INB+(ilocblk-nblocks+1)*NB; + * or the last block is full and I am the first process owning only + * ilocblk full blocks of size NB, that is N = INB+(nblocks-1)*NB and + * Np = ilocblk * NB = N - INB + (ilocblk-nblocks+1) * NB. + */ + if( nblocks < NPROCS ) + return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 : + N - inb + NB * ( 1 - nblocks ) ) ); + + ilocblk = nblocks / NPROCS; + mydist -= nblocks - ilocblk * NPROCS; + return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB : + ( ( mydist > 0 ) ? ilocblk * NB : + N - inb + NB * ( ilocblk - nblocks + 1 ) ) ); + } +/* + * End of HPL_numrocI + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pabort.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pabort.c new file mode 100644 index 000000000..268975fc1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pabort.c @@ -0,0 +1,137 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pabort +( + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_pabort( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pabort displays an error message on stderr and halts execution. + * + * + * Arguments + * ========= + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + int rank; + char cline[128]; +#ifndef STDC_HEADERS + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( stderr, "%s %s %d, %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR", "from process #", rank, "in function", + SRNAME, cline ); + else + HPL_fprintf( stderr, + "%s %s %d, %s %d %s %s:\n>>> %s <<< Abort ...\n\n", + "HPL ERROR", "from process #", rank, "on line", LINE, + "of function", SRNAME, cline ); + + MPI_Abort( MPI_COMM_WORLD, -1 ); + exit( -1 ); +/* + * End of HPL_pabort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlamch.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlamch.c new file mode 100644 index 000000000..73cf649da --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlamch.c @@ -0,0 +1,143 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_pdlamch +( + MPI_Comm COMM, + const HPL_T_MACH CMACH +) +#else +double HPL_pdlamch +( COMM, CMACH ) + MPI_Comm COMM; + const HPL_T_MACH CMACH; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlamch determines machine-specific arithmetic constants such as + * the relative machine precision (eps), the safe minimum(sfmin) such that + * 1/sfmin does not overflow, the base of the machine (base), the precision + * (prec), the number of (base) digits in the mantissa (t), whether + * rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum + * exponent before (gradual) underflow (emin), the underflow threshold + * (rmin)- base**(emin-1), the largest exponent before overflow (emax), the + * overflow threshold (rmax) - (base**emax)*(1-eps). + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection. + * + * CMACH (global input) const HPL_T_MACH + * Specifies the value to be returned by HPL_pdlamch + * = HPL_MACH_EPS, HPL_pdlamch := eps (default) + * = HPL_MACH_SFMIN, HPL_pdlamch := sfmin + * = HPL_MACH_BASE, HPL_pdlamch := base + * = HPL_MACH_PREC, HPL_pdlamch := eps*base + * = HPL_MACH_MLEN, HPL_pdlamch := t + * = HPL_MACH_RND, HPL_pdlamch := rnd + * = HPL_MACH_EMIN, HPL_pdlamch := emin + * = HPL_MACH_RMIN, HPL_pdlamch := rmin + * = HPL_MACH_EMAX, HPL_pdlamch := emax + * = HPL_MACH_RMAX, HPL_pdlamch := rmax + * + * where + * + * eps = relative machine precision, + * sfmin = safe minimum, + * base = base of the machine, + * prec = eps*base, + * t = number of digits in the mantissa, + * rnd = 1.0 if rounding occurs in addition, + * emin = minimum exponent before underflow, + * rmin = underflow threshold, + * emax = largest exponent before overflow, + * rmax = overflow threshold. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double param; +/* .. + * .. Executable Statements .. + */ + param = HPL_dlamch( CMACH ); + + switch( CMACH ) + { + case HPL_MACH_EPS : + case HPL_MACH_SFMIN : + case HPL_MACH_EMIN : + case HPL_MACH_RMIN : + (void) HPL_all_reduce( (void *)(¶m), 1, HPL_DOUBLE, + HPL_max, COMM ); + break; + case HPL_MACH_EMAX : + case HPL_MACH_RMAX : + (void) HPL_all_reduce( (void *)(¶m), 1, HPL_DOUBLE, + HPL_min, COMM ); + break; + default : + break; + } + + return( param ); +/* + * End of HPL_pdlamch + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlange.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlange.c new file mode 100644 index 000000000..40bdcc36b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlange.c @@ -0,0 +1,242 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_pdlange +( + const HPL_T_grid * GRID, + const HPL_T_NORM NORM, + const int M, + const int N, + const int NB, + const double * A, + const int LDA +) +#else +double HPL_pdlange +( GRID, NORM, M, N, NB, A, LDA ) + const HPL_T_grid * GRID; + const HPL_T_NORM NORM; + const int M; + const int N; + const int NB; + const double * A; + const int LDA; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlange returns the value of the one norm, or the infinity norm, + * or the element of largest absolute value of a distributed matrix A: + * + * + * max(abs(A(i,j))) when NORM = HPL_NORM_A, + * norm1(A), when NORM = HPL_NORM_1, + * normI(A), when NORM = HPL_NORM_I, + * + * where norm1 denotes the one norm of a matrix (maximum column sum) and + * normI denotes the infinity norm of a matrix (maximum row sum). Note + * that max(abs(A(i,j))) is not a matrix norm. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * NORM (global input) const HPL_T_NORM + * On entry, NORM specifies the value to be returned by this + * function as described above. + * + * M (global input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * A (local input) const double * + * On entry, A points to an array of dimension (LDA,LocQ(N)), + * that contains the local pieces of the distributed matrix A. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double s, v0=HPL_rzero, * work = NULL; + MPI_Comm Acomm, Ccomm, Rcomm; + int ii, jj, mp, mycol, myrow, npcol, nprow, + nq; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Rcomm = GRID->row_comm; Ccomm = GRID->col_comm; + Acomm = GRID->all_comm; + + Mnumroc( mp, M, NB, NB, myrow, 0, nprow ); + Mnumroc( nq, N, NB, NB, mycol, 0, npcol ); + + if( Mmin( M, N ) == 0 ) { return( v0 ); } + else if( NORM == HPL_NORM_A ) + { +/* + * max( abs( A ) ) + */ + if( ( nq > 0 ) && ( mp > 0 ) ) + { + for( jj = 0; jj < nq; jj++ ) + { + for( ii = 0; ii < mp; ii++ ) + { v0 = Mmax( v0, Mabs( *A ) ); A++; } + A += LDA - mp; + } + } + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0, + Acomm ); + } + else if( NORM == HPL_NORM_1 ) + { +/* + * Find norm_1( A ). + */ + if( nq > 0 ) + { + work = (double*)malloc( (size_t)(nq) * sizeof( double ) ); + if( work == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); } + + for( jj = 0; jj < nq; jj++ ) + { + s = HPL_rzero; + for( ii = 0; ii < mp; ii++ ) { s += Mabs( *A ); A++; } + work[jj] = s; A += LDA - mp; + } +/* + * Find sum of global matrix columns, store on row 0 of process grid + */ + (void) HPL_reduce( (void *)(work), nq, HPL_DOUBLE, HPL_sum, + 0, Ccomm ); +/* + * Find maximum sum of columns for 1-norm + */ + if( myrow == 0 ) + { v0 = work[HPL_idamax( nq, work, 1 )]; v0 = Mabs( v0 ); } + if( work ) free( work ); + } +/* + * Find max in row 0, store result in process (0,0) + */ + if( myrow == 0 ) + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0, + Rcomm ); + } + else if( NORM == HPL_NORM_I ) + { +/* + * Find norm_inf( A ) + */ + if( mp > 0 ) + { + work = (double*)malloc( (size_t)(mp) * sizeof( double ) ); + if( work == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); } + + for( ii = 0; ii < mp; ii++ ) { work[ii] = HPL_rzero; } + + for( jj = 0; jj < nq; jj++ ) + { + for( ii = 0; ii < mp; ii++ ) + { work[ii] += Mabs( *A ); A++; } + A += LDA - mp; + } +/* + * Find sum of global matrix rows, store on column 0 of process grid + */ + (void) HPL_reduce( (void *)(work), mp, HPL_DOUBLE, HPL_sum, + 0, Rcomm ); +/* + * Find maximum sum of rows for inf-norm + */ + if( mycol == 0 ) + { v0 = work[HPL_idamax( mp, work, 1 )]; v0 = Mabs( v0 ); } + if( work ) free( work ); + } +/* + * Find max in column 0, store result in process (0,0) + */ + if( mycol == 0 ) + (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, + 0, Ccomm ); + } +/* + * Broadcast answer to every process in the grid + */ + (void) HPL_broadcast( (void *)(&v0), 1, HPL_DOUBLE, 0, Acomm ); + + return( v0 ); +/* + * End of HPL_pdlange + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlaprnt.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlaprnt.c new file mode 100644 index 000000000..f32667cf3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pdlaprnt.c @@ -0,0 +1,191 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaprnt +( + const HPL_T_grid * GRID, + const int M, + const int N, + const int NB, + double * A, + const int LDA, + const int IAROW, + const int IACOL, + const char * CMATNM +) +#else +void HPL_pdlaprnt +( GRID, M, N, NB, A, LDA, IAROW, IACOL, CMATNM ) + const HPL_T_grid * GRID; + const int M; + const int N; + const int NB; + double * A; + const int LDA; + const int IAROW; + const int IACOL; + const char * CMATNM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaprnt prints to standard error a distributed matrix A. The + * local pieces of A are sent to the process of coordinates (0,0) in + * the grid and then printed. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * M (global input) const int + * On entry, M specifies the number of rows of the coefficient + * matrix A. M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the + * coefficient matrix A. N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix. NB must be larger than one. + * + * A (local input) double * + * On entry, A points to an array of dimension (LDA,LocQ(N)). + * This array contains the coefficient matrix to be printed. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * IAROW (global input) const int + * On entry, IAROW specifies the row process coordinate owning + * the first row of A. IAROW must be larger than or equal to + * zero and less than NPROW. + * + * IACOL (global input) const int + * On entry, IACOL specifies the column process coordinate + * owning the first column of A. IACOL must be larger than or + * equal to zero and less than NPCOL. + * + * CMATNM (global input) const char * + * On entry, CMATNM is the name of the matrix to be printed. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm Acomm; + double * buf = NULL; + int h, i, ib, icurcol=IACOL, icurrow=IAROW, + ii=0, j, jb, jj=0, mycol, myrow, npcol, + nprow, src; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Acomm = GRID->all_comm; + if( ( myrow == 0 ) && ( mycol == 0 ) ) + buf = (double*)malloc( (size_t)(NB) * sizeof( double ) ); + + for( j = 0; j < N; j += NB ) + { + jb = N-j; jb = Mmin( jb, NB ); + for( h = 0; h < jb; h++ ) + { + (void) HPL_barrier( Acomm ); + + for( i = 0; i < M; i += NB ) + { + ib = M-i; ib = Mmin( ib, NB ); + if( ( icurrow == 0 ) && ( icurcol == 0 ) ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_dlaprnt( ib, 1, Mptr( A, ii, jj+h, LDA ), i+1, + j+h+1, LDA, CMATNM ); + } + else + { + if( ( myrow == icurrow ) && ( mycol == icurcol ) ) + { + (void) HPL_send( Mptr( A, ii, jj+h, LDA ), ib, 0, + 9000+(j+h)*M+i, Acomm ); + } + else if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + src = HPL_pnum( GRID, icurrow, icurcol ); + (void) HPL_recv( buf, ib, src, 9000+(j+h)*M+i, + Acomm ); + if (buf != NULL) + HPL_dlaprnt( ib, 1, buf, i+1, j+h+1, NB, CMATNM ); + } + } + if( myrow == icurrow ) ii += ib; + icurrow = MModAdd1( icurrow, nprow ); + (void) HPL_barrier( Acomm ); + } + ii = 0; icurrow = IAROW; + } + if( mycol == icurcol ) jj += jb; + icurcol = MModAdd1( icurcol, npcol ); + (void) HPL_barrier( Acomm ); + } + if( ( myrow == 0 ) && ( mycol == 0 ) && ( buf ) ) free( buf ); +/* + * End of HPL_pdlaprnt + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pwarn.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pwarn.c new file mode 100644 index 000000000..a9f666f89 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/HPL_pwarn.c @@ -0,0 +1,139 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pwarn +( + FILE * STREAM, + int LINE, + const char * SRNAME, + const char * FORM, + ... +) +#else +void HPL_pwarn( va_alist ) +va_dcl +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pwarn displays an error message. + * + * + * Arguments + * ========= + * + * STREAM (local input) FILE * + * On entry, STREAM specifies the output stream. + * + * LINE (local input) int + * On entry, LINE specifies the line number in the file where + * the error has occured. When LINE is not a positive line + * number, it is ignored. + * + * SRNAME (local input) const char * + * On entry, SRNAME should be the name of the routine calling + * this error handler. + * + * FORM (local input) const char * + * On entry, FORM specifies the format, i.e., how the subsequent + * arguments are converted for output. + * + * (local input) ... + * On entry, ... is the list of arguments to be printed within + * the format string. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + va_list argptr; + int rank; + char cline[128]; +#ifndef STDC_HEADERS + FILE * STREAM; + int LINE; + char * FORM, * SRNAME; +#endif +/* .. + * .. Executable Statements .. + */ +#ifdef STDC_HEADERS + va_start( argptr, FORM ); +#else + va_start( argptr ); + STREAM = va_arg( argptr, FILE * ); + LINE = va_arg( argptr, int ); + SRNAME = va_arg( argptr, char * ); + FORM = va_arg( argptr, char * ); +#endif + (void) vsprintf( cline, FORM, argptr ); + va_end( argptr ); + + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +/* + * Display an error message + */ + if( LINE <= 0 ) + HPL_fprintf( STREAM, "%s %s %d, %s %s:\n>>> %s <<<\n\n", + "HPL ERROR", "from process #", rank, "in function", + SRNAME, cline ); + else + HPL_fprintf( STREAM, "%s %s %d, %s %d %s %s:\n>>> %s <<<\n\n", + "HPL ERROR", "from process #", rank, "on line", LINE, + "of function", SRNAME, cline ); +/* + * End of HPL_pwarn + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/Make.inc new file mode 120000 index 000000000..ae55370b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/Make.inc @@ -0,0 +1 @@ +/home/kate/hip/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/Makefile new file mode 100644 index 000000000..ea93cd150 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/Makefile @@ -0,0 +1,137 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_pauxil.h +# +## Object files ######################################################## +# +HPL_pauobj = \ + HPL_indxg2l.o HPL_indxg2lp.o HPL_indxg2p.o \ + HPL_indxl2g.o HPL_infog2l.o HPL_numroc.o \ + HPL_numrocI.o HPL_dlaswp00N.o HPL_dlaswp10N.o \ + HPL_dlaswp01N.o HPL_dlaswp01T.o HPL_dlaswp02N.o \ + HPL_dlaswp03N.o HPL_dlaswp03T.o HPL_dlaswp04N.o \ + HPL_dlaswp04T.o HPL_dlaswp05N.o HPL_dlaswp05T.o \ + HPL_dlaswp06N.o HPL_dlaswp06T.o HPL_pwarn.o \ + HPL_pabort.o HPL_pdlaprnt.o HPL_pdlamch.o \ + HPL_pdlange.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pauobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pauobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_indxg2l.o : ../HPL_indxg2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2l.c +HPL_indxg2lp.o : ../HPL_indxg2lp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2lp.c +HPL_indxg2p.o : ../HPL_indxg2p.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2p.c +HPL_indxl2g.o : ../HPL_indxl2g.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxl2g.c +HPL_infog2l.o : ../HPL_infog2l.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_infog2l.c +HPL_numroc.o : ../HPL_numroc.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_numroc.c +HPL_numrocI.o : ../HPL_numrocI.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_numrocI.c +HPL_dlaswp00N.o : ../HPL_dlaswp00N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp00N.c +HPL_dlaswp10N.o : ../HPL_dlaswp10N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp10N.c +HPL_dlaswp01N.o : ../HPL_dlaswp01N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01N.c +HPL_dlaswp01T.o : ../HPL_dlaswp01T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01T.c +HPL_dlaswp02N.o : ../HPL_dlaswp02N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp02N.c +HPL_dlaswp03N.o : ../HPL_dlaswp03N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03N.c +HPL_dlaswp03T.o : ../HPL_dlaswp03T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03T.c +HPL_dlaswp04N.o : ../HPL_dlaswp04N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04N.c +HPL_dlaswp04T.o : ../HPL_dlaswp04T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04T.c +HPL_dlaswp05N.o : ../HPL_dlaswp05N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05N.c +HPL_dlaswp05T.o : ../HPL_dlaswp05T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05T.c +HPL_dlaswp06N.o : ../HPL_dlaswp06N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06N.c +HPL_dlaswp06T.o : ../HPL_dlaswp06T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06T.c +HPL_pwarn.o : ../HPL_pwarn.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pwarn.c +HPL_pabort.o : ../HPL_pabort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pabort.c +HPL_pdlaprnt.o : ../HPL_pdlaprnt.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaprnt.c +HPL_pdlamch.o : ../HPL_pdlamch.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlamch.c +HPL_pdlange.o : ../HPL_pdlange.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlange.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pauxil/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocmax.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocmax.c new file mode 100644 index 000000000..644641412 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocmax.c @@ -0,0 +1,149 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dlocmax +( + HPL_T_panel * PANEL, + const int N, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocmax +( PANEL, N, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int N; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocmax finds the maximum entry in the current column and packs + * the useful information in WORK[0:3]. On exit, WORK[0] contains the + * local maximum absolute value scalar, WORK[1] is the corresponding + * local row index, WORK[2] is the corresponding global row index, and + * WORK[3] is the coordinate of the process owning this max. When N is + * less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set + * to the total number of process rows. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * N (local input) const int + * On entry, N specifies the local number of rows of the column + * of A on which we operate. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 4. On exit, + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A; + int kk, igindx, ilindx, myrow, nb, nprow; +/* .. + * .. Executable Statements .. + */ + if( N > 0 ) + { + A = Mptr( PANEL->A, II, JJ, PANEL->lda ); + myrow = PANEL->grid->myrow; + nprow = PANEL->grid->nprow; + nb = PANEL->nb; + kk = PANEL->ii + II + ( ilindx = HPL_idamax( N, A, 1 ) ); + Mindxl2g( igindx, kk, nb, nb, myrow, 0, nprow ); +/* + * WORK[0] := local maximum absolute value scalar, + * WORK[1] := corresponding local row index, + * WORK[2] := corresponding global row index, + * WORK[3] := coordinate of process owning this max. + */ + WORK[0] = A[ilindx]; WORK[1] = (double)(ilindx); + WORK[2] = (double)(igindx); WORK[3] = (double)(myrow); + } + else + { +/* + * If I do not have any row of A, then set the coordinate of the process + * (WORK[3]) owning this "ghost" row, such that it will never be used, + * even if there are only zeros in the current column of A. + */ + WORK[0] = WORK[1] = WORK[2] = HPL_rzero; + WORK[3] = (double)(PANEL->grid->nprow); + } +/* + * End of HPL_dlocmax + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocswpN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocswpN.c new file mode 100644 index 000000000..a3919500a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocswpN.c @@ -0,0 +1,436 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LOCSWP_DEPTH +#define HPL_LOCSWP_DEPTH 32 +#define HPL_LOCSWP_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlocswpN +( + HPL_T_panel * PANEL, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocswpN +( PANEL, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocswpN performs the local swapping operations within a panel. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. The N0 length max + * row is stored in WORK[4:4+N0-1]; Note that this is also the + * JJth row (or column) of L1. The remaining part of this array + * is used as workspace. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax; + double * A1, * A2, * L, * Wr0, * Wmx; + int ilindx, lda, myrow, n0, nr, nu; + register int i; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda; + + Wr0 = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0]; + nu = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) + << HPL_LOCSWP_LOG2_DEPTH ); + nr = n0 - nu; +/* + * Replicated swap and copy of the current (new) row of A into L1 + */ + L = Mptr( PANEL->L1, JJ, 0, n0 ); +/* + * If the pivot is non-zero ... + */ + if( gmax != HPL_rzero ) + { +/* + * and if I own the current row of A ... + */ + if( myrow == PANEL->prow ) + { +/* + * and if I also own the row to be swapped with the current row of A ... + */ + if( myrow == (int)(WORK[3]) ) + { +/* + * and if the current row of A is not to swapped with itself ... + */ + if( ( ilindx = (int)(WORK[1]) ) != 0 ) + { +/* + * then copy the max row into L1 and locally swap the 2 rows of A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + A2 = Mptr( A1, ilindx, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) + { + *L=*A1=Wmx[ 0]; *A2=Wr0[ 0]; L+=n0; A1+=lda; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L=*A1=Wmx[ 1]; *A2=Wr0[ 1]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L=*A1=Wmx[ 2]; *A2=Wr0[ 2]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 3]; *A2=Wr0[ 3]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L=*A1=Wmx[ 4]; *A2=Wr0[ 4]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 5]; *A2=Wr0[ 5]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 6]; *A2=Wr0[ 6]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 7]; *A2=Wr0[ 7]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L=*A1=Wmx[ 8]; *A2=Wr0[ 8]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[ 9]; *A2=Wr0[ 9]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[10]; *A2=Wr0[10]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[11]; *A2=Wr0[11]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[12]; *A2=Wr0[12]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[13]; *A2=Wr0[13]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[14]; *A2=Wr0[14]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[15]; *A2=Wr0[15]; L+=n0; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L=*A1=Wmx[16]; *A2=Wr0[16]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[17]; *A2=Wr0[17]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[18]; *A2=Wr0[18]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[19]; *A2=Wr0[19]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[20]; *A2=Wr0[20]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[21]; *A2=Wr0[21]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[22]; *A2=Wr0[22]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[23]; *A2=Wr0[23]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[24]; *A2=Wr0[24]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[25]; *A2=Wr0[25]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[26]; *A2=Wr0[26]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[27]; *A2=Wr0[27]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[28]; *A2=Wr0[28]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[29]; *A2=Wr0[29]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[30]; *A2=Wr0[30]; L+=n0; A1+=lda; A2+=lda; + *L=*A1=Wmx[31]; *A2=Wr0[31]; L+=n0; A1+=lda; A2+=lda; +#endif + } + for( i = 0; i < nr; i++, L += n0, A1 += lda, A2 += lda ) + { *L = *A1 = Wmx[i]; *A2 = Wr0[i]; } + } + else + { +/* + * otherwise the current row of A is swapped with itself, so just copy + * the current of A into L1. + */ + *Mptr( PANEL->A, II, JJ, lda ) = gmax; + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) + { + *L = Wmx[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wmx[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0; + *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0; + *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0; + *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0; + *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0; + *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0; + *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0; + *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0; + *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0; + *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0; + *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0; + *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0; +#endif + } + for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; } + } + } + else + { +/* + * otherwise, the row to be swapped with the current row of A is in Wmx, + * so copy Wmx into L1 and A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH ) + { + *L = *A1 = Wmx[ 0]; L += n0; A1 += lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = *A1 = Wmx[ 1]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = *A1 = Wmx[ 2]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 3]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = *A1 = Wmx[ 4]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 5]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 6]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 7]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = *A1 = Wmx[ 8]; L += n0; A1 += lda; + *L = *A1 = Wmx[ 9]; L += n0; A1 += lda; + *L = *A1 = Wmx[10]; L += n0; A1 += lda; + *L = *A1 = Wmx[11]; L += n0; A1 += lda; + *L = *A1 = Wmx[12]; L += n0; A1 += lda; + *L = *A1 = Wmx[13]; L += n0; A1 += lda; + *L = *A1 = Wmx[14]; L += n0; A1 += lda; + *L = *A1 = Wmx[15]; L += n0; A1 += lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = *A1 = Wmx[16]; L += n0; A1 += lda; + *L = *A1 = Wmx[17]; L += n0; A1 += lda; + *L = *A1 = Wmx[18]; L += n0; A1 += lda; + *L = *A1 = Wmx[19]; L += n0; A1 += lda; + *L = *A1 = Wmx[20]; L += n0; A1 += lda; + *L = *A1 = Wmx[21]; L += n0; A1 += lda; + *L = *A1 = Wmx[22]; L += n0; A1 += lda; + *L = *A1 = Wmx[23]; L += n0; A1 += lda; + *L = *A1 = Wmx[24]; L += n0; A1 += lda; + *L = *A1 = Wmx[25]; L += n0; A1 += lda; + *L = *A1 = Wmx[26]; L += n0; A1 += lda; + *L = *A1 = Wmx[27]; L += n0; A1 += lda; + *L = *A1 = Wmx[28]; L += n0; A1 += lda; + *L = *A1 = Wmx[29]; L += n0; A1 += lda; + *L = *A1 = Wmx[30]; L += n0; A1 += lda; + *L = *A1 = Wmx[31]; L += n0; A1 += lda; +#endif + } + + for( i = 0; i < nr; i++, L += n0, A1 += lda ) + { *L = *A1 = Wmx[i]; } + } + } + else + { +/* + * otherwise I do not own the current row of A, so copy the max row Wmx + * into L1. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH ) + { + *L = Wmx[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wmx[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0; + *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0; + *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0; + *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0; + *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0; + *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0; + *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0; + *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0; + *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0; + *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0; + *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0; + *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0; +#endif + } + for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; } +/* + * and if I own the max row, overwrite it with the current row Wr0. + */ + if( myrow == (int)(WORK[3]) ) + { + A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *A2 = Wr0[ 0]; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *A2 = Wr0[ 1]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda; + *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda; + *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda; + *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda; + *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda; + *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda; + *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda; + *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda; + *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda; + *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda; + *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda; + *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda; +#endif + } + + for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; } + } + } + } + else + { +/* + * Otherwise the max element in the current column is zero, simply copy + * the current row Wr0 into L1. The matrix is singular. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *L = Wr0[ 0]; L+=n0; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *L = Wr0[ 1]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *L = Wr0[ 2]; L+=n0; *L = Wr0[ 3]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *L = Wr0[ 4]; L+=n0; *L = Wr0[ 5]; L+=n0; + *L = Wr0[ 6]; L+=n0; *L = Wr0[ 7]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *L = Wr0[ 8]; L+=n0; *L = Wr0[ 9]; L+=n0; + *L = Wr0[10]; L+=n0; *L = Wr0[11]; L+=n0; + *L = Wr0[12]; L+=n0; *L = Wr0[13]; L+=n0; + *L = Wr0[14]; L+=n0; *L = Wr0[15]; L+=n0; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *L = Wr0[16]; L+=n0; *L = Wr0[17]; L+=n0; + *L = Wr0[18]; L+=n0; *L = Wr0[19]; L+=n0; + *L = Wr0[20]; L+=n0; *L = Wr0[21]; L+=n0; + *L = Wr0[22]; L+=n0; *L = Wr0[23]; L+=n0; + *L = Wr0[24]; L+=n0; *L = Wr0[25]; L+=n0; + *L = Wr0[26]; L+=n0; *L = Wr0[27]; L+=n0; + *L = Wr0[28]; L+=n0; *L = Wr0[29]; L+=n0; + *L = Wr0[30]; L+=n0; *L = Wr0[31]; L+=n0; +#endif + } + + for( i = 0; i < nr; i++, L += n0 ) { *L = Wr0[i]; } +/* + * set INFO. + */ + if( *(PANEL->DINFO) == 0.0 ) + *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1); + } +/* + * End of HPL_dlocswpN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocswpT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocswpT.c new file mode 100644 index 000000000..89b86e35a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_dlocswpT.c @@ -0,0 +1,406 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * Define default value for unrolling factor + */ +#ifndef HPL_LOCSWP_DEPTH +#define HPL_LOCSWP_DEPTH 32 +#define HPL_LOCSWP_LOG2_DEPTH 5 +#endif + +#ifdef STDC_HEADERS +void HPL_dlocswpT +( + HPL_T_panel * PANEL, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_dlocswpT +( PANEL, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dlocswpT performs the local swapping operations within a panel. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * WORK[0] contains the local maximum absolute value scalar, + * WORK[1] contains the corresponding local row index, WORK[2] + * contains the corresponding global row index, and WORK[3] is + * the coordinate of process owning this max. The N0 length max + * row is stored in WORK[4:4+N0-1]; Note that this is also the + * JJth row (or column) of L1. The remaining part of this array + * is used as workspace. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax; + double * A1, * A2, * L, * Wr0, * Wmx; + int ilindx, lda, myrow, n0, nr, nu; + register int i; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda; + + Wr0 = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0]; + nu = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) + << HPL_LOCSWP_LOG2_DEPTH ); + nr = n0 - nu; +/* + * Replicated swap and copy of the current (new) row of A into L1 + */ + L = Mptr( PANEL->L1, 0, JJ, n0 ); +/* + * If the pivot is non-zero ... + */ + if( gmax != HPL_rzero ) + { +/* + * and if I own the current row of A ... + */ + if( myrow == PANEL->prow ) + { +/* + * and if I also own the row to be swapped with the current row of A ... + */ + if( myrow == (int)(WORK[3]) ) + { +/* + * and if the current row of A is not to swapped with itself ... + */ + if( ( ilindx = (int)(WORK[1]) ) != 0 ) + { +/* + * then copy the max row into L1 and locally swap the 2 rows of A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + A2 = Mptr( A1, ilindx, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH, + L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=*A1=Wmx[ 0]; *A2=Wr0[ 0]; A1+=lda; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=*A1=Wmx[ 1]; *A2=Wr0[ 1]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=*A1=Wmx[ 2]; *A2=Wr0[ 2]; A1+=lda; A2+=lda; + L[ 3]=*A1=Wmx[ 3]; *A2=Wr0[ 3]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=*A1=Wmx[ 4]; *A2=Wr0[ 4]; A1+=lda; A2+=lda; + L[ 5]=*A1=Wmx[ 5]; *A2=Wr0[ 5]; A1+=lda; A2+=lda; + L[ 6]=*A1=Wmx[ 6]; *A2=Wr0[ 6]; A1+=lda; A2+=lda; + L[ 7]=*A1=Wmx[ 7]; *A2=Wr0[ 7]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=*A1=Wmx[ 8]; *A2=Wr0[ 8]; A1+=lda; A2+=lda; + L[ 9]=*A1=Wmx[ 9]; *A2=Wr0[ 9]; A1+=lda; A2+=lda; + L[10]=*A1=Wmx[10]; *A2=Wr0[10]; A1+=lda; A2+=lda; + L[11]=*A1=Wmx[11]; *A2=Wr0[11]; A1+=lda; A2+=lda; + L[12]=*A1=Wmx[12]; *A2=Wr0[12]; A1+=lda; A2+=lda; + L[13]=*A1=Wmx[13]; *A2=Wr0[13]; A1+=lda; A2+=lda; + L[14]=*A1=Wmx[14]; *A2=Wr0[14]; A1+=lda; A2+=lda; + L[15]=*A1=Wmx[15]; *A2=Wr0[15]; A1+=lda; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=*A1=Wmx[16]; *A2=Wr0[16]; A1+=lda; A2+=lda; + L[17]=*A1=Wmx[17]; *A2=Wr0[17]; A1+=lda; A2+=lda; + L[18]=*A1=Wmx[18]; *A2=Wr0[18]; A1+=lda; A2+=lda; + L[19]=*A1=Wmx[19]; *A2=Wr0[19]; A1+=lda; A2+=lda; + L[20]=*A1=Wmx[20]; *A2=Wr0[20]; A1+=lda; A2+=lda; + L[21]=*A1=Wmx[21]; *A2=Wr0[21]; A1+=lda; A2+=lda; + L[22]=*A1=Wmx[22]; *A2=Wr0[22]; A1+=lda; A2+=lda; + L[23]=*A1=Wmx[23]; *A2=Wr0[23]; A1+=lda; A2+=lda; + L[24]=*A1=Wmx[24]; *A2=Wr0[24]; A1+=lda; A2+=lda; + L[25]=*A1=Wmx[25]; *A2=Wr0[25]; A1+=lda; A2+=lda; + L[26]=*A1=Wmx[26]; *A2=Wr0[26]; A1+=lda; A2+=lda; + L[27]=*A1=Wmx[27]; *A2=Wr0[27]; A1+=lda; A2+=lda; + L[28]=*A1=Wmx[28]; *A2=Wr0[28]; A1+=lda; A2+=lda; + L[29]=*A1=Wmx[29]; *A2=Wr0[29]; A1+=lda; A2+=lda; + L[30]=*A1=Wmx[30]; *A2=Wr0[30]; A1+=lda; A2+=lda; + L[31]=*A1=Wmx[31]; *A2=Wr0[31]; A1+=lda; A2+=lda; +#endif + } + + for( i = 0; i < nr; i++, A1 += lda, A2 += lda ) + { L[i] = *A1 = Wmx[i]; *A2 = Wr0[i]; } + } + else + { +/* + * otherwise the current row of A is swapped with itself, so just copy + * the current of A into L1. + */ + *Mptr( PANEL->A, II, JJ, lda ) = gmax; + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wmx[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wmx[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; + L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wmx[ 8]; L[12]=Wmx[12]; + L[ 9]=Wmx[ 9]; L[13]=Wmx[13]; + L[10]=Wmx[10]; L[14]=Wmx[14]; + L[11]=Wmx[11]; L[15]=Wmx[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wmx[16]; L[20]=Wmx[20]; + L[17]=Wmx[17]; L[21]=Wmx[21]; + L[18]=Wmx[18]; L[22]=Wmx[22]; + L[19]=Wmx[19]; L[23]=Wmx[23]; + L[24]=Wmx[24]; L[28]=Wmx[28]; + L[25]=Wmx[25]; L[29]=Wmx[29]; + L[26]=Wmx[26]; L[30]=Wmx[30]; + L[27]=Wmx[27]; L[31]=Wmx[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; } + } + } + else + { +/* + * otherwise, the row to be swapped with the current row of A is in Wmx, + * so copy Wmx into L1 and A. + */ + A1 = Mptr( PANEL->A, II, 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=*A1=Wmx[ 0]; A1+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=*A1=Wmx[ 1]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=*A1=Wmx[ 2]; A1+=lda; L[ 3]=*A1=Wmx[ 3]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=*A1=Wmx[ 4]; A1+=lda; L[ 5]=*A1=Wmx[ 5]; A1+=lda; + L[ 6]=*A1=Wmx[ 6]; A1+=lda; L[ 7]=*A1=Wmx[ 7]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=*A1=Wmx[ 8]; A1+=lda; L[ 9]=*A1=Wmx[ 9]; A1+=lda; + L[10]=*A1=Wmx[10]; A1+=lda; L[11]=*A1=Wmx[11]; A1+=lda; + L[12]=*A1=Wmx[12]; A1+=lda; L[13]=*A1=Wmx[13]; A1+=lda; + L[14]=*A1=Wmx[14]; A1+=lda; L[15]=*A1=Wmx[15]; A1+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=*A1=Wmx[16]; A1+=lda; L[17]=*A1=Wmx[17]; A1+=lda; + L[18]=*A1=Wmx[18]; A1+=lda; L[19]=*A1=Wmx[19]; A1+=lda; + L[20]=*A1=Wmx[20]; A1+=lda; L[21]=*A1=Wmx[21]; A1+=lda; + L[22]=*A1=Wmx[22]; A1+=lda; L[23]=*A1=Wmx[23]; A1+=lda; + L[24]=*A1=Wmx[24]; A1+=lda; L[25]=*A1=Wmx[25]; A1+=lda; + L[26]=*A1=Wmx[26]; A1+=lda; L[27]=*A1=Wmx[27]; A1+=lda; + L[28]=*A1=Wmx[28]; A1+=lda; L[29]=*A1=Wmx[29]; A1+=lda; + L[30]=*A1=Wmx[30]; A1+=lda; L[31]=*A1=Wmx[31]; A1+=lda; +#endif + } + + for( i = 0; i < nr; i++, A1 += lda ) { L[i]=*A1=Wmx[i]; } + } + } + else + { +/* + * otherwise I do not own the current row of A, so copy the max row Wmx + * into L1. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wmx[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wmx[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wmx[ 8]; L[ 9]=Wmx[ 9]; L[10]=Wmx[10]; L[11]=Wmx[11]; + L[12]=Wmx[12]; L[13]=Wmx[13]; L[14]=Wmx[14]; L[15]=Wmx[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wmx[16]; L[17]=Wmx[17]; L[18]=Wmx[18]; L[19]=Wmx[19]; + L[20]=Wmx[20]; L[21]=Wmx[21]; L[22]=Wmx[22]; L[23]=Wmx[23]; + L[24]=Wmx[24]; L[25]=Wmx[25]; L[26]=Wmx[26]; L[27]=Wmx[27]; + L[28]=Wmx[28]; L[29]=Wmx[29]; L[30]=Wmx[30]; L[31]=Wmx[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; } +/* + * and if I own the max row, overwrite it with the current row Wr0. + */ + if( myrow == (int)(WORK[3]) ) + { + A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda ); + + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH ) + { + *A2 = Wr0[ 0]; A2+=lda; +#if ( HPL_LOCSWP_DEPTH > 1 ) + *A2 = Wr0[ 1]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda; + *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda; + *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda; + *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda; + *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda; + *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda; + *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda; + *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda; + *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda; + *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda; + *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda; + *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda; +#endif + } + for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; } + } + } + } + else + { +/* + * Otherwise the max element in the current column is zero, simply copy + * the current row Wr0 into L1. The matrix is singular. + */ + for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, + Wr0 += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) + { + L[ 0]=Wr0[ 0]; +#if ( HPL_LOCSWP_DEPTH > 1 ) + L[ 1]=Wr0[ 1]; +#endif +#if ( HPL_LOCSWP_DEPTH > 2 ) + L[ 2]=Wr0[ 2]; L[ 3]=Wr0[ 3]; +#endif +#if ( HPL_LOCSWP_DEPTH > 4 ) + L[ 4]=Wr0[ 4]; L[ 5]=Wr0[ 5]; L[ 6]=Wr0[ 6]; L[ 7]=Wr0[ 7]; +#endif +#if ( HPL_LOCSWP_DEPTH > 8 ) + L[ 8]=Wr0[ 8]; L[12]=Wr0[12]; L[ 9]=Wr0[ 9]; L[13]=Wr0[13]; + L[10]=Wr0[10]; L[14]=Wr0[14]; L[11]=Wr0[11]; L[15]=Wr0[15]; +#endif +#if ( HPL_LOCSWP_DEPTH > 16 ) + L[16]=Wr0[16]; L[20]=Wr0[20]; L[17]=Wr0[17]; L[21]=Wr0[21]; + L[18]=Wr0[18]; L[22]=Wr0[22]; L[19]=Wr0[19]; L[23]=Wr0[23]; + L[24]=Wr0[24]; L[28]=Wr0[28]; L[25]=Wr0[25]; L[29]=Wr0[29]; + L[26]=Wr0[26]; L[30]=Wr0[30]; L[27]=Wr0[27]; L[31]=Wr0[31]; +#endif + } + for( i = 0; i < nr; i++ ) { L[i] = Wr0[i]; } +/* + * Set INFO. + */ + if( *(PANEL->DINFO) == 0.0 ) + *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1); + } +/* + * End of HPL_dlocswpT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdfact.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdfact.c new file mode 100644 index 000000000..1d99c6e14 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdfact.c @@ -0,0 +1,141 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdfact +( + HPL_T_panel * PANEL +) +#else +void HPL_pdfact +( PANEL ) + HPL_T_panel * PANEL; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdfact recursively factorizes a 1-dimensional panel of columns. + * The RPFACT function pointer specifies the recursive algorithm to be + * used, either Crout, Left- or Right looking. NBMIN allows to vary the + * recursive stopping criterium in terms of the number of columns in the + * panel, and NDIV allows to specify the number of subpanels each panel + * should be divided into. Usuallly a value of 2 will be chosen. Finally + * PFACT is a function pointer specifying the non-recursive algorithm to + * to be used on at most NBMIN columns. One can also choose here between + * Crout, Left- or Right looking. Empirical tests seem to indicate that + * values of 4 or 8 for NBMIN give the best results. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + void * vptr = NULL; + int align, jb; +/* .. + * .. Executable Statements .. + */ + jb = PANEL->jb; PANEL->n -= jb; PANEL->ja += jb; + + if( ( PANEL->grid->mycol != PANEL->pcol ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_RPFACT ); +#endif + align = PANEL->algo->align; + vptr = (void *)malloc( ( (size_t)(align) + + (size_t)(((4+((unsigned int)(jb) << 1)) << 1) )) * + sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdfact", "Memory allocation failed" ); } +/* + * Factor the panel - Update the panel pointers + */ + PANEL->algo->rffun( PANEL, PANEL->mp, jb, 0, (double *)HPL_PTR( vptr, + ((size_t)(align) * sizeof(double) ) ) ); + if( vptr ) free( vptr ); + + PANEL->A = Mptr( PANEL->A, 0, jb, PANEL->lda ); + PANEL->nq -= jb; PANEL->jj += jb; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_RPFACT ); +#endif +/* + * End of HPL_pdfact + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdmxswp.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdmxswp.c new file mode 100644 index 000000000..b14452197 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdmxswp.c @@ -0,0 +1,311 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdmxswp +( + HPL_T_panel * PANEL, + const int M, + const int II, + const int JJ, + double * WORK +) +#else +void HPL_pdmxswp +( PANEL, M, II, JJ, WORK ) + HPL_T_panel * PANEL; + const int M; + const int II; + const int JJ; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdmxswp swaps and broadcasts the absolute value max row using + * bi-directional exchange. The buffer is partially set by HPL_dlocmax. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by + * + * log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) + * + * where lat and bdwth are the latency and bandwidth of the network for + * double precision real elements. Communication only occurs in one + * process column. Mono-directional links will cause the communication + * cost to double. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of the matrix + * column on which this function operates. + * + * II (local input) const int + * On entry, II specifies the row offset where the column to be + * operated on starts with respect to the panel. + * + * JJ (local input) const int + * On entry, JJ specifies the column offset where the column to + * be operated on starts with respect to the panel. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2 * (4+2*N0). + * It is assumed that HPL_dlocmax was called prior to this + * routine to initialize the first four entries of this array. + * On exit, the N0 length max row is stored in WORK[4:4+N0-1]; + * Note that this is also the JJth row (or column) of L1. The + * remaining part is used as a temporary array. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double gmax, tmp1; + double * A0, * Wmx, * Wwork; + HPL_T_grid * grid; + MPI_Comm comm; + unsigned int hdim, ip2, ip2_, ipow, k, mask; + int Np2, cnt_, cnt0, i, icurrow, lda, mydist, + mydis_, myrow, n0, nprow, partner, rcnt, + root, scnt, size_; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_MXSWP ); +#endif + grid = PANEL->grid; myrow = grid->myrow; nprow = grid->nprow; +/* + * ip2 : the smallest power of two less than or equal to nprow; + * hdim : dimension of the hypercube made of those ip2 processes; + * Np2 : logical flag indicating whether or not nprow is a power of 2; + */ + comm = grid->col_comm; ip2 = (unsigned int)(grid->row_ip2); + hdim = (unsigned int)(grid->row_hdim); n0 = PANEL->jb; + icurrow = PANEL->prow; Np2 = (int)( ( size_ = nprow - ip2 ) != 0 ); + mydist = MModSub( myrow, icurrow, nprow ); +/* + * Set up pointers in workspace: WORK and Wwork point to the beginning + * of the buffers of size 4 + 2*N0 to be combined. Wmx points to the row + * owning the local (before combine) and global (after combine) absolute + * value max. A0 points to the copy of the current row of the matrix. + */ + cnt0 = ( cnt_ = n0 + 4 ) + n0; A0 = ( Wmx = WORK + 4 ) + n0; + Wwork = WORK + cnt0; +/* + * Wmx[0:N0-1] := A[ilindx,0:N0-1] where ilindx is (int)(WORK[1]) (row + * with max in current column). If I am the current process row, pack in + * addition the current row of A in A0[0:N0-1]. If I do not own any row + * of A, then zero out Wmx[0:N0-1]. + */ + if( M > 0 ) + { + lda = PANEL->lda; + HPL_dcopy( n0, Mptr( PANEL->A, II+(int)(WORK[1]), 0, lda ), lda, + Wmx, 1 ); + if( myrow == icurrow ) + { HPL_dcopy( n0, Mptr( PANEL->A, II, 0, lda ), lda, A0, 1 ); } + } + else { for( i = 0; i < n0; i++ ) Wmx[i] = HPL_rzero; } +/* + * Combine the results (bi-directional exchange): the process coordina- + * tes are relative to icurrow, this allows to reduce the communication + * volume when nprow is not a power of 2. + * + * When nprow is not a power of 2: proc[i-ip2] receives local data from + * proc[i] for all i in [ip2..nprow). In addition, proc[0] (icurrow) + * sends to proc[ip2] the current row of A for later broadcast in procs + * [ip2..nprow). + */ + if( ( Np2 != 0 ) && + ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) ) + { + if( ( mydist & ip2 ) != 0 ) + { + if( mydist == (int)(ip2) ) + (void) HPL_sdrv( WORK, cnt_, MSGID_BEGIN_PFACT, A0, n0, + MSGID_BEGIN_PFACT, MModAdd( partner, + icurrow, nprow ), comm ); + else + (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else + { + if( mydist == 0 ) + (void) HPL_sdrv( A0, n0, MSGID_BEGIN_PFACT, Wwork, cnt_, + MSGID_BEGIN_PFACT, MModAdd( partner, + icurrow, nprow ), comm ); + else + (void) HPL_recv( Wwork, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + + tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] ); + if( ( tmp1 > gmax ) || + ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) ) + { HPL_dcopy( cnt_, Wwork, 1, WORK, 1 ); } + } + } + + if( mydist < (int)(ip2) ) + { +/* + * power of 2 part of the processes collection: processes [0..ip2) are + * combining (binary exchange); proc[0] has two rows to send, but one to + * receive. At every step k in [0..hdim) of the algorithm, a process + * pair exchanging 2 rows is such that myrow >> k+1 is 0. Among those + * processes the ones that are sending one more row than what they are + * receiving are such that myrow >> k is equal to 0. + */ + k = 0; ipow = 1; + + while( k < hdim ) + { + if( ( (unsigned int)(mydist) >> ( k + 1 ) ) == 0 ) + { + if( ( (unsigned int)(mydist) >> k ) == 0 ) + { scnt = cnt0; rcnt = cnt_; } + else + { scnt = cnt_; rcnt = cnt0; } + } + else { scnt = rcnt = cnt_; } + + partner = (int)( (unsigned int)(mydist) ^ ipow ); + (void) HPL_sdrv( WORK, scnt, MSGID_BEGIN_PFACT, Wwork, rcnt, + MSGID_BEGIN_PFACT, MModAdd( partner, icurrow, + nprow ), comm ); + + tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] ); + if( ( tmp1 > gmax ) || + ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) ) + { + HPL_dcopy( ( rcnt == cnt0 ? cnt0 : cnt_ ), Wwork, 1, + WORK, 1 ); + } + else if( rcnt == cnt0 ) + { HPL_dcopy( n0, Wwork+cnt_, 1, A0, 1 ); } + + ipow <<= 1; k++; + } + } + else if( size_ > 1 ) + { +/* + * proc[ip2] broadcast current row of A to procs [ip2+1..nprow). + */ + k = (unsigned int)(size_) - 1; ip2_ = mask = 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( A0, n0, MModAdd( root, partner, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else if( partner < size_ ) + { + (void) HPL_send( A0, n0, MModAdd( root, partner, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + } + ip2_ >>= 1; + } while( ip2_ > 0 ); + } +/* + * If nprow is not a power of 2, for all i in [ip2..nprow), proc[i-ip2] + * sends the pivot row to proc[i] along with the first four entries of + * the WORK array. + */ + if( ( Np2 != 0 ) && + ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) ) + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_recv( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + else + { + (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow, + nprow ), MSGID_BEGIN_PFACT, comm ); + } + } +/* + * Save the global pivot index in pivot array + */ + (PANEL->DPIV)[JJ] = WORK[2]; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_MXSWP ); +#endif +/* + * End of HPL_pdmxswp + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpancrN.c new file mode 100644 index 000000000..4ea170b73 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpancrN.c @@ -0,0 +1,270 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpancrN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpancrN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpancrN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Crout variant of the usual + * one-dimensional algorithm. The lower triangular N0-by-N0 upper block + * of the panel is stored in no-transpose form (i.e. just like the input + * matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); +/* + * Compute row (column) jj of L1 + */ + if( kk > 0 ) + { + L1ptr = Mptr( L1, jj, jj+1, n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk, Nm1 ); + Xv1 = vsip_msubview_d( Xv0, jj, ICOFF, 1, kk ); + Yv1 = vsip_msubview_d( Xv0, jj, jj+1, 1, Nm1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Av1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplTrans, kk, Nm1, -HPL_rone, + Mptr( L1, ICOFF, jj+1, n0 ), n0, Mptr( L1, jj, + ICOFF, n0 ), n0, HPL_rone, L1ptr, n0 ); +#endif + if( curr != 0 ) + HPL_dcopy( Nm1, L1ptr, n0, Mptr( A, ii, jj+1, lda ), lda ); + } +/* + * Scale current column by its absolute value max entry - Update dia- + * diagonal and subdiagonal elements in column A(iip1:iip1+Mm1-1, jj+1) + * and find local absolute value max in that column (Only one pass + * through cache for each current column). This sequence of operations + * could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk+1, 1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + vsip_mdestroy_d( Yv1 ); + vsip_mdestroy_d( Xv1 ); + vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, ICOFF, + jj+1, n0 ), 1, HPL_rone, Mptr( A, iip1, jj+1, lda ), + 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; kk++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); + +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpancrN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpancrT.c new file mode 100644 index 000000000..50ed300aa --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpancrT.c @@ -0,0 +1,267 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpancrT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpancrT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpancrT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Crout variant of the usual + * one-dimensional algorithm. The lower triangular N0-by-N0 upper block + * of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); +/* + * Compute row (column) jj of L1 + */ + if( kk > 0 ) + { + L1ptr = Mptr( L1, jj+1, jj, n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Xv0, jj+1, ICOFF, Nm1, kk ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj, kk, 1 ); + Yv1 = vsip_msubview_d( Xv0, jj+1, jj, Nm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Nm1, kk, -HPL_rone, + Mptr( L1, jj+1, ICOFF, n0 ), n0, Mptr( L1, ICOFF, + jj, n0 ), 1, HPL_rone, L1ptr, 1 ); +#endif + if( curr != 0 ) + HPL_dcopy( Nm1, L1ptr, 1, Mptr( A, ii, jj+1, lda ), lda ); + } +/* + * Scale current column by its absolute value max entry - Update dia- + * diagonal and subdiagonal elements in column A(iip1:iip1+Mm1-1, jj+1) + * and find local absolute value max in that column (Only one pass + * through cache for each current column). This sequence of operations + * could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 ); + Xv1 = vsip_msubview_d( Xv0, jj+1, ICOFF, 1, kk+1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, jj+1, ICOFF, + n0 ), n0, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; kk++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpancrT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanllN.c new file mode 100644 index 000000000..fa471198d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanllN.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanllN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanllN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanllN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Left-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in no-transpose form (i.e. just like the + * input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column and initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + + L1ptr = Mptr( L1, ICOFF, jj+1, n0 ); kk = jj + 1 - ICOFF; + HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, HplUnit, kk, + Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, 1 ); +/* + * Scale current column by its absolute value max entry - Update and + * find local absolute value max in next column (Only one pass through + * cache for each next column). This sequence of operations could bene- + * fit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk ); + Xv1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk, 1 ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, 1, + HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) + { + HPL_dcopy( kk, L1ptr, 1, Mptr( A, ICOFF, jj+1, lda ), 1 ); + ii = iip1; iip1++; m = Mm1; Mm1--; + } + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanllN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanllT.c new file mode 100644 index 000000000..a6e1b67bd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanllT.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanllT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanllT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanllT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Left-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, kk, lda, + m=M, n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column and initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 > 0 ) + { +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + + L1ptr = Mptr( L1, jj+1, ICOFF, n0 ); kk = jj + 1 - ICOFF; + HPL_dtrsv( HplColumnMajor, HplUpper, HplTrans, HplUnit, kk, + Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, n0 ); +/* + * Scale current column by its absolute value max entry - Update and + * find local absolute value max in next column (Only one pass through + * cache for each next column). This sequence of operations could bene- + * fit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk ); + Xv1 = vsip_msubview_d( Xv0, jj+1, ICOFF, 1, kk ); + Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS, + HPL_rone, Yv1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk, -HPL_rone, + Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, n0, + HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); +#endif + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + if( curr != 0 ) + { + HPL_dcopy( kk, L1ptr, n0, Mptr( A, ICOFF, jj+1, lda ), 1 ); + ii = iip1; iip1++; m = Mm1; Mm1--; + } + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); + +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Xv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanllT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanrlN.c new file mode 100644 index 000000000..0a3b9a542 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanrlN.c @@ -0,0 +1,250 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanrlN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanrlN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanrlN factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Right-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in no-transpose form (i.e. just like the + * input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Acur, * Anxt; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Xv1, * Yv0, * Yv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, lda, m=M; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 >= 1 ) + { + Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda ); +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); +/* + * Scale current column by its absolute value max entry - Update trai- + * ling sub-matrix and find local absolute value max in next column (On- + * ly one pass through cache for each current column). This sequence of + * operations could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 ); + HPL_daxpy( Mm1, -WORK[4+jj+1], Acur, 1, Anxt, 1 ); + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); +#ifdef HPL_CALL_VSIPL + if( Nm1 > 1 ) + { +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2, + Mm1, Nm1-1 ); + Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj, + Mm1, 1 ); + Yv1 = vsip_msubview_d( Yv0, jj, jj+2, 1, Nm1-1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); + } +#else + if( Nm1 > 1 ) + HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + WORK+4+jj+2, 1, Mptr( Anxt, 0, 1, lda ), lda ); +#endif +/* + * Same thing as above but with worse data access on y (A += x * y^T) + * + * if( Nm1 > 1 ) ) + * HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + * Mptr( L1, jj, jj+2, n0 ), n0, Mptr( Anxt, 0, 1, lda ), + * lda ); + */ + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpN( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Yv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanrlN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanrlT.c new file mode 100644 index 000000000..68c1afc02 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdpanrlT.c @@ -0,0 +1,244 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdpanrlT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdpanrlT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdpanrlT factorizes a panel of columns that is a sub-array of a + * larger one-dimensional panel A using the Right-looking variant of the + * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper + * block of the panel is stored in transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Note that one iteration of the the main loop is unrolled. The local + * computation of the absolute value max of the next column is performed + * just after its update by the current column. This allows to bring the + * current column only once through cache at each step. The current + * implementation does not perform any blocking for this sequence of + * BLAS operations, however the design allows for plugging in an optimal + * (machine-specific) specialized BLAS-like kernel. This idea has been + * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Acur, * Anxt, * L1; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Xv1, * Yv0, * Yv1; +#endif + int Mm1, Nm1, curr, ii, iip1, jj, lda, m=M, + n0; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + Nm1 = N - 1; jj = ICOFF; + if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } + else { ii = 0; iip1 = ii; Mm1 = m; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); +#endif +/* + * Find local absolute value max in first column - initialize WORK[0:3] + */ + HPL_dlocmax( PANEL, m, ii, jj, WORK ); + + while( Nm1 >= 1 ) + { + Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda ); +/* + * Swap and broadcast the current row + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); +/* + * Scale current column by its absolute value max entry - Update trai- + * ling sub-matrix and find local absolute value max in next column (On- + * ly one pass through cache for each current column). This sequence of + * operations could benefit from a specialized blocked implementation. + */ + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 ); + HPL_daxpy( Mm1, -(*(Mptr( L1, jj+1, jj, n0 ))), Acur, 1, Anxt, 1 ); + HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); + + if( Nm1 > 1 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2, + Mm1, Nm1-1 ); + Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj, + Mm1, 1 ); + Yv1 = vsip_msubview_d( Yv0, jj+2, jj, Nm1-1, 1 ); + + vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Yv1 ); + (void) vsip_mdestroy_d( Xv1 ); + (void) vsip_mdestroy_d( Av1 ); +#else + HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, + Mptr( L1, jj+2, jj, n0 ), 1, Mptr( Anxt, 0, 1, lda ), + lda ); +#endif + } + if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } + + Nm1--; jj++; + } +/* + * Swap and broadcast last row - Scale last column by its absolute value + * max entry + */ + HPL_pdmxswp( PANEL, m, ii, jj, WORK ); + HPL_dlocswpT( PANEL, ii, jj, WORK ); + if( WORK[0] != HPL_rzero ) + HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); +#ifdef HPL_CALL_VSIPL +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Yv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PFACT ); +#endif +/* + * End of HPL_pdpanrlT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpancrN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpancrN.c new file mode 100644 index 000000000..348d7ebe6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpancrN.c @@ -0,0 +1,282 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpancrN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpancrN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpancrN HPL_pdrpancrN recursively factorizes a panel of columns using the + * recursive Crout variant of the usual one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Local update - Factor current panel - Replicated update and solve + */ +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + } + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, jj, + -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, + 0, jj, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), + lda ); +#endif + HPL_pdrpancrN( PANEL, m, jb, ioff, WORK ); + + if( n > 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + Av2 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n ); + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff+jb, jj, n ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, jb, n, + jj, -HPL_rone, Mptr( L1ptr, jj, 0, n0 ), n0, + Mptr( L1ptr, 0, jj+jb, n0 ), n0, HPL_rone, + Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, + n0 ), n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); + } +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpancrN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpancrT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpancrT.c new file mode 100644 index 000000000..a1ecfac2c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpancrT.c @@ -0,0 +1,282 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpancrT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpancrT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpancrT recursively factorizes a panel of columns using the + * recursive Crout variant of the usual one-dimensional algorithm. + * The lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Local update - Factor current panel - Replicated update and solve + */ +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, + VSIP_MAT_TRANS, HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, jj, + -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, + jj, 0, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), + lda ); +#endif + HPL_pdrpancrT( PANEL, m, jb, ioff, WORK ); + + if( n > 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + Av1 = vsip_msubview_d( Lv0, ioff+jb, ICOFF, n, jj ); + Av2 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb ); + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, + VSIP_MAT_NTRANS, HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, n, jb, + jj, -HPL_rone, Mptr( L1ptr, jj+jb, 0, n0 ), n0, + Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone, + Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); +#endif + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, + n0 ), n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); + } +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpancrT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanllN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanllN.c new file mode 100644 index 000000000..4dbc13b44 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanllN.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanllN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanllN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanllN recursively factorizes a panel of columns using the + * recursive Left-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Replicated solve - Local update - Factor current panel + */ + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, + jj, jb, HPL_rone, L1ptr, n0, Mptr( L1ptr, 0, jj, n0 ), + n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jj ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jj ); + } + Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, + jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, + Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj, lda ), lda ); +#endif + HPL_pdrpanllN( PANEL, m, jb, ioff, WORK ); +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanllN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanllT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanllT.c new file mode 100644 index 000000000..887caeb87 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanllT.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanllT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanllT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanllT recursively factorizes a panel of columns using the + * recursive Left-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Replicated solve - Local update - Factor current panel + */ + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, jb, jj, HPL_rone, L1ptr, n0, Mptr( L1ptr, + jj, 0, n0 ), n0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, + m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jj ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jj ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Av2 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, + jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, + Mptr( L1ptr, jj, 0, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj, lda ), lda ); +#endif + HPL_pdrpanllT( PANEL, m, jb, ioff, WORK ); +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + ii += jb; m -= jb; + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanllT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanrlN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanrlN.c new file mode 100644 index 000000000..22f105cf4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanrlN.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanrlN +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanrlN +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanrlN recursively factorizes a panel of columns using the + * recursive Right-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * no-transpose form (i.e. just like the input matrix itself). + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Factor current panel - Replicated solve - Local update + */ + HPL_pdrpanrlN( PANEL, m, jb, ioff, WORK ); + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), + n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); + if( curr != 0 ) { ii += jb; m -= jb; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb, + m, n ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m, n ); + } + Lv1 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, n, + jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda, + Mptr( L1ptr, jj, jj+jb, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj+jb, lda ), lda ); +#endif +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanrlN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanrlT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanrlT.c new file mode 100644 index 000000000..a77301b9b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/HPL_pdrpanrlT.c @@ -0,0 +1,240 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdrpanrlT +( + HPL_T_panel * PANEL, + const int M, + const int N, + const int ICOFF, + double * WORK +) +#else +void HPL_pdrpanrlT +( PANEL, M, N, ICOFF, WORK ) + HPL_T_panel * PANEL; + const int M; + const int N; + const int ICOFF; + double * WORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdrpanrlT recursively factorizes a panel of columns using the + * recursive Right-looking variant of the one-dimensional algorithm. The + * lower triangular N0-by-N0 upper block of the panel is stored in + * transpose form. + * + * Bi-directional exchange is used to perform the swap::broadcast + * operations at once for one column in the panel. This results in a + * lower number of slightly larger messages than usual. On P processes + * and assuming bi-directional links, the running time of this function + * can be approximated by (when N is equal to N0): + * + * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + * N0^2 * ( M - N0/3 ) * gam2-3 + * + * where M is the local number of rows of the panel, lat and bdwth are + * the latency and bandwidth of the network for double precision real + * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS + * rate of execution. The recursive algorithm allows indeed to almost + * achieve Level 3 BLAS performance in the panel factorization. On a + * large number of modern machines, this operation is however latency + * bound, meaning that its cost can be estimated by only the latency + * portion N0 * log_2(P) * lat. Mono-directional links will double this + * communication cost. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * M (local input) const int + * On entry, M specifies the local number of rows of sub(A). + * + * N (local input) const int + * On entry, N specifies the local number of columns of sub(A). + * + * ICOFF (global input) const int + * On entry, ICOFF specifies the row and column offset of sub(A) + * in A. + * + * WORK (local workspace) double * + * On entry, WORK is a workarray of size at least 2*(4+2*N0). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * Aptr, * L1, * L1ptr; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; +#endif + int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, + nbdiv, nbmin; +/* .. + * .. Executable Statements .. + */ + if( N <= ( nbmin = PANEL->algo->nbmin ) ) + { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } +/* + * Find new recursive blocking factor. To avoid an infinite loop, one + * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. + * First, we compute nblocks: the number of blocks of size NBMIN in N, + * including the last one that may be smaller. nblocks is thus larger + * than or equal to one, since N >= NBMIN. + * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal + * to one as well. For NDIV >= 2, we are guaranteed that the quan- + * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and + * greater than or equal to NBMIN. + */ + nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; + nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; + + A = PANEL->A; lda = PANEL->lda; + L1 = PANEL->L1; n0 = PANEL->jb; + L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); + curr = (int)( PANEL->grid->myrow == PANEL->prow ); + + if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); + else Aptr = Mptr( A, 0, ICOFF, lda ); +/* + * The triangular solve is replicated in every process row. The panel + * factorization is such that the first rows of A are accumulated in + * every process row during the (panel) swapping phase. We ensure this + * way a minimum amount of communication during the entire panel facto- + * rization. + */ + do + { + n -= jb; ioff = ICOFF + jj; +/* + * Factor current panel - Replicated solve - Local update + */ + HPL_pdrpanrlT( PANEL, m, jb, ioff, WORK ); + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), + n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); + if( curr != 0 ) { ii += jb; m -= jb; } +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); +/* + * Create the matrix subviews + */ + if( curr != 0 ) + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, + m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb, + m, N ); + } + else + { + Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); + Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m, n ); + } + Lv1 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb ); + + vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS, + HPL_rone, Av2 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); + (void) vsip_mdestroy_d( Av2 ); + (void) vsip_mdestroy_d( Av1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, n, + jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda, + Mptr( L1ptr, jj+jb, jj, n0 ), n0, HPL_rone, + Mptr( Aptr, ii, jj+jb, lda ), lda ); +#endif +/* + * Copy back upper part of A in current process row - Go the next block + */ + if( curr != 0 ) + { + HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, + Mptr( A, 0, ioff, lda ), lda ); + } + jj += jb; jb = Mmin( n, nb ); + + } while( n > 0 ); +/* + * End of HPL_pdrpanrlT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/Make.inc new file mode 120000 index 000000000..ae55370b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/Make.inc @@ -0,0 +1 @@ +/home/kate/hip/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/Makefile new file mode 100644 index 000000000..bf4634d31 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/Makefile @@ -0,0 +1,118 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pfact.h +# +## Object files ######################################################## +# +HPL_pfaobj = \ + HPL_dlocmax.o HPL_dlocswpN.o HPL_dlocswpT.o \ + HPL_pdmxswp.o HPL_pdpancrN.o HPL_pdpancrT.o \ + HPL_pdpanllN.o HPL_pdpanllT.o HPL_pdpanrlN.o \ + HPL_pdpanrlT.o HPL_pdrpanllN.o HPL_pdrpanllT.o \ + HPL_pdrpancrN.o HPL_pdrpancrT.o HPL_pdrpanrlN.o \ + HPL_pdrpanrlT.o HPL_pdfact.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pfaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pfaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dlocmax.o : ../HPL_dlocmax.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocmax.c +HPL_dlocswpN.o : ../HPL_dlocswpN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpN.c +HPL_dlocswpT.o : ../HPL_dlocswpT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpT.c +HPL_pdmxswp.o : ../HPL_pdmxswp.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmxswp.c +HPL_pdpancrN.o : ../HPL_pdpancrN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrN.c +HPL_pdpancrT.o : ../HPL_pdpancrT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrT.c +HPL_pdpanllN.o : ../HPL_pdpanllN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllN.c +HPL_pdpanllT.o : ../HPL_pdpanllT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllT.c +HPL_pdpanrlN.o : ../HPL_pdpanrlN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlN.c +HPL_pdpanrlT.o : ../HPL_pdpanrlT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlT.c +HPL_pdrpanllN.o : ../HPL_pdrpanllN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllN.c +HPL_pdrpanllT.o : ../HPL_pdrpanllT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllT.c +HPL_pdrpancrN.o : ../HPL_pdrpancrN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrN.c +HPL_pdrpancrT.o : ../HPL_pdrpancrT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrT.c +HPL_pdrpanrlN.o : ../HPL_pdrpanrlN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlN.c +HPL_pdrpanrlT.o : ../HPL_pdrpanrlT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlT.c +HPL_pdfact.o : ../HPL_pdfact.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdfact.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pfact/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_equil.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_equil.c new file mode 100644 index 000000000..b917a6525 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_equil.c @@ -0,0 +1,253 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_equil +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_TRANS TRANS, + const int N, + double * U, + const int LDU, + int * IPLEN, + const int * IPMAP, + const int * IPMAPM1, + int * IWORK +) +#else +void HPL_equil +( PBCST, IFLAG, PANEL, TRANS, N, U, LDU, IPLEN, IPMAP, IPMAPM1, IWORK ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_TRANS TRANS; + const int N; + double * U; + const int LDU; + int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_equil equilibrates the local pieces of U, so that on exit to + * this function, pieces of U contained in every process row are of the + * same size. This phase makes the rolling phase optimal. In addition, + * this function probes for the column panel L and forwards it when + * possible. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be equilibrated) information. + * + * TRANS (global input) const enum HPL_TRANS + * On entry, TRANS specifies whether U is stored in transposed + * or non-transposed form. + * + * N (local input) const int + * On entry, N specifies the number of rows or columns of U. N + * must be at least 0. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[nprow]) when U is stored in + * non-transposed form, and MAX(1,N) otherwise. + * + * IPLEN (global input) int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension NPROW+1. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, ip, ipU, ipcur, iprow, iptgt, lastrow, + left, npm1, nprow, ll, llU, llcur, lltgt, + right, slen, smax, smin; +/* .. + * .. Executable Statements .. + */ + if( ( npm1 = ( nprow = PANEL->grid->nprow ) - 1 ) <= 1 ) return; +/* + * If the current distribution of the pieces of U is already optimal for + * the rolling phase, then return imediately. The optimal distribution + * is such that ip processes have smax items and the remaining processes + * only have smin items. Another way to check this is to verify that all + * differences IPLEN[i+1] - IPLEN[i] are either smin or smax. + */ + smax = ( ( slen = IPLEN[nprow] ) + npm1 ) / nprow; + ip = slen - nprow * ( smin = slen / nprow ); + + iprow = 0; + do + { + ll = IPLEN[iprow+1] - IPLEN[iprow]; iprow++; + } while( ( iprow < nprow ) && ( ( ll == smin ) || ( ll == smax ) ) ); + + if( iprow == nprow ) return; +/* + * Now, we are sure the distribution of the pieces of U is not optimal + * with respect to the rolling phase, thus perform equilibration. Go + * through the list of processes: Processes that have rows that do not + * belong to them with respect to the optimal mapping spread them in a + * logarithmic fashion. To simplify a little bit the implementation, and + * mainly the packing, a source process row spreads its data to its left + * first, and then to its right. + */ + IWORK[nprow] = slen; + + for( iprow = 0; iprow < nprow; iprow++ ) + { + llU = IPLEN[iprow+1] - ( ipU = IPLEN[iprow] ); + if( iprow < ip ) { lltgt = smax; iptgt = iprow * smax; } + else { lltgt = smin; iptgt = iprow * smin + ip; } + + left = ( ipU < iptgt ); right = ( iptgt + lltgt < ipU + llU ); +/* + * If I have something to spread to either the left or the right + */ + if( ( llU > 0 ) && ( left || right ) ) + { /* Figure out how much every other process should have */ + + ipcur = ipU; llcur = llU; + + for( i = 0; i < nprow; i++ ) + { + if( i < ip ) { lltgt = smax; iptgt = i * smax; } + else { lltgt = smin; iptgt = i * smin + ip; } + lastrow = iptgt + lltgt - 1; + + if( ( lastrow >= ipcur ) && ( llcur > 0 ) ) + { ll = lastrow - ipcur + 1; ll = Mmin( ll, llcur ); llcur -= ll; } + else { ll = 0; } + + IWORK[i] = ipcur; ipcur += ll; IWORK[i+1] = ipcur; + } +/* + * Equilibration phase + */ + if( TRANS == HplNoTrans ) + { + if( left ) + { + HPL_spreadN( PBCST, IFLAG, PANEL, HplLeft, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + + if( right ) + { + HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + } + else + { + if( left ) + { + HPL_spreadT( PBCST, IFLAG, PANEL, HplLeft, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + + if( right ) + { + HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, N, U, LDU, + iprow, IWORK, IPMAP, IPMAPM1 ); + } + } + } + } +/* + * Finally update IPLEN with the indexes corresponding to the new dis- + * tribution of U - IPLEN[nprow] remained unchanged. + */ + for( i = 0; i < nprow; i++ ) IPLEN[i] = ( i < ip ? i*smax : i*smin + ip ); +/* + * End of HPL_equil + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_logsort.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_logsort.c new file mode 100644 index 000000000..0715159bd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_logsort.c @@ -0,0 +1,185 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_logsort +( + const int NPROCS, + const int ICURROC, + int * IPLEN, + int * IPMAP, + int * IPMAPM1 +) +#else +void HPL_logsort +( NPROCS, ICURROC, IPLEN, IPMAP, IPMAPM1 ) + const int NPROCS; + const int ICURROC; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_logsort computes an array IPMAP and its inverse IPMAPM1 that + * contain the logarithmic sorted processes id with repect to the local + * number of rows of U that they own. This is necessary to ensure that + * the logarithmic spreading of U is optimal in terms of number of steps + * and communication volume as well. In other words, the larget pieces + * of U will be sent a minimal number of times. + * + * Arguments + * ========= + * + * NPROCS (global input) const int + * On entry, NPROCS specifies the number of process rows in the + * process grid. NPROCS is at least one. + * + * ICURROC (global input) const int + * On entry, ICURROC is the source process row. + * + * IPLEN (global input/output) int * + * On entry, IPLEN is an array of dimension NPROCS+1, such that + * IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U, + * that process i-1 has. On exit, IPLEN[i] is the number of + * rows of U in the processes before process IPMAP[i] after the + * sort, with the convention that IPLEN[NPROCS] is the total + * number of rows of the panel. In other words, IPLEN[i+1] - + * IPLEN[i] is the number of rows of A that should be moved to + * the process IPMAP[i]. IPLEN is such that the number of rows + * of the source process row is IPLEN[1] - IPLEN[0], and the + * remaining entries of this array are sorted so that the + * quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROCS. On exit, + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myroc] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROCS. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROCS) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dist, i, ip, iplen_i, iplen_j, itmp, j, k; +/* .. + * .. Executable Statements .. + */ +/* + * Compute the logarithmic distance between process j and process 0, as + * well as the maximum logarithmic distance. IPMAPM1 is workarray here. + */ + for( j = 0, dist = 0; j < NPROCS; j++ ) + { + IPMAP[j] = MModAdd( j, ICURROC, NPROCS ); ip = j; itmp = 0; + do { if( ip & 1 ) itmp++; ip >>= 1; } while ( ip ); + IPMAPM1[j] = itmp; if( itmp > dist ) dist = itmp; + } +/* + * Shift IPLEN[1..NPROCS] of ICURROC places, so that IPLEN[1] is now + * what used to be IPLEN[ICURROC+1]. Initialize IPMAP, so that IPMAP[0] + * is ICURROC. + */ + for( j = 0; j < ICURROC; j++ ) + { + for( i = 2, itmp = IPLEN[1]; i <= NPROCS; i++ ) IPLEN[i-1] = IPLEN[i]; + IPLEN[NPROCS] = itmp; + } +/* + * logarithmic sort + */ + for( k = 1; k <= dist; k++ ) + { + for( j = 1; j < NPROCS; j++ ) + { + if( IPMAPM1[j] == k ) + { + for( i = 2; i < NPROCS; i++ ) + { + if( k < IPMAPM1[i] ) + { + iplen_i = IPLEN[i+1]; iplen_j = IPLEN[j+1]; + + if( iplen_j < iplen_i ) + { + IPLEN[j+1] = iplen_i; IPLEN[i+1] = iplen_j; + itmp = IPMAP[j]; IPMAP[j] = IPMAP[i]; + IPMAP[i] = itmp; + } + } + } + } + } + } +/* + * Compute IPLEN and IPMAPM1 (the inverse of IPMAP) + */ + IPLEN[0] = 0; + + for( i = 0; i < NPROCS; i++ ) + { + IPMAPM1[ IPMAP[i] ] = i; + IPLEN[i+1] += IPLEN[i]; + } +/* + * End of HPL_logsort + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesv.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesv.c new file mode 100644 index 000000000..ced74269e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesv.c @@ -0,0 +1,116 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesv +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesv +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesv factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with or without look-ahead. The lower triangular factor is left + * unpivoted and the pivots are not returned. The right hand side is the + * N+1 column of the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( A->n <= 0 ) return; + + A->info = 0; + + if( ( ALGO->depth == 0 ) || ( GRID->npcol == 1 ) ) + { + HPL_pdgesv0( GRID, ALGO, A ); + } + else + { + HPL_pdgesvK2( GRID, ALGO, A ); + } +/* + * Solve upper triangular system + */ + if( A->info == 0 ) HPL_pdtrsv( GRID, A ); +/* + * End of HPL_pdgesv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesv0.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesv0.c new file mode 100644 index 000000000..d79b6fa55 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesv0.c @@ -0,0 +1,167 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesv0 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesv0 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesv0 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * without look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, j, jb, n, nb, tag=MSGID_BEGIN_FACT, + test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + if( ( N = A->n ) <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + + HPL_pdupdate = ALGO->upfun; nb = A->nb; +/* + * Allocate a panel list of length 1 - Allocate panel[0] resources + */ + panel = (HPL_T_panel **)malloc( sizeof( HPL_T_panel * ) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesv0", "Memory allocation failed" ); } + + HPL_pdpanel_new( GRID, ALGO, N, N+1, Mmin( N, nb ), A, 0, 0, tag, + &panel[0] ); +/* + * Loop over the columns of A + */ + for( j = 0; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && GRID->mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Release panel resources - re-initialize panel data structure + */ + (void) HPL_pdpanel_free( panel[0] ); + HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[0] ); +/* + * Factor and broadcast current panel - update + */ + HPL_pdfact( panel[0] ); + (void) HPL_binit( panel[0] ); + do + { (void) HPL_bcast( panel[0], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[0] ); + HPL_pdupdate( NULL, NULL, panel[0], -1 ); +/* + * Update message id for next factorization + */ + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Release panel resources and panel list + */ + (void) HPL_pdpanel_disp( &panel[0] ); + + if( panel ) free( panel ); +/* + * End of HPL_pdgesv0 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesvK1.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesvK1.c new file mode 100644 index 000000000..ff1958cfc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesvK1.c @@ -0,0 +1,222 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesvK1 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesvK1 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesvK1 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, depth, icurcol=0, j, jb, jj=0, jstart, + k, mycol, n, nb, nn, npcol, nq, + tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + mycol = GRID->mycol; npcol = GRID->npcol; + depth = ALGO->depth; HPL_pdupdate = ALGO->upfun; + N = A->n; nb = A->nb; + + if( N <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + +/* + * Allocate a panel list of length depth + 1 (depth >= 1) + */ + panel = (HPL_T_panel **)malloc( (size_t)(depth+1)*sizeof( HPL_T_panel *) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesvK1", "Memory allocation failed" ); } +/* + * Create and initialize the first depth panels + */ + nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0; + + for( k = 0; k < depth; k++ ) + { + jb = Mmin( nn, nb ); + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart, + tag, &panel[k] ); + nn -= jb; jstart += jb; + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Initialize the lookahead - Factor jstart columns: panel[0..depth-1] + */ + for( k = 0, j = 0; k < depth; k++ ) + { + jb = jstart - j; jb = Mmin( jb, nb ); j += jb; +/* + * Factor and broadcast k-th panel - use long topology for those + */ + HPL_pdfact( panel[k] ); + (void) HPL_binit( panel[k] ); + do + { (void) HPL_bcast( panel[k], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[k] ); +/* + * Partial update of the depth-1-k panels in front of me + */ + if( k < depth - 1 ) + { + nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol ); + HPL_pdupdate( NULL, NULL, panel[k], nn ); + } + } +/* + * Main loop over the remaining columns of A + */ + for( j = jstart; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Allocate current panel resources - Finish latest update - Factor and + * broadcast current panel + */ + HPL_pdpanel_new( GRID, ALGO, n, n+1, jb, A, j, j, tag, &panel[depth] ); + + if( mycol == icurcol ) + { + nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) /* partial updates 0..depth-1 */ + HPL_pdupdate( NULL, NULL, panel[k], nn ); + HPL_pdfact( panel[depth] ); /* factor current panel */ + } + else { nn = 0; } + /* Finish the latest update and broadcast the current panel */ + (void) HPL_binit( panel[depth] ); + HPL_pdupdate( panel[depth], &test, panel[0], nq-nn ); + (void) HPL_bwait( panel[depth] ); +/* + * Release latest panel resources - circular of the panel pointers + * Go to the next process row and column - update the message ids for + * broadcast + */ + (void) HPL_pdpanel_disp( &panel[0] ); + for( k = 0; k < depth; k++ ) panel[k] = panel[k+1]; + + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Clean-up: Finish updates - release panels and panel list + */ + nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) + { + HPL_pdupdate( NULL, NULL, panel[k], nn ); + (void) HPL_pdpanel_disp( &panel[k] ); + } + + if( panel ) free( panel ); +/* + * End of HPL_pdgesvK1 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesvK2.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesvK2.c new file mode 100644 index 000000000..dec506ab9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdgesvK2.c @@ -0,0 +1,231 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdgesvK2 +( + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + HPL_T_pmat * A +) +#else +void HPL_pdgesvK2 +( GRID, ALGO, A ) + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + HPL_T_pmat * A; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdgesvK2 factors a N+1-by-N matrix using LU factorization with row + * partial pivoting. The main algorithm is the "right looking" variant + * with look-ahead. The lower triangular factor is left unpivoted and + * the pivots are not returned. The right hand side is the N+1 column of + * the coefficient matrix. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters. + * + * A (local input/output) HPL_T_pmat * + * On entry, A points to the data structure containing the local + * array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + HPL_T_panel * p, * * panel = NULL; + HPL_T_UPD_FUN HPL_pdupdate; + int N, depth, icurcol=0, j, jb, jj=0, jstart, + k, mycol, n, nb, nn, npcol, nq, + tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING; +#ifdef HPL_PROGRESS_REPORT + double start_time, time, gflops; +#endif +/* .. + * .. Executable Statements .. + */ + mycol = GRID->mycol; npcol = GRID->npcol; + depth = ALGO->depth; HPL_pdupdate = ALGO->upfun; + N = A->n; nb = A->nb; + + if( N <= 0 ) return; + +#ifdef HPL_PROGRESS_REPORT + start_time = HPL_timer_walltime(); +#endif + +/* + * Allocate a panel list of length depth + 1 (depth >= 1) + */ + panel = (HPL_T_panel **)malloc( (size_t)(depth+1) * sizeof( HPL_T_panel *) ); + if( panel == NULL ) + { HPL_pabort( __LINE__, "HPL_pdgesvK2", "Memory allocation failed" ); } +/* + * Create and initialize the first depth panels + */ + nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0; + + for( k = 0; k < depth; k++ ) + { + jb = Mmin( nn, nb ); + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart, + tag, &panel[k] ); + nn -= jb; jstart += jb; + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Create last depth+1 panel + */ + HPL_pdpanel_new( GRID, ALGO, nn, nn+1, Mmin( nn, nb ), A, jstart, + jstart, tag, &panel[depth] ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); +/* + * Initialize the lookahead - Factor jstart columns: panel[0..depth-1] + */ + for( k = 0, j = 0; k < depth; k++ ) + { + jb = jstart - j; jb = Mmin( jb, nb ); j += jb; +/* + * Factor and broadcast k-th panel + */ + HPL_pdfact( panel[k] ); + (void) HPL_binit( panel[k] ); + do + { (void) HPL_bcast( panel[k], &test ); } + while( test != HPL_SUCCESS ); + (void) HPL_bwait( panel[k] ); +/* + * Partial update of the depth-k-1 panels in front of me + */ + if( k < depth - 1 ) + { + nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol ); + HPL_pdupdate( NULL, NULL, panel[k], nn ); + } + } +/* + * Main loop over the remaining columns of A + */ + for( j = jstart; j < N; j += nb ) + { + n = N - j; jb = Mmin( n, nb ); +#ifdef HPL_PROGRESS_REPORT + /* if this is process 0,0 and not the first panel */ + if ( GRID->myrow == 0 && mycol == 0 && j > 0 ) + { + time = HPL_timer_walltime() - start_time; + gflops = 2.0*(N*(double)N*N - n*(double)n*n)/3.0/(time > 0.0 ? time : 1e-6)/1e9; + HPL_fprintf( stdout, "Column=%09d Fraction=%4.1f%% Gflops=%9.3e\n", j, j*100.0/N, gflops); + } +#endif +/* + * Initialize current panel - Finish latest update, Factor and broadcast + * current panel + */ + (void) HPL_pdpanel_free( panel[depth] ); + HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[depth] ); + + if( mycol == icurcol ) + { + nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) /* partial updates 0..depth-1 */ + (void) HPL_pdupdate( NULL, NULL, panel[k], nn ); + HPL_pdfact( panel[depth] ); /* factor current panel */ + } + else { nn = 0; } + /* Finish the latest update and broadcast the current panel */ + (void) HPL_binit( panel[depth] ); + HPL_pdupdate( panel[depth], &test, panel[0], nq-nn ); + (void) HPL_bwait( panel[depth] ); +/* + * Circular of the panel pointers: + * xtmp = x[0]; for( k=0; k < depth; k++ ) x[k] = x[k+1]; x[d] = xtmp; + * + * Go to next process row and column - update the message ids for broadcast + */ + p = panel[0]; for( k = 0; k < depth; k++ ) panel[k] = panel[k+1]; + panel[depth] = p; + + if( mycol == icurcol ) { jj += jb; nq -= jb; } + icurcol = MModAdd1( icurcol, npcol ); + tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); + } +/* + * Clean-up: Finish updates - release panels and panel list + */ + nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol ); + for( k = 0; k < depth; k++ ) + { + (void) HPL_pdupdate( NULL, NULL, panel[k], nn ); + (void) HPL_pdpanel_disp( &panel[k] ); + } + (void) HPL_pdpanel_disp( &panel[depth] ); + + if( panel ) free( panel ); +/* + * End of HPL_pdgesvK2 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c new file mode 100644 index 000000000..b4433e1be --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp00N.c @@ -0,0 +1,432 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp00N +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp00N +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp00N applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * Bi-directional exchange is used to perform the swap :: broadcast of + * the row panel U at once, resulting in a lower number of messages than + * usual as well as a lower communication volume. With P process rows and + * assuming bi-directional links, the running time of this function can + * be approximated by: + * + * log_2(P) * (lat + NB*LocQ(N) / bdwth) + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. Mono + * directional links will double this communication cost. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be broadcast and swapped) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + HPL_T_grid * grid; + double * A, * U, * W; + void * vptr = NULL; + int * ipID, * lindxA, * lindxAU, * llen, + * llen_sv; + unsigned int ip2, ip2_=1, ipdist, ipow=1, mask=1, + mydist, mydis_; + int Cmsgid=MSGID_BEGIN_PFACT, Np2, align, + hdim, i, icurrow, *iflag, ipA, ipW, *ipl, + iprow, jb, k, lda, ldW, myrow, n, nprow, + partner, root, size_, usize; +#define LDU jb +/* .. + * .. Executable Statements .. + */ + n = Mmin( NN, PANEL->n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Retrieve parameters from the PANEL data structure + */ + grid = PANEL->grid; nprow = grid->nprow; myrow = grid->myrow; + comm = grid->col_comm; ip2 = (unsigned int)grid->row_ip2; + hdim = grid->row_hdim; align = PANEL->algo->align; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; usize = jb * n; + ldW = n + 1; +/* + * Allocate space for temporary W (ldW * jb) + */ + vptr = (void*)malloc( + ((size_t)(align) + ((size_t)(jb) * (size_t)(ldW))) * sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlaswp00N", "Memory allocation failed" ); } + + W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) ); +/* + * Construct ipID and its local counter parts lindxA, lindxAU - llen is + * the number of rows/columns that I have in workspace and that I should + * send. Compute lindx_, ipA, llen if it has not already been done for + * this panel; + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + lindxA = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k; + llen = lindxAU + k; llen_sv = llen + nprow; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } + else if( *iflag == 1 ) /* HPL_pdlaswp01N called before: reuse ipID */ + { + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } +/* + * Copy the llen_sv into llen - Reset ipA to its correct value + */ + ipA = llen_sv[myrow]; + for( i = 0; i < nprow; i++ ) { llen[i] = llen_sv[i]; } +/* + * For i in [0..2*jb), lindxA[i] is the offset in A of a row that ulti- + * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ). In icurrow, + * we directly pack into U, otherwise we pack into workspace. The first + * entry of each column packed in workspace is in fact the row or column + * offset in U where it should go to. + */ + if( myrow == icurrow ) + { + HPL_dlaswp01N( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } + else + { + HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * Algorithm for bi-directional data exchange: + * + * As long as I have not talked to a process that already had the data + * from icurrow, I will be sending the workspace, otherwise I will be + * sending U. Note that the columns in workspace contain the local index + * in U they should go to. + * + * If I am receiving from a process that has the data from icurrow, I + * will be receiving in U, copy the data of U that stays into A, and + * then the columns I have in workspace into U; otherwise I will be re- + * ceiving in the remaining workspace. If I am one of those processes + * that already has the data from icurrow, I will be immediately copying + * the data I have in my workspace into U. + * + * When I receive U, some of U should be copied in my piece of A before + * I can copy the rows I have in my workspace into U. This information + * is kept in the lists lindx_: the row lindxAU[i] should be copied in + * the row lindxA[i] of my piece of A, just as in the reversed initial + * packing operation. Those rows are thus the first ones in the work ar- + * ray. After this operation has been performed, I will not need + * those lindx arrays, and I will always be sending a buffer of size + * jb x n, or n x jb, that is, U. + * + * At every step of the algorithm, it is necesary to update the list + * llen, so that I can figure out how large the next messages I will be + * sending/receiving are. It is obvious when I am sending U. It is not + * otherwise. + * + * We choose icurrow to be the source of the bi-directional exchange. + * This allows the processes in the non-power 2 part to receive U at the + * first exchange, and then broadcast internally this U so that those + * processes can grab their piece of A. + */ + if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; } + ipW = ipA; + Np2 = ( ( size_ = nprow - ip2 ) != 0 ); + mydist = (unsigned int)MModSub( myrow, icurrow, nprow ); +/* + * bi-directional exchange: If nprow is not a power of 2, proc[i-ip2] + * receives local data from proc[i] for all i in [ip2..nprow); icurrow + * is the source, these last process indexes are relative to icurrow. + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + + if( mydist == 0 ) /* I am the current row: I send U and recv W */ + { + (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW, + Cmsgid, partner, comm ); + if( llen[partner] > 0 ) + HPL_dlaswp03N( llen[partner], n, U, LDU, W, W+1, ldW ); + } + else if( mydist == ip2 ) + { /* I recv U for later Bcast, I send my W */ + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + } + else /* None of us is icurrow, we exchange our Ws */ + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm ); + } + else + { + (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, + partner, Cmsgid, comm ); + if( llen[partner] > 0 ) ipW += llen[partner]; + } + } + } +/* + * Update llen + */ + for( i = 1; i < size_; i++ ) + { + iprow = MModAdd( icurrow, i, nprow ); + partner = MModAdd( iprow, (int)(ip2), nprow ); + llen[ iprow ] += llen[ partner ]; + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * power of 2 part of the processes collection: only processes [0..ip2) + * are working; some of them (mydist >> (k+1) == 0) either send or re- + * ceive U. At every step k, k is in [0 .. hdim), of the algorithm, a + * process pair that exchanges U is such that (mydist >> (k+1) == 0). + * Among those processes, the ones that are sending U are such that + * mydist >> k == 0. + */ + if( mydist < ip2 ) + { + k = 0; + + while( k < hdim ) + { + partner = (int)(mydist ^ ipow); + partner = MModAdd( icurrow, partner, nprow ); +/* + * Exchange and combine the local results - If I receive U, then I must + * copy from U the rows that belong to my piece of A, and then update U + * by copying in it the rows I have accumulated in W. Otherwise, I re- + * ceive W. In this later case, and I have U, I shall update my copy of + * U by copying in it the rows I have accumulated in W. If I did not + * have U before, I simply need to update my pointer in W for later use. + */ + if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 ) + { + if( ( mydist >> (unsigned int)(k) ) == 0 ) + { + (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW, + ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + HPL_dlaswp03N( llen[partner], n, U, LDU, Mptr( W, 0, ipW, + ldW ), Mptr( W, 1, ipW, ldW ), ldW ); + ipW += llen[partner]; + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + HPL_dlaswp04N( ipA, llen[myrow], n, U, LDU, A, lda, W, + W+1, ldW, lindxA, lindxAU ); + } + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0, + ipW, ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + ipW += llen[partner]; + } +/* + * Update llen - Go to next process pairs + */ + iprow = icurrow; ipdist = 0; + do + { + if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist ) + { + partner = MModAdd( icurrow, partner, nprow ); + llen[iprow] += llen[partner]; + llen[partner] = llen[iprow]; + } + iprow = MModAdd( iprow, 1, nprow ); ipdist++; + + } while( ipdist < ip2 ); + + ipow <<= 1; k++; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + } + else + { +/* + * non power of 2 part of the process collection: proc[ip2] broadcast U + * to procs[ip2..nprow) (relatively to icurrow). + */ + if( size_ > 1 ) + { + k = size_ - 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = (unsigned int)MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + + } + else if( partner < size_ ) + { + (void) HPL_send( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + } + } + ip2_ >>= 1; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2_ > 0 ); + } +/* + * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece + * of A. + */ + HPL_dlaswp05N( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } +/* + * If nprow is not a power of 2, proc[i-ip2] sends global result to + * proc[i] for all i in [ip2..nprow); + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + if( ( mydist & ip2 ) != 0 ) + { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); } + else + { (void) HPL_send( U, usize, partner, Cmsgid, comm ); } + } + + if( vptr ) free( vptr ); +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp00N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c new file mode 100644 index 000000000..7a9764c09 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp00T.c @@ -0,0 +1,433 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp00T +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp00T +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp00T applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * Bi-directional exchange is used to perform the swap :: broadcast of + * the row panel U at once, resulting in a lower number of messages than + * usual as well as a lower communication volume. With P process rows and + * assuming bi-directional links, the running time of this function can + * be approximated by: + * + * log_2(P) * (lat + NB*LocQ(N) / bdwth) + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. Mono + * directional links will double this communication cost. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be broadcast and swapped) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm comm; + HPL_T_grid * grid; + double * A, * U, * W; + void * vptr = NULL; + int * ipID, * lindxA, * lindxAU, * llen, + * llen_sv; + unsigned int ip2, ip2_=1, ipdist, ipow=1, mask=1, + mydist, mydis_; + int Cmsgid=MSGID_BEGIN_PFACT, Np2, align, + hdim, i, icurrow, *iflag, ipA, ipW, *ipl, + iprow, jb, k, lda, ldW, myrow, n, nprow, + partner, root, size_, usize; +#define LDU n +/* .. + * .. Executable Statements .. + */ + n = Mmin( NN, PANEL->n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Retrieve parameters from the PANEL data structure + */ + grid = PANEL->grid; nprow = grid->nprow; myrow = grid->myrow; + comm = grid->col_comm; ip2 = (unsigned int)grid->row_ip2; + hdim = grid->row_hdim; align = PANEL->algo->align; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; usize = jb * n; + ldW = n + 1; +/* + * Allocate space for temporary W (ldW * jb) + */ + vptr = (void*)malloc( ( (size_t)(align) + + ((size_t)(jb) * (size_t)(ldW))) * + sizeof(double) ); + if( vptr == NULL ) + { HPL_pabort( __LINE__, "HPL_pdlaswp00T", "Memory allocation failed" ); } + + W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) ); +/* + * Construct ipID and its local counter parts lindxA, lindxAU - llen is + * the number of rows/columns that I have in workspace and that I should + * send. Compute lindx_, ipA, llen if it has not already been done for + * this panel; + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + lindxA = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k; + llen = lindxAU + k; llen_sv = llen + nprow; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } + else if( *iflag == 1 ) /* HPL_pdlaswp01T called before: reuse ipID */ + { + HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); + *iflag = 0; + } +/* + * Copy the llen_sv into llen - Reset ipA to its correct value + */ + ipA = llen_sv[myrow]; + for( i = 0; i < nprow; i++ ) { llen[i] = llen_sv[i]; } +/* + * For i in [0..2*jb), lindxA[i] is the offset in A of a row that ulti- + * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ). In icurrow, + * we directly pack into U, otherwise we pack into workspace. The first + * entry of each column packed in workspace is in fact the row or column + * offset in U where it should go to. + */ + if( myrow == icurrow ) + { + HPL_dlaswp01T( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } + else + { + HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * Algorithm for bi-directional data exchange: + * + * As long as I have not talked to a process that already had the data + * from icurrow, I will be sending the workspace, otherwise I will be + * sending U. Note that the columns in workspace contain the local index + * in U they should go to. + * + * If I am receiving from a process that has the data from icurrow, I + * will be receiving in U, copy the data of U that stays into A, and + * then the columns I have in workspace into U; otherwise I will be re- + * ceiving in the remaining workspace. If I am one of those processes + * that already has the data from icurrow, I will be immediately copying + * the data I have in my workspace into U. + * + * When I receive U, some of U should be copied in my piece of A before + * I can copy the rows I have in my workspace into U. This information + * is kept in the lists lindx_: the row lindxAU[i] should be copied in + * the row lindxA[i] of my piece of A, just as in the reversed initial + * packing operation. Those rows are thus the first ones in the work ar- + * ray. After this operation has been performed, I will not need + * those lindx arrays, and I will always be sending a buffer of size + * jb x n, or n x jb, that is, U. + * + * At every step of the algorithm, it is necesary to update the list + * llen, so that I can figure out how large the next messages I will be + * sending/receiving are. It is obvious when I am sending U. It is not + * otherwise. + * + * We choose icurrow to be the source of the bi-directional exchange. + * This allows the processes in the non-power 2 part to receive U at the + * first exchange, and then broadcast internally this U so that those + * processes can grab their piece of A. + */ + if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; } + ipW = ipA; + Np2 = ( ( size_ = nprow - ip2 ) != 0 ); + mydist = (unsigned int)MModSub( myrow, icurrow, nprow ); +/* + * bi-directional exchange: If nprow is not a power of 2, proc[i-ip2] + * receives local data from proc[i] for all i in [ip2..nprow); icurrow + * is the source, these last process indexes are relative to icurrow. + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + + if( mydist == 0 ) /* I am the current row: I send U and recv W */ + { + (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW, + Cmsgid, partner, comm ); + if( llen[partner] > 0 ) + HPL_dlaswp03T( llen[partner], n, U, LDU, W, W+1, ldW ); + } + else if( mydist == ip2 ) + { /* I recv U for later Bcast, I send my W */ + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + } + else /* None of us is icurrow, we exchange our Ws */ + { + if( ( mydist & ip2 ) != 0 ) + { + (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm ); + } + else + { + (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, + partner, Cmsgid, comm ); + if( llen[partner] > 0 ) ipW += llen[partner]; + } + } + } +/* + * Update llen + */ + for( i = 1; i < size_; i++ ) + { + iprow = MModAdd( icurrow, i, nprow ); + partner = MModAdd( iprow, (int)(ip2), nprow ); + llen[ iprow ] += llen[ partner ]; + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); +/* + * power of 2 part of the processes collection: only processes [0..ip2) + * are working; some of them (mydist >> (k+1) == 0) either send or re- + * ceive U. At every step k, k is in [0 .. hdim), of the algorithm, a + * process pair that exchanges U is such that (mydist >> (k+1) == 0). + * Among those processes, the ones that are sending U are such that + * mydist >> k == 0. + */ + if( mydist < ip2 ) + { + k = 0; + + while( k < hdim ) + { + partner = (int)(mydist ^ ipow); + partner = MModAdd( icurrow, partner, nprow ); +/* + * Exchange and combine the local results - If I receive U, then I must + * copy from U the rows that belong to my piece of A, and then update U + * by copying in it the rows I have accumulated in W. Otherwise, I re- + * ceive W. In this later case, and I have U, I shall update my copy of + * U by copying in it the rows I have accumulated in W. If I did not + * have U before, I simply need to update my pointer in W for later use. + */ + if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 ) + { + if( ( mydist >> (unsigned int)(k) ) == 0 ) + { + (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW, + ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + HPL_dlaswp03T( llen[partner], n, U, LDU, Mptr( W, 0, ipW, + ldW ), Mptr( W, 1, ipW, ldW ), ldW ); + ipW += llen[partner]; + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, + Cmsgid, partner, comm ); + HPL_dlaswp04T( ipA, llen[myrow], n, U, LDU, A, lda, W, + W+1, ldW, lindxA, lindxAU ); + } + } + else + { + (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0, + ipW, ldW ), llen[partner]*ldW, Cmsgid, + partner, comm ); + ipW += llen[partner]; + } +/* + * Update llen - Go to next process pairs + */ + iprow = icurrow; ipdist = 0; + do + { + if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist ) + { + partner = MModAdd( icurrow, partner, nprow ); + llen[iprow] += llen[partner]; + llen[partner] = llen[iprow]; + } + iprow = MModAdd( iprow, 1, nprow ); ipdist++; + + } while( ipdist < ip2 ); + + ipow <<= 1; k++; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + } + else + { +/* + * non power of 2 part of the process collection: proc[ip2] broadcast U + * to procs[ip2..nprow) (relatively to icurrow). + */ + if( size_ > 1 ) + { + k = size_ - 1; + while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } + root = MModAdd( icurrow, (int)(ip2), nprow ); + mydis_ = (unsigned int)MModSub( myrow, root, nprow ); + + do + { + mask ^= ip2_; + if( ( mydis_ & mask ) == 0 ) + { + partner = (int)(mydis_ ^ ip2_); + if( ( mydis_ & ip2_ ) != 0 ) + { + (void) HPL_recv( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + + } + else if( partner < size_ ) + { + (void) HPL_send( U, usize, MModAdd( root, partner, + nprow ), Cmsgid, comm ); + } + } + ip2_ >>= 1; +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2_ > 0 ); + } +/* + * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece + * of A. + */ + HPL_dlaswp05T( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); + } +/* + * If nprow is not a power of 2, proc[i-ip2] sends global result to + * proc[i] for all i in [ip2..nprow); + */ + if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) + { + partner = MModAdd( icurrow, partner, nprow ); + if( ( mydist & ip2 ) != 0 ) + { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); } + else + { (void) HPL_send( U, usize, partner, Cmsgid, comm ); } + } + + if( vptr ) free( vptr ); +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp00T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c new file mode 100644 index 000000000..31f219840 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp01N.c @@ -0,0 +1,217 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp01N +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp01N +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp01N applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * A "Spread then roll" algorithm performs the swap :: broadcast of the + * row panel U at once, resulting in a minimal communication volume and + * a "very good" use of the connectivity if available. With P process + * rows and assuming bi-directional links, the running time of this + * function can be approximated by: + * + * (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. K is + * a constant in (2,3] that depends on the achieved bandwidth during a + * simultaneous message exchange between two processes. An empirical + * optimistic value of K is typically 2.4. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * U; + int * ipID, * iplen, * ipmap, * ipmapm1, + * iwork, * lindxA = NULL, * lindxAU, + * permU; + static int equil=-1; + int icurrow, * iflag, * ipA, * ipl, jb, k, + lda, myrow, n, nprow; +#define LDU jb +/* .. + * .. Executable Statements .. + */ + n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Decide whether equilibration should be performed or not + */ + if( equil == -1 ) equil = PANEL->algo->equil; +/* + * Retrieve parameters from the PANEL data structure + */ + nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; +/* + * Compute ipID (if not already done for this panel). lindxA and lindxAU + * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1 + * are of size nprow, permU is of length jb, and this function needs a + * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): + * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1) + * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1); + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + ipA = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1; + lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1; + ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( *iflag == 0 ) /* HPL_pdlaswp00N called before: reuse ipID */ + { + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( ( *iflag == 1 ) && ( equil != 0 ) ) + { /* HPL_pdlaswp01N was call before only re-compute IPLEN, IPMAP */ + HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 ); + *iflag = 1; + } +/* + * Copy into U the rows to be spread (local to icurrow) + */ + if( myrow == icurrow ) + { HPL_dlaswp01N( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } +/* + * Spread U - optionally probe for column panel + */ + HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen, + ipmap, ipmapm1 ); +/* + * Local exchange (everywhere but in process row icurrow) + */ + if( myrow != icurrow ) + { + k = ipmapm1[myrow]; + HPL_dlaswp06N( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, iplen[k], + 0, LDU ), LDU, lindxA ); + } +/* + * Equilibration + */ + if( equil != 0 ) + HPL_equil( PBCST, IFLAG, PANEL, HplNoTrans, n, U, LDU, iplen, + ipmap, ipmapm1, iwork ); +/* + * Rolling phase + */ + HPL_rollN( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 ); +/* + * Permute U in every process row + */ + HPL_dlaswp00N( jb, n, U, LDU, permU ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp01N + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c new file mode 100644 index 000000000..0c4de2669 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdlaswp01T.c @@ -0,0 +1,217 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdlaswp01T +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdlaswp01T +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdlaswp01T applies the NB row interchanges to NN columns of the + * trailing submatrix and broadcast a column panel. + * + * A "Spread then roll" algorithm performs the swap :: broadcast of the + * row panel U at once, resulting in a minimal communication volume and + * a "very good" use of the connectivity if available. With P process + * rows and assuming bi-directional links, the running time of this + * function can be approximated by: + * + * (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + * + * where NB is the number of rows of the row panel U, N is the global + * number of columns being updated, lat and bdwth are the latency and + * bandwidth of the network for double precision real words. K is + * a constant in (2,3] that depends on the achieved bandwidth during a + * simultaneous message exchange between two processes. An empirical + * optimistic value of K is typically 2.4. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be swapped and broadcast starting at + * the current position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * A, * U; + int * ipID, * iplen, * ipmap, * ipmapm1, + * iwork, * lindxA = NULL, * lindxAU, + * permU; + static int equil=-1; + int icurrow, * iflag, * ipA, * ipl, jb, k, + lda, myrow, n, nprow; +#define LDU n +/* .. + * .. Executable Statements .. + */ + n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb; +/* + * Quick return if there is nothing to do + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) return; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * Decide whether equilibration should be performed or not + */ + if( equil == -1 ) equil = PANEL->algo->equil; +/* + * Retrieve parameters from the PANEL data structure + */ + nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow; + A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; + lda = PANEL->lda; icurrow = PANEL->prow; +/* + * Compute ipID (if not already done for this panel). lindxA and lindxAU + * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1 + * are of size nprow, permU is of length jb, and this function needs a + * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): + * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1) + * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1); + */ + k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; + ipA = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1; + lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1; + ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb; + + if( *iflag == -1 ) /* no index arrays have been computed so far */ + { + HPL_pipid( PANEL, ipl, ipID ); + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( *iflag == 0 ) /* HPL_pdlaswp00T called before: reuse ipID */ + { + HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, + ipmap, ipmapm1, permU, iwork ); + *iflag = 1; + } + else if( ( *iflag == 1 ) && ( equil != 0 ) ) + { /* HPL_pdlaswp01T was call before only re-compute IPLEN, IPMAP */ + HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 ); + *iflag = 1; + } +/* + * Copy into U the rows to be spread (local to icurrow) + */ + if( myrow == icurrow ) + { HPL_dlaswp01T( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } +/* + * Spread U - optionally probe for column panel + */ + HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen, + ipmap, ipmapm1 ); +/* + * Local exchange (everywhere but in process row icurrow) + */ + if( myrow != icurrow ) + { + k = ipmapm1[myrow]; + HPL_dlaswp06T( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, 0, + iplen[k], LDU ), LDU, lindxA ); + } +/* + * Equilibration + */ + if( equil != 0 ) + HPL_equil( PBCST, IFLAG, PANEL, HplTrans, n, U, LDU, iplen, ipmap, + ipmapm1, iwork ); +/* + * Rolling phase + */ + HPL_rollT( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 ); +/* + * Permute U in every process row + */ + HPL_dlaswp10N( n, jb, U, LDU, permU ); + +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); +#endif +/* + * End of HPL_pdlaswp01T + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdtrsv.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdtrsv.c new file mode 100644 index 000000000..d2135130a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdtrsv.c @@ -0,0 +1,296 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdtrsv +( + HPL_T_grid * GRID, + HPL_T_pmat * AMAT +) +#else +void HPL_pdtrsv +( GRID, AMAT ) + HPL_T_grid * GRID; + HPL_T_pmat * AMAT; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdtrsv solves an upper triangular system of linear equations. + * + * The rhs is the last column of the N by N+1 matrix A. The solve starts + * in the process column owning the Nth column of A, so the rhs b may + * need to be moved one process column to the left at the beginning. The + * routine therefore needs a column vector in every process column but + * the one owning b. The result is replicated in all process rows, and + * returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. + * + * The algorithm uses decreasing one-ring broadcast in process rows and + * columns implemented in terms of synchronous communication point to + * point primitives. The lookahead of depth 1 is used to minimize the + * critical path. This entire operation is essentially ``latency'' bound + * and an estimate of its running time is given by: + * + * (move rhs) lat + N / ( P bdwth ) + + * (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + + * gam2 N^2 / ( P Q ), + * + * where gam2 is an estimate of the Level 2 BLAS rate of execution. + * There are N / NB diagonal blocks. One must exchange 2 messages of + * length NB to compute the next NB entries of the vector solution, as + * well as performing a total of N^2 floating point operations. + * + * Arguments + * ========= + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * AMAT (local input/output) HPL_T_pmat * + * On entry, AMAT points to the data structure containing the + * local array information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Comm Ccomm, Rcomm; + double * A=NULL, * Aprev=NULL, * Aptr, * XC=NULL, + * XR=NULL, * Xd=NULL, * Xdprev=NULL, + * W=NULL; + int Alcol, Alrow, Anpprev, Anp, Anq, Bcol, + Cmsgid, GridIsNotPx1, GridIsNot1xQ, Rmsgid, + Wfr=0, colprev, kb, kbprev, lda, mycol, + myrow, n, n1, n1p, n1pprev=0, nb, npcol, + nprow, rowprev, tmp1, tmp2; +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PTRSV ); +#endif + if( ( n = AMAT->n ) <= 0 ) return; + nb = AMAT->nb; lda = AMAT->ld; A = AMAT->A; XR = AMAT->X; + + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + Rcomm = GRID->row_comm; Rmsgid = MSGID_BEGIN_PTRSV; + Ccomm = GRID->col_comm; Cmsgid = MSGID_BEGIN_PTRSV + 1; + GridIsNot1xQ = ( nprow > 1 ); GridIsNotPx1 = ( npcol > 1 ); +/* + * Move the rhs in the process column owning the last column of A. + */ + Mnumroc( Anp, n, nb, nb, myrow, 0, nprow ); + Mnumroc( Anq, n, nb, nb, mycol, 0, npcol ); + + tmp1 = ( n - 1 ) / nb; + Alrow = tmp1 - ( tmp1 / nprow ) * nprow; + Alcol = tmp1 - ( tmp1 / npcol ) * npcol; + kb = n - tmp1 * nb; + + Aptr = (double *)(A); XC = Mptr( Aptr, 0, Anq, lda ); + Mindxg2p( n, nb, nb, Bcol, 0, npcol ); + + if( ( Anp > 0 ) && ( Alcol != Bcol ) ) + { + if( mycol == Bcol ) + { (void) HPL_send( XC, Anp, Alcol, Rmsgid, Rcomm ); } + else if( mycol == Alcol ) + { (void) HPL_recv( XC, Anp, Bcol, Rmsgid, Rcomm ); } + } + Rmsgid = ( Rmsgid + 2 > + MSGID_END_PTRSV ? MSGID_BEGIN_PTRSV : Rmsgid + 2 ); + if( mycol != Alcol ) + { for( tmp1=0; tmp1 < Anp; tmp1++ ) XC[tmp1] = HPL_rzero; } +/* + * Set up lookahead + */ + n1 = ( npcol - 1 ) * nb; n1 = Mmax( n1, nb ); + if( Anp > 0 ) + { + W = (double*)malloc( (size_t)(Mmin( n1, Anp )) * sizeof( double ) ); + if( W == NULL ) + { HPL_pabort( __LINE__, "HPL_pdtrsv", "Memory allocation failed" ); } + Wfr = 1; + } + + Anpprev = Anp; Xdprev = XR; Aprev = Aptr = Mptr( Aptr, 0, Anq, lda ); + tmp1 = n - kb; tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1pprev, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); + + if( myrow == Alrow ) { Anpprev = ( Anp -= kb ); } + if( mycol == Alcol ) + { + Aprev = ( Aptr -= lda * kb ); Anq -= kb; Xdprev = ( Xd = XR + Anq ); + if( myrow == Alrow ) + { + HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit, + kb, Aptr+Anp, lda, XC+Anp, 1 ); + HPL_dcopy( kb, XC+Anp, 1, Xd, 1 ); + } + } + + rowprev = Alrow; Alrow = MModSub1( Alrow, nprow ); + colprev = Alcol; Alcol = MModSub1( Alcol, npcol ); + kbprev = kb; n -= kb; + tmp1 = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); +/* + * Start the operations + */ + while( n > 0 ) + { + if( mycol == Alcol ) { Aptr -= lda * kb; Anq -= kb; Xd = XR + Anq; } + if( myrow == Alrow ) { Anp -= kb; } +/* + * Broadcast (decreasing-ring) of previous solution block in previous + * process column, compute partial update of current block and send it + * to current process column. + */ + if( mycol == colprev ) + { +/* + * Send previous solution block in process row above + */ + if( myrow == rowprev ) + { + if( GridIsNot1xQ ) + (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ), + Cmsgid, Ccomm ); + } + else + { + (void) HPL_recv( Xdprev, kbprev, MModAdd1( myrow, nprow ), + Cmsgid, Ccomm ); + } +/* + * Compute partial update of previous solution block and send it to cur- + * rent column + */ + if( n1pprev > 0 ) + { + tmp1 = Anpprev - n1pprev; + HPL_dgemv( HplColumnMajor, HplNoTrans, n1pprev, kbprev, + -HPL_rone, Aprev+tmp1, lda, Xdprev, 1, HPL_rone, + XC+tmp1, 1 ); + if( GridIsNotPx1 ) + (void) HPL_send( XC+tmp1, n1pprev, Alcol, Rmsgid, Rcomm ); + } +/* + * Finish the (decreasing-ring) broadcast of the solution block in pre- + * vious process column + */ + if( ( myrow != rowprev ) && + ( myrow != MModAdd1( rowprev, nprow ) ) ) + (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ), + Cmsgid, Ccomm ); + } + else if( mycol == Alcol ) + { +/* + * Current column receives and accumulates partial update of previous + * solution block + */ + if( n1pprev > 0 ) + { + (void) HPL_recv( W, n1pprev, colprev, Rmsgid, Rcomm ); + HPL_daxpy( n1pprev, HPL_rone, W, 1, XC+Anpprev-n1pprev, 1 ); + } + } +/* + * Solve current diagonal block + */ + if( ( mycol == Alcol ) && ( myrow == Alrow ) ) + { + HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit, + kb, Aptr+Anp, lda, XC+Anp, 1 ); + HPL_dcopy( kb, XC+Anp, 1, XR+Anq, 1 ); + } +/* +* Finish previous update +*/ + if( ( mycol == colprev ) && ( ( tmp1 = Anpprev - n1pprev ) > 0 ) ) + HPL_dgemv( HplColumnMajor, HplNoTrans, tmp1, kbprev, -HPL_rone, + Aprev, lda, Xdprev, 1, HPL_rone, XC, 1 ); +/* +* Save info of current step and update info for the next step +*/ + if( mycol == Alcol ) { Xdprev = Xd; Aprev = Aptr; } + if( myrow == Alrow ) { Anpprev -= kb; } + rowprev = Alrow; colprev = Alcol; + n1pprev = n1p; kbprev = kb; n -= kb; + Alrow = MModSub1( Alrow, nprow ); Alcol = MModSub1( Alcol, npcol ); + tmp1 = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); + MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); + + Rmsgid = ( Rmsgid+2 > MSGID_END_PTRSV ? + MSGID_BEGIN_PTRSV : Rmsgid+2 ); + Cmsgid = ( Cmsgid+2 > MSGID_END_PTRSV ? + MSGID_BEGIN_PTRSV+1 : Cmsgid+2 ); + } +/* + * Replicate last solution block + */ + if( mycol == colprev ) + (void) HPL_broadcast( (void *)(XR), kbprev, HPL_DOUBLE, rowprev, + Ccomm ); + + if( Wfr ) free( W ); +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_PTRSV ); +#endif +/* + * End of HPL_pdtrsv + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateNN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateNN.c new file mode 100644 index 000000000..7e31ddcd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateNN.c @@ -0,0 +1,442 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateNN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateNN +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateNN broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU jb +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, n ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, 0, nn, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateNN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateNT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateNT.c new file mode 100644 index 000000000..faa3ef207 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateNT.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateNT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateNT +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateNT broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU n +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, nn, 0, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateNT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateTN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateTN.c new file mode 100644 index 000000000..a16aa26a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateTN.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateTN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateTN +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateTN broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU jb +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, n ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, 0, nn, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateTN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateTT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateTT.c new file mode 100644 index 000000000..81e6cc4b7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pdupdateTT.c @@ -0,0 +1,443 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdupdateTT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int NN +) +#else +void HPL_pdupdateTT +( PBCST, IFLAG, PANEL, NN ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int NN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdupdateTT broadcast - forward the panel PBCST and simultaneously + * applies the row interchanges and updates part of the trailing (using + * the panel PANEL) submatrix. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local output) int * + * On exit, IFLAG indicates whether or not the broadcast has + * been completed when PBCST is not NULL on entry. In that case, + * IFLAG is left unchanged. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be updated) information. + * + * NN (local input) const int + * On entry, NN specifies the local number of columns of the + * trailing submatrix to be updated starting at the current + * position. NN must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; + int * ipiv; +#ifdef HPL_CALL_VSIPL + vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; +#endif + int curr, i, iroff, jb, lda, ldl2, mp, n, nb, + nq0, nn, test; + static int tswap = 0; + static HPL_T_SWAP fswap = HPL_NO_SWP; +#define LDU n +/* .. + * .. Executable Statements .. + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; + if( NN >= 0 ) n = Mmin( NN, n ); +/* + * There is nothing to update, enforce the panel broadcast. + */ + if( ( n <= 0 ) || ( jb <= 0 ) ) + { + if( PBCST != NULL ) + { + do { (void) HPL_bcast( PBCST, IFLAG ); } + while( *IFLAG != HPL_SUCCESS ); + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif + return; + } +/* + * Enable/disable the column panel probing mechanism + */ + (void) HPL_bcast( PBCST, &test ); +/* + * 1 x Q case + */ + if( PANEL->grid->nprow == 1 ) + { + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; + mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif + for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } +/* + * So far we have not updated anything - test availability of the panel + * to be forwarded - If detected forward it and finish the update in one + * step. + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); +/* + * Update nb columns at a time + */ +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_LASWP ); + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); + HPL_ptimer( HPL_TIMING_LASWP ); +#else + HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); +#endif + HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, + HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + else /* nprow > 1 ... */ + { +/* + * Selection of the swapping algorithm - swap:broadcast U. + */ + if( fswap == HPL_NO_SWP ) + { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } + + if( ( fswap == HPL_SWAP01 ) || + ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) + { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); } + else + { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); } +/* + * Compute redundantly row block of U and update trailing submatrix + */ + nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); + Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; + Uptr = PANEL->U; ldl2 = PANEL->ldl2; + mp = PANEL->mp - ( curr != 0 ? jb : 0 ); +#ifdef HPL_CALL_VSIPL +/* + * Admit the blocks + */ + (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); + (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); +/* + * Create the matrix views + */ + Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); + Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); + Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, jb ); +/* + * Create the matrix subviews + */ + Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); +#endif +/* + * Broadcast has not occured yet, spliting the computational part + */ + while ( test == HPL_KEEP_TESTING ) + { + nn = n - nq0; nn = Mmin( nb, nn ); + + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + Uptr = Mptr( Uptr, nn, 0, LDU ); + Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; + + (void) HPL_bcast( PBCST, &test ); + } +/* + * The panel has been forwarded at that point, finish the update + */ + if( ( nn = n - nq0 ) > 0 ) + { + HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, + HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); + + if( curr != 0 ) + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Mptr( Aptr, jb, 0, lda ), lda ); +#endif + HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); + } + else + { +#ifdef HPL_CALL_VSIPL +/* + * Create the matrix subviews + */ + Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); + Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); + + vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, + HPL_rone, Av1 ); +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Av1 ); + (void) vsip_mdestroy_d( Uv1 ); +#else + HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, + jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, + Aptr, lda ); +#endif + } + } +#ifdef HPL_CALL_VSIPL +/* + * Destroy the matrix subviews + */ + (void) vsip_mdestroy_d( Lv1 ); +/* + * Release the blocks + */ + (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); + (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); +/* + * Destroy the matrix views + */ + (void) vsip_mdestroy_d( Uv0 ); + (void) vsip_mdestroy_d( Lv0 ); + (void) vsip_mdestroy_d( Av0 ); +#endif + } + + PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; +/* + * return the outcome of the probe (should always be HPL_SUCCESS, the + * panel broadcast is enforced in that routine). + */ + if( PBCST != NULL ) *IFLAG = test; +#ifdef HPL_DETAILED_TIMING + HPL_ptimer( HPL_TIMING_UPDATE ); +#endif +/* + * End of HPL_pdupdateTT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_perm.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_perm.c new file mode 100644 index 000000000..bf7cc4503 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_perm.c @@ -0,0 +1,131 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_perm +( + const int N, + int * LINDXA, + int * LINDXAU, + int * IWORK +) +#else +void HPL_perm +( N, LINDXA, LINDXAU, IWORK ) + const int N; + int * LINDXA; + int * LINDXAU; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_perm combines two index arrays and generate the corresponding + * permutation. First, this function computes the inverse of LINDXA, and + * then combine it with LINDXAU. Second, in order to be able to perform + * the permutation in place, LINDXAU is overwritten by the sequence of + * permutation producing the same result. What we ultimately want to + * achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the + * call to this function, this in place permutation can be performed by + * for i in [0..N) swap U[i] with U[LINDXAU[i]]. + * + * Arguments + * ========= + * + * N (global input) const int + * On entry, N specifies the length of the arrays LINDXA and + * LINDXAU. N should be at least zero. + * + * LINDXA (global input/output) int * + * On entry, LINDXA is an array of dimension N containing the + * source indexes. On exit, LINDXA contains the combined index + * array. + * + * LINDXAU (global input/output) int * + * On entry, LINDXAU is an array of dimension N containing the + * target indexes. On exit, LINDXAU contains the sequence of + * permutation, that should be applied in increasing order to + * permute the underlying array U in place. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension N. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, j, k, fndd; +/* .. + * .. Executable Statements .. + */ +/* + * Inverse LINDXA - combine LINDXA and LINDXAU - Initialize IWORK + */ + for( i = 0; i < N; i++ ) { IWORK[LINDXA[i]] = i; } + for( i = 0; i < N; i++ ) { LINDXA[i] = LINDXAU[IWORK[i]]; IWORK[i] = i; } + + for( i = 0; i < N; i++ ) + { + /* search LINDXA such that LINDXA[j] == i */ + j = 0; do { fndd = ( LINDXA[j] == i ); j++; } while( !fndd ); j--; + /* search IWORK such that IWORK[k] == j */ + k = 0; do { fndd = ( IWORK[k] == j ); k++; } while( !fndd ); k--; + /* swap IWORK[i] and IWORK[k]; LINDXAU[i] = k */ + j = IWORK[i]; IWORK[i] = IWORK[k]; IWORK[k] = j; + LINDXAU[i] = k; + } +/* + * End of HPL_perm + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pipid.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pipid.c new file mode 100644 index 000000000..ab5ef949f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_pipid.c @@ -0,0 +1,187 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pipid +( + HPL_T_panel * PANEL, + int * K, + int * IPID +) +#else +void HPL_pipid +( PANEL, K, IPID ) + HPL_T_panel * PANEL; + int * K; + int * IPID; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pipid computes an array IPID that contains the source and final + * destination of matrix rows resulting from the application of N + * interchanges as computed by the LU factorization with row partial + * pivoting. The array IPID is such that the row of global index IPID(i) + * should be mapped onto the row of global index IPID(i+1). Note that we + * cannot really know the length of IPID a priori. However, we know that + * this array is at least 2*N long, since there are N rows to swap and + * broadcast. The length of this array must be smaller than or equal to + * 4*N, since every row is swapped with at most a single distinct remote + * row. The algorithm constructing IPID goes as follows: Let IA be the + * global index of the first row to be swapped. + * + * For every row src IA + i with i in [0..N) to be swapped with row dst + * such that dst is given by DPIV[i]: + * + * Is row src the destination of a previous row of the current block, + * that is, is there k odd such that IPID(k) is equal to src ? + * Yes: update this destination with dst. For example, if the + * pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, + * we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it + * was thought so far ... + * No : add the pair (src,dst) at the end of IPID; row src has not + * been moved yet. + * + * Is row dst different from src the destination of a previous row of + * the current block, i.e., is there k odd such that IPID(k) is equal to + * dst ? + * Yes: update IPID(k) with src. For example, if the pivot array + * is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in + * fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought + * so far ... + * No : add the pair (dst,src) at the end of IPID; row dst has not + * been moved yet. + * + * Note that when src is equal to dst, the pair (dst,src) should not be + * added to IPID in order to avoid duplicated entries in this array. + * During the construction of the array IPID, we make sure that the + * first N entries are such that IPID(k) with k odd is equal to IA+k/2. + * For k in [0..K/2), the row of global index IPID(2*k) should be + * mapped onto the row of global index IPID(2*k+1). + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global output) int * + * On exit, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global output) int * + * On entry, IPID is an array of length 4*N. On exit, the first + * K entries of that array contain the src and final destination + * resulting from the application of the N interchanges as + * specified by DPIV. The pairs (src,dst) are contiguously + * stored and sorted so that IPID(2*i+1) is equal to IA+i with i + * in [0..N) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, fndd, fnds, ia, i, j, jb, lst, off, + src; + double * dpiv; +/* .. + * .. Executable Statements .. + */ + dpiv = PANEL->DPIV; jb = PANEL->jb; src = ia = PANEL->ia; + dst = (int)(dpiv[0]); IPID[0] = dst; IPID[1] = src; *K = 2; + if( src != dst ) { IPID[2] = src; IPID[3] = dst; *K += 2; } + + for( i = 1; i < jb; i++ ) + { + fnds = 0; j = 1; + + if( ( src = ia + i ) == ( dst = (int)(dpiv[i]) ) ) + { + do { if( src == IPID[j] ) { fnds = j; } else { j += 2; } } + while( !( fnds ) && ( j < *K ) ); + if( !fnds ) { lst = *K; off = 2; IPID[lst] = src; } + else { lst = fnds-1; off = 0; } + IPID[lst+1] = dst; + } + else + { + fndd = 0; + do + { + if ( src == IPID[j] ) { fnds = j; } + else if( dst == IPID[j] ) { fndd = j; } + j += 2; + } + while( ( !( fnds ) || !( fndd ) ) && ( j < *K ) ); + if( !fnds ) { IPID[*K] = src; IPID[*K+1] = dst; off = 2; } + else { IPID[fnds] = dst; off = 0; } + if( !fndd ) { lst = *K+off; IPID[lst ] = dst; off += 2; } + else { lst = fndd-1; } + IPID[lst+1] = src; + } +/* + * Enforce IPID(1,i) equal to src = ia + i + */ + if( lst != ( j = ( i << 1 ) ) ) + { + src = IPID[j ]; IPID[j ] = IPID[lst ]; IPID[lst ] = src; + dst = IPID[j+1]; IPID[j+1] = IPID[lst+1]; IPID[lst+1] = dst; + } + *K += off; + } +/* + * End of HPL_pipid + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx0.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx0.c new file mode 100644 index 000000000..be12639d0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx0.c @@ -0,0 +1,281 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx0 +( + HPL_T_panel * PANEL, + const int K, + int * IPID, + int * LINDXA, + int * LINDXAU, + int * LLEN +) +#else +void HPL_plindx0 +( PANEL, K, IPID, LINDXA, LINDXAU, LLEN ) + HPL_T_panel * PANEL; + const int K; + int * IPID; + int * LINDXA; + int * LINDXAU; + int * LLEN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx0 computes two local arrays LINDXA and LINDXAU containing + * the local source and final destination position resulting from the + * application of row interchanges. + * + * On entry, the array IPID of length K is such that the row of global + * index IPID(i) should be mapped onto row of global index IPID(i+1). + * Let IA be the global index of the first row to be swapped. For k in + * [0..K/2), the row of global index IPID(2*k) should be mapped onto the + * row of global index IPID(2*k+1). The question then, is to determine + * which rows should ultimately be part of U. + * + * First, some rows of the process ICURROW may be swapped locally. One + * of this row belongs to U, the other one belongs to my local piece of + * A. The other rows of the current block are swapped with remote rows + * and are thus not part of U. These rows however should be sent along, + * and grabbed by the other processes as we progress in the exchange + * phase. + * + * So, assume that I am ICURROW and consider a row of index IPID(2*i) + * that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less + * than N, this row is locally swapped and should be copied into U at + * the position IPID(2*i+1) - IA. No row will be exchanged for this one. + * If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be + * locally copied into my local piece of A at the position corresponding + * to the row of global index IPID(2*i+1). + * + * If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) + * is to be swapped away and strictly speaking does not belong to U, but + * to A remotely. Since this process will however send this array U, + * this row is copied into U, exactly where the row IPID(2*i+1) should + * go. For this, we search IPID for k1, such that IPID(2*k1) is equal to + * IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position + * IPID(2*k1+1)-IA. + * + * It is thus important to put the rows that go into U, i.e., such that + * IPID(2*i+1) - IA is less than N at the begining of the array IPID. By + * doing so, U is formed, and the local copy is performed in just one + * sweep. + * + * Two lists LINDXA and LINDXAU are built. LINDXA contains the local + * index of the rows I have that should be copied. LINDXAU contains the + * local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A + * is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). In the process + * ICURROW, the initial packing algorithm proceeds as follows. + * + * for all entries in IPID, + * if IPID(2*i) is in ICURROW, + * if IPID(2*i+1) is in ICURROW, + * if( IPID(2*i+1) - IA < N ) + * save corresponding local position + * of this row (LINDXA); + * save local position (LINDXAU) in U + * where this row goes; + * [copy row IPID(2*i) in U at position + * IPID(2*i+1)-IA; ]; + * else + * save corresponding local position of + * this row (LINDXA); + * save local position (-LINDXAU) in A + * where this row goes; + * [copy row IPID(2*i) in my piece of A + * at IPID(2*i+1);] + * end if + * else + * find k1 such that IPID(2*k1) = IPID(2*i+1); + * copy row IPID(2*i) in U at position + * IPID(2*k1+1)-IA; + * save corresponding local position of this + * row (LINDXA); + * save local position (LINDXAU) in U where + * this row goes; + * end if + * end if + * end for + * + * Second, if I am not the current row process ICURROW, all source rows + * in IPID that I own are part of U. Indeed, they are swapped with one + * row of the current block of rows, and the main factorization + * algorithm proceeds one row after each other. The processes different + * from ICURROW, should exchange and accumulate those rows until they + * receive some data previously owned by the process ICURROW. + * + * In processes different from ICURROW, the initial packing algorithm + * proceeds as follows. Consider a row of global index IPID(2*i) that I + * own. When I will be receiving data previously owned by ICURROW, i.e., + * U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, + * and this particular row of U should be first copied into my piece of + * A, at A(il,:), where il is the local row index corresponding to + * IPID(2*i). Now,initially, this row will be packed into workspace, say + * as the kth row of that work array. The following algorithm sets + * LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row + * should be copied. LINDXA(k) stores the local index in A where this + * row of U should be copied, i.e il. + * + * for all entries in IPID, + * if IPID(2*i) is not in ICURROW, + * copy row IPID(2*i) in work array; + * save corresponding local position + * of this row (LINDXA); + * save position (LINDXAU) in U where + * this row should be copied; + * end if + * end for + * + * Since we are at it, we also globally figure out how many rows every + * process has. That is necessary, because it would rather be cumbersome + * to figure it on the fly during the bi-directional exchange phase. + * This information is kept in the array LLEN of size NPROW. Also note + * that the arrays LINDXA and LINDXAU are of max length equal to 2*N. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * LINDXA (local output) int * + * On entry, LINDXA is an array of dimension 2*N. On exit, this + * array contains the local indexes of the rows of A I have that + * should be copied into U. + * + * LINDXAU (local output) int * + * On exit, LINDXAU is an array of dimension 2*N. On exit, this + * array contains the local destination information encoded as + * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be + * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). + * + * LLEN (global output) int * + * On entry, LLEN is an array of length NPROW. On exit, it + * contains how many rows every process has. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, dstrow, fndd, i, ia, icurrow, il, + ip=0, iroff, j, jb, myrow, nb, nprow, + src, srcrow; +/* .. + * .. Executable Statements .. + */ +/* + * Compute the local arrays LINDXA and LINDXAU containing the local + * source and final destination position resulting from the application + * of N interchanges. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + icurrow = PANEL->prow; jb = PANEL->jb; + nb = PANEL->nb; ia = PANEL->ia; + iroff = PANEL->ii; + + for( i = 0; i < nprow; i++ ) LLEN[i] = 0; + + for( i = 0; i < K; i += 2 ) + { + src = IPID[i]; + Mindxg2p( src, nb, nb, srcrow, 0, nprow ); LLEN[ srcrow ]++; + + if( myrow == srcrow ) + { + Mindxg2l( il, src, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; dst = IPID[i+1]; + + if( myrow == icurrow ) + { + Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + if( dstrow == icurrow ) + { + if( dst - ia < jb ) { LINDXAU[ip] = dst - ia; } + else + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXAU[ip] = iroff - il; + } + } + else + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + LINDXAU[ip] = IPID[j-1] - ia; + } + } + else { LINDXAU[ip] = dst - ia; } + + ip++; + } + } +/* + * End of HPL_plindx0 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx1.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx1.c new file mode 100644 index 000000000..a24fd4c56 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx1.c @@ -0,0 +1,275 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx1 +( + HPL_T_panel * PANEL, + const int K, + const int * IPID, + int * IPA, + int * LINDXA, + int * LINDXAU, + int * IPLEN, + int * IPMAP, + int * IPMAPM1, + int * PERMU, + int * IWORK +) +#else +void HPL_plindx1 +( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK ) + HPL_T_panel * PANEL; + const int K; + const int * IPID; + int * IPA; + int * LINDXA; + int * LINDXAU; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; + int * PERMU; + int * IWORK; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx1 computes two local arrays LINDXA and LINDXAU containing + * the local source and final destination position resulting from the + * application of row interchanges. In addition, this function computes + * three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic + * mapping information for the spreading phase. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) const int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * IPA (global output) int * + * On exit, IPA specifies the number of rows that the current + * process row has that either belong to U or should be swapped + * with remote rows of A. + * + * LINDXA (global output) int * + * On entry, LINDXA is an array of dimension 2*N. On exit, this + * array contains the local indexes of the rows of A I have that + * should be copied into U. + * + * LINDXAU (global output) int * + * On exit, LINDXAU is an array of dimension 2*N. On exit, this + * array contains the local destination information encoded as + * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be + * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) + * of A should be locally copied into A(-LINDXAU(k),:). + * + * IPLEN (global output) int * + * On entry, IPLEN is an array of dimension NPROW + 1. On exit, + * this array is such that IPLEN[i] is the number of rows of A + * in the processes before process IPMAP[i] after the sort + * with the convention that IPLEN[nprow] is the total number of + * rows of the panel. In other words IPLEN[i+1]-IPLEN[i] is the + * local number of rows of A that should be moved to the process + * IPMAP[i]. IPLEN is such that the number of rows of the source + * process row can be computed as IPLEN[1] - IPLEN[0], and the + * remaining entries of this array are sorted so that the + * quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROW. On exit, this + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myrow] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROW. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROCS) + * + * PERMU (global output) int * + * On entry, PERMU is an array of dimension JB. On exit, PERMU + * contains a sequence of permutations, that should be applied + * in increasing order to permute in place the row panel U. + * + * IWORK (workspace) int * + * On entry, IWORK is a workarray of dimension 2*JB. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int * iwork; + int dst, dstrow, fndd, i, ia, icurrow, il, + ip, ipU, iroff, j, jb, myrow, nb, nprow, + src, srcrow; +/* .. + * .. Executable Statements .. + */ +/* + * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1 + */ + HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 ); +/* + * Compute the local arrays LINDXA and LINDXAU containing the local + * source and final destination position resulting from the application + * of N interchanges. Compute LINDXA and LINDXAU in icurrow, and LINDXA + * elsewhere and PERMU in every process. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + jb = PANEL->jb; nb = PANEL->nb; ia = PANEL->ia; + iroff = PANEL->ii; icurrow = PANEL->prow; + + iwork = IWORK + jb; + + if( myrow == icurrow ) + { + for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) + { + src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + + if( srcrow == icurrow ) + { + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + + Mindxg2l( il, src, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; + + if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) + { + PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; + j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; + IPLEN[il]++; ipU++; + } + else if( dstrow != icurrow ) + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + + PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow]; + j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; + IPLEN[il]++; ipU++; + } + else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) ) + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXAU[ip] = iroff - il; + } + ip++; + } + } + *IPA = ip; + } + else + { + for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) + { + src = IPID[i ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); +/* + * LINDXA[i] is the local index of the row of A that belongs into U + */ + if( myrow == dstrow ) + { + Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); + LINDXA[ip] = il - iroff; ip++; + } +/* + * iwork[i] is the local (current) position index in U + * PERMU[i] is the local (final) destination index in U + */ + if( srcrow == icurrow ) + { + if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) + { + PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; + iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; + } + else if( dstrow != icurrow ) + { + j = 0; + do { fndd = ( dst == IPID[j] ); j+=2; } + while( !fndd && ( j < K ) ); + PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow]; + iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; + } + } + } + *IPA = 0; + } +/* + * Simplify iwork and PERMU, return in PERMU the sequence of permutation + * that need to be apply to U after it has been broadcast. + */ + HPL_perm( jb, iwork, PERMU, IWORK ); +/* + * Reset IPLEN to its correct value + */ + for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1]; + IPLEN[0] = 0; +/* + * End of HPL_plindx1 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx10.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx10.c new file mode 100644 index 000000000..fa460fd35 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_plindx10.c @@ -0,0 +1,155 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_plindx10 +( + HPL_T_panel * PANEL, + const int K, + const int * IPID, + int * IPLEN, + int * IPMAP, + int * IPMAPM1 +) +#else +void HPL_plindx10 +( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PANEL; + const int K; + const int * IPID; + int * IPLEN; + int * IPMAP; + int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_plindx10 computes three arrays IPLEN, IPMAP and IPMAPM1 that + * contain the logarithmic mapping information for the spreading phase. + * + * Arguments + * ========= + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel information. + * + * K (global input) const int + * On entry, K specifies the number of entries in IPID. K is at + * least 2*N, and at most 4*N. + * + * IPID (global input) const int * + * On entry, IPID is an array of length K. The first K entries + * of that array contain the src and final destination resulting + * from the application of the interchanges. + * + * IPLEN (global output) int * + * On entry, IPLEN is an array of dimension NPROW + 1. On exit, + * this array is such that IPLEN[i] is the number of rows of A + * in the processes before process IMAP[i] after the sort, with + * the convention that IPLEN[nprow] is the total number of rows. + * In other words, IPLEN[i+1] - IPLEN[i] is the local number of + * rows of A that should be moved for each process. IPLEN is + * such that the number of rows of the source process row can be + * computed as IPLEN[1] - IPLEN[0], and the remaining entries of + * this array are sorted so that the quantities IPLEN[i+1] - + * IPLEN[i] are logarithmically sorted. + * + * IPMAP (global output) int * + * On entry, IPMAP is an array of dimension NPROW. On exit, this + * array contains the logarithmic mapping of the processes. In + * other words, IPMAP[myrow] is the corresponding sorted process + * coordinate. + * + * IPMAPM1 (global output) int * + * On entry, IPMAPM1 is an array of dimension NPROW. On exit, + * this array contains the inverse of the logarithmic mapping + * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in + * [0.. NPROW) + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int dst, dstrow, i, ia, icurrow, jb, nb, + nprow, src, srcrow; +/* .. + * .. Executable Statements .. + */ + nprow = PANEL->grid->nprow; jb = PANEL->jb; nb = PANEL->nb; + ia = PANEL->ia; icurrow = PANEL->prow; +/* + * Compute redundantly the local number of rows that each process has + * and that belong to U in IPLEN[1 .. nprow+1] + */ + for( i = 0; i <= nprow; i++ ) IPLEN[i] = 0; + + for( i = 0; i < K; i += 2 ) + { + src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); + if( srcrow == icurrow ) + { + dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); + if( ( dstrow != srcrow ) || ( dst - ia < jb ) ) IPLEN[dstrow+1]++; + } + } +/* + * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1 + * (the inverse of IPMAP) + */ + HPL_logsort( nprow, icurrow, IPLEN, IPMAP, IPMAPM1 ); +/* + * End of HPL_plindx10 + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_rollN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_rollN.c new file mode 100644 index 000000000..e68590a01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_rollN.c @@ -0,0 +1,225 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +void HPL_rollN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int N, + double * U, + const int LDU, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_rollN +( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int N; + double * U; + const int LDU; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rollN rolls the local arrays containing the local pieces of U, so + * that on exit to this function U is replicated in every process row. + * In addition, this function probe for the presence of the column panel + * and forwards it when available. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be rolled) information. + * + * N (local input) const int + * On entry, N specifies the number of columns of U. N must be + * at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[NPROW]). + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process row. + * + * IPMAP (global input) const int * + * On entry, IMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Datatype type[2]; + MPI_Status status; + MPI_Request request; + MPI_Comm comm; + int Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS, + ierr=MPI_SUCCESS, il, k, l, lengthR, + lengthS, mydist, myrow, next, npm1, nprow, + partner, prev; +/* .. + * .. Executable Statements .. + */ + if( N <= 0 ) return; + + npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow; + comm = PANEL->grid->col_comm; +/* + * Rolling phase + */ + mydist = IPMAPM1[myrow]; + prev = IPMAP[MModSub1( mydist, nprow )]; + next = IPMAP[MModAdd1( mydist, nprow )]; + + for( k = 0; k < npm1; k++ ) + { + l = (int)( (unsigned int)(k) >> 1 ); + + if( ( ( mydist + k ) & 1 ) != 0 ) + { + il = MModAdd( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModSub( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev; + } + else + { + il = MModSub( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModAdd( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next; + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lengthR, LDU, MPI_DOUBLE, + &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, ibufR, 0, LDU ), 1, type[I_RECV], + partner, Cmsgid, comm, &request ); + } + + if( lengthS > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lengthS, LDU, MPI_DOUBLE, + &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibufS, 0, LDU ), 1, type[I_SEND], + partner, Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_SEND] ); + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_RECV] ); + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_rollN", "MPI call failed" ); } +/* + * End of HPL_rollN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_rollT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_rollT.c new file mode 100644 index 000000000..0160c9412 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_rollT.c @@ -0,0 +1,259 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#define I_SEND 0 +#define I_RECV 1 + +#ifdef STDC_HEADERS +void HPL_rollT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const int N, + double * U, + const int LDU, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_rollT +( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const int N; + double * U; + const int LDU; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rollT rolls the local arrays containing the local pieces of U, so + * that on exit to this function U is replicated in every process row. + * In addition, this function probe for the presence of the column panel + * and forwards it when available. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be rolled) information. + * + * N (local input) const int + * On entry, N specifies the local number of rows of U. N must + * be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U in each process row. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,N). + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process row. + * + * IPMAP (global input) const int * + * On entry, IMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#if 0 + MPI_Datatype type[2]; +#endif + MPI_Status status; + MPI_Request request; + MPI_Comm comm; + int Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS, + ierr=MPI_SUCCESS, il, k, l, lengthR, + lengthS, mydist, myrow, next, npm1, nprow, + partner, prev; +/* .. + * .. Executable Statements .. + */ + if( N <= 0 ) return; + + npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow; + comm = PANEL->grid->col_comm; +/* + * Rolling phase + */ + mydist = IPMAPM1[myrow]; + prev = IPMAP[MModSub1( mydist, nprow )]; + next = IPMAP[MModAdd1( mydist, nprow )]; + + for( k = 0; k < npm1; k++ ) + { + l = (int)( (unsigned int)(k) >> 1 ); + + if( ( ( mydist + k ) & 1 ) != 0 ) + { + il = MModAdd( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModSub( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev; + } + else + { + il = MModSub( mydist, l, nprow ); + lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); + il = MModAdd( mydist, l+1, nprow ); + lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next; + } + + if( lengthR > 0 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lengthR * LDU, MPI_DOUBLE, + &type[I_RECV] ); + else + ierr = MPI_Type_vector( lengthR, N, LDU, MPI_DOUBLE, + &type[I_RECV] ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_RECV] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, 0, ibufR, LDU ), 1, type[I_RECV], + partner, Cmsgid, comm, &request ); +#else +/* + * In our case, LDU is N - Do not use the MPI datatype. + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Irecv( Mptr( U, 0, ibufR, LDU ), lengthR*LDU, + MPI_DOUBLE, partner, Cmsgid, comm, &request ); +#endif + } + + if( lengthS > 0 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lengthS*LDU, MPI_DOUBLE, + &type[I_SEND] ); + else + ierr = MPI_Type_vector( lengthS, N, LDU, MPI_DOUBLE, + &type[I_SEND] ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type[I_SEND] ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibufS, LDU ), 1, type[I_SEND], + partner, Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_SEND] ); +#else +/* + * In our case, LDU is N - Do not use the MPI datatype. + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibufS, LDU ), lengthS*LDU, + MPI_DOUBLE, partner, Cmsgid, comm ); +#endif + } + + if( lengthR > 0 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Wait( &request, &status ); +#if 0 + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type[I_RECV] ); +#endif + } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_rollT", "MPI call failed" ); } +/* + * End of HPL_rollT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_spreadN.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_spreadN.c new file mode 100644 index 000000000..202611e7f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_spreadN.c @@ -0,0 +1,303 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_spreadN +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_SIDE SIDE, + const int N, + double * U, + const int LDU, + const int SRCDIST, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_spreadN +( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_SIDE SIDE; + const int N; + double * U; + const int LDU; + const int SRCDIST; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_spreadN spreads the local array containing local pieces of U, so + * that on exit to this function, a piece of U is contained in every + * process row. The array IPLEN contains the number of rows of U, that + * should be spread on any given process row. This function also probes + * for the presence of the column panel PBCST. In case of success, this + * panel will be forwarded. If PBCST is NULL on input, this probing + * mechanism will be disabled. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be spread) information. + * + * SIDE (global input) const enum HPL_SIDE + * On entry, SIDE specifies whether the local piece of U located + * in process IPMAP[SRCDIST] should be spread to the right or to + * the left. This feature is used by the equilibration process. + * + * N (global input) const int + * On entry, N specifies the local number of columns of U. N + * must be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,IPLEN[nprow]). + * + * SRCDIST (local input) const int + * On entry, SRCDIST specifies the source process that spreads + * its piece of U. + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process before process IPMAP[i], with the convention + * that IPLEN[nprow] is the total number of rows. In other words + * IPLEN[i+1] - IPLEN[i] is the local number of rows of U that + * should be moved to process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + MPI_Datatype type; + MPI_Status status; + MPI_Comm comm; + unsigned int ip2=1, mask=1, mydist, mydist2; + int Cmsgid=MSGID_BEGIN_PFACT, ibuf, + ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow, + npm1, nprow, partner; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + comm = PANEL->grid->col_comm; +/* + * Spread U to the left + */ + if( SIDE == HplLeft ) + { + nprow = ( npm1 = SRCDIST ) + 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) > + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2; + lgth = IPLEN[nprow]; + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + else if( partner < nprow ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il += ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + else + { + npm1 = ( nprow -= SRCDIST ) - 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) < + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist -= SRCDIST ); il = ip2; + lgth = IPLEN[SRCDIST+nprow]; +/* + * Spread U to the right - offset the IPLEN, and IPMAP arrays + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + k = il ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ); + k = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf; + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + else if( partner < nprow ) + { + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, + &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il += ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_spreadN", "MPI call failed" ); } +/* + * End of HPL_spreadN + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_spreadT.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_spreadT.c new file mode 100644 index 000000000..1adf93507 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/HPL_spreadT.c @@ -0,0 +1,372 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_spreadT +( + HPL_T_panel * PBCST, + int * IFLAG, + HPL_T_panel * PANEL, + const enum HPL_SIDE SIDE, + const int N, + double * U, + const int LDU, + const int SRCDIST, + const int * IPLEN, + const int * IPMAP, + const int * IPMAPM1 +) +#else +void HPL_spreadT +( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 ) + HPL_T_panel * PBCST; + int * IFLAG; + HPL_T_panel * PANEL; + const enum HPL_SIDE SIDE; + const int N; + double * U; + const int LDU; + const int SRCDIST; + const int * IPLEN; + const int * IPMAP; + const int * IPMAPM1; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_spreadT spreads the local array containing local pieces of U, so + * that on exit to this function, a piece of U is contained in every + * process row. The array IPLEN contains the number of columns of U, + * that should be spread on any given process row. This function also + * probes for the presence of the column panel PBCST. If available, + * this panel will be forwarded. If PBCST is NULL on input, this + * probing mechanism will be disabled. + * + * Arguments + * ========= + * + * PBCST (local input/output) HPL_T_panel * + * On entry, PBCST points to the data structure containing the + * panel (to be broadcast) information. + * + * IFLAG (local input/output) int * + * On entry, IFLAG indicates whether or not the broadcast has + * already been completed. If not, probing will occur, and the + * outcome will be contained in IFLAG on exit. + * + * PANEL (local input/output) HPL_T_panel * + * On entry, PANEL points to the data structure containing the + * panel (to be spread) information. + * + * SIDE (global input) const enum HPL_SIDE + * On entry, SIDE specifies whether the local piece of U located + * in process IPMAP[SRCDIST] should be spread to the right or to + * the left. This feature is used by the equilibration process. + * + * N (global input) const int + * On entry, N specifies the local number of rows of U. N must + * be at least zero. + * + * U (local input/output) double * + * On entry, U is an array of dimension (LDU,*) containing the + * local pieces of U. + * + * LDU (local input) const int + * On entry, LDU specifies the local leading dimension of U. LDU + * should be at least MAX(1,N). + * + * SRCDIST (local input) const int + * On entry, SRCDIST specifies the source process that spreads + * its piece of U. + * + * IPLEN (global input) const int * + * On entry, IPLEN is an array of dimension NPROW+1. This array + * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U + * in each process before process IPMAP[i], with the convention + * that IPLEN[nprow] is the total number of rows. In other words + * IPLEN[i+1] - IPLEN[i] is the local number of rows of U that + * should be moved to process IPMAP[i]. + * + * IPMAP (global input) const int * + * On entry, IPMAP is an array of dimension NPROW. This array + * contains the logarithmic mapping of the processes. In other + * words, IPMAP[myrow] is the absolute coordinate of the sorted + * process. + * + * IPMAPM1 (global input) const int * + * On entry, IPMAPM1 is an array of dimension NPROW. This array + * contains the inverse of the logarithmic mapping contained in + * IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#if 0 + MPI_Datatype type; +#endif + MPI_Status status; + MPI_Comm comm; + unsigned int ip2=1, mask=1, mydist, mydist2; + int Cmsgid=MSGID_BEGIN_PFACT, ibuf, + ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow, + npm1, nprow, partner; +/* .. + * .. Executable Statements .. + */ + myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; + comm = PANEL->grid->col_comm; +/* + * Spread U + */ + if( SIDE == HplLeft ) + { + nprow = ( npm1 = SRCDIST ) + 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) > + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2; + lgth = IPLEN[nprow]; + + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm, + &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[npm1-partner], + Cmsgid, comm, &status ); +#endif + } + else if( partner < nprow ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[npm1-partner], Cmsgid, comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[npm1-partner], + Cmsgid, comm ); +#endif + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il += ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + else + { + npm1 = ( nprow -= SRCDIST ) - 1; + if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) < + (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; + + k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } + mydist2 = ( mydist -= SRCDIST ); il = ip2; +/* + * Spread to the right - offset the IPLEN and IPMAP arrays + */ + lgth = IPLEN[SRCDIST+nprow]; +/* + * Spread U + */ + do + { + mask ^= ip2; + + if( ( mydist & mask ) == 0 ) + { + k = il ; ibuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ); + k = il + ip2; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf; + + if( lbuf > 0 ) + { + partner = mydist ^ ip2; + + if( mydist & ip2 ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm, &status ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[SRCDIST+partner], + Cmsgid, comm, &status ); +#endif + } + else if( partner < nprow ) + { +#if 0 + if( ierr == MPI_SUCCESS ) + { + if( LDU == N ) + ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, + &type ); + else + ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, + &type ); + } + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_commit( &type ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type, + IPMAP[SRCDIST+partner], Cmsgid, + comm ); + if( ierr == MPI_SUCCESS ) + ierr = MPI_Type_free( &type ); +#else +/* + * In our case, LDU is N - do not use the MPI Datatypes + */ + if( ierr == MPI_SUCCESS ) + ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N, + MPI_DOUBLE, IPMAP[SRCDIST+partner], + Cmsgid, comm ); +#endif + } + } + } + + if( mydist2 < ip2 ) { ip2 >>= 1; il -= ip2; } + else { mydist2 -= ip2; ip2 >>= 1; il += ip2; } +/* + * Probe for column panel - forward it when available + */ + if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); + + } while( ip2 > 0 ); + } + + if( ierr != MPI_SUCCESS ) + { HPL_pabort( __LINE__, "HPL_spreadT", "MPI call failed" ); } +/* + * End of HPL_spreadT + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/Make.inc new file mode 120000 index 000000000..ae55370b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/Make.inc @@ -0,0 +1 @@ +/home/kate/hip/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/Makefile new file mode 100644 index 000000000..7898665f0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/Makefile @@ -0,0 +1,136 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ + $(INCdir)/hpl_pgesv.h +# +## Object files ######################################################## +# +HPL_pgeobj = \ + HPL_pipid.o HPL_plindx0.o HPL_pdlaswp00N.o \ + HPL_pdlaswp00T.o HPL_perm.o HPL_logsort.o \ + HPL_plindx10.o HPL_plindx1.o HPL_spreadN.o \ + HPL_spreadT.o HPL_rollN.o HPL_rollT.o \ + HPL_equil.o HPL_pdlaswp01N.o HPL_pdlaswp01T.o \ + HPL_pdupdateNN.o HPL_pdupdateNT.o HPL_pdupdateTN.o \ + HPL_pdupdateTT.o HPL_pdtrsv.o HPL_pdgesv0.o \ + HPL_pdgesvK1.o HPL_pdgesvK2.o HPL_pdgesv.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pgeobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pgeobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pipid.o : ../HPL_pipid.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pipid.c +HPL_plindx0.o : ../HPL_plindx0.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx0.c +HPL_pdlaswp00N.o : ../HPL_pdlaswp00N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00N.c +HPL_pdlaswp00T.o : ../HPL_pdlaswp00T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00T.c +HPL_perm.o : ../HPL_perm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_perm.c +HPL_logsort.o : ../HPL_logsort.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_logsort.c +HPL_plindx10.o : ../HPL_plindx10.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx10.c +HPL_plindx1.o : ../HPL_plindx1.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx1.c +HPL_spreadN.o : ../HPL_spreadN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadN.c +HPL_spreadT.o : ../HPL_spreadT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadT.c +HPL_rollN.o : ../HPL_rollN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollN.c +HPL_rollT.o : ../HPL_rollT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollT.c +HPL_equil.o : ../HPL_equil.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_equil.c +HPL_pdlaswp01N.o : ../HPL_pdlaswp01N.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01N.c +HPL_pdlaswp01T.o : ../HPL_pdlaswp01T.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01T.c +HPL_pdupdateNN.o : ../HPL_pdupdateNN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNN.c +HPL_pdupdateNT.o : ../HPL_pdupdateNT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNT.c +HPL_pdupdateTN.o : ../HPL_pdupdateTN.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTN.c +HPL_pdupdateTT.o : ../HPL_pdupdateTT.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTT.c +HPL_pdtrsv.o : ../HPL_pdtrsv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtrsv.c +HPL_pdgesv0.o : ../HPL_pdgesv0.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv0.c +HPL_pdgesvK1.o : ../HPL_pdgesvK1.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK1.c +HPL_pdgesvK2.o : ../HPL_pdgesvK2.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK2.c +HPL_pdgesv.o : ../HPL_pdgesv.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/src/pgesv/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/Makefile.am b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/Makefile.am new file mode 100644 index 000000000..452ea5f06 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/Makefile.am @@ -0,0 +1,13 @@ + +AM_CPPFLAGS = -I$(top_srcdir)/include + +xhpl_LDADD = ../src/libhpl.a + +bin_PROGRAMS = xhpl + +xhpl_SOURCES = \ +matgen/HPL_jumpit.c matgen/HPL_rand.c matgen/HPL_setran.c matgen/HPL_xjumpm.c \ +matgen/HPL_lmul.c matgen/HPL_ladd.c \ +pmatgen/HPL_pdmatgen.c \ +ptest/HPL_pddriver.c ptest/HPL_pdinfo.c ptest/HPL_pdtest.c \ +ptimer/HPL_ptimer.c ptimer/HPL_ptimer_cputime.c ptimer/HPL_ptimer_walltime.c diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/Makefile.in b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/Makefile.in new file mode 100644 index 000000000..034564545 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/Makefile.in @@ -0,0 +1,698 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +bin_PROGRAMS = xhpl$(EXEEXT) +subdir = testing +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/include/hplconfig.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" +PROGRAMS = $(bin_PROGRAMS) +am__dirstamp = $(am__leading_dot)dirstamp +am_xhpl_OBJECTS = matgen/HPL_jumpit.$(OBJEXT) \ + matgen/HPL_rand.$(OBJEXT) matgen/HPL_setran.$(OBJEXT) \ + matgen/HPL_xjumpm.$(OBJEXT) matgen/HPL_lmul.$(OBJEXT) \ + matgen/HPL_ladd.$(OBJEXT) pmatgen/HPL_pdmatgen.$(OBJEXT) \ + ptest/HPL_pddriver.$(OBJEXT) ptest/HPL_pdinfo.$(OBJEXT) \ + ptest/HPL_pdtest.$(OBJEXT) ptimer/HPL_ptimer.$(OBJEXT) \ + ptimer/HPL_ptimer_cputime.$(OBJEXT) \ + ptimer/HPL_ptimer_walltime.$(OBJEXT) +xhpl_OBJECTS = $(am_xhpl_OBJECTS) +xhpl_DEPENDENCIES = ../src/libhpl.a +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = matgen/$(DEPDIR)/HPL_jumpit.Po \ + matgen/$(DEPDIR)/HPL_ladd.Po matgen/$(DEPDIR)/HPL_lmul.Po \ + matgen/$(DEPDIR)/HPL_rand.Po matgen/$(DEPDIR)/HPL_setran.Po \ + matgen/$(DEPDIR)/HPL_xjumpm.Po \ + pmatgen/$(DEPDIR)/HPL_pdmatgen.Po \ + ptest/$(DEPDIR)/HPL_pddriver.Po ptest/$(DEPDIR)/HPL_pdinfo.Po \ + ptest/$(DEPDIR)/HPL_pdtest.Po ptimer/$(DEPDIR)/HPL_ptimer.Po \ + ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po \ + ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(xhpl_SOURCES) +DIST_SOURCES = $(xhpl_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BLAS_LIBS = @BLAS_LIBS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build_alias = @build_alias@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host_alias = @host_alias@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AM_CPPFLAGS = -I$(top_srcdir)/include +xhpl_LDADD = ../src/libhpl.a +xhpl_SOURCES = \ +matgen/HPL_jumpit.c matgen/HPL_rand.c matgen/HPL_setran.c matgen/HPL_xjumpm.c \ +matgen/HPL_lmul.c matgen/HPL_ladd.c \ +pmatgen/HPL_pdmatgen.c \ +ptest/HPL_pddriver.c ptest/HPL_pdinfo.c ptest/HPL_pdtest.c \ +ptimer/HPL_ptimer.c ptimer/HPL_ptimer_cputime.c ptimer/HPL_ptimer_walltime.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu testing/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu testing/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) +matgen/$(am__dirstamp): + @$(MKDIR_P) matgen + @: > matgen/$(am__dirstamp) +matgen/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) matgen/$(DEPDIR) + @: > matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_jumpit.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_rand.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_setran.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_xjumpm.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_lmul.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +matgen/HPL_ladd.$(OBJEXT): matgen/$(am__dirstamp) \ + matgen/$(DEPDIR)/$(am__dirstamp) +pmatgen/$(am__dirstamp): + @$(MKDIR_P) pmatgen + @: > pmatgen/$(am__dirstamp) +pmatgen/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) pmatgen/$(DEPDIR) + @: > pmatgen/$(DEPDIR)/$(am__dirstamp) +pmatgen/HPL_pdmatgen.$(OBJEXT): pmatgen/$(am__dirstamp) \ + pmatgen/$(DEPDIR)/$(am__dirstamp) +ptest/$(am__dirstamp): + @$(MKDIR_P) ptest + @: > ptest/$(am__dirstamp) +ptest/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ptest/$(DEPDIR) + @: > ptest/$(DEPDIR)/$(am__dirstamp) +ptest/HPL_pddriver.$(OBJEXT): ptest/$(am__dirstamp) \ + ptest/$(DEPDIR)/$(am__dirstamp) +ptest/HPL_pdinfo.$(OBJEXT): ptest/$(am__dirstamp) \ + ptest/$(DEPDIR)/$(am__dirstamp) +ptest/HPL_pdtest.$(OBJEXT): ptest/$(am__dirstamp) \ + ptest/$(DEPDIR)/$(am__dirstamp) +ptimer/$(am__dirstamp): + @$(MKDIR_P) ptimer + @: > ptimer/$(am__dirstamp) +ptimer/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ptimer/$(DEPDIR) + @: > ptimer/$(DEPDIR)/$(am__dirstamp) +ptimer/HPL_ptimer.$(OBJEXT): ptimer/$(am__dirstamp) \ + ptimer/$(DEPDIR)/$(am__dirstamp) +ptimer/HPL_ptimer_cputime.$(OBJEXT): ptimer/$(am__dirstamp) \ + ptimer/$(DEPDIR)/$(am__dirstamp) +ptimer/HPL_ptimer_walltime.$(OBJEXT): ptimer/$(am__dirstamp) \ + ptimer/$(DEPDIR)/$(am__dirstamp) + +xhpl$(EXEEXT): $(xhpl_OBJECTS) $(xhpl_DEPENDENCIES) $(EXTRA_xhpl_DEPENDENCIES) + @rm -f xhpl$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(xhpl_OBJECTS) $(xhpl_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f matgen/*.$(OBJEXT) + -rm -f pmatgen/*.$(OBJEXT) + -rm -f ptest/*.$(OBJEXT) + -rm -f ptimer/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_jumpit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_ladd.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_lmul.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_rand.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_setran.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@matgen/$(DEPDIR)/HPL_xjumpm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@pmatgen/$(DEPDIR)/HPL_pdmatgen.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pddriver.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pdinfo.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptest/$(DEPDIR)/HPL_pdtest.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) +installdirs: + for dir in "$(DESTDIR)$(bindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f matgen/$(DEPDIR)/$(am__dirstamp) + -rm -f matgen/$(am__dirstamp) + -rm -f pmatgen/$(DEPDIR)/$(am__dirstamp) + -rm -f pmatgen/$(am__dirstamp) + -rm -f ptest/$(DEPDIR)/$(am__dirstamp) + -rm -f ptest/$(am__dirstamp) + -rm -f ptimer/$(DEPDIR)/$(am__dirstamp) + -rm -f ptimer/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic mostlyclean-am + +distclean: distclean-am + -rm -f matgen/$(DEPDIR)/HPL_jumpit.Po + -rm -f matgen/$(DEPDIR)/HPL_ladd.Po + -rm -f matgen/$(DEPDIR)/HPL_lmul.Po + -rm -f matgen/$(DEPDIR)/HPL_rand.Po + -rm -f matgen/$(DEPDIR)/HPL_setran.Po + -rm -f matgen/$(DEPDIR)/HPL_xjumpm.Po + -rm -f pmatgen/$(DEPDIR)/HPL_pdmatgen.Po + -rm -f ptest/$(DEPDIR)/HPL_pddriver.Po + -rm -f ptest/$(DEPDIR)/HPL_pdinfo.Po + -rm -f ptest/$(DEPDIR)/HPL_pdtest.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-binPROGRAMS + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f matgen/$(DEPDIR)/HPL_jumpit.Po + -rm -f matgen/$(DEPDIR)/HPL_ladd.Po + -rm -f matgen/$(DEPDIR)/HPL_lmul.Po + -rm -f matgen/$(DEPDIR)/HPL_rand.Po + -rm -f matgen/$(DEPDIR)/HPL_setran.Po + -rm -f matgen/$(DEPDIR)/HPL_xjumpm.Po + -rm -f pmatgen/$(DEPDIR)/HPL_pdmatgen.Po + -rm -f ptest/$(DEPDIR)/HPL_pddriver.Po + -rm -f ptest/$(DEPDIR)/HPL_pdinfo.Po + -rm -f ptest/$(DEPDIR)/HPL_pdtest.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer_cputime.Po + -rm -f ptimer/$(DEPDIR)/HPL_ptimer_walltime.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-binPROGRAMS clean-generic cscopelist-am ctags ctags-am \ + distclean distclean-compile distclean-generic distclean-tags \ + distdir dvi dvi-am html html-am info info-am install \ + install-am install-binPROGRAMS install-data install-data-am \ + install-dvi install-dvi-am install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-binPROGRAMS + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_dmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_dmatgen.c new file mode 100644 index 000000000..c14ef0fd1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_dmatgen.c @@ -0,0 +1,134 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_dmatgen +( + const int M, + const int N, + double * A, + const int LDA, + const int ISEED +) +#else +void HPL_dmatgen +( M, N, A, LDA, ISEED ) + const int M; + const int N; + double * A; + const int LDA; + const int ISEED; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_dmatgen generates (or regenerates) a random matrix A. + * + * The pseudo-random generator uses the linear congruential algorithm: + * X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer + * Programming, Knuth 1973, Vol. 2. + * + * Arguments + * ========= + * + * M (input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * A (output) double * + * On entry, A points to an array of dimension (LDA,N). On exit, + * this array contains the coefficients of the randomly + * generated matrix. + * + * LDA (input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,M). + * + * ISEED (input) const int + * On entry, ISEED specifies the seed number to generate the + * matrix A. ISEED must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int iadd[2], ia1[2], ic1[2], iran1[2], + jseed[2], mult[2]; + int i, incA = LDA - M, j; +/* .. + * .. Executable Statements .. + */ + if( ( M <= 0 ) || ( N <= 0 ) ) return; +/* + * Initialize the random sequence + */ + mult [0] = HPL_MULT0; mult [1] = HPL_MULT1; + iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1; + jseed[0] = ISEED; jseed[1] = 0; + + HPL_xjumpm( 1, mult, iadd, jseed, iran1, ia1, ic1 ); + HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 ); +/* + * Generate an M by N matrix + */ + for( j = 0; j < N; A += incA, j++ ) + for( i = 0; i < M; A++, i++ ) *A = HPL_rand(); +/* + * End of HPL_dmatgen + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_jumpit.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_jumpit.c new file mode 100644 index 000000000..4d4dc4db5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_jumpit.c @@ -0,0 +1,114 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_jumpit +( + int * MULT, + int * IADD, + int * IRANN, + int * IRANM +) +#else +void HPL_jumpit +( MULT, IADD, IRANN, IRANM ) + int * MULT; + int * IADD; + int * IRANN; + int * IRANM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_jumpit jumps in the random sequence from the number X(n) encoded + * in IRANN to the number X(m) encoded in IRANM using the constants A + * and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A + * and C obviously depend on m and n, see the function HPL_xjumpm in + * order to initialize them. + * + * Arguments + * ========= + * + * MULT (local input) int * + * On entry, MULT is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant A. + * + * IADD (local input) int * + * On entry, IADD is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant C. + * + * IRANN (local input) int * + * On entry, IRANN is an array of dimension 2, that contains + * the 16-lower and 15-higher bits of the encoding of X(n). + * + * IRANM (local output) int * + * On entry, IRANM is an array of dimension 2. On exit, this + * array contains respectively the 16-lower and 15-higher bits + * of the encoding of X(m). + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + HPL_lmul( IRANN, MULT, j ); /* j = IRANN * MULT; */ + HPL_ladd( j, IADD, IRANM ); /* IRANM = j + IADD; */ + HPL_setran( 0, IRANM ); /* irand = IRANM */ +/* + * End of HPL_jumpit + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_ladd.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_ladd.c new file mode 100644 index 000000000..0d4e4c08c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_ladd.c @@ -0,0 +1,126 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_ladd +( + int * J, + int * K, + int * I +) +#else +void HPL_ladd +( J, K, I ) + int * J; + int * K; + int * I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ladd adds without carry two long positive integers K and J and + * puts the result into I. The long integers I, J, K are encoded on 64 + * bits using an array of 2 integers. The 32-lower bits are stored in + * the first entry of each array, the 32-higher bits in the second + * entry. + * + * Arguments + * ========= + * + * J (local input) int * + * On entry, J is an integer array of dimension 2 containing the + * encoded long integer J. + * + * K (local input) int * + * On entry, K is an integer array of dimension 2 containing the + * encoded long integer K. + * + * I (local output) int * + * On entry, I is an integer array of dimension 2. On exit, this + * array contains the encoded long integer result. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + unsigned int itmp0, itmp1; + unsigned int ktmp0 = K[0] & 65535, ktmp1 = (unsigned)K[0] >> 16; + unsigned int ktmp2 = K[1] & 65535, ktmp3 = (unsigned)K[1] >> 16; + unsigned int jtmp0 = J[0] & 65535, jtmp1 = (unsigned)J[0] >> 16; + unsigned int jtmp2 = J[1] & 65535, jtmp3 = (unsigned)J[1] >> 16; + +/* .. + * .. Executable Statements .. + */ +/* + * K[1] K[0] K I[0] = (K[0]+J[0]) % 2^32 + * XXXX XXXX carry = (K[0]+J[0]) / 2^32 + * + * + J[1] J[0] J I[1] = K[1] + J[1] + carry + * XXXX XXXX I[1] = I[1] % 2^32 + * ------------- + * I[1] I[0] + * 0XXX XXXX I + */ + itmp0 = ktmp0 + jtmp0; + itmp1 = itmp0 >> 16; I[0] = itmp0 - (itmp1 << 16 ); + itmp1 += ktmp1 + jtmp1; I[0] |= (itmp1 & 65535) << 16; + itmp0 = (itmp1 >> 16) + ktmp2 + jtmp2; + I[1] = itmp0 - ((itmp0 >> 16 ) << 16); + itmp1 = (itmp0 >> 16) + ktmp3 + jtmp3; + I[1] |= (itmp1 & 65535) << 16; +/* + * End of HPL_ladd + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_lmul.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_lmul.c new file mode 100644 index 000000000..254b192f6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_lmul.c @@ -0,0 +1,131 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_lmul +( + int * K, + int * J, + int * I +) +#else +void HPL_lmul +( K, J, I ) + int * K; + int * J; + int * I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_lmul multiplies without carry two long positive integers K and J + * and puts the result into I. The long integers I, J, K are encoded on + * 64 bits using an array of 2 integers. The 32-lower bits are stored in + * the first entry of each array, the 32-higher bits in the second entry + * of each array. For efficiency purposes, the intrisic modulo function + * is inlined. + * + * Arguments + * ========= + * + * K (local input) int * + * On entry, K is an integer array of dimension 2 containing the + * encoded long integer K. + * + * J (local input) int * + * On entry, J is an integer array of dimension 2 containing the + * encoded long integer J. + * + * I (local output) int * + * On entry, I is an integer array of dimension 2. On exit, this + * array contains the encoded long integer result. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int r, c; + unsigned int kk[4], jj[4], res[5]; +/* .. + * .. Executable Statements .. + */ +/* + * Addition is done with 16 bits at a time. Multiplying two 16-bit + * integers yields a 32-bit result. The lower 16-bits of the result + * are kept in I, and the higher 16-bits are carried over to the + * next multiplication. + */ + for (c = 0; c < 2; ++c) { + kk[2*c] = K[c] & 65535; + kk[2*c+1] = ((unsigned)K[c] >> 16) & 65535; + jj[2*c] = J[c] & 65535; + jj[2*c+1] = ((unsigned)J[c] >> 16) & 65535; + } + + res[0] = 0; + for (c = 0; c < 4; ++c) { + res[c+1] = (res[c] >> 16) & 65535; + res[c] &= 65535; + for (r = 0; r < c+1; ++r) { + res[c] = kk[r] * jj[c-r] + (res[c] & 65535); + res[c+1] += (res[c] >> 16) & 65535; + } + } + + for (c = 0; c < 2; ++c) + I[c] = (int)(((res[2*c+1] & 65535) << 16) | (res[2*c] & 65535)); +/* + * End of HPL_lmul + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_rand.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_rand.c new file mode 100644 index 000000000..fe4e12f5e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_rand.c @@ -0,0 +1,94 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +double HPL_rand( void ) +#else +double HPL_rand() +#endif +{ +/* + * Purpose + * ======= + * + * HPL_rand generates the next number in the random sequence. This + * function ensures that this number lies in the interval (-0.5, 0.5]. + * + * The static array irand contains the information (2 integers) required + * to generate the next number in the sequence X(n). This number is + * computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the + * constant d is the largest 64 bit positive unsigned integer. The array + * irand is then updated for the generation of the next number X(n+1) + * in the random sequence as follows X(n+1) = a * X(n) + c. The + * constants a and c should have been preliminarily stored in the arrays + * ias and ics as 2 pairs of integers. The initialization of ias, ics + * and irand is performed by the function HPL_setran. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + HPL_setran( 3, j ); +/* + * return number between -0.5 and 0.5 + */ + return( HPL_HALF - + (((j[0] & 65535) + ((unsigned)j[0] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF + + (j[1] & 65535) + ((unsigned)j[1] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF ); +/* + * End of HPL_rand + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_setran.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_setran.c new file mode 100644 index 000000000..1a3ca73aa --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_setran.c @@ -0,0 +1,115 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int ias[2], ics[2], irand[2]; + +#ifdef STDC_HEADERS +void HPL_setran +( + const int OPTION, + int * IRAN +) +#else +void HPL_setran +( OPTION, IRAN ) + const int OPTION; + int * IRAN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_setran initializes the random generator with the encoding of the + * first number X(0) in the sequence, and the constants a and c used to + * compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), + * a and c are stored in the static variables irand, ias and ics. When + * OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the + * values of the input array IRAN. When OPTION is 3, IRAN is set to the + * current value of irand, and irand is then incremented. + * + * Arguments + * ========= + * + * OPTION (local input) const int + * On entry, OPTION is an integer that specifies the operations + * to be performed on the random generator as specified above. + * + * IRAN (local input/output) int * + * On entry, IRAN is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of a random number. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2]; +/* .. + * .. Executable Statements .. + */ + if( OPTION == 3 ) + { /* return current value */ + IRAN[0] = irand[0]; IRAN[1] = irand[1]; + HPL_lmul( irand, ias, j ); /* j = irand * ias; */ + HPL_ladd( j, ics, irand ); /* irand = j + ics; */ + } + else if( OPTION == 0 ) { irand[0] = IRAN[0]; irand[1] = IRAN[1]; } + else if( OPTION == 1 ) { ias [0] = IRAN[0]; ias [1] = IRAN[1]; } + else if( OPTION == 2 ) { ics [0] = IRAN[0]; ics [1] = IRAN[1]; } +/* + * End of HPL_setran + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_xjumpm.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_xjumpm.c new file mode 100644 index 000000000..ae70bbc16 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/HPL_xjumpm.c @@ -0,0 +1,158 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_xjumpm +( + const int JUMPM, + int * MULT, + int * IADD, + int * IRANN, + int * IRANM, + int * IAM, + int * ICM +) +#else +void HPL_xjumpm +( JUMPM, MULT, IADD, IRANN, IRANM, IAM, ICM ) + const int JUMPM; + int * MULT; + int * IADD; + int * IRANN; + int * IRANM; + int * IAM; + int * ICM; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_xjumpm computes the constants A and C to jump JUMPM numbers in + * the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in + * MULT and IADD specify how to jump from one entry in the sequence to + * the next. + * + * Arguments + * ========= + * + * JUMPM (local input) const int + * On entry, JUMPM specifies the number of entries in the + * sequence to jump over. When JUMPM is less or equal than zero, + * A and C are not computed, IRANM is set to IRANN corresponding + * to a jump of size zero. + * + * MULT (local input) int * + * On entry, MULT is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant a to jump from + * X(n) to X(n+1) = a*X(n) + c in the random sequence. + * + * IADD (local input) int * + * On entry, IADD is an array of dimension 2, that contains the + * 16-lower and 15-higher bits of the constant c to jump from + * X(n) to X(n+1) = a*X(n) + c in the random sequence. + * + * IRANN (local input) int * + * On entry, IRANN is an array of dimension 2. that contains the + * 16-lower and 15-higher bits of the encoding of X(n). + * + * IRANM (local output) int * + * On entry, IRANM is an array of dimension 2. On exit, this + * array contains respectively the 16-lower and 15-higher bits + * of the encoding of X(n+JUMPM). + * + * IAM (local output) int * + * On entry, IAM is an array of dimension 2. On exit, when JUMPM + * is greater than zero, this array contains the encoded + * constant A to jump from X(n) to X(n+JUMPM) in the random + * sequence. IAM(0:1) contains respectively the 16-lower and + * 15-higher bits of this constant A. When JUMPM is less or + * equal than zero, this array is not referenced. + * + * ICM (local output) int * + * On entry, ICM is an array of dimension 2. On exit, when JUMPM + * is greater than zero, this array contains the encoded + * constant C to jump from X(n) to X(n+JUMPM) in the random + * sequence. ICM(0:1) contains respectively the 16-lower and + * 15-higher bits of this constant C. When JUMPM is less or + * equal than zero, this array is not referenced. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int j[2], k; +/* .. + * .. Executable Statements .. + */ + if( JUMPM > 0 ) + { + IAM[0] = MULT[0]; IAM[1] = MULT[1]; /* IAM = MULT; */ + ICM[0] = IADD[0]; ICM[1] = IADD[1]; /* ICM = IADD; */ + for( k = 1; k <= JUMPM-1; k++ ) + { + HPL_lmul( IAM, MULT, j ); /* j = IAM * MULT; */ + IAM[0] = j[0]; IAM[1] = j[1]; /* IAM = j; */ + HPL_lmul( ICM, MULT, j ); /* j = ICM * MULT; */ + HPL_ladd( IADD, j, ICM ); /* ICM = IADD + j; */ + } + HPL_lmul( IRANN, IAM, j ); /* j = IRANN * IAM; */ + HPL_ladd( j, ICM, IRANM ); /* IRANM = j + ICM; */ + } + else + { /* IRANM = IRANN */ + IRANM[0] = IRANN[0]; IRANM[1] = IRANN[1]; + } +/* + * End of HPL_xjumpm + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/Make.inc new file mode 120000 index 000000000..ae55370b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/Make.inc @@ -0,0 +1 @@ +/home/kate/hip/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/Makefile new file mode 100644 index 000000000..f027fbc06 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/Makefile @@ -0,0 +1,95 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_matgen.h +# +## Object files ######################################################## +# +HPL_matobj = \ + HPL_dmatgen.o HPL_ladd.o HPL_lmul.o \ + HPL_xjumpm.o HPL_jumpit.o HPL_rand.o \ + HPL_setran.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_matobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_matobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_dmatgen.o : ../HPL_dmatgen.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_dmatgen.c +HPL_ladd.o : ../HPL_ladd.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ladd.c +HPL_lmul.o : ../HPL_lmul.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_lmul.c +HPL_xjumpm.o : ../HPL_xjumpm.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_xjumpm.c +HPL_jumpit.o : ../HPL_jumpit.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_jumpit.c +HPL_rand.o : ../HPL_rand.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_rand.c +HPL_setran.o : ../HPL_setran.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_setran.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/matgen/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c new file mode 100644 index 000000000..2d129c863 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/HPL_pdmatgen.c @@ -0,0 +1,198 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdmatgen +( + const HPL_T_grid * GRID, + const int M, + const int N, + const int NB, + double * A, + const int LDA, + const int ISEED +) +#else +void HPL_pdmatgen +( GRID, M, N, NB, A, LDA, ISEED ) + const HPL_T_grid * GRID; + const int M; + const int N; + const int NB; + double * A; + const int LDA; + const int ISEED; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdmatgen generates (or regenerates) a parallel random matrix A. + * + * The pseudo-random generator uses the linear congruential algorithm: + * X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer + * Programming, Knuth 1973, Vol. 2. + * + * Arguments + * ========= + * + * GRID (local input) const HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * M (global input) const int + * On entry, M specifies the number of rows of the matrix A. + * M must be at least zero. + * + * N (global input) const int + * On entry, N specifies the number of columns of the matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * A (local output) double * + * On entry, A points to an array of dimension (LDA,LocQ(N)). + * On exit, this array contains the coefficients of the randomly + * generated matrix. + * + * LDA (local input) const int + * On entry, LDA specifies the leading dimension of the array A. + * LDA must be at least max(1,LocP(M)). + * + * ISEED (global input) const int + * On entry, ISEED specifies the seed number to generate the + * matrix A. ISEED must be at least zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int iadd [2], ia1 [2], ia2 [2], ia3 [2], + ia4 [2], ia5 [2], ib1 [2], ib2 [2], + ib3 [2], ic1 [2], ic2 [2], ic3 [2], + ic4 [2], ic5 [2], iran1[2], iran2[2], + iran3[2], iran4[2], itmp1[2], itmp2[2], + itmp3[2], jseed[2], mult [2]; + int ib, iblk, ik, jb, jblk, jk, jump1, jump2, + jump3, jump4, jump5, jump6, jump7, lmb, + lnb, mblks, mp, mycol, myrow, nblks, + npcol, nprow, nq; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + + mult [0] = HPL_MULT0; mult [1] = HPL_MULT1; + iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1; + jseed[0] = ISEED; jseed[1] = 0; +/* + * Generate an M by N matrix starting in process (0,0) + */ + Mnumroc( mp, M, NB, NB, myrow, 0, nprow ); + Mnumroc( nq, N, NB, NB, mycol, 0, npcol ); + + if( ( mp <= 0 ) || ( nq <= 0 ) ) return; +/* + * Local number of blocks and size of the last one + */ + mblks = ( mp + NB - 1 ) / NB; lmb = mp - ( ( mp - 1 ) / NB ) * NB; + nblks = ( nq + NB - 1 ) / NB; lnb = nq - ( ( nq - 1 ) / NB ) * NB; +/* + * Compute multiplier/adder for various jumps in random sequence + */ + jump1 = 1; jump2 = nprow * NB; jump3 = M; jump4 = npcol * NB; + jump5 = NB; jump6 = mycol; jump7 = myrow * NB; + + HPL_xjumpm( jump1, mult, iadd, jseed, iran1, ia1, ic1 ); + HPL_xjumpm( jump2, mult, iadd, iran1, itmp1, ia2, ic2 ); + HPL_xjumpm( jump3, mult, iadd, iran1, itmp1, ia3, ic3 ); + HPL_xjumpm( jump4, ia3, ic3, iran1, itmp1, ia4, ic4 ); + HPL_xjumpm( jump5, ia3, ic3, iran1, itmp1, ia5, ic5 ); + HPL_xjumpm( jump6, ia5, ic5, iran1, itmp3, itmp1, itmp2 ); + HPL_xjumpm( jump7, mult, iadd, itmp3, iran1, itmp1, itmp2 ); + HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 ); +/* + * Save value of first number in sequence + */ + ib1[0] = iran1[0]; ib1[1] = iran1[1]; + ib2[0] = iran1[0]; ib2[1] = iran1[1]; + ib3[0] = iran1[0]; ib3[1] = iran1[1]; + + for( jblk = 0; jblk < nblks; jblk++ ) + { + jb = ( jblk == nblks - 1 ? lnb : NB ); + for( jk = 0; jk < jb; jk++ ) + { + for( iblk = 0; iblk < mblks; iblk++ ) + { + ib = ( iblk == mblks - 1 ? lmb : NB ); + for( ik = 0; ik < ib; A++, ik++ ) *A = HPL_rand(); + HPL_jumpit( ia2, ic2, ib1, iran2 ); + ib1[0] = iran2[0]; ib1[1] = iran2[1]; + } + A += LDA - mp; + HPL_jumpit( ia3, ic3, ib2, iran3 ); + ib1[0] = iran3[0]; ib1[1] = iran3[1]; + ib2[0] = iran3[0]; ib2[1] = iran3[1]; + } + HPL_jumpit( ia4, ic4, ib3, iran4 ); + ib1[0] = iran4[0]; ib1[1] = iran4[1]; + ib2[0] = iran4[0]; ib2[1] = iran4[1]; + ib3[0] = iran4[0]; ib3[1] = iran4[1]; + } +/* + * End of HPL_pdmatgen + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/Make.inc new file mode 120000 index 000000000..ae55370b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/Make.inc @@ -0,0 +1 @@ +/home/kate/hip/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/Makefile new file mode 100644 index 000000000..bf33fcd7b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/Makefile @@ -0,0 +1,81 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_matgen.h $(INCdir)/hpl_pmisc.h \ + $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pmatgen.h +# +## Object files ######################################################## +# +HPL_pmaobj = \ + HPL_pdmatgen.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_pmaobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pmaobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_pdmatgen.o : ../HPL_pdmatgen.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmatgen.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/pmatgen/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL.dat b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL.dat new file mode 100644 index 000000000..47aee883e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL.dat @@ -0,0 +1,31 @@ +HPLinpack benchmark input file +Innovative Computing Laboratory, University of Tennessee +HPL.out output file name (if any) +6 device out (6=stdout,7=stderr,file) +4 # of problems sizes (N) +29 30 34 35 Ns +4 # of NBs +1 2 3 4 NBs +0 PMAP process mapping (0=Row-,1=Column-major) +3 # of process grids (P x Q) +2 1 4 Ps +2 4 1 Qs +16.0 threshold +3 # of panel fact +0 1 2 PFACTs (0=left, 1=Crout, 2=Right) +2 # of recursive stopping criterium +2 4 NBMINs (>= 1) +1 # of panels in recursion +2 NDIVs +3 # of recursive panel fact. +0 1 2 RFACTs (0=left, 1=Crout, 2=Right) +1 # of broadcast +0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) +1 # of lookahead depth +0 DEPTHs (>=0) +2 SWAP (0=bin-exch,1=long,2=mix) +64 swapping threshold +0 L1 in (0=transposed,1=no-transposed) form +0 U in (0=transposed,1=no-transposed) form +1 Equilibration (0=no,1=yes) +8 memory alignment in double (> 0) diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pddriver.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pddriver.c new file mode 100644 index 000000000..5e4050f48 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pddriver.c @@ -0,0 +1,293 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +int main +( + int ARGC, + char * * ARGV +) +#else +int main( ARGC, ARGV ) +/* + * .. Scalar Arguments .. + */ + int ARGC; +/* + * .. Array Arguments .. + */ + char * * ARGV; +#endif +{ +/* + * Purpose + * ======= + * + * main is the main driver program for testing the HPL routines. + * This program is driven by a short data file named "HPL.dat". + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int nval [HPL_MAX_PARAM], + nbval [HPL_MAX_PARAM], + pval [HPL_MAX_PARAM], + qval [HPL_MAX_PARAM], + nbmval[HPL_MAX_PARAM], + ndvval[HPL_MAX_PARAM], + ndhval[HPL_MAX_PARAM]; + + HPL_T_FACT pfaval[HPL_MAX_PARAM], + rfaval[HPL_MAX_PARAM]; + + HPL_T_TOP topval[HPL_MAX_PARAM]; + + HPL_T_grid grid; + HPL_T_palg algo; + HPL_T_test test; + int L1notran, Unotran, align, equil, in, inb, + inbm, indh, indv, ipfa, ipq, irfa, itop, + mycol, myrow, ns, nbs, nbms, ndhs, ndvs, + npcol, npfs, npqs, nprow, nrfs, ntps, + rank, size, tswap; + HPL_T_ORDER pmapping; + HPL_T_FACT rpfa; + HPL_T_SWAP fswap; +/* .. + * .. Executable Statements .. + */ + MPI_Init( &ARGC, &ARGV ); +#ifdef HPL_CALL_VSIPL + vsip_init((void*)0); +#endif + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + MPI_Comm_size( MPI_COMM_WORLD, &size ); +/* + * Read and check validity of test parameters from input file + * + * HPL Version 1.0, Linpack benchmark input file + * Your message here + * HPL.out output file name (if any) + * 6 device out (6=stdout,7=stderr,file) + * 4 # of problems sizes (N) + * 29 30 34 35 Ns + * 4 # of NBs + * 1 2 3 4 NBs + * 0 PMAP process mapping (0=Row-,1=Column-major) + * 3 # of process grids (P x Q) + * 2 1 4 Ps + * 2 4 1 Qs + * 16.0 threshold + * 3 # of panel fact + * 0 1 2 PFACTs (0=left, 1=Crout, 2=Right) + * 2 # of recursive stopping criterium + * 2 4 NBMINs (>= 1) + * 1 # of panels in recursion + * 2 NDIVs + * 3 # of recursive panel fact. + * 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) + * 1 # of broadcast + * 0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) + * 1 # of lookahead depth + * 0 DEPTHs (>=0) + * 2 SWAP (0=bin-exch,1=long,2=mix) + * 4 swapping threshold + * 0 L1 in (0=transposed,1=no-transposed) form + * 0 U in (0=transposed,1=no-transposed) form + * 1 Equilibration (0=no,1=yes) + * 8 memory alignment in double (> 0) + */ + HPL_pdinfo( &test, &ns, nval, &nbs, nbval, &pmapping, &npqs, pval, qval, + &npfs, pfaval, &nbms, nbmval, &ndvs, ndvval, &nrfs, rfaval, + &ntps, topval, &ndhs, ndhval, &fswap, &tswap, &L1notran, + &Unotran, &equil, &align ); +/* + * Loop over different process grids - Define process grid. Go to bottom + * of process grid loop if this case does not use my process. + */ + for( ipq = 0; ipq < npqs; ipq++ ) + { + (void) HPL_grid_init( MPI_COMM_WORLD, pmapping, pval[ipq], qval[ipq], + &grid ); + (void) HPL_grid_info( &grid, &nprow, &npcol, &myrow, &mycol ); + + if( ( myrow < 0 ) || ( myrow >= nprow ) || + ( mycol < 0 ) || ( mycol >= npcol ) ) goto label_end_of_npqs; + + for( in = 0; in < ns; in++ ) + { /* Loop over various problem sizes */ + for( inb = 0; inb < nbs; inb++ ) + { /* Loop over various blocking factors */ + for( indh = 0; indh < ndhs; indh++ ) + { /* Loop over various lookahead depths */ + for( itop = 0; itop < ntps; itop++ ) + { /* Loop over various broadcast topologies */ + for( irfa = 0; irfa < nrfs; irfa++ ) + { /* Loop over various recursive factorizations */ + for( ipfa = 0; ipfa < npfs; ipfa++ ) + { /* Loop over various panel factorizations */ + for( inbm = 0; inbm < nbms; inbm++ ) + { /* Loop over various recursive stopping criteria */ + for( indv = 0; indv < ndvs; indv++ ) + { /* Loop over various # of panels in recursion */ +/* + * Set up the algorithm parameters + */ + algo.btopo = topval[itop]; algo.depth = ndhval[indh]; + algo.nbmin = nbmval[inbm]; algo.nbdiv = ndvval[indv]; + + algo.pfact = rpfa = pfaval[ipfa]; + + if( L1notran != 0 ) + { + if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllN; + else if( rpfa == HPL_CROUT ) algo.pffun = HPL_pdpancrN; + else algo.pffun = HPL_pdpanrlN; + + algo.rfact = rpfa = rfaval[irfa]; + if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllN; + else if( rpfa == HPL_CROUT ) algo.rffun = HPL_pdrpancrN; + else algo.rffun = HPL_pdrpanrlN; + + if( Unotran != 0 ) algo.upfun = HPL_pdupdateNN; + else algo.upfun = HPL_pdupdateNT; + } + else + { + if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllT; + else if( rpfa == HPL_CROUT ) algo.pffun = HPL_pdpancrT; + else algo.pffun = HPL_pdpanrlT; + + algo.rfact = rpfa = rfaval[irfa]; + if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllT; + else if( rpfa == HPL_CROUT ) algo.rffun = HPL_pdrpancrT; + else algo.rffun = HPL_pdrpanrlT; + + if( Unotran != 0 ) algo.upfun = HPL_pdupdateTN; + else algo.upfun = HPL_pdupdateTT; + } + + algo.fswap = fswap; algo.fsthr = tswap; + algo.equil = equil; algo.align = align; + + HPL_pdtest( &test, &grid, &algo, nval[in], nbval[inb] ); + + } + } + } + } + } + } + } + } + (void) HPL_grid_exit( &grid ); +label_end_of_npqs: ; + } +/* + * Print ending messages, close output file, exit. + */ + if( rank == 0 ) + { + test.ktest = test.kpass + test.kfail + test.kskip; +#ifndef HPL_DETAILED_TIMING + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); +#else + if( test.thrsh > HPL_rzero ) + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); +#endif + + HPL_fprintf( test.outfp, "\n%s %6d %s\n", "Finished", test.ktest, + "tests with the following results:" ); + if( test.thrsh > HPL_rzero ) + { + HPL_fprintf( test.outfp, " %6d %s\n", test.kpass, + "tests completed and passed residual checks," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kfail, + "tests completed and failed residual checks," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kskip, + "tests skipped because of illegal input values." ); + } + else + { + HPL_fprintf( test.outfp, " %6d %s\n", test.kpass, + "tests completed without checking," ); + HPL_fprintf( test.outfp, " %6d %s\n", test.kskip, + "tests skipped because of illegal input values." ); + } + + HPL_fprintf( test.outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( test.outfp, "\nEnd of Tests.\n" ); + HPL_fprintf( test.outfp, "%s%s\n", + "========================================", + "========================================" ); + + if( ( test.outfp != stdout ) && ( test.outfp != stderr ) ) + (void) fclose( test.outfp ); + } +#ifdef HPL_CALL_VSIPL + vsip_finalize((void*)0); +#endif + MPI_Finalize(); + exit( 0 ); + + return( 0 ); +/* + * End of main + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pdinfo.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pdinfo.c new file mode 100644 index 000000000..5db4e73d7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pdinfo.c @@ -0,0 +1,1183 @@ + /* + * -- High Performance Computing Linpack Benchmark (HPL) + * Modifications Copyright (C) 2023 Intel Corporation​ + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + * + * SPDX-License-Identifier: BSD-4-Clause + */ + + + +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdinfo +( + HPL_T_test * TEST, + int * NS, + int * N, + int * NBS, + int * NB, + HPL_T_ORDER * PMAPPIN, + int * NPQS, + int * P, + int * Q, + int * NPFS, + HPL_T_FACT * PF, + int * NBMS, + int * NBM, + int * NDVS, + int * NDV, + int * NRFS, + HPL_T_FACT * RF, + int * NTPS, + HPL_T_TOP * TP, + int * NDHS, + int * DH, + HPL_T_SWAP * FSWAP, + int * TSWAP, + int * L1NOTRAN, + int * UNOTRAN, + int * EQUIL, + int * ALIGN +) +#else +void HPL_pdinfo +( TEST, NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN ) + HPL_T_test * TEST; + int * NS; + int * N; + int * NBS; + int * NB; + HPL_T_ORDER * PMAPPIN; + int * NPQS; + int * P; + int * Q; + int * NPFS; + HPL_T_FACT * PF; + int * NBMS; + int * NBM; + int * NDVS; + int * NDV; + int * NRFS; + HPL_T_FACT * RF; + int * NTPS; + HPL_T_TOP * TP; + int * NDHS; + int * DH; + HPL_T_SWAP * FSWAP; + int * TSWAP; + int * L1NOTRAN; + int * UNOTRAN; + int * EQUIL; + int * ALIGN; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdinfo reads the startup information for the various tests and + * transmits it to all processes. + * + * Arguments + * ========= + * + * TEST (global output) HPL_T_test * + * On entry, TEST points to a testing data structure. On exit, + * the fields of this data structure are initialized as follows: + * TEST->outfp specifies the output file where the results will + * be printed. It is only defined and used by the process 0 of + * the grid. TEST->thrsh specifies the threshhold value for the + * test ratio. TEST->epsil is the relative machine precision of + * the distributed computer. Finally the test counters, kfail, + * kpass, kskip, ktest are initialized to zero. + * + * NS (global output) int * + * On exit, NS specifies the number of different problem sizes + * to be tested. NS is less than or equal to HPL_MAX_PARAM. + * + * N (global output) int * + * On entry, N is an array of dimension HPL_MAX_PARAM. On exit, + * the first NS entries of this array contain the problem sizes + * to run the code with. + * + * NBS (global output) int * + * On exit, NBS specifies the number of different distribution + * blocking factors to be tested. NBS must be less than or equal + * to HPL_MAX_PARAM. + * + * NB (global output) int * + * On exit, PMAPPIN specifies the process mapping onto the no- + * des of the MPI machine configuration. PMAPPIN defaults to + * row-major ordering. + * + * PMAPPIN (global output) HPL_T_ORDER * + * On entry, NB is an array of dimension HPL_MAX_PARAM. On exit, + * the first NBS entries of this array contain the values of the + * various distribution blocking factors, to run the code with. + * + * NPQS (global output) int * + * On exit, NPQS specifies the number of different values that + * can be used for P and Q, i.e., the number of process grids to + * run the code with. NPQS must be less than or equal to + * HPL_MAX_PARAM. + * + * P (global output) int * + * On entry, P is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPQS entries of this array contain the values of P, + * the number of process rows of the NPQS grids to run the code + * with. + * + * Q (global output) int * + * On entry, Q is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPQS entries of this array contain the values of Q, + * the number of process columns of the NPQS grids to run the + * code with. + * + * NPFS (global output) int * + * On exit, NPFS specifies the number of different values that + * can be used for PF : the panel factorization algorithm to run + * the code with. NPFS is less than or equal to HPL_MAX_PARAM. + * + * PF (global output) HPL_T_FACT * + * On entry, PF is an array of dimension HPL_MAX_PARAM. On exit, + * the first NPFS entries of this array contain the various + * panel factorization algorithms to run the code with. + * + * NBMS (global output) int * + * On exit, NBMS specifies the number of various recursive + * stopping criteria to be tested. NBMS must be less than or + * equal to HPL_MAX_PARAM. + * + * NBM (global output) int * + * On entry, NBM is an array of dimension HPL_MAX_PARAM. On + * exit, the first NBMS entries of this array contain the values + * of the various recursive stopping criteria to be tested. + * + * NDVS (global output) int * + * On exit, NDVS specifies the number of various numbers of + * panels in recursion to be tested. NDVS is less than or equal + * to HPL_MAX_PARAM. + * + * NDV (global output) int * + * On entry, NDV is an array of dimension HPL_MAX_PARAM. On + * exit, the first NDVS entries of this array contain the values + * of the various numbers of panels in recursion to be tested. + * + * NRFS (global output) int * + * On exit, NRFS specifies the number of different values that + * can be used for RF : the recursive factorization algorithm to + * be tested. NRFS is less than or equal to HPL_MAX_PARAM. + * + * RF (global output) HPL_T_FACT * + * On entry, RF is an array of dimension HPL_MAX_PARAM. On exit, + * the first NRFS entries of this array contain the various + * recursive factorization algorithms to run the code with. + * + * NTPS (global output) int * + * On exit, NTPS specifies the number of different values that + * can be used for the broadcast topologies to be tested. NTPS + * is less than or equal to HPL_MAX_PARAM. + * + * TP (global output) HPL_T_TOP * + * On entry, TP is an array of dimension HPL_MAX_PARAM. On exit, + * the first NTPS entries of this array contain the various + * broadcast (along rows) topologies to run the code with. + * + * NDHS (global output) int * + * On exit, NDHS specifies the number of different values that + * can be used for the lookahead depths to be tested. NDHS is + * less than or equal to HPL_MAX_PARAM. + * + * DH (global output) int * + * On entry, DH is an array of dimension HPL_MAX_PARAM. On + * exit, the first NDHS entries of this array contain the values + * of lookahead depths to run the code with. Such a value is at + * least 0 (no-lookahead) or greater than zero. + * + * FSWAP (global output) HPL_T_SWAP * + * On exit, FSWAP specifies the swapping algorithm to be used in + * all tests. + * + * TSWAP (global output) int * + * On exit, TSWAP specifies the swapping threshold as a number + * of columns when the mixed swapping algorithm was chosen. + * + * L1NOTRA (global output) int * + * On exit, L1NOTRAN specifies whether the upper triangle of the + * panels of columns should be stored in no-transposed form + * (L1NOTRAN=1) or in transposed form (L1NOTRAN=0). + * + * UNOTRAN (global output) int * + * On exit, UNOTRAN specifies whether the panels of rows should + * be stored in no-transposed form (UNOTRAN=1) or transposed + * form (UNOTRAN=0) during their broadcast. + * + * EQUIL (global output) int * + * On exit, EQUIL specifies whether equilibration during the + * swap-broadcast of the panel of rows should be performed + * (EQUIL=1) or not (EQUIL=0). + * + * ALIGN (global output) int * + * On exit, ALIGN specifies the alignment of the dynamically + * allocated buffers in double precision words. ALIGN is greater + * than zero. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + char file[HPL_LINE_MAX], line[HPL_LINE_MAX], + auth[HPL_LINE_MAX], num [HPL_LINE_MAX]; + FILE * infp; + int * iwork = NULL; + char * lineptr; + int error=0, fid, i, j, lwork, maxp, nprocs, + rank, size; +/* .. + * .. Executable Statements .. + */ + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + MPI_Comm_size( MPI_COMM_WORLD, &size ); +/* + * Initialize the TEST data structure with default values + */ + TEST->outfp = stderr; TEST->epsil = 2.0e-16; TEST->thrsh = 16.0; + TEST->kfail = TEST->kpass = TEST->kskip = TEST->ktest = 0; +/* + * Process 0 reads the input data, broadcasts to other processes and + * writes needed information to TEST->outfp. + */ + if( rank == 0 ) + { +/* + * Open file and skip data file header + */ + if( ( infp = fopen( "HPL.dat", "r" ) ) == NULL ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "cannot open file HPL.dat" ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) fgets( auth, HPL_LINE_MAX - 2, infp ); +/* + * Read name and unit number for summary output file + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", file ); + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); + fid = atoi( num ); + if ( fid == 6 ) TEST->outfp = stdout; + else if( fid == 7 ) TEST->outfp = stderr; + else if( ( TEST->outfp = fopen( file, "w" ) ) == NULL ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "cannot open file %s.", + file ); + error = 1; goto label_error; + } +/* + * Read and check the parameter values for the tests. + * + * Problem size (>=0) (N) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NS = atoi( num ); + if( ( *NS < 1 ) || ( *NS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %d", + "Number of values of N is less than 1 or greater than", + HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( N[ i ] = atoi( num ) ) < 0 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of N less than 0" ); + error = 1; goto label_error; + } + } +/* + * Block size (>=1) (NB) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NBS = atoi( num ); + if( ( *NBS < 1 ) || ( *NBS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NB is less than 1 or", + "greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NBS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NB[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NB less than 1" ); + error = 1; goto label_error; + } + } +/* + * Process grids, mapping, (>=1) (P, Q) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); + *PMAPPIN = ( atoi( num ) == 1 ? HPL_COLUMN_MAJOR : HPL_ROW_MAJOR ); + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NPQS = atoi( num ); + if( ( *NPQS < 1 ) || ( *NPQS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of grids is less", + "than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPQS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( P[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of P less than 1" ); + error = 1; goto label_error; + } + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPQS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( Q[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of Q less than 1" ); + error = 1; goto label_error; + } + } +/* + * Check for enough processes in machine configuration + */ + maxp = 0; + for( i = 0; i < *NPQS; i++ ) + { nprocs = P[i] * Q[i]; maxp = Mmax( maxp, nprocs ); } + if( maxp > size ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Need at least %d processes for these tests", maxp ); + error = 1; goto label_error; + } +/* + * Checking threshold value (TEST->thrsh) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); TEST->thrsh = atof( num ); +/* + * Panel factorization algorithm (PF) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NPFS = atoi( num ); + if( ( *NPFS < 1 ) || ( *NPFS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "number of values of PFACT", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NPFS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) PF[ i ] = HPL_LEFT_LOOKING; + else if( j == 1 ) PF[ i ] = HPL_CROUT; + else if( j == 2 ) PF[ i ] = HPL_RIGHT_LOOKING; + else PF[ i ] = HPL_RIGHT_LOOKING; + } +/* + * Recursive stopping criterium (>=1) (NBM) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NBMS = atoi( num ); + if( ( *NBMS < 1 ) || ( *NBMS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NBMIN", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NBMS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NBM[ i ] = atoi( num ) ) < 1 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NBMIN less than 1" ); + error = 1; goto label_error; + } + } +/* + * Number of panels in recursion (>=2) (NDV) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NDVS = atoi( num ); + if( ( *NDVS < 1 ) || ( *NDVS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of NDIV", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NDVS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + if( ( NDV[ i ] = atoi( num ) ) < 2 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of NDIV less than 2" ); + error = 1; goto label_error; + } + } +/* + * Recursive panel factorization (RF) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NRFS = atoi( num ); + if( ( *NRFS < 1 ) || ( *NRFS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of RFACT", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NRFS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) RF[ i ] = HPL_LEFT_LOOKING; + else if( j == 1 ) RF[ i ] = HPL_CROUT; + else if( j == 2 ) RF[ i ] = HPL_RIGHT_LOOKING; + else RF[ i ] = HPL_RIGHT_LOOKING; + } +/* + * Broadcast topology (TP) (0=rg, 1=2rg, 2=rgM, 3=2rgM, 4=L) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NTPS = atoi( num ); + if( ( *NTPS < 1 ) || ( *NTPS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of BCAST", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NTPS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; + j = atoi( num ); + if( j == 0 ) TP[ i ] = HPL_1RING; + else if( j == 1 ) TP[ i ] = HPL_1RING_M; + else if( j == 2 ) TP[ i ] = HPL_2RING; + else if( j == 3 ) TP[ i ] = HPL_2RING_M; + else if( j == 4 ) TP[ i ] = HPL_BLONG; + else if( j == 5 ) TP[ i ] = HPL_BLONG_M; + else TP[ i ] = HPL_1RING_M; + } +/* + * Lookahead depth (>=0) (NDH) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *NDHS = atoi( num ); + if( ( *NDHS < 1 ) || ( *NDHS > HPL_MAX_PARAM ) ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", + "Number of values of DEPTH", + "is less than 1 or greater than", HPL_MAX_PARAM ); + error = 1; goto label_error; + } + (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; + for( i = 0; i < *NDHS; i++ ) + { + (void) sscanf( lineptr, "%s", num ); + lineptr += strlen( num ) + 1; + if( ( DH[ i ] = atoi( num ) ) < 0 ) + { + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Value of DEPTH less than 0" ); + error = 1; goto label_error; + } + } +/* + * Swapping algorithm (0,1 or 2) (FSWAP) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); j = atoi( num ); + if( j == 0 ) *FSWAP = HPL_SWAP00; + else if( j == 1 ) *FSWAP = HPL_SWAP01; + else if( j == 2 ) *FSWAP = HPL_SW_MIX; + else *FSWAP = HPL_SWAP01; +/* + * Swapping threshold (>=0) (TSWAP) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *TSWAP = atoi( num ); + if( *TSWAP <= 0 ) *TSWAP = 0; +/* + * L1 in (no-)transposed form (0 or 1) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *L1NOTRAN = atoi( num ); + if( ( *L1NOTRAN != 0 ) && ( *L1NOTRAN != 1 ) ) *L1NOTRAN = 0; +/* + * U in (no-)transposed form (0 or 1) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *UNOTRAN = atoi( num ); + if( ( *UNOTRAN != 0 ) && ( *UNOTRAN != 1 ) ) *UNOTRAN = 0; +/* + * Equilibration (0=no, 1=yes) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *EQUIL = atoi( num ); + if( ( *EQUIL != 0 ) && ( *EQUIL != 1 ) ) *EQUIL = 1; +/* + * Memory alignment in bytes (> 0) (ALIGN) + */ + (void) fgets( line, HPL_LINE_MAX - 2, infp ); + (void) sscanf( line, "%s", num ); *ALIGN = atoi( num ); + if( *ALIGN <= 0 ) *ALIGN = 4; +/* + * Close input file + */ +label_error: + if (infp != NULL) + (void) fclose( infp ); + } + else { TEST->outfp = NULL; } +/* + * Check for error on reading input file + */ + (void) HPL_all_reduce( (void *)(&error), 1, HPL_INT, HPL_max, + MPI_COMM_WORLD ); + if( error ) + { + if( rank == 0 ) + HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", + "Illegal input in file HPL.dat. Exiting ..." ); + MPI_Finalize(); +#ifdef HPL_CALL_VSIPL + (void) vsip_finalize( NULL ); +#endif + exit( 1 ); + } +/* + * Compute and broadcast machine epsilon + */ + TEST->epsil = HPL_pdlamch( MPI_COMM_WORLD, HPL_MACH_EPS ); +/* + * Pack information arrays and broadcast + */ + (void) HPL_broadcast( (void *)(&(TEST->thrsh)), 1, HPL_DOUBLE, 0, + MPI_COMM_WORLD ); +/* + * Broadcast array sizes + */ + iwork = (int *)malloc( (size_t)(15) * sizeof( int ) ); + if( rank == 0 ) + { + iwork[ 0] = *NS; iwork[ 1] = *NBS; + iwork[ 2] = ( *PMAPPIN == HPL_ROW_MAJOR ? 0 : 1 ); + iwork[ 3] = *NPQS; iwork[ 4] = *NPFS; iwork[ 5] = *NBMS; + iwork[ 6] = *NDVS; iwork[ 7] = *NRFS; iwork[ 8] = *NTPS; + iwork[ 9] = *NDHS; iwork[10] = *TSWAP; iwork[11] = *L1NOTRAN; + iwork[12] = *UNOTRAN; iwork[13] = *EQUIL; iwork[14] = *ALIGN; + } + (void) HPL_broadcast( (void *)iwork, 15, HPL_INT, 0, MPI_COMM_WORLD ); + if( rank != 0 ) + { + *NS = iwork[ 0]; *NBS = iwork[ 1]; + *PMAPPIN = ( iwork[ 2] == 0 ? HPL_ROW_MAJOR : HPL_COLUMN_MAJOR ); + *NPQS = iwork[ 3]; *NPFS = iwork[ 4]; *NBMS = iwork[ 5]; + *NDVS = iwork[ 6]; *NRFS = iwork[ 7]; *NTPS = iwork[ 8]; + *NDHS = iwork[ 9]; *TSWAP = iwork[10]; *L1NOTRAN = iwork[11]; + *UNOTRAN = iwork[12]; *EQUIL = iwork[13]; *ALIGN = iwork[14]; + } + if( iwork ) free( iwork ); +/* + * Pack information arrays and broadcast + */ + lwork = (*NS) + (*NBS) + 2 * (*NPQS) + (*NPFS) + (*NBMS) + + (*NDVS) + (*NRFS) + (*NTPS) + (*NDHS) + 1; + + if (lwork < 0) + exit(EXIT_FAILURE); + + + iwork = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); + if( rank == 0 ) + { + j = 0; + for( i = 0; i < *NS; i++ ) { iwork[j] = N [i]; j++; } + for( i = 0; i < *NBS; i++ ) { iwork[j] = NB[i]; j++; } + for( i = 0; i < *NPQS; i++ ) { iwork[j] = P [i]; j++; } + for( i = 0; i < *NPQS; i++ ) { iwork[j] = Q [i]; j++; } + for( i = 0; i < *NPFS; i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; + else if( PF[i] == HPL_CROUT ) iwork[j] = 1; + else if( PF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; + j++; + } + for( i = 0; i < *NBMS; i++ ) { iwork[j] = NBM[i]; j++; } + for( i = 0; i < *NDVS; i++ ) { iwork[j] = NDV[i]; j++; } + for( i = 0; i < *NRFS; i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; + else if( RF[i] == HPL_CROUT ) iwork[j] = 1; + else if( RF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; + j++; + } + for( i = 0; i < *NTPS; i++ ) + { + if( TP[i] == HPL_1RING ) iwork[j] = 0; + else if( TP[i] == HPL_1RING_M ) iwork[j] = 1; + else if( TP[i] == HPL_2RING ) iwork[j] = 2; + else if( TP[i] == HPL_2RING_M ) iwork[j] = 3; + else if( TP[i] == HPL_BLONG ) iwork[j] = 4; + else if( TP[i] == HPL_BLONG_M ) iwork[j] = 5; + j++; + } + for( i = 0; i < *NDHS; i++ ) { iwork[j] = DH[i]; j++; } + + if( *FSWAP == HPL_SWAP00 ) iwork[j] = 0; + else if( *FSWAP == HPL_SWAP01 ) iwork[j] = 1; + else if( *FSWAP == HPL_SW_MIX ) iwork[j] = 2; + j++; + } + (void) HPL_broadcast( (void*)iwork, lwork, HPL_INT, 0, + MPI_COMM_WORLD ); + if ((rank != 0) && (iwork != NULL)) + { + j = 0; + for( i = 0; i < *NS; i++ ) { N [i] = iwork[j]; j++; } + for( i = 0; i < *NBS; i++ ) { NB[i] = iwork[j]; j++; } + for( i = 0; i < *NPQS; i++ ) { P [i] = iwork[j]; j++; } + for( i = 0; i < *NPQS; i++ ) { Q [i] = iwork[j]; j++; } + + for( i = 0; i < *NPFS; i++ ) + { + if( iwork[j] == 0 ) PF[i] = HPL_LEFT_LOOKING; + else if( iwork[j] == 1 ) PF[i] = HPL_CROUT; + else if( iwork[j] == 2 ) PF[i] = HPL_RIGHT_LOOKING; + j++; + } + for( i = 0; i < *NBMS; i++ ) { NBM[i] = iwork[j]; j++; } + for( i = 0; i < *NDVS; i++ ) { NDV[i] = iwork[j]; j++; } + for( i = 0; i < *NRFS; i++ ) + { + if( iwork[j] == 0 ) RF[i] = HPL_LEFT_LOOKING; + else if( iwork[j] == 1 ) RF[i] = HPL_CROUT; + else if( iwork[j] == 2 ) RF[i] = HPL_RIGHT_LOOKING; + j++; + } + for( i = 0; i < *NTPS; i++ ) + { + if( iwork[j] == 0 ) TP[i] = HPL_1RING; + else if( iwork[j] == 1 ) TP[i] = HPL_1RING_M; + else if( iwork[j] == 2 ) TP[i] = HPL_2RING; + else if( iwork[j] == 3 ) TP[i] = HPL_2RING_M; + else if( iwork[j] == 4 ) TP[i] = HPL_BLONG; + else if( iwork[j] == 5 ) TP[i] = HPL_BLONG_M; + j++; + } + for( i = 0; i < *NDHS; i++ ) { DH[i] = iwork[j]; j++; } + + if( iwork[j] == 0 ) *FSWAP = HPL_SWAP00; + else if( iwork[j] == 1 ) *FSWAP = HPL_SWAP01; + else if( iwork[j] == 2 ) *FSWAP = HPL_SW_MIX; + j++; + + if( iwork ) free( iwork ); + } +/* + * regurgitate input + */ + if( rank == 0 ) + { + + if (TEST->outfp != NULL){ + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "HPLinpack 2.3 -- High-Performance Linpack benchmark -- ", + " December 2, 2018" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Written by A. Petitet and R. Clint Whaley, ", + "Innovative Computing Laboratory, UTK" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Modified by Piotr Luszczek, ", + "Innovative Computing Laboratory, UTK" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "Modified by Julien Langou, ", + "University of Colorado Denver"); + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + + HPL_fprintf( TEST->outfp, "\n%s\n", + "An explanation of the input/output parameters follows:" ); + HPL_fprintf( TEST->outfp, "%s\n", + "T/V : Wall time / encoded variant." ); + HPL_fprintf( TEST->outfp, "%s\n", + "N : The order of the coefficient matrix A." ); + HPL_fprintf( TEST->outfp, "%s\n", + "NB : The partitioning blocking factor." ); + HPL_fprintf( TEST->outfp, "%s\n", + "P : The number of process rows." ); + HPL_fprintf( TEST->outfp, "%s\n", + "Q : The number of process columns." ); + HPL_fprintf( TEST->outfp, "%s\n", + "Time : Time in seconds to solve the linear system." ); + HPL_fprintf( TEST->outfp, "%s\n\n", + "Gflops : Rate of execution for solving the linear system." ); + HPL_fprintf( TEST->outfp, "%s\n", + "The following parameter values will be used:" ); +/* + * Problem size + */ + HPL_fprintf( TEST->outfp, "\nN :" ); + for( i = 0; i < Mmin( 8, *NS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + if( *NS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + if( *NS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", N[i] ); + } + } +/* + * Distribution blocking factor + */ + HPL_fprintf( TEST->outfp, "\nNB :" ); + for( i = 0; i < Mmin( 8, *NBS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + if( *NBS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NBS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + if( *NBS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NBS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); + } + } +/* + * Process mapping + */ + HPL_fprintf( TEST->outfp, "\nPMAP :" ); + if( *PMAPPIN == HPL_ROW_MAJOR ) + HPL_fprintf( TEST->outfp, " Row-major process mapping" ); + else if( *PMAPPIN == HPL_COLUMN_MAJOR ) + HPL_fprintf( TEST->outfp, " Column-major process mapping" ); +/* + * Process grid + */ + HPL_fprintf( TEST->outfp, "\nP :" ); + for( i = 0; i < Mmin( 8, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + if( *NPQS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + if( *NPQS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPQS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", P[i] ); + } + } + HPL_fprintf( TEST->outfp, "\nQ :" ); + for( i = 0; i < Mmin( 8, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + if( *NPQS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPQS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + if( *NPQS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPQS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); + } + } +/* + * Panel Factorization + */ + HPL_fprintf( TEST->outfp, "\nPFACT :" ); + for( i = 0; i < Mmin( 8, *NPFS ); i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NPFS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NPFS ); i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NPFS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NPFS; i++ ) + { + if( PF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( PF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( PF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + } + } +/* + * Recursive stopping criterium + */ + HPL_fprintf( TEST->outfp, "\nNBMIN :" ); + for( i = 0; i < Mmin( 8, *NBMS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + if( *NBMS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NBMS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + if( *NBMS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NBMS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); + } + } +/* + * Number of panels in recursion + */ + HPL_fprintf( TEST->outfp, "\nNDIV :" ); + for( i = 0; i < Mmin( 8, *NDVS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + if( *NDVS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NDVS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + if( *NDVS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NDVS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); + } + } +/* + * Recursive Factorization + */ + HPL_fprintf( TEST->outfp, "\nRFACT :" ); + for( i = 0; i < Mmin( 8, *NRFS ); i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NRFS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NRFS ); i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + if( *NRFS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NRFS; i++ ) + { + if( RF[i] == HPL_LEFT_LOOKING ) + HPL_fprintf( TEST->outfp, " Left " ); + else if( RF[i] == HPL_CROUT ) + HPL_fprintf( TEST->outfp, " Crout " ); + else if( RF[i] == HPL_RIGHT_LOOKING ) + HPL_fprintf( TEST->outfp, " Right " ); + } + } + } +/* + * Broadcast topology + */ + HPL_fprintf( TEST->outfp, "\nBCAST :" ); + for( i = 0; i < Mmin( 8, *NTPS ); i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + if( *NTPS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NTPS ); i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + if( *NTPS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NTPS; i++ ) + { + if( TP[i] == HPL_1RING ) + HPL_fprintf( TEST->outfp, " 1ring " ); + else if( TP[i] == HPL_1RING_M ) + HPL_fprintf( TEST->outfp, " 1ringM " ); + else if( TP[i] == HPL_2RING ) + HPL_fprintf( TEST->outfp, " 2ring " ); + else if( TP[i] == HPL_2RING_M ) + HPL_fprintf( TEST->outfp, " 2ringM " ); + else if( TP[i] == HPL_BLONG ) + HPL_fprintf( TEST->outfp, " Blong " ); + else if( TP[i] == HPL_BLONG_M ) + HPL_fprintf( TEST->outfp, " BlongM " ); + } + } + } +/* + * Lookahead depths + */ + HPL_fprintf( TEST->outfp, "\nDEPTH :" ); + for( i = 0; i < Mmin( 8, *NDHS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + if( *NDHS > 8 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 8; i < Mmin( 16, *NDHS ); i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + if( *NDHS > 16 ) + { + HPL_fprintf( TEST->outfp, "\n " ); + for( i = 16; i < *NDHS; i++ ) + HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); + } + } +/* + * Swapping algorithm + */ + HPL_fprintf( TEST->outfp, "\nSWAP :" ); + if( *FSWAP == HPL_SWAP00 ) + HPL_fprintf( TEST->outfp, " Binary-exchange" ); + else if( *FSWAP == HPL_SWAP01 ) + HPL_fprintf( TEST->outfp, " Spread-roll (long)" ); + else if( *FSWAP == HPL_SW_MIX ) + HPL_fprintf( TEST->outfp, " Mix (threshold = %d)", *TSWAP ); +/* + * L1 storage form + */ + HPL_fprintf( TEST->outfp, "\nL1 :" ); + if( *L1NOTRAN != 0 ) + HPL_fprintf( TEST->outfp, " no-transposed form" ); + else + HPL_fprintf( TEST->outfp, " transposed form" ); +/* + * U storage form + */ + HPL_fprintf( TEST->outfp, "\nU :" ); + if( *UNOTRAN != 0 ) + HPL_fprintf( TEST->outfp, " no-transposed form" ); + else + HPL_fprintf( TEST->outfp, " transposed form" ); +/* + * Equilibration + */ + HPL_fprintf( TEST->outfp, "\nEQUIL :" ); + if( *EQUIL != 0 ) + HPL_fprintf( TEST->outfp, " yes" ); + else + HPL_fprintf( TEST->outfp, " no" ); +/* + * Alignment + */ + HPL_fprintf( TEST->outfp, "\nALIGN : %d double precision words", + *ALIGN ); + + HPL_fprintf( TEST->outfp, "\n\n" ); +/* + * For testing only + */ + if( TEST->thrsh > HPL_rzero ) + { + HPL_fprintf( TEST->outfp, "%s%s\n\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( TEST->outfp, "%s\n", + "- The matrix A is randomly generated for each test." ); + HPL_fprintf( TEST->outfp, "%s\n", + "- The following scaled residual check will be computed:" ); + HPL_fprintf( TEST->outfp, "%s\n", + " ||Ax-b||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )" ); + HPL_fprintf( TEST->outfp, "%s %21.6e\n", + "- The relative machine precision (eps) is taken to be ", + TEST->epsil ); + HPL_fprintf( TEST->outfp, "%s %11.1f\n\n", + "- Computational tests pass if scaled residuals are less than ", + TEST->thrsh ); + } + } + } +/* + * End of HPL_pdinfo + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pdtest.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pdtest.c new file mode 100644 index 000000000..73a62a7ff --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/HPL_pdtest.c @@ -0,0 +1,438 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +#ifdef STDC_HEADERS +void HPL_pdtest +( + HPL_T_test * TEST, + HPL_T_grid * GRID, + HPL_T_palg * ALGO, + const int N, + const int NB +) +#else +void HPL_pdtest +( TEST, GRID, ALGO, N, NB ) + HPL_T_test * TEST; + HPL_T_grid * GRID; + HPL_T_palg * ALGO; + const int N; + const int NB; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_pdtest performs one test given a set of parameters such as the + * process grid, the problem size, the distribution blocking factor ... + * This function generates the data, calls and times the linear system + * solver, checks the accuracy of the obtained vector solution and + * writes this information to the file pointed to by TEST->outfp. + * + * Arguments + * ========= + * + * TEST (global input) HPL_T_test * + * On entry, TEST points to a testing data structure: outfp + * specifies the output file where the results will be printed. + * It is only defined and used by the process 0 of the grid. + * thrsh specifies the threshhold value for the test ratio. + * Concretely, a test is declared "PASSED" if and only if the + * following inequality is satisfied: + * ||Ax-b||_oo / ( epsil * + * ( || x ||_oo * || A ||_oo + || b ||_oo ) * + * N ) < thrsh. + * epsil is the relative machine precision of the distributed + * computer. Finally the test counters, kfail, kpass, kskip and + * ktest are updated as follows: if the test passes, kpass is + * incremented by one; if the test fails, kfail is incremented + * by one; if the test is skipped, kskip is incremented by one. + * ktest is left unchanged. + * + * GRID (local input) HPL_T_grid * + * On entry, GRID points to the data structure containing the + * process grid information. + * + * ALGO (global input) HPL_T_palg * + * On entry, ALGO points to the data structure containing the + * algorithmic parameters to be used for this test. + * + * N (global input) const int + * On entry, N specifies the order of the coefficient matrix A. + * N must be at least zero. + * + * NB (global input) const int + * On entry, NB specifies the blocking factor used to partition + * and distribute the matrix A. NB must be larger than one. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ +#ifdef HPL_DETAILED_TIMING + double HPL_w[HPL_TIMING_N]; +#endif + HPL_T_pmat mat; + double wtime[1]; + int info[3]; + double Anorm1, AnormI, Gflops, Xnorm1, XnormI, + BnormI, resid0, resid1; + double * Bptr; + void * vptr = NULL; + static int first=1; + int ii, ip2, mycol, myrow, npcol, nprow, nq; + char ctop, cpfact, crfact; + time_t current_time_start, current_time_end; +/* .. + * .. Executable Statements .. + */ + (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); + + mat.n = N; mat.nb = NB; mat.info = 0; + mat.mp = HPL_numroc( N, NB, NB, myrow, 0, nprow ); + nq = HPL_numroc( N, NB, NB, mycol, 0, npcol ); + mat.nq = nq + 1; +/* + * Allocate matrix, right-hand-side, and vector solution x. [ A | b ] is + * N by N+1. One column is added in every process column for the solve. + * The result however is stored in a 1 x N vector replicated in every + * process row. In every process, A is lda * (nq+1), x is 1 * nq and the + * workspace is mp. + * + * Ensure that lda is a multiple of ALIGN and not a power of 2 + */ + mat.ld = ( ( Mmax( 1, mat.mp ) - 1 ) / ALGO->align ) * ALGO->align; + do + { + ii = ( mat.ld += ALGO->align ); ip2 = 1; + while( ii > 1 ) { ii >>= 1; ip2 <<= 1; } + } + while( mat.ld == ip2 ); +/* + * Allocate dynamic memory + */ + vptr = (void*)malloc( ( (size_t)(ALGO->align) + + (size_t)(mat.ld+1) * (size_t)(mat.nq) ) * + sizeof(double) ); + info[0] = (vptr == NULL); info[1] = myrow; info[2] = mycol; + (void) HPL_all_reduce( (void *)(info), 3, HPL_INT, HPL_max, + GRID->all_comm ); + if( info[0] != 0 ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", + "[%d,%d] %s", info[1], info[2], + "Memory allocation failed for A, x and b. Skip." ); + (TEST->kskip)++; + /* some processes might have succeeded with allocation */ + if (vptr) free(vptr); + return; + } +/* + * generate matrix and right-hand-side, [ A | b ] which is N by N+1. + */ + mat.A = (double *)HPL_PTR( vptr, + ((size_t)(ALGO->align) * sizeof(double) ) ); + mat.X = Mptr( mat.A, 0, mat.nq, mat.ld ); + HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED ); +#ifdef HPL_CALL_VSIPL + mat.block = vsip_blockbind_d( (vsip_scalar_d *)(mat.A), + (vsip_length)(mat.ld * mat.nq), + VSIP_MEM_NONE ); +#endif +/* + * Solve linear system + */ + HPL_ptimer_boot(); (void) HPL_barrier( GRID->all_comm ); + time( ¤t_time_start ); + HPL_ptimer( 0 ); + HPL_pdgesv( GRID, ALGO, &mat ); + HPL_ptimer( 0 ); + time( ¤t_time_end ); +#ifdef HPL_CALL_VSIPL + (void) vsip_blockrelease_d( mat.block, VSIP_TRUE ); + vsip_blockdestroy_d( mat.block ); +#endif +/* + * Gather max of all CPU and WALL clock timings and print timing results + */ + HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME, + 1, 0, wtime ); + + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + if( first ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "T/V N NB P Q", + " Time Gflops" ); + HPL_fprintf( TEST->outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + if( TEST->thrsh <= HPL_rzero ) first = 0; + } +/* + * 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve. + * Print WALL time + */ + Gflops = ( ( (double)(N) / 1.0e+9 ) * + ( (double)(N) / wtime[0] ) ) * + ( ( 2.0 / 3.0 ) * (double)(N) + ( 3.0 / 2.0 ) ); + + cpfact = ( ( (HPL_T_FACT)(ALGO->pfact) == + (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ? (char)('L') : + ( ( (HPL_T_FACT)(ALGO->pfact) == (HPL_T_FACT)(HPL_CROUT) ) ? + (char)('C') : (char)('R') ) ); + crfact = ( ( (HPL_T_FACT)(ALGO->rfact) == + (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ? (char)('L') : + ( ( (HPL_T_FACT)(ALGO->rfact) == (HPL_T_FACT)(HPL_CROUT) ) ? + (char)('C') : (char)('R') ) ); + + if( ALGO->btopo == HPL_1RING ) ctop = '0'; + else if( ALGO->btopo == HPL_1RING_M ) ctop = '1'; + else if( ALGO->btopo == HPL_2RING ) ctop = '2'; + else if( ALGO->btopo == HPL_2RING_M ) ctop = '3'; + else if( ALGO->btopo == HPL_BLONG ) ctop = '4'; + else /* if( ALGO->btopo == HPL_BLONG_M ) */ ctop = '5'; + + if( wtime[0] > HPL_rzero ) { + HPL_fprintf( TEST->outfp, + "W%c%1d%c%c%1d%c%1d%12d %5d %5d %5d %18.2f %19.4e\n", + ( GRID->order == HPL_ROW_MAJOR ? 'R' : 'C' ), + ALGO->depth, ctop, crfact, ALGO->nbdiv, cpfact, ALGO->nbmin, + N, NB, nprow, npcol, wtime[0], Gflops ); + HPL_fprintf( TEST->outfp, + "HPL_pdgesv() start time %s\n", ctime( ¤t_time_start ) ); + HPL_fprintf( TEST->outfp, + "HPL_pdgesv() end time %s\n", ctime( ¤t_time_end ) ); + } + } +#ifdef HPL_DETAILED_TIMING + HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME, + HPL_TIMING_N, HPL_TIMING_BEG, HPL_w ); + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "--VVV--VVV--VVV--VVV--VVV--VVV--VVV--V", + "VV--VVV--VVV--VVV--VVV--VVV--VVV--VVV-" ); +/* + * Recursive panel factorization + */ + if( HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time rfact . . . : %18.2f\n", + HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] ); +/* + * Panel factorization + */ + if( HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time pfact . . : %18.2f\n", + HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] ); +/* + * Panel factorization (swap) + */ + if( HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time mxswp . . : %18.2f\n", + HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] ); +/* + * Update + */ + if( HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time update . . : %18.2f\n", + HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] ); +/* + * Update (swap) + */ + if( HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "+ Max aggregated wall time laswp . . : %18.2f\n", + HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] ); +/* + * Upper triangular system solve + */ + if( HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] > HPL_rzero ) + HPL_fprintf( TEST->outfp, + "Max aggregated wall time up tr sv . : %18.2f\n", + HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] ); + + if( TEST->thrsh <= HPL_rzero ) + HPL_fprintf( TEST->outfp, "%s%s\n", + "========================================", + "========================================" ); + } +#endif +/* + * Quick return, if I am not interested in checking the computations + */ + if( TEST->thrsh <= HPL_rzero ) + { (TEST->kpass)++; if( vptr ) free( vptr ); return; } +/* + * Check info returned by solve + */ + if( mat.info != 0 ) + { + if( ( myrow == 0 ) && ( mycol == 0 ) ) + HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", "%s %d, %s", + "Error code returned by solve is", mat.info, "skip" ); + (TEST->kskip)++; + if( vptr ) free( vptr ); return; + } +/* + * Check computation, re-generate [ A | b ], compute norm 1 and inf of A and x, + * and norm inf of b - A x. Display residual checks. + */ + HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED ); + Anorm1 = HPL_pdlange( GRID, HPL_NORM_1, N, N, NB, mat.A, mat.ld ); + AnormI = HPL_pdlange( GRID, HPL_NORM_I, N, N, NB, mat.A, mat.ld ); +/* + * Because x is distributed in process rows, switch the norms + */ + XnormI = HPL_pdlange( GRID, HPL_NORM_1, 1, N, NB, mat.X, 1 ); + Xnorm1 = HPL_pdlange( GRID, HPL_NORM_I, 1, N, NB, mat.X, 1 ); +/* + * If I am in the col that owns b, (1) compute local BnormI, (2) all_reduce to + * find the max (in the col). Then (3) broadcast along the rows so that every + * process has BnormI. Note that since we use a uniform distribution in [-0.5,0.5] + * for the entries of B, it is very likely that BnormI (<=,~) 0.5. + */ + Bptr = Mptr( mat.A, 0, nq, mat.ld ); + if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ){ + if( mat.mp > 0 ) + { + BnormI = Bptr[HPL_idamax( mat.mp, Bptr, 1 )]; BnormI = Mabs( BnormI ); + } + else + { + BnormI = HPL_rzero; + } + (void) HPL_all_reduce( (void *)(&BnormI), 1, HPL_DOUBLE, HPL_max, + GRID->col_comm ); + } + (void) HPL_broadcast( (void *)(&BnormI), 1, HPL_DOUBLE, + HPL_indxg2p( N, NB, NB, 0, npcol ), + GRID->row_comm ); +/* + * If I own b, compute ( b - A x ) and ( - A x ) otherwise + */ + if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ) + { + HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone, + mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1 ); + } + else if( nq > 0 ) + { + HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone, + mat.A, mat.ld, mat.X, 1, HPL_rzero, Bptr, 1 ); + } + else { for( ii = 0; ii < mat.mp; ii++ ) Bptr[ii] = HPL_rzero; } +/* + * Reduce the distributed residual in process column 0 + */ + if( mat.mp > 0 ) + (void) HPL_reduce( Bptr, mat.mp, HPL_DOUBLE, HPL_sum, 0, + GRID->row_comm ); +/* + * Compute || b - A x ||_oo + */ + resid0 = HPL_pdlange( GRID, HPL_NORM_I, N, 1, NB, Bptr, mat.ld ); +/* + * Computes and displays norms, residuals ... + */ + if( N <= 0 ) + { + resid1 = HPL_rzero; + } + else + { + resid1 = resid0 / ( TEST->epsil * ( AnormI * XnormI + BnormI ) * (double)(N) ); + } + + if( resid1 < TEST->thrsh ) (TEST->kpass)++; + else (TEST->kfail)++; + + if( ( myrow == 0 ) && ( mycol == 0 ) ) + { + HPL_fprintf( TEST->outfp, "%s%s\n", + "----------------------------------------", + "----------------------------------------" ); + HPL_fprintf( TEST->outfp, "%s%16.8e%s%s\n", + "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ", resid1, + " ...... ", ( resid1 < TEST->thrsh ? "PASSED" : "FAILED" ) ); + + if(resid1 >= TEST->thrsh ) + { + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||Ax-b||_oo . . . . . . . . . . . . . . . . . = ", resid0 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||A||_oo . . . . . . . . . . . . . . . . . . . = ", AnormI ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||A||_1 . . . . . . . . . . . . . . . . . . . = ", Anorm1 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||x||_oo . . . . . . . . . . . . . . . . . . . = ", XnormI ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||x||_1 . . . . . . . . . . . . . . . . . . . = ", Xnorm1 ); + HPL_fprintf( TEST->outfp, "%s%18.6f\n", + "||b||_oo . . . . . . . . . . . . . . . . . . . = ", BnormI ); + } + } + if( vptr ) free( vptr ); +/* + * End of HPL_pdtest + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/Make.inc new file mode 120000 index 000000000..ae55370b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/Make.inc @@ -0,0 +1 @@ +/home/kate/hip/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/Makefile new file mode 100644 index 000000000..cfc96e667 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/Makefile @@ -0,0 +1,94 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ + $(INCdir)/hpl_gesv.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h \ + $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h $(INCdir)/hpl_pmatgen.h \ + $(INCdir)/hpl_ptimer.h $(INCdir)/hpl_ptest.h +# +## Executable names #################################################### +# +xhpl = $(BINdir)/xhpl +# +## Object files ######################################################## +# +HPL_pteobj = \ + HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o +# +## Targets ############################################################# +# +all : dexe +# +dexe : dexe.grd +# +$(BINdir)/HPL.dat : ../HPL.dat + ( $(CP) ../HPL.dat $(BINdir) ) +# +dexe.grd: $(HPL_pteobj) $(HPLlib) + $(LINKER) $(LINKFLAGS) -o $(xhpl) $(HPL_pteobj) $(HPL_LIBS) + $(MAKE) $(BINdir)/HPL.dat + $(TOUCH) dexe.grd +# +# ###################################################################### +# +HPL_pddriver.o : ../HPL_pddriver.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pddriver.c +HPL_pdinfo.o : ../HPL_pdinfo.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdinfo.c +HPL_pdtest.o : ../HPL_pdtest.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtest.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/dexe.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptest/intel64/dexe.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer.c new file mode 100644 index 000000000..202416079 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer.c @@ -0,0 +1,358 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int HPL_ptimer_disabled; +static double HPL_ptimer_cpusec [HPL_NPTIMER], + HPL_ptimer_cpustart [HPL_NPTIMER], + HPL_ptimer_wallsec [HPL_NPTIMER], + HPL_ptimer_wallstart[HPL_NPTIMER]; +/* + * --------------------------------------------------------------------- + * User callable functions + * --------------------------------------------------------------------- + */ +#ifdef STDC_HEADERS +void HPL_ptimer_boot( void ) +#else +void HPL_ptimer_boot() +#endif +{ +/* + * HPL_ptimer_boot (re)sets all timers to 0, and enables HPL_ptimer. + */ +/* + * .. Local Variables .. + */ + int i; +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 0; + + for( i = 0; i < HPL_NPTIMER; i++ ) + { + HPL_ptimer_cpusec [i] = HPL_ptimer_wallsec [i] = HPL_rzero; + HPL_ptimer_cpustart[i] = HPL_ptimer_wallstart[i] = HPL_PTIMER_STARTFLAG; + } +/* + * End of HPL_ptimer_boot + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer( const int I ) +#else +void HPL_ptimer( I ) + const int I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer provides a "stopwatch" functionality cpu/wall timer in + * seconds. Up to 64 separate timers can be functioning at once. The + * first call starts the timer, and the second stops it. This routine + * can be disenabled by calling HPL_ptimer_disable(), so that calls to + * the timer are ignored. This feature can be used to make sure certain + * sections of code do not affect timings, even if they call routines + * which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable + * the timer functionality. One can retrieve the current value of a + * timer by calling + * + * t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + * + * where I is the timer index in [0..64). To inititialize the timer + * functionality, one must have called HPL_ptimer_boot() prior to any of + * the functions mentioned above. + * + * Arguments + * ========= + * + * I (global input) const int + * On entry, I specifies the timer to stop/start. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( HPL_ptimer_disabled ) return; +/* + * If timer has not been started, start it. Otherwise, stop it and add + * interval to count + */ + if( HPL_ptimer_wallstart[I] == HPL_PTIMER_STARTFLAG ) + { + HPL_ptimer_wallstart[I] = HPL_ptimer_walltime(); + HPL_ptimer_cpustart [I] = HPL_ptimer_cputime (); + } + else + { + HPL_ptimer_cpusec [I] += HPL_ptimer_cputime ()-HPL_ptimer_cpustart [I]; + HPL_ptimer_wallsec [I] += HPL_ptimer_walltime()-HPL_ptimer_wallstart[I]; + HPL_ptimer_wallstart[I] = HPL_PTIMER_STARTFLAG; + } +/* + * End of HPL_ptimer + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_enable( void ) +#else +void HPL_ptimer_enable() +#endif +{ +/* + * HPL_ptimer_enable sets it so calls to HPL_ptimer are not ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 0; + return; +/* + * End of HPL_ptimer_enable + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_disable( void ) +#else +void HPL_ptimer_disable() +#endif +{ +/* + * HPL_ptimer_disable sets it so calls to HPL_ptimer are ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_ptimer_disabled = 1; + return; +/* + * End of HPL_ptimer_disable + */ +} + +#ifdef STDC_HEADERS +double HPL_ptimer_inquire +( + const HPL_T_PTIME TMTYPE, + const int I +) +#else +double HPL_ptimer_inquire( TMTYPE, I ) + const int I; + const HPL_T_PTIME TMTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer_inquire returns wall- or cpu- time that has accumulated in + * timer I. + * + * Arguments + * ========= + * + * TMTYPE (global input) const HPL_T_PTIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_PTIME : wall clock time is returned, + * = HPL_CPU_PTIME : CPU time is returned (default). + * + * I (global input) const int + * On entry, I specifies the timer to return. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double time; +/* .. + * .. Executable Statements .. + */ +/* + * If wall- or cpu-time are not available on this machine, return + * HPL_PTIMER_ERROR + */ + if( TMTYPE == HPL_WALL_PTIME ) + { + if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR ) + time = HPL_PTIMER_ERROR; + else + time = HPL_ptimer_wallsec[I]; + } + else + { + if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR ) + time = HPL_PTIMER_ERROR; + else + time = HPL_ptimer_cpusec [I]; + } + return( time ); +/* + * End of HPL_ptimer_inquire + */ +} + +#ifdef STDC_HEADERS +void HPL_ptimer_combine +( + MPI_Comm COMM, + const HPL_T_PTIME_OP OPE, + const HPL_T_PTIME TMTYPE, + const int N, + const int IBEG, + double * TIMES +) +#else +void HPL_ptimer_combine( COMM, OPE, TMTYPE, N, IBEG, TIMES ) + const int IBEG, N; + const HPL_T_PTIME_OP OPE; + const HPL_T_PTIME TMTYPE; + MPI_Comm COMM; + double * TIMES; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_ptimer_combine combines the timing information stored on a scope + * of processes into the user TIMES array. + * + * Arguments + * ========= + * + * COMM (global/local input) MPI_Comm + * The MPI communicator identifying the process collection on + * which the timings are taken. + * + * OPE (global input) const HPL_T_PTIME_OP + * On entry, OP specifies what combine operation should be done + * as follows: + * = HPL_AMAX_PTIME get max. time on any process (default), + * = HPL_AMIN_PTIME get min. time on any process, + * = HPL_SUM_PTIME get sum of times across processes. + * + * TMTYPE (global input) const HPL_T_PTIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_PTIME : wall clock time is returned, + * = HPL_CPU_PTIME : CPU time is returned (default). + * + * N (global input) const int + * On entry, N specifies the number of timers to combine. + * + * IBEG (global input) const int + * On entry, IBEG specifies the first timer to be combined. + * + * TIMES (global output) double * + * On entry, TIMES is an array of dimension at least N. On exit, + * this array contains the requested timing information. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + int i, tmpdis; +/* .. + * .. Executable Statements .. + */ + tmpdis = HPL_ptimer_disabled; HPL_ptimer_disabled = 1; +/* + * Timer has been disabled for combine operation - copy timing informa- + * tion into user times array. If wall- or cpu-time are not available + * on this machine, fill in times with HPL_PTIMER_ERROR flag and return. + */ + if( TMTYPE == HPL_WALL_PTIME ) + { + if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR ) + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return; } + else + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_wallsec[IBEG+i]; } + } + else + { + if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR ) + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return; } + else + { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_cpusec[IBEG+i]; } + } +/* + * Combine all nodes information, restore HPL_ptimer_disabled, and return + */ + for( i = 0; i < N; i++ ) TIMES[i] = Mmax( HPL_rzero, TIMES[i] ); + + if( OPE == HPL_AMAX_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM ); + else if( OPE == HPL_AMIN_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_min, COMM ); + else if( OPE == HPL_SUM_PTIME ) + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_sum, COMM ); + else + (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM ); + + HPL_ptimer_disabled = tmpdis; +/* + * End of HPL_ptimer_combine + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c new file mode 100644 index 000000000..711ef185d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer_cputime.c @@ -0,0 +1,146 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_ptimer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, + * the clock() function is used to return an approximation of processor + * time used by the program. The value returned is the CPU time used so + * far as a clock_t; to get the number of seconds used, the result is + * divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C + * standard library. If HPL_USE_TIMES is defined, the times() function + * is used instead. This function returns the current process times. + * times() returns the number of clock ticks that have elapsed since the + * system has been up. Otherwise and by default, the standard library + * function getrusage() is used. + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_CLOCK ) + +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + static double cps = CLOCKS_PER_SEC; + double d; + clock_t t1; + static clock_t t0 = 0; + + if( t0 == 0 ) t0 = clock(); + t1 = clock() - t0; + d = (double)(t1) / cps; + return( d ); +} + +#elif defined( HPL_USE_TIMES ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + clock_t t1; + struct tms ts; + static double ClockTick = HPL_rzero; + + if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK)); + (void) times( &ts ); + return( (double)(ts.tms_utime) / ClockTick ); +} + +/* #elif defined( HPL_USE_GETRUSAGE ) */ +#else + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + struct rusage ruse; + + (void) getrusage( RUSAGE_SELF, &ruse ); + return( (double)( ruse.ru_utime.tv_sec ) + + ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) ); +} + +/* +#else + +#ifdef STDC_HEADERS +double HPL_ptimer_cputime( void ) +#else +double HPL_ptimer_cputime() +#endif +{ + return( HPL_PTIMER_ERROR ); +} +*/ + +#endif +/* + * End of HPL_ptimer_cputime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c new file mode 100644 index 000000000..96cbd300f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/HPL_ptimer_walltime.c @@ -0,0 +1,103 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_ptimer_walltime returns the elapsed (wall-clock) time. + * + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_GETTIMEOFDAY ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_ptimer_walltime( void ) +#else +double HPL_ptimer_walltime() +#endif +{ + struct timeval tp; + static long start=0, startu; + + if( !start ) + { + (void) gettimeofday( &tp, NULL ); + start = tp.tv_sec; + startu = tp.tv_usec; + return( HPL_rzero ); + } + (void) gettimeofday( &tp, NULL ); + + return( (double)( tp.tv_sec - start ) + + ( (double)( tp.tv_usec-startu ) / 1000000.0 ) ); +} + +#else + +#ifdef STDC_HEADERS +double HPL_ptimer_walltime( void ) +#else +double HPL_ptimer_walltime() +#endif +{ + return( MPI_Wtime() ); +} + +#endif +/* + * End of HPL_ptimer_walltime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/Make.inc new file mode 120000 index 000000000..ae55370b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/Make.inc @@ -0,0 +1 @@ +/home/kate/hip/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/Makefile new file mode 100644 index 000000000..971500764 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/Makefile @@ -0,0 +1,84 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_ptimer.h +# +## Object files ######################################################## +# +HPL_ptiobj = \ + HPL_ptimer.o HPL_ptimer_cputime.o HPL_ptimer_walltime.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_ptiobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_ptiobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_ptimer.o : ../HPL_ptimer.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer.c +HPL_ptimer_cputime.o : ../HPL_ptimer_cputime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_cputime.c +HPL_ptimer_walltime.o : ../HPL_ptimer_walltime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_walltime.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/ptimer/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer.c new file mode 100644 index 000000000..3be9665f7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer.c @@ -0,0 +1,253 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" +/* + * --------------------------------------------------------------------- + * Static variables + * --------------------------------------------------------------------- + */ +static int HPL_timer_disabled; +static double HPL_timer_cpusec [HPL_NTIMER], + HPL_timer_cpustart [HPL_NTIMER], + HPL_timer_wallsec [HPL_NTIMER], + HPL_timer_wallstart[HPL_NTIMER]; +/* + * --------------------------------------------------------------------- + * User callable functions + * --------------------------------------------------------------------- + */ +#ifdef STDC_HEADERS +void HPL_timer_boot( void ) +#else +void HPL_timer_boot() +#endif +{ +/* + * HPL_timer_boot (re)sets all timers to 0, and enables HPL_timer. + */ +/* + * .. Local Variables .. + */ + int i; +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 0; + + for( i = 0; i < HPL_NTIMER; i++ ) + { + HPL_timer_cpusec [i] = HPL_timer_wallsec [i] = HPL_rzero; + HPL_timer_cpustart[i] = HPL_timer_wallstart[i] = HPL_TIMER_STARTFLAG; + } +/* + * End of HPL_timer_boot + */ +} + +#ifdef STDC_HEADERS +void HPL_timer( const int I ) +#else +void HPL_timer( I ) + const int I; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_timer provides a "stopwatch" functionality cpu/wall timer in + * seconds. Up to 64 separate timers can be functioning at once. The + * first call starts the timer, and the second stops it. This routine + * can be disenabled by calling HPL_timer_disable(), so that calls to + * the timer are ignored. This feature can be used to make sure certain + * sections of code do not affect timings, even if they call routines + * which have HPL_timer calls in them. HPL_timer_enable() will re-enable + * the timer functionality. One can retrieve the current value of a + * timer by calling + * + * t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + * + * where I is the timer index in [0..64). To initialize the timer + * functionality, one must have called HPL_timer_boot() prior to any of + * the functions mentioned above. + * + * Arguments + * ========= + * + * I (global input) const int + * On entry, I specifies the timer to stop/start. + * + * --------------------------------------------------------------------- + */ +/* .. + * .. Executable Statements .. + */ + if( HPL_timer_disabled ) return; +/* + * If timer has not been started, start it. Otherwise, stop it and add + * interval to count + */ + if( HPL_timer_wallstart[I] == HPL_TIMER_STARTFLAG ) + { + HPL_timer_wallstart[I] = HPL_timer_walltime(); + HPL_timer_cpustart [I] = HPL_timer_cputime (); + } + else + { + HPL_timer_cpusec [I] += HPL_timer_cputime () - HPL_timer_cpustart [I]; + HPL_timer_wallsec [I] += HPL_timer_walltime() - HPL_timer_wallstart[I]; + HPL_timer_wallstart[I] = HPL_TIMER_STARTFLAG; + } +/* + * End of HPL_timer + */ +} + +#ifdef STDC_HEADERS +void HPL_timer_enable( void ) +#else +void HPL_timer_enable() +#endif +{ +/* + * HPL_timer_enable sets it so calls to HPL_timer are not ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 0; + return; +/* + * End of HPL_timer_enable + */ +} + +#ifdef STDC_HEADERS +void HPL_timer_disable( void ) +#else +void HPL_timer_disable() +#endif +{ +/* + * HPL_timer_disable sets it so calls to HPL_timer are ignored. + */ +/* .. + * .. Executable Statements .. + */ + HPL_timer_disabled = 1; + return; +/* + * End of HPL_timer_disable + */ +} + +#ifdef STDC_HEADERS +double HPL_timer_inquire +( + const HPL_T_TIME TMTYPE, + const int I +) +#else +double HPL_timer_inquire( TMTYPE, I ) + const int I; + const HPL_T_TIME TMTYPE; +#endif +{ +/* + * Purpose + * ======= + * + * HPL_timer_inquire returns wall- or cpu- time that has accumulated in + * timer I. + * + * Arguments + * ========= + * + * TMTYPE (global input) const HPL_T_TIME + * On entry, TMTYPE specifies what time will be returned as fol- + * lows + * = HPL_WALL_TIME : wall clock time is returned, + * = HPL_CPU_TIME : CPU time is returned (default). + * + * I (global input) const int + * On entry, I specifies the timer to return. + * + * --------------------------------------------------------------------- + */ +/* + * .. Local Variables .. + */ + double time; +/* .. + * .. Executable Statements .. + */ +/* + * If wall- or cpu-time are not available on this machine, return + * HPL_TIMER_ERROR + */ + if( TMTYPE == HPL_WALL_TIME ) + { + if( HPL_timer_walltime() == HPL_TIMER_ERROR ) + time = HPL_TIMER_ERROR; + else + time = HPL_timer_wallsec[I]; + } + else + { + if( HPL_timer_cputime() == HPL_TIMER_ERROR ) + time = HPL_TIMER_ERROR; + else + time = HPL_timer_cpusec [I]; + } + return( time ); +/* + * End of HPL_timer_inquire + */ +} diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer_cputime.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer_cputime.c new file mode 100644 index 000000000..4a7f9dfef --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer_cputime.c @@ -0,0 +1,145 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_timer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, + * the clock() function is used to return an approximation of processor + * time used by the program. The value returned is the CPU time used so + * far as a clock_t; to get the number of seconds used, the result is + * divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C + * standard library. If HPL_USE_TIMES is defined, the times() function + * is used instead. This function returns the current process times. + * times() returns the number of clock ticks that have elapsed since the + * system has been up. Otherwise and by default, the standard library + * function getrusage() is used. + * + * --------------------------------------------------------------------- + */ + +#if defined( HPL_USE_CLOCK ) + +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + static double cps = CLOCKS_PER_SEC; + double d; + clock_t t1; + static clock_t t0 = 0; + + if( t0 == 0 ) t0 = clock(); + t1 = clock() - t0; + d = (double)(t1) / cps; + return( d ); +} + +#elif defined( HPL_USE_TIMES ) + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + clock_t t1; + struct tms ts; + static double ClockTick = HPL_rzero; + + if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK)); + (void) times( &ts ); + return( (double)(ts.tms_utime) / ClockTick ); +} + +/* #elif defined( HPL_USE_GETRUSAGE ) */ +#else + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + struct rusage ruse; + (void) getrusage( RUSAGE_SELF, &ruse ); + return( (double)( ruse.ru_utime.tv_sec ) + + ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) ); +} + +/* +#else + +#ifdef STDC_HEADERS +double HPL_timer_cputime( void ) +#else +double HPL_timer_cputime() +#endif +{ + return( HPL_TIMER_ERROR ); +} +*/ + +#endif +/* + * End of HPL_timer_cputime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer_walltime.c b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer_walltime.c new file mode 100644 index 000000000..f4f44f202 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/HPL_timer_walltime.c @@ -0,0 +1,88 @@ +/* + * -- High Performance Computing Linpack Benchmark (HPL) + * HPL - 2.3 - December 2, 2018 + * Antoine P. Petitet + * University of Tennessee, Knoxville + * Innovative Computing Laboratory + * (C) Copyright 2000-2008 All Rights Reserved + * + * -- Copyright notice and Licensing terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions, and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgement: + * This product includes software developed at the University of + * Tennessee, Knoxville, Innovative Computing Laboratory. + * + * 4. The name of the University, the name of the Laboratory, or the + * names of its contributors may not be used to endorse or promote + * products derived from this software without specific written + * permission. + * + * -- Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------- + */ +/* + * Include files + */ +#include "hpl.h" + +/* + * Purpose + * ======= + * + * HPL_timer_walltime returns the elapsed (wall-clock) time. + * + * + * --------------------------------------------------------------------- + */ + +#include +#include + +#ifdef STDC_HEADERS +double HPL_timer_walltime( void ) +#else +double HPL_timer_walltime() +#endif +{ + struct timeval tp; + static long start=0, startu; + + if( !start ) + { + (void) gettimeofday( &tp, NULL ); + start = tp.tv_sec; + startu = tp.tv_usec; + return( HPL_rzero ); + } + (void) gettimeofday( &tp, NULL ); + + return( (double)( tp.tv_sec - start ) + + ( (double)( tp.tv_usec-startu ) / 1000000.0 ) ); +} +/* + * End of HPL_timer_walltime + */ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/Make.inc b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/Make.inc new file mode 120000 index 000000000..ae55370b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/Make.inc @@ -0,0 +1 @@ +/home/kate/hip/hpl-2.3/Make.intel64 \ No newline at end of file diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/Makefile b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/Makefile new file mode 100644 index 000000000..b8009e88a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/Makefile @@ -0,0 +1,84 @@ +# +# -- High Performance Computing Linpack Benchmark (HPL) +# HPL - 2.3 - December 2, 2018 +# Antoine P. Petitet +# University of Tennessee, Knoxville +# Innovative Computing Laboratory +# (C) Copyright 2000-2008 All Rights Reserved +# +# -- Copyright notice and Licensing terms: +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. All advertising materials mentioning features or use of this +# software must display the following acknowledgement: +# This product includes software developed at the University of +# Tennessee, Knoxville, Innovative Computing Laboratory. +# +# 4. The name of the University, the name of the Laboratory, or the +# names of its contributors may not be used to endorse or promote +# products derived from this software without specific written +# permission. +# +# -- Disclaimer: +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ###################################################################### +# +include Make.inc +# +# ###################################################################### +# +INCdep = \ + $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_timer.h +# +## Object files ######################################################## +# +HPL_timobj = \ + HPL_timer.o HPL_timer_cputime.o HPL_timer_walltime.o +# +## Targets ############################################################# +# +all : lib +# +lib : lib.grd +# +lib.grd : $(HPL_timobj) + $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_timobj) + $(RANLIB) $(HPLlib) + $(TOUCH) lib.grd +# +# ###################################################################### +# +HPL_timer.o : ../HPL_timer.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer.c +HPL_timer_cputime.o : ../HPL_timer_cputime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_cputime.c +HPL_timer_walltime.o : ../HPL_timer_walltime.c $(INCdep) + $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_walltime.c +# +# ###################################################################### +# +clean : + $(RM) *.o *.grd +# +# ###################################################################### diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/lib.grd b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/testing/timer/intel64/lib.grd new file mode 100644 index 000000000..e69de29bb diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/1rinM.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/1rinM.jpg new file mode 100755 index 000000000..9af78f844 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/1rinM.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/1ring.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/1ring.jpg new file mode 100755 index 000000000..73e4391cf Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/1ring.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2-273x48.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2-273x48.jpg new file mode 100755 index 000000000..23795f8b9 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2-273x48.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2rinM.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2rinM.jpg new file mode 100755 index 000000000..c294e0d07 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2rinM.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2ring.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2ring.jpg new file mode 100755 index 000000000..f37187f13 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/2ring.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_abort.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_abort.html new file mode 100755 index 000000000..49a4bd318 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_abort.html @@ -0,0 +1,67 @@ + + +HPL_abort HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_abort halts execution. + +

Synopsis

+#include "hpl.h"

+void +HPL_abort( +int +LINE, +const char * +SRNAME, +const char * +FORM, +... +); + +

Description

+HPL_abort +displays an error message on stderr and halts execution. + +

Arguments

+
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+
+
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   HPL_abort( __LINE__, __FILE__, "Halt.\n" );
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_fprintf, +HPL_warn. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_all_reduce.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_all_reduce.html new file mode 100755 index 000000000..591cdd596 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_all_reduce.html @@ -0,0 +1,67 @@ + + +HPL_all_reduce HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_all_reduce All reduce operation. + +

Synopsis

+#include "hpl.h"

+int +HPL_all_reduce( +void * +BUFFER, +const int +COUNT, +const HPL_T_TYPE +DTYPE, +const HPL_T_OP +OP, +MPI_Comm +COMM +); + +

Description

+HPL_all_reduce +performs a global reduce operation across all +processes of a group leaving the results on all processes. + +

Arguments

+
+BUFFER  (local input/global output)   void *
+        On entry,  BUFFER  points to  the  buffer to be combined.  On
+        exit, this array contains the combined data and  is identical
+        on all processes in the group.
+
+
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+
+
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+
+OP      (global input)                const HPL_T_OP 
+        On entry, OP is a pointer to the local combine function.
+
+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_barrier, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_barrier.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_barrier.html new file mode 100755 index 000000000..86ae426ad --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_barrier.html @@ -0,0 +1,41 @@ + + +HPL_barrier HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_barrier Barrier operation. + +

Synopsis

+#include "hpl.h"

+int +HPL_barrier( +MPI_Comm +COMM +); + +

Description

+HPL_barrier +blocks the caller until all process members have call it. +The call returns at any process only after all group members have +entered the call. + +

Arguments

+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_all_reduce, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_bcast.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_bcast.html new file mode 100755 index 000000000..079325ed7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_bcast.html @@ -0,0 +1,46 @@ + + +HPL_bcast HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_bcast Perform the row broadcast. + +

Synopsis

+#include "hpl.h"

+int +HPL_bcast( +HPL_T_panel * +PANEL, +int * +IFLAG +); + +

Description

+HPL_bcast +broadcasts the current panel. Successful completion is +indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to +HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was +not completed, in which case this function should be called again. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+
+IFLAG   (output)                      int *
+        On exit,  IFLAG  indicates  whether  or not the broadcast has
+        occured.
+
+ +

See Also

+HPL_binit, +HPL_bwait. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_binit.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_binit.html new file mode 100755 index 000000000..0f9a9e1ae --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_binit.html @@ -0,0 +1,37 @@ + + +HPL_binit HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_binit Initialize the row broadcast. + +

Synopsis

+#include "hpl.h"

+int +HPL_binit( +HPL_T_panel * +PANEL +); + +

Description

+HPL_binit +initializes a row broadcast. Successful completion is +indicated by the returned error code HPL_SUCCESS. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+ +

See Also

+HPL_bcast, +HPL_bwait. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_broadcast.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_broadcast.html new file mode 100755 index 000000000..6e24b2c2b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_broadcast.html @@ -0,0 +1,67 @@ + + +HPL_broadcast HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_broadcast Broadcast operation. + +

Synopsis

+#include "hpl.h"

+int +HPL_broadcast( +void * +BUFFER, +const int +COUNT, +const HPL_T_TYPE +DTYPE, +const int +ROOT, +MPI_Comm +COMM +); + +

Description

+HPL_broadcast +broadcasts a message from the process with rank ROOT to +all processes in the group. + +

Arguments

+
+BUFFER  (local input/output)          void *
+        On entry,  BUFFER  points to  the  buffer to be broadcast. On
+        exit, this array contains the broadcast data and is identical
+        on all processes in the group.
+
+
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+
+
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+
+ROOT    (global input)                const int
+        On entry, ROOT is the coordinate of the source process.
+
+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+ +

See Also

+HPL_reduce, +HPL_all_reduce, +HPL_barrier, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_bwait.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_bwait.html new file mode 100755 index 000000000..f1dd51e7b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_bwait.html @@ -0,0 +1,38 @@ + + +HPL_bwait HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_bwait Finalize the row broadcast. + +

Synopsis

+#include "hpl.h"

+int +HPL_bwait( +HPL_T_panel * +PANEL +); + +

Description

+HPL_bwait +HPL_bwait waits for the row broadcast of the current panel to +terminate. Successful completion is indicated by the returned error +code HPL_SUCCESS. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+ +

See Also

+HPL_binit, +HPL_bcast. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_copyL.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_copyL.html new file mode 100755 index 000000000..4b98963ac --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_copyL.html @@ -0,0 +1,42 @@ + + +HPL_copyL HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_copyL Copy the current panel into a contiguous workspace. + +

Synopsis

+#include "hpl.h"

+void +HPL_copyL( +HPL_T_panel * +PANEL +); + +

Description

+HPL_copyL +copies the panel of columns, the L1 replicated submatrix, +the pivot array and the info scalar into a contiguous workspace for +later broadcast. + +The copy of this panel into a contiguous buffer can be enforced by +specifying -DHPL_COPY_L in the architecture specific Makefile. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+ +

See Also

+HPL_binit, +HPL_bcast, +HPL_bwait. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_daxpy.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_daxpy.html new file mode 100755 index 000000000..c34d0b2e8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_daxpy.html @@ -0,0 +1,89 @@ + + +HPL_daxpy HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_daxpy y := y + alpha * x. + +

Synopsis

+#include "hpl.h"

+void +HPL_daxpy( +const int +N, +const double +ALPHA, +const double * +X, +const int +INCX, +double * +Y, +const int +INCY +); + +

Description

+HPL_daxpy +scales the vector x by alpha and adds it to y. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero, then the entries of the incremented array X
+        need not be set on input.
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the scaled entries of the incremented array X.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_daxpy( 3, 2.0, x, 1, y, 1 );
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dcopy, +HPL_dscal, +HPL_dswap. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dcopy.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dcopy.html new file mode 100755 index 000000000..2a4a485b5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dcopy.html @@ -0,0 +1,81 @@ + + +HPL_dcopy HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dcopy y := x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dcopy( +const int +N, +const double * +X, +const int +INCX, +double * +Y, +const int +INCY +); + +

Description

+HPL_dcopy +copies the vector x into the vector y. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the entries of the incremented array X.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_dcopy( 3, x, 1, y, 1 );
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_daxpy, +HPL_dscal, +HPL_dswap. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dgemm.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dgemm.html new file mode 100755 index 000000000..667c0ff01 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dgemm.html @@ -0,0 +1,178 @@ + + +HPL_dgemm HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dgemm C := alpha * op(A) * op(B) + beta * C. + +

Synopsis

+#include "hpl.h"

+void +HPL_dgemm( +const enum HPL_ORDER +ORDER, +const enum HPL_TRANS +TRANSA, +const enum HPL_TRANS +TRANSB, +const int +M, +const int +N, +const int +K, +const double +ALPHA, +const double * +A, +const int +LDA, +const double * +B, +const int +LDB, +const double +BETA, +double * +C, +const int +LDC +); + +

Description

+HPL_dgemm +performs one of the matrix-matrix operations + + C := alpha * op( A ) * op( B ) + beta * C + + where op( X ) is one of + + op( X ) = X or op( X ) = X^T. + +Alpha and beta are scalars, and A, B and C are matrices, with op(A) +an m by k matrix, op(B) a k by n matrix and C an m by n matrix. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+TRANSA  (local input)                 const enum HPL_TRANS
+        On entry, TRANSA  specifies the form of  op(A)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSA==HplNoTrans    : op( A ) = A,                     
+           TRANSA==HplTrans      : op( A ) = A^T,                   
+           TRANSA==HplConjTrans  : op( A ) = A^T.                   
+
+
+TRANSB  (local input)                 const enum HPL_TRANS
+        On entry, TRANSB  specifies the form of  op(B)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSB==HplNoTrans    : op( B ) = B,                     
+           TRANSB==HplTrans      : op( B ) = B^T,                   
+           TRANSB==HplConjTrans  : op( B ) = B^T.                   
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the  number  of rows  of the  matrix
+        op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the number  of columns of the matrix
+        op(B)  and  the number of columns of the matrix  C. N must be
+        at least zero.
+
+
+K       (local input)                 const int
+        On entry,  K  specifies  the  number of columns of the matrix
+        op(A) and the number of rows of the matrix op(B).  K  must be
+        be at least  zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied  as  zero  then the elements of the matrices A and B
+        need not be set on input.
+
+
+A       (local input)                 const double *
+        On entry,  A  is an array of dimension (LDA,ka),  where ka is
+        k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
+        entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
+        the array  A must contain the matrix A, otherwise the leading
+        k  by  m  part of the array  A  must  contain the  matrix  A.
+
+
+LDA     (local input)                 const int
+        On entry, LDA  specifies the first dimension of A as declared
+        in the  calling (sub) program. When  TRANSA==HplNoTrans  then
+        LDA must be at least max(1,m), otherwise LDA must be at least
+        max(1,k).
+
+
+B       (local input)                 const double *
+        On entry, B is an array of dimension (LDB,kb),  where  kb  is
+        n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
+        entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
+        the array  B must contain the matrix B, otherwise the leading
+        n  by  k  part of the array  B  must  contain  the matrix  B.
+
+
+LDB     (local input)                 const int
+        On entry, LDB  specifies the first dimension of B as declared
+        in the  calling (sub) program. When  TRANSB==HplNoTrans  then
+        LDB must be at least max(1,k), otherwise LDB must be at least
+        max(1,n).
+
+
+BETA    (local input)                 const double
+        On entry,  BETA  specifies the scalar  beta.   When  BETA  is
+        supplied  as  zero  then  the  elements of the matrix C  need
+        not be set on input.
+
+
+C       (local input/output)          double *
+        On entry,  C  is an array of dimension (LDC,n). Before entry,
+        the  leading m by n part  of  the  array  C  must contain the
+        matrix C,  except when beta is zero, in which case C need not
+        be set on entry. On exit, the array  C  is overwritten by the
+        m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+
+
+LDC     (local input)                 const int
+        On entry, LDC  specifies the first dimension of C as declared
+        in  the   calling  (sub)  program.   LDC  must  be  at  least
+        max(1,m).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2], c[2*2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+   c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0;
+   HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans,
+              2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 );
+   printf("  [%f,%f]\n", c[0], c[2]);
+   printf("c=[%f,%f]\n", c[1], c[3]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dtrsm. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dgemv.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dgemv.html new file mode 100755 index 000000000..d5921a9b2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dgemv.html @@ -0,0 +1,146 @@ + + +HPL_dgemv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dgemv y := beta * y + alpha * op(A) * x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dgemv( +const enum HPL_ORDER +ORDER, +const enum HPL_TRANS +TRANS, +const int +M, +const int +N, +const double +ALPHA, +const double * +A, +const int +LDA, +const double * +X, +const int +INCX, +const double +BETA, +double * +Y, +const int +INCY +); + +

Description

+HPL_dgemv +performs one of the matrix-vector operations + + y := alpha * op( A ) * x + beta * y, + + where op( X ) is one of + + op( X ) = X or op( X ) = X^T. + +where alpha and beta are scalars, x and y are vectors and A is an m +by n matrix. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry,  TRANS  specifies the  operation to be performed as
+        follows:   
+           TRANS = HplNoTrans y := alpha*A  *x + beta*y,
+           TRANS = HplTrans   y := alpha*A^T*x + beta*y.
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of  the matrix A.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero then  A and X  need not be set on input.
+
+
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n.  Before  entry, the leading m by n part  of the
+        array  A  must contain the matrix coefficients.
+
+
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m).
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+BETA    (local input)                 const double
+        On entry, BETA  specifies the scalar beta.    When  ALPHA  is
+        supplied as zero then  Y  need not be set on input.
+
+
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        Before entry with BETA non-zero, the incremented array Y must
+        contain the vector  y.  On exit,  Y  is  overwritten  by  the
+        updated vector y.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2], y[2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+   HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0,
+              a, 2, x, 1, -1.0, y, 1 );
+   printf("y=[%f,%f]\n", y[0], y[1]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dger, +HPL_dtrsv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dger.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dger.html new file mode 100755 index 000000000..e4ea948ed --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dger.html @@ -0,0 +1,124 @@ + + +HPL_dger HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dger A := alpha * x * y^T + A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dger( +const enum HPL_ORDER +ORDER, +const int +M, +const int +N, +const double +ALPHA, +const double * +X, +const int +INCX, +double * +Y, +const int +INCY, +double * +A, +const int +LDA +); + +

Description

+HPL_dger +performs the rank 1 operation + + A := alpha * x * y^T + A, + +where alpha is a scalar, x is an m-element vector, y is an n-element +vector and A is an m by n matrix. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of  the matrix A.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero then  X and Y  need not be set on input.
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+Y       (local input)                 double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+
+A       (local input/output)          double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n.  Before  entry, the leading m by n part  of the
+        array  A  must contain the matrix coefficients. On exit, A is
+        overwritten by the updated matrix.
+
+
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2], y[2];
+   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
+   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
+   HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1,
+             a, 2 );
+   printf("y=[%f,%f]\n", y[0], y[1]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dgemv, +HPL_dtrsv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlacpy.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlacpy.html new file mode 100755 index 000000000..b64d34e0c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlacpy.html @@ -0,0 +1,84 @@ + + +HPL_dlacpy HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlacpy B := A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlacpy( +const int +M, +const int +N, +const double * +A, +const int +LDA, +double * +B, +const int +LDB +); + +

Description

+HPL_dlacpy +copies an array A into an array B. + +

Arguments

+
+M       (local input)                 const int
+        On entry,  M specifies the number of rows of the arrays A and
+        B. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N specifies  the number of columns of the arrays A
+        and B. N must be at least zero.
+
+
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,N).
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+B       (local output)                double *
+        On entry, B points to an array of dimension (LDB,N). On exit,
+        B is overwritten with A.
+
+
+LDB     (local input)                 const int
+        On entry, LDB specifies the leading dimension of the array B.
+        LDB must be at least MAX(1,M).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0);
+   return(0);
+}
+
+ +

See Also

+HPL_dlatcpy. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlamch.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlamch.html new file mode 100755 index 000000000..cb87a90ba --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlamch.html @@ -0,0 +1,86 @@ + + +HPL_dlamch HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlamch determines machine-specific arithmetic constants. + +

Synopsis

+#include "hpl.h"

+double +HPL_dlamch( +const HPL_T_MACH +CMACH +); + +

Description

+HPL_dlamch +determines machine-specific arithmetic constants such as +the relative machine precision (eps), the safe minimum (sfmin) such +that 1 / sfmin does not overflow, the base of the machine (base), the +precision (prec), the number of (base) digits in the mantissa (t), +whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the +minimum exponent before (gradual) underflow (emin), the underflow +threshold (rmin) base**(emin-1), the largest exponent before overflow +(emax), the overflow threshold (rmax) (base**emax)*(1-eps). + +

Arguments

+
+CMACH   (local input)                 const HPL_T_MACH
+        Specifies the value to be returned by HPL_dlamch             
+           = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
+           = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
+           = HPL_MACH_BASE,  HPL_dlamch := base                      
+           = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
+           = HPL_MACH_MLEN,  HPL_dlamch := t                         
+           = HPL_MACH_RND,   HPL_dlamch := rnd                       
+           = HPL_MACH_EMIN,  HPL_dlamch := emin                      
+           = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
+           = HPL_MACH_EMAX,  HPL_dlamch := emax                      
+           = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
+         
+        where                                                        
+         
+           eps   = relative machine precision,                       
+           sfmin = safe minimum,                                     
+           base  = base of the machine,                              
+           prec  = eps*base,                                         
+           t     = number of digits in the mantissa,                 
+           rnd   = 1.0 if rounding occurs in addition,               
+           emin  = minimum exponent before underflow,                
+           rmin  = underflow threshold,                              
+           emax  = largest exponent before overflow,                 
+           rmax  = overflow threshold.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double eps;
+   eps = HPL_dlamch( HPL_MACH_EPS );
+   printf("eps=%18.8e\n", eps);
+   exit(0); return(0);
+}
+
+ +

References

+This function has been manually translated from the Fortran 77 LAPACK +auxiliary function dlamch.f (version 2.0 -- 1992), that was itself +based on the function ENVRON by Malcolm and incorporated suggestions +by Gentleman and Marovich. See + +Malcolm M. A., Algorithms to reveal properties of floating-point +arithmetic., Comms. of the ACM, 15, 949-951 (1972). + +Gentleman W. M. and Marovich S. B., More on algorithms that reveal +properties of floating point arithmetic units., Comms. of the ACM, +17, 276-277 (1974). + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlange.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlange.html new file mode 100755 index 000000000..ce276e257 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlange.html @@ -0,0 +1,86 @@ + + +HPL_dlange HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlange Compute ||A||. + +

Synopsis

+#include "hpl.h"

+double +HPL_dlange( +const HPL_T_NORM +NORM, +const int +M, +const int +N, +const double * +A, +const int +LDA +); + +

Description

+HPL_dlange +returns the value of the one norm, or the infinity norm, +or the element of largest absolute value of a matrix A: + + max(abs(A(i,j))) when NORM = HPL_NORM_A, + norm1(A), when NORM = HPL_NORM_1, + normI(A), when NORM = HPL_NORM_I, + +where norm1 denotes the one norm of a matrix (maximum column sum) and +normI denotes the infinity norm of a matrix (maximum row sum). Note +that max(abs(A(i,j))) is not a matrix norm. + +

Arguments

+
+NORM    (local input)                 const HPL_T_NORM
+        On entry,  NORM  specifies  the  value to be returned by this
+        function as described above.
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+A       (local input)                 const double *
+        On entry,  A  points to an  array of dimension  (LDA,N), that
+        contains the matrix A.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 );
+   printf("norm=%f\n", norm);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dlaprnt, +HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaprnt.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaprnt.html new file mode 100755 index 000000000..f589ee2bb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaprnt.html @@ -0,0 +1,86 @@ + + +HPL_dlaprnt HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaprnt Print the matrix A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaprnt( +const int +M, +const int +N, +double * +A, +const int +IA, +const int +JA, +const int +LDA, +const char * +CMATNM +); + +

Description

+HPL_dlaprnt +prints to standard error an M-by-N matrix A. + +

Arguments

+
+M       (local input)                 const int
+        On entry,  M  specifies the number of rows of A. M must be at
+        least zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies the number of columns of A. N must be
+        at least zero.
+
+
+A       (local input)                 double *
+        On entry, A  points to an array of dimension (LDA,N).
+
+
+IA      (local input)                 const int
+        On entry, IA specifies the starting row index to be printed.
+
+
+JA      (local input)                 const int
+        On entry,  JA  specifies  the  starting  column index  to be
+        printed.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+
+
+CMATNM  (local input)                 const char *
+        On entry, CMATNM is the name of the matrix to be printed.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" );
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp00N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp00N.html new file mode 100755 index 000000000..8e36cf6c6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp00N.html @@ -0,0 +1,78 @@ + + +HPL_dlaswp00N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp00N performs a series of row interchanges. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp00N( +const int +M, +const int +N, +double * +A, +const int +LDA, +const int * +IPIV +); + +

Description

+HPL_dlaswp00N +performs a series of local row interchanges on a matrix +A. One row interchange is initiated for rows 0 through M-1 of A. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M specifies the number of rows of the array A to be
+        interchanged. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies  the number of columns of the array A.
+        N must be at least zero.
+
+
+A       (local input/output)          double *
+        On entry, A  points to an array of dimension (LDA,N) to which
+        the row interchanges will be  applied.  On exit, the permuted
+        matrix.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+IPIV    (local input)                 const int *
+        On entry,  IPIV  is  an  array of size  M  that  contains the
+        pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
+        implies that local rows k and l are to be interchanged.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp01N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp01N.html new file mode 100755 index 000000000..aa8861d10 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp01N.html @@ -0,0 +1,109 @@ + + +HPL_dlaswp01N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp01N copies rows of A into itself and into U. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp01N( +const int +M, +const int +N, +double * +A, +const int +LDA, +double * +U, +const int +LDU, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp01N +copies scattered rows of A into itself and into an +array U. The row offsets in A of the source rows are specified by +LINDXA. The destination of those rows are specified by LINDXAU. A +positive value of LINDXAU indicates that the array destination is U, +and A otherwise. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        moved within A or copied into U. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        moved within A or copied into U. N must be at least zero.
+
+
+A       (local input/output)          double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be moved within A or
+        copied into U.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,N). The rows
+        of A specified by LINDXA are be copied within this array U at
+        the positions indicated by positive values of LINDXAU.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local  row indexes  of  A  that should be moved within  A  or
+        or copied into U.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local  row indexes of  U  where the rows of  A  should be
+        copied at. This array also contains the  local row offsets in
+        A where some of the rows of A should be moved to.  A positive
+        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+        should be copied into U at the position LINDXAU[i]; otherwise
+        the row  LINDXA[i]  of  A  should be moved  at  the  position
+        -LINDXAU[i] within A.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp01T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp01T.html new file mode 100755 index 000000000..9697471c5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp01T.html @@ -0,0 +1,110 @@ + + +HPL_dlaswp01T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp01T copies rows of A into itself and into U. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp01T( +const int +M, +const int +N, +double * +A, +const int +LDA, +double * +U, +const int +LDU, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp01T +copies scattered rows of A into itself and into an +array U. The row offsets in A of the source rows are specified by +LINDXA. The destination of those rows are specified by LINDXAU. A +positive value of LINDXAU indicates that the array destination is U, +and A otherwise. Rows of A are stored as columns in U. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        moved within A or copied into U. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        moved within A or copied into U. N must be at least zero.
+
+
+A       (local input/output)          double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be moved within A or
+        copied into U.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,M). The rows
+        of A specified by  LINDXA  are copied within this array  U at
+        the  positions indicated by positive values of LINDXAU.  The
+        rows of A are stored as columns in U.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local  row indexes  of  A  that should be moved within  A  or
+        or copied into U.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local  row indexes of  U  where the rows of  A  should be
+        copied at. This array also contains the  local row offsets in
+        A where some of the rows of A should be moved to.  A positive
+        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
+        should be copied into U at the position LINDXAU[i]; otherwise
+        the row  LINDXA[i]  of  A  should be moved  at  the  position
+        -LINDXAU[i] within A.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp02N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp02N.html new file mode 100755 index 000000000..d4e1a0cf8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp02N.html @@ -0,0 +1,107 @@ + + +HPL_dlaswp02N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp02N pack rows of A into columns of W. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp02N( +const int +M, +const int +N, +const double * +A, +const int +LDA, +double * +W0, +double * +W, +const int +LDW, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp02N +packs scattered rows of an array A into workspace W. +The row offsets in A are specified by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        copied into W. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the length of rows of A that should be
+        copied into W. N must be at least zero.
+
+
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,N). The rows
+        of this array specified by LINDXA should be copied into W.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+W0      (local input/output)          double *
+        On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local output)                double *
+        On entry, W  is an array of size (LDW,M). On exit, W contains
+        the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
+        in W(:,i).
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied into W.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M  that  contains
+        the local  row indexes of  U that should be copied into A and
+        replaced by the rows of W.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp03N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp03N.html new file mode 100755 index 000000000..f5c4127b0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp03N.html @@ -0,0 +1,95 @@ + + +HPL_dlaswp03N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp03N copy rows of W into U. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp03N( +const int +M, +const int +N, +double * +U, +const int +LDU, +const double * +W0, +const double * +W, +const int +LDW +); + +

Description

+HPL_dlaswp03N +copies columns of W into rows of an array U. The +destination in U of these columns contained in W is stored within W0. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies  the  number  of columns of  W  stored
+        contiguously that should be copied into U. M must be at least
+        zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the  length of columns of  W  stored
+        contiguously that should be copied into U. N must be at least
+        zero.
+
+
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,N).  Columns
+        of W are copied as rows within this array U at  the positions
+        specified in W0.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+
+
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M),  that contains data
+        to be copied into U. For i in [0..M),  entries W(:,i)  should
+        be copied into the row or column W0(i*LDW) of U.
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp03T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp03T.html new file mode 100755 index 000000000..010175313 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp03T.html @@ -0,0 +1,95 @@ + + +HPL_dlaswp03T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp03T copy columns of W into U. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp03T( +const int +M, +const int +N, +double * +U, +const int +LDU, +const double * +W0, +const double * +W, +const int +LDW +); + +

Description

+HPL_dlaswp03T +copies columns of W into an array U. The destination +in U of these columns contained in W is stored within W0. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies  the  number  of columns of  W  stored
+        contiguously that should be copied into U. M must be at least
+        zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the  length of columns of  W  stored
+        contiguously that should be copied into U. N must be at least
+        zero.
+
+
+U       (local input/output)          double *
+        On entry, U points to an array of dimension (LDU,M).  Columns
+        of W are copied within the array U at the positions specified
+        in W0.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M),  that contains data
+        to be copied into U. For i in [0..M),  entries W(:,i)  should
+        be copied into the row or column W0(i*LDW) of U.
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp04N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp04N.html new file mode 100755 index 000000000..bb6cab0a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp04N.html @@ -0,0 +1,131 @@ + + +HPL_dlaswp04N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp04N copy rows of U in A and replace them with columns of W. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp04N( +const int +M0, +const int +M1, +const int +N, +double * +U, +const int +LDU, +double * +A, +const int +LDA, +const double * +W0, +const double * +W, +const int +LDW, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp04N +copies M0 rows of U into A and replaces those rows of U +with columns of W. In addition M1 - M0 columns of W are copied into +rows of U. + +

Arguments

+
+M0      (local input)                 const int
+        On entry, M0 specifies the number of rows of U that should be
+        copied into  A  and replaced by columns of  W.  M0 must be at
+        least zero.
+
+
+M1      (local input)                 const int
+        On entry, M1 specifies the number of columns of W that should
+        be copied into rows of U. M1 must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of U that should
+        be copied into A. N must be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  points to  an array of dimension (LDU,N).  This
+        array contains the rows that are to be copied into A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M1).
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows of U indicated by LINDXAU.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M0).
+
+
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M0+M1),  that  contains
+        data to be copied into U.  For i in [M0..M0+M1),  the entries
+        W(:,i) are copied into the row W0(i*LDW) of U.
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA  is an array of dimension  M0 containing the
+        local row indexes A into which rows of U are copied.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M0 that  contains
+        the local  row indexes of  U that should be copied into A and
+        replaced by the columns of W.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp04T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp04T.html new file mode 100755 index 000000000..0209a3689 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp04T.html @@ -0,0 +1,132 @@ + + +HPL_dlaswp04T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp04T copy columns of U in rows of A and replace them with columns of W. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp04T( +const int +M0, +const int +M1, +const int +N, +double * +U, +const int +LDU, +double * +A, +const int +LDA, +const double * +W0, +const double * +W, +const int +LDW, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp04T +copies M0 columns of U into rows of A and replaces those +columns of U with columns of W. In addition M1 - M0 columns of W are +copied into U. + +

Arguments

+
+M0      (local input)                 const int
+        On entry, M0 specifies the number of columns of U that should
+        be copied into A and replaced by columns of W.  M0 must be at
+        least zero.
+
+
+M1      (local input)                 const int
+        On entry, M1 specifies  the number of columnns of W that will
+        be copied into U. M1 must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies the length of the columns of  U  that
+        will be copied into rows of A. N must be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns that are to be copied into rows of
+        A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U indicated by LINDXAU.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M0).
+
+
+W0      (local input)                 const double *
+        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
+        the destination offset  in U where the columns of W should be
+        copied.
+
+
+W       (local input)                 const double *
+        On entry, W  is an array of size (LDW,M0+M1),  that  contains
+        data to be copied into U.  For i in [M0..M0+M1),  the entries
+        W(:,i) are copied into the column W0(i*LDW) of U.
+
+
+LDW     (local input)                 const int
+        On entry, LDW specifies the leading dimension of the array W.
+        LDW must be at least MAX(1,N+1).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA  is an array of dimension  M0 containing the
+        local row indexes A into which columns of U are copied.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension M0 that  contains
+        the  local column indexes of  U  that should be copied into A
+        and replaced by the columns of W.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp05N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp05N.html new file mode 100755 index 000000000..f428b7354 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp05N.html @@ -0,0 +1,98 @@ + + +HPL_dlaswp05N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp05N copy rows of U into A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp05N( +const int +M, +const int +N, +double * +A, +const int +LDA, +const double * +U, +const int +LDU, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp05N +copies rows of U of global offset LINDXAU into rows of +A at positions indicated by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of U that should be
+        copied into A. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of U that should
+        be copied into A. N must be at least zero.
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows of U indicated by LINDXAU.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          const double *
+        On entry,  U  points to an array of dimension  (LDU,N).  This
+        array contains the rows that are to be copied into A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied from U.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local row indexes of U that should be copied in A.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp05T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp05T.html new file mode 100755 index 000000000..fffb9f320 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp05T.html @@ -0,0 +1,98 @@ + + +HPL_dlaswp05T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp05T copy rows of U into A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp05T( +const int +M, +const int +N, +double * +A, +const int +LDA, +const double * +U, +const int +LDU, +const int * +LINDXA, +const int * +LINDXAU +); + +

Description

+HPL_dlaswp05T +copies columns of U of global offset LINDXAU into rows +of A at positions indicated by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the columns of U that will
+        be copied into rows of A. N must be at least zero.
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U indicated by LINDXAU.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          const double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns that are to be copied into rows of
+        A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be copied from U.
+
+
+LINDXAU (local input)                 const int *
+        On entry, LINDXAU  is an array of dimension  M that  contains
+        the local column indexes of U that should be copied in A.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp06N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp06N.html new file mode 100755 index 000000000..f28ab48c6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp06N.html @@ -0,0 +1,92 @@ + + +HPL_dlaswp06N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp06N swap rows of U with rows of A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp06N( +const int +M, +const int +N, +double * +A, +const int +LDA, +double * +U, +const int +LDU, +const int * +LINDXA +); + +

Description

+HPL_dlaswp06N +swaps rows of U with rows of A at positions +indicated by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        swapped with rows of U. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of A that should
+        be swapped with rows of U. N must be at least zero.
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        rows or columns of U.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,N).  This
+        array contains the rows of U that are to be swapped with rows
+        of A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,M).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be swapped with U.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp06T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp06T.html new file mode 100755 index 000000000..86032a9f4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp06T.html @@ -0,0 +1,92 @@ + + +HPL_dlaswp06T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp06T swap rows or columns of U with rows of A. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp06T( +const int +M, +const int +N, +double * +A, +const int +LDA, +double * +U, +const int +LDU, +const int * +LINDXA +); + +

Description

+HPL_dlaswp06T +swaps columns of U with rows of A at positions +indicated by LINDXA. + +

Arguments

+
+M       (local input)                 const int
+        On entry, M  specifies the number of rows of A that should be
+        swapped with columns of U. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N specifies the length of the rows of A that should
+        be swapped with columns of U. N must be at least zero.
+
+
+A       (local output)                double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        the  rows of this array specified by  LINDXA  are replaced by
+        columns of U.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+
+U       (local input/output)          double *
+        On entry,  U  points  to an array of dimension (LDU,*).  This
+        array contains the columns of  U  that are to be swapped with
+        rows of A.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the leading dimension of the array U.
+        LDU must be at least MAX(1,N).
+
+
+LINDXA  (local input)                 const int *
+        On entry, LINDXA is an array of dimension M that contains the
+        local row indexes of A that should be swapped with U.
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp10N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp10N.html new file mode 100755 index 000000000..84403ca79 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlaswp10N.html @@ -0,0 +1,77 @@ + + +HPL_dlaswp10N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlaswp10N performs a series column interchanges. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlaswp10N( +const int +M, +const int +N, +double * +A, +const int +LDA, +const int * +IPIV +); + +

Description

+HPL_dlaswp10N +performs a sequence of local column interchanges on a +matrix A. One column interchange is initiated for columns 0 through +N-1 of A. + +

Arguments

+
+M       (local input)                 const int
+        __arg0__
+
+
+N       (local input)                 const int
+        On entry,  M  specifies  the number of rows of the array A. M
+        must be at least zero.
+
+
+A       (local input/output)          double *
+        On entry, N specifies the number of columns of the array A. N
+        must be at least zero.
+
+
+LDA     (local input)                 const int
+        On entry, A  points to an  array of  dimension (LDA,N).  This
+        array contains the columns onto which the interchanges should
+        be applied. On exit, A contains the permuted matrix.
+
+
+IPIV    (local input)                 const int *
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,M).
+
+ +

See Also

+HPL_dlaswp00N, +HPL_dlaswp10N, +HPL_dlaswp01N, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp03T, +HPL_dlaswp04N, +HPL_dlaswp04T, +HPL_dlaswp05N, +HPL_dlaswp05T, +HPL_dlaswp06N, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlatcpy.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlatcpy.html new file mode 100755 index 000000000..fa1cca5d9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlatcpy.html @@ -0,0 +1,83 @@ + + +HPL_dlatcpy HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlatcpy B := A^T + +

Synopsis

+#include "hpl.h"

+void +HPL_dlatcpy( +const int +M, +const int +N, +const double * +A, +const int +LDA, +double * +B, +const int +LDB +); + +

Description

+HPL_dlatcpy +copies the transpose of an array A into an array B. + +

Arguments

+
+M       (local input)                 const int
+        On entry,  M specifies the number of  rows of the array B and
+        the number of columns of A. M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N specifies the number of  rows of the array A and
+        the number of columns of B. N must be at least zero.
+
+
+A       (local input)                 const double *
+        On entry, A points to an array of dimension (LDA,M).
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least MAX(1,N).
+
+
+B       (local output)                double *
+        On entry, B points to an array of dimension (LDB,N). On exit,
+        B is overwritten with the transpose of A.
+
+
+LDB     (local input)                 const int
+        On entry, LDB specifies the leading dimension of the array B.
+        LDB must be at least MAX(1,M).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
+   HPL_dlacpy( 2, 2, a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dlacpy. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocmax.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocmax.html new file mode 100755 index 000000000..c3361f32d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocmax.html @@ -0,0 +1,87 @@ + + +HPL_dlocmax HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlocmax finds the maximum entry in matrix column. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlocmax( +HPL_T_panel * +PANEL, +const int +N, +const int +II, +const int +JJ, +double * +WORK +); + +

Description

+HPL_dlocmax +finds the maximum entry in the current column and packs +the useful information in WORK[0:3]. On exit, WORK[0] contains the +local maximum absolute value scalar, WORK[1] is the corresponding +local row index, WORK[2] is the corresponding global row index, and +WORK[3] is the coordinate of the process owning this max. When N is +less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set +to the total number of process rows. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of rows of the column
+        of A on which we operate.
+
+
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+
+
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is  a workarray of size at least 4.  On exit,
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.
+
+ +

See Also

+HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocswpN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocswpN.html new file mode 100755 index 000000000..b5c4b74a9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocswpN.html @@ -0,0 +1,79 @@ + + +HPL_dlocswpN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlocswpN locally swaps rows within panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlocswpN( +HPL_T_panel * +PANEL, +const int +II, +const int +JJ, +double * +WORK +); + +

Description

+HPL_dlocswpN +performs the local swapping operations within a panel. +The lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+
+
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.  The N0 length max
+        row is stored in WORK[4:4+N0-1];  Note  that this is also the
+        JJth row  (or column) of L1. The remaining part of this array
+        is used as workspace.
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocswpT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocswpT.html new file mode 100755 index 000000000..d31361543 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dlocswpT.html @@ -0,0 +1,79 @@ + + +HPL_dlocswpT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dlocswpT locally swaps rows within panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_dlocswpT( +HPL_T_panel * +PANEL, +const int +II, +const int +JJ, +double * +WORK +); + +

Description

+HPL_dlocswpT +performs the local swapping operations within a panel. +The lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+
+
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        WORK[0] contains  the  local  maximum  absolute value scalar,
+        WORK[1] contains  the corresponding local row index,  WORK[2]
+        contains the corresponding global row index, and  WORK[3]  is
+        the coordinate of process owning this max.  The N0 length max
+        row is stored in WORK[4:4+N0-1];  Note  that this is also the
+        JJth row  (or column) of L1. The remaining part of this array
+        is used as workspace.
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dmatgen.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dmatgen.html new file mode 100755 index 000000000..7886da146 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dmatgen.html @@ -0,0 +1,73 @@ + + +HPL_dmatgen HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dmatgen random matrix generator. + +

Synopsis

+#include "hpl.h"

+void +HPL_dmatgen( +const int +M, +const int +N, +double * +A, +const int +LDA, +const int +ISEED +); + +

Description

+HPL_dmatgen +generates (or regenerates) a random matrix A. + +The pseudo-random generator uses the linear congruential algorithm: +X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer +Programming, Knuth 1973, Vol. 2. + +

Arguments

+
+M       (input)                       const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+
+
+N       (input)                       const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+A       (output)                      double *
+        On entry, A points to an array of dimension (LDA,N). On exit,
+        this  array  contains   the   coefficients  of  the  randomly
+        generated matrix.
+
+
+LDA     (input)                       const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,M).
+
+
+ISEED   (input)                       const int
+        On entry, ISEED  specifies  the  seed  number to generate the
+        matrix A. ISEED must be at least zero.
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dscal.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dscal.html new file mode 100755 index 000000000..c13427f44 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dscal.html @@ -0,0 +1,74 @@ + + +HPL_dscal HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dscal x = alpha * x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dscal( +const int +N, +const double +ALPHA, +double * +X, +const int +INCX +); + +

Description

+HPL_dscal +scales the vector x by alpha. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vector x. N  must  be
+        at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied as zero, then the entries of the incremented array X
+        need not be set on input.
+
+
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        On exit, the entries of the incremented array  X  are  scaled
+        by the scalar alpha.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   HPL_dscal( 3, 2.0, x, 1 );
+   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_daxpy, +HPL_dcopy, +HPL_dswap. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dswap.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dswap.html new file mode 100755 index 000000000..cae6980a6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dswap.html @@ -0,0 +1,84 @@ + + +HPL_dswap HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dswap y <-> x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dswap( +const int +N, +double * +X, +const int +INCX, +double * +Y, +const int +INCY +); + +

Description

+HPL_dswap +swaps the vectors x and y. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vectors  x  and  y. N
+        must be at least zero.
+
+
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        On exit, the entries of the incremented array  X  are updated
+        with the entries of the incremented array Y.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+
+Y       (local input/output)          double *
+        On entry,  Y  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
+        On exit, the entries of the incremented array  Y  are updated
+        with the entries of the incremented array X.
+
+
+INCY    (local input)                 const int
+        On entry, INCY specifies the increment for the elements of Y.
+        INCY must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3], y[3];
+   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
+   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
+   HPL_dswap( 3, x, 1, y, 1 );
+   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
+   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_daxpy, +HPL_dcopy, +HPL_dscal. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dtrsm.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dtrsm.html new file mode 100755 index 000000000..3d60e597f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dtrsm.html @@ -0,0 +1,168 @@ + + +HPL_dtrsm HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dtrsm B := A^{-1} * B or B := B * A^{-1}. + +

Synopsis

+#include "hpl.h"

+void +HPL_dtrsm( +const enum HPL_ORDER +ORDER, +const enum HPL_SIDE +SIDE, +const enum HPL_UPLO +UPLO, +const enum HPL_TRANS +TRANS, +const enum HPL_DIAG +DIAG, +const int +M, +const int +N, +const double +ALPHA, +const double * +A, +const int +LDA, +double * +B, +const int +LDB +); + +

Description

+HPL_dtrsm +solves one of the matrix equations + + op( A ) * X = alpha * B, or X * op( A ) = alpha * B, + +where alpha is a scalar, X and B are m by n matrices, A is a unit, or +non-unit, upper or lower triangular matrix and op(A) is one of + + op( A ) = A or op( A ) = A^T. + +The matrix X is overwritten on B. + +No test for singularity or near-singularity is included in this +routine. Such tests must be performed before calling this routine. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+SIDE    (local input)                 const enum HPL_SIDE
+        On entry, SIDE  specifies  whether  op(A) appears on the left
+        or right of X as follows:
+           SIDE==HplLeft    op( A ) * X = alpha * B,
+           SIDE==HplRight   X * op( A ) = alpha * B.
+
+
+UPLO    (local input)                 const enum HPL_UPLO
+        On  entry,   UPLO   specifies  whether  the  upper  or  lower
+        triangular  part  of the array  A  is to be referenced.  When
+        UPLO==HplUpper, only  the upper triangular part of A is to be
+        referenced, otherwise only the lower triangular part of A is 
+        to be referenced. 
+
+
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry, TRANSA  specifies the form of  op(A)  to be used in
+        the matrix-matrix operation follows:                         
+           TRANSA==HplNoTrans    : op( A ) = A,                     
+           TRANSA==HplTrans      : op( A ) = A^T,                   
+           TRANSA==HplConjTrans  : op( A ) = A^T.                   
+
+
+DIAG    (local input)                 const enum HPL_DIAG
+        On entry,  DIAG  specifies  whether  A  is unit triangular or
+        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+        and otherwise, A is not assumed to be unit triangular.
+
+
+M       (local input)                 const int
+        On entry,  M  specifies  the number of rows of the  matrix B.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the number of columns of the matrix B.
+        N must be at least zero.
+
+
+ALPHA   (local input)                 const double
+        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
+        supplied  as  zero then the elements of the matrix B need not
+        be set on input.
+
+
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
+        otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
+        k by k upper triangular  part of the array A must contain the
+        upper triangular  matrix and the  strictly  lower  triangular
+        part of A is not referenced.  When  UPLO==HplLower on  entry,
+        the  leading k by k lower triangular part of the array A must
+        contain the lower triangular matrix  and  the  strictly upper
+        triangular part of A is not referenced.
+         
+        Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
+        not referenced  either,  but are assumed to be unity.
+
+
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
+
+
+B       (local input/output)          double *
+        On entry,  B  points  to an array of size equal to or greater
+        than LDB * n.  Before entry, the leading  m by n  part of the
+        array B must contain the matrix  B, except when beta is zero,
+        in which case B need not be set on entry.  On exit, the array
+        B is overwritten by the m by n solution matrix.
+
+
+LDB     (local input)                 const int
+        On entry,  LDB  specifies  the  leading  dimension  of  B  as
+        declared  in  the  calling  (sub) program.  LDB  must  be  at
+        least MAX(1,m).
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], b[2*2];
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
+   HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper,
+              HplNoTrans, HplNonUnit, 2, 2, 2.0,
+              a, 2, b, 2 );
+   printf("  [%f,%f]\n", b[0], b[2]);
+   printf("b=[%f,%f]\n", b[1], b[3]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dgemm. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dtrsv.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dtrsv.html new file mode 100755 index 000000000..3e4703529 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_dtrsv.html @@ -0,0 +1,136 @@ + + +HPL_dtrsv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_dtrsv x := A^{-1} x. + +

Synopsis

+#include "hpl.h"

+void +HPL_dtrsv( +const enum HPL_ORDER +ORDER, +const enum HPL_UPLO +UPLO, +const enum HPL_TRANS +TRANS, +const enum HPL_DIAG +DIAG, +const int +N, +const double * +A, +const int +LDA, +double * +X, +const int +INCX +); + +

Description

+HPL_dtrsv +solves one of the systems of equations + + A * x = b, or A^T * x = b, + +where b and x are n-element vectors and A is an n by n non-unit, or +unit, upper or lower triangular matrix. + +No test for singularity or near-singularity is included in this +routine. Such tests must be performed before calling this routine. + +

Arguments

+
+ORDER   (local input)                 const enum HPL_ORDER
+        On entry, ORDER  specifies the storage format of the operands
+        as follows:                                                  
+           ORDER = HplRowMajor,                                      
+           ORDER = HplColumnMajor.                                   
+
+
+UPLO    (local input)                 const enum HPL_UPLO
+        On  entry,   UPLO   specifies  whether  the  upper  or  lower
+        triangular  part  of the array  A  is to be referenced.  When
+        UPLO==HplUpper, only  the upper triangular part of A is to be
+        referenced, otherwise only the lower triangular part of A is 
+        to be referenced. 
+
+
+TRANS   (local input)                 const enum HPL_TRANS
+        On entry,  TRANS  specifies  the equations  to  be  solved as
+        follows:
+           TRANS==HplNoTrans     A   * x = b,
+           TRANS==HplTrans       A^T * x = b.
+
+
+DIAG    (local input)                 const enum HPL_DIAG
+        On entry,  DIAG  specifies  whether  A  is unit triangular or
+        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
+        and otherwise, A is not assumed to be unit triangular.
+
+
+N       (local input)                 const int
+        On entry, N specifies the order of the matrix A. N must be at
+        least zero.
+
+
+A       (local input)                 const double *
+        On entry,  A  points  to an array of size equal to or greater
+        than LDA * n. Before entry with  UPLO==HplUpper,  the leading
+        n by n upper triangular  part of the array A must contain the
+        upper triangular  matrix and the  strictly  lower  triangular
+        part of A is not referenced.  When  UPLO==HplLower  on entry,
+        the  leading n by n lower triangular part of the array A must
+        contain the lower triangular matrix  and  the  strictly upper
+        triangular part of A is not referenced.
+         
+        Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
+        not referenced  either,  but are assumed to be unity.
+
+
+LDA     (local input)                 const int
+        On entry,  LDA  specifies  the  leading  dimension  of  A  as
+        declared  in  the  calling  (sub) program.  LDA  must  be  at
+        least MAX(1,n).
+
+
+X       (local input/output)          double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+        Before entry,  the  incremented array  X  must contain  the n
+        element right-hand side vector b. On exit,  X  is overwritten
+        with the solution vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double a[2*2], x[2];
+   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
+   x[0] = 2.0; x[1] = 1.0;
+   HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans,
+              HplNoUnit, a, 2, x, 1 );
+   printf("x=[%f,%f]\n", x[0], x[1]);
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_dger, +HPL_dgemv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_equil.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_equil.html new file mode 100755 index 000000000..d64ecab99 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_equil.html @@ -0,0 +1,115 @@ + + +HPL_equil HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_equil Equilibrate U and forward the column panel L. + +

Synopsis

+#include "hpl.h"

+void +HPL_equil( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const enum HPL_TRANS +TRANS, +const int +N, +double * +U, +const int +LDU, +int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1, +int * +IWORK +); + +

Description

+HPL_equil +equilibrates the local pieces of U, so that on exit to +this function, pieces of U contained in every process row are of the +same size. This phase makes the rolling phase optimal. In addition, +this function probes for the column panel L and forwards it when +possible. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be equilibrated) information.
+
+
+TRANS   (global input)                const enum HPL_TRANS
+        On entry, TRANS specifies whether  U  is stored in transposed
+        or non-transposed form.
+
+
+N       (local input)                 const int
+        On entry, N  specifies the number of rows or columns of  U. N
+        must be at least 0.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
+        non-transposed form, and MAX(1,N) otherwise.
+
+
+IPLEN   (global input)                int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in process IPMAP[i].
+
+
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry, IPMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
+
+
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension NPROW+1.
+
+ +

See Also

+HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_fprintf.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_fprintf.html new file mode 100755 index 000000000..d62b2c871 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_fprintf.html @@ -0,0 +1,58 @@ + + +HPL_fprintf HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_fprintf fprintf + fflush wrapper. + +

Synopsis

+#include "hpl.h"

+void +HPL_fprintf( +FILE * +STREAM, +const char * +FORM, +... +); + +

Description

+HPL_fprintf +is a wrapper around fprintf flushing the output stream. + +

Arguments

+
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   HPL_fprintf( stdout, "Hello World.\n" );
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_abort, +HPL_warn. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_exit.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_exit.html new file mode 100755 index 000000000..b42f315c9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_exit.html @@ -0,0 +1,39 @@ + + +HPL_grid_exit HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_grid_exit Exit process grid. + +

Synopsis

+#include "hpl.h"

+int +HPL_grid_exit( +HPL_T_grid * +GRID +); + +

Description

+HPL_grid_exit +marks the process grid object for deallocation. The +returned error code MPI_SUCCESS indicates successful completion. +Other error codes are (MPI) implementation dependent. + +

Arguments

+
+GRID    (local input/output)          HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid to be released.
+
+ +

See Also

+HPL_pnum, +HPL_grid_init, +HPL_grid_info. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_info.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_info.html new file mode 100755 index 000000000..47f63672d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_info.html @@ -0,0 +1,70 @@ + + +HPL_grid_info HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_grid_info Retrieve grid information. + +

Synopsis

+#include "hpl.h"

+int +HPL_grid_info( +const HPL_T_grid * +GRID, +int * +NPROW, +int * +NPCOL, +int * +MYROW, +int * +MYCOL +); + +

Description

+HPL_grid_info +returns the grid shape and the coordinates in the grid +of the calling process. Successful completion is indicated by the +returned error code MPI_SUCCESS. Other error codes depend on the MPI +implementation. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+NPROW   (global output)               int *
+        On exit,   NPROW  specifies the number of process rows in the
+        grid. NPROW is at least one.
+
+
+NPCOL   (global output)               int *
+        On exit,   NPCOL  specifies  the number of process columns in
+        the grid. NPCOL is at least one.
+
+
+MYROW   (global output)               int *
+        On exit,  MYROW  specifies my  row process  coordinate in the
+        grid. MYROW is greater than or equal  to zero  and  less than
+        NPROW.
+
+
+MYCOL   (global output)               int *
+        On exit,  MYCOL specifies my column process coordinate in the
+        grid. MYCOL is greater than or equal  to zero  and  less than
+        NPCOL.
+
+ +

See Also

+HPL_pnum, +HPL_grid_init, +HPL_grid_exit. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_init.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_init.html new file mode 100755 index 000000000..0bec56e6e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_grid_init.html @@ -0,0 +1,73 @@ + + +HPL_grid_init HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_grid_init Create a process grid. + +

Synopsis

+#include "hpl.h"

+int +HPL_grid_init( +MPI_Comm +COMM, +const HPL_T_ORDER +ORDER, +const int +NPROW, +const int +NPCOL, +HPL_T_grid * +GRID +); + +

Description

+HPL_grid_init +creates a NPROW x NPCOL process grid using column- or +row-major ordering from an initial collection of processes identified +by an MPI communicator. Successful completion is indicated by the +returned error code MPI_SUCCESS. Other error codes depend on the MPI +implementation. The coordinates of processes that are not part of the +grid are set to values outside of [0..NPROW) x [0..NPCOL). + +

Arguments

+
+COMM    (global/local input)          MPI_Comm
+        On entry,  COMM  is  the  MPI  communicator  identifying  the
+        initial  collection  of  processes out of which  the  grid is
+        formed.
+
+
+ORDER   (global input)                const HPL_T_ORDER
+        On entry, ORDER specifies how the processes should be ordered
+        in the grid as follows:
+           ORDER = HPL_ROW_MAJOR    row-major    ordering;
+           ORDER = HPL_COLUMN_MAJOR column-major ordering;
+
+
+NPROW   (global input)                const int
+        On entry,  NPROW  specifies the number of process rows in the
+        grid to be created. NPROW must be at least one.
+
+
+NPCOL   (global input)                const int
+        On entry,  NPCOL  specifies  the number of process columns in
+        the grid to be created. NPCOL must be at least one.
+
+
+GRID    (local input/output)          HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information to be initialized.
+
+ +

See Also

+HPL_pnum, +HPL_grid_info, +HPL_grid_exit. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_idamax.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_idamax.html new file mode 100755 index 000000000..f16b296f6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_idamax.html @@ -0,0 +1,68 @@ + + +HPL_idamax HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_idamax 1st k s.t. |x_k| = max_i(|x_i|). + +

Synopsis

+#include "hpl.h"

+int +HPL_idamax( +const int +N, +const double * +X, +const int +INCX +); + +

Description

+HPL_idamax +returns the index in an n-vector x of the first element +having maximum absolute value. + +

Arguments

+
+N       (local input)                 const int
+        On entry, N specifies the length of the vector x. N  must  be
+        at least zero.
+
+
+X       (local input)                 const double *
+        On entry,  X  is an incremented array of dimension  at  least
+        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
+
+
+INCX    (local input)                 const int
+        On entry, INCX specifies the increment for the elements of X.
+        INCX must not be zero.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   double x[3];
+   int    imax;
+   x[0] = 1.0; x[1] = 3.0; x[2] = 2.0;
+   imax = HPL_idamax( 3, x, 1 );
+   printf("imax=%d\n", imax);
+   exit(0);
+   return(0);
+}
+
+ +

See Also

+HPL_daxpy, +HPL_dcopy, +HPL_dscal, +HPL_dswap. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2l.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2l.html new file mode 100755 index 000000000..a3eb758da --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2l.html @@ -0,0 +1,71 @@ + + +HPL_indxg2l HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_indxg2l Map a global index into a local one. + +

Synopsis

+#include "hpl.h"

+int +HPL_indxg2l( +const int +IG, +const int +INB, +const int +NB, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_indxg2l +computes the local index of a matrix entry pointed to by +the global index IG. This local returned index is the same in all +processes. + +

Arguments

+
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+
+
+SRCPROC (input)                       const int
+        On entry, if SRCPROC = -1, the data  is not  distributed  but
+        replicated,  in  which  case  this  routine returns IG in all
+        processes. Otherwise, the value of SRCPROC is ignored.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2lp, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2lp.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2lp.html new file mode 100755 index 000000000..d9fa00436 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2lp.html @@ -0,0 +1,86 @@ + + +HPL_indxg2lp HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_indxg2lp Map a local index into a global one. + +

Synopsis

+#include "hpl.h"

+void +HPL_indxg2lp( +int * +IL, +int * +PROC, +const int +IG, +const int +INB, +const int +NB, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_indxg2lp +computes the local index of a matrix entry pointed to by +the global index IG as well as the process coordinate which posseses +this entry. The local returned index is the same in all processes. + +

Arguments

+
+IL      (output)                      int *
+        On exit, IL specifies the local index corresponding to IG. IL
+        is at least zero.
+
+
+PROC    (output)                      int *
+        On exit,  PROC  is the  coordinate of the process  owning the
+        entry specified by the global index IG. PROC is at least zero
+        and less than NPROCS.
+
+
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+SRCPROC (input)                       const int
+        On entry, if SRCPROC = -1, the data  is not  distributed  but
+        replicated,  in  which  case  this  routine returns IG in all
+        processes. Otherwise, the value of SRCPROC is ignored.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2p.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2p.html new file mode 100755 index 000000000..0068dede3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxg2p.html @@ -0,0 +1,70 @@ + + +HPL_indxg2p HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_indxg2p Map a global index into a process coordinate. + +

Synopsis

+#include "hpl.h"

+int +HPL_indxg2p( +const int +IG, +const int +INB, +const int +NB, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_indxg2p +computes the process coordinate which posseses the entry +of a matrix specified by a global index IG. + +

Arguments

+
+IG      (input)                       const int
+        On entry, IG specifies the global index of the matrix  entry.
+        IG must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxl2g.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxl2g.html new file mode 100755 index 000000000..216e98057 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_indxl2g.html @@ -0,0 +1,78 @@ + + +HPL_indxl2g HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_indxl2g Map a index-process pair into a global index. + +

Synopsis

+#include "hpl.h"

+int +HPL_indxl2g( +const int +IL, +const int +INB, +const int +NB, +const int +PROC, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_indxl2g +computes the global index of a matrix entry pointed to +by the local index IL of the process indicated by PROC. + +

Arguments

+
+IL      (input)                       const int
+        On entry, IL specifies the local  index of the matrix  entry.
+        IL must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+PROC    (input)                       const int
+        On entry, PROC  specifies the coordinate of the process whose
+        local array row or column is to be determined. PROC  must  be
+        at least zero and strictly less than NPROCS.
+
+
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2lp, +HPL_indxg2p, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_infog2l.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_infog2l.html new file mode 100755 index 000000000..34feff72c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_infog2l.html @@ -0,0 +1,155 @@ + + +HPL_infog2l HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_infog2l global to local index translation. + +

Synopsis

+#include "hpl.h"

+void +HPL_infog2l( +int +I, +int +J, +const int +IMB, +const int +MB, +const int +INB, +const int +NB, +const int +RSRC, +const int +CSRC, +const int +MYROW, +const int +MYCOL, +const int +NPROW, +const int +NPCOL, +int * +II, +int * +JJ, +int * +PROW, +int * +PCOL +); + +

Description

+HPL_infog2l +computes the starting local index II, JJ corresponding to +the submatrix starting globally at the entry pointed by I, J. This +routine returns the coordinates in the grid of the process owning the +matrix entry of global indexes I, J, namely PROW and PCOL. + +

Arguments

+
+I       (global input)                int
+        On entry,  I  specifies  the  global  row index of the matrix
+        entry. I must be at least zero.
+
+
+J       (global input)                int
+        On entry,  J  specifies the global column index of the matrix
+        entry. J must be at least zero.
+
+
+IMB     (global input)                const int
+        On entry,  IMB  specifies  the size of the first row block of
+        the global matrix. IMB must be at least one.
+
+
+MB      (global input)                const int
+        On entry,  MB specifies the blocking factor used to partition
+        and  distribute the rows of the matrix A.  MB  must be larger
+        than one.
+
+
+INB     (global input)                const int
+        On entry, INB specifies the size of the first column block of
+        the global matrix. INB must be at least one.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the columns of the matrix A. NB must be larger
+        than one.
+
+
+RSRC    (global input)                const int
+        On entry,  RSRC  specifies  the row coordinate of the process
+        that possesses the row  I.  RSRC  must  be at least zero  and
+        strictly less than NPROW.
+
+
+CSRC    (global input)                const int
+        On entry, CSRC specifies the column coordinate of the process
+        that possesses the column J. CSRC  must be at least zero  and
+        strictly less than NPCOL.
+
+
+MYROW   (local input)                 const int
+        On entry, MYROW  specifies my  row process  coordinate in the
+        grid. MYROW is greater than or equal  to zero  and  less than
+        NPROW.
+
+
+MYCOL   (local input)                 const int
+        On entry, MYCOL specifies my column process coordinate in the
+        grid. MYCOL is greater than or equal  to zero  and  less than
+        NPCOL.
+
+
+NPROW   (global input)                const int
+        On entry,  NPROW  specifies the number of process rows in the
+        grid. NPROW is at least one.
+
+
+NPCOL   (global input)                const int
+        On entry,  NPCOL  specifies  the number of process columns in
+        the grid. NPCOL is at least one.
+
+
+II      (local output)                int *
+        On exit, II  specifies the  local  starting  row index of the
+        submatrix. On exit, II is at least 0.
+
+
+JJ      (local output)                int *
+        On exit, JJ  specifies the local starting column index of the
+        submatrix. On exit, JJ is at least 0.
+
+
+PROW    (global output)               int *
+        On exit, PROW is the row coordinate of the process owning the
+        entry specified by the global index I.  PROW is at least zero
+        and less than NPROW.
+
+
+PCOL    (global output)               int *
+        On exit, PCOL  is the column coordinate of the process owning
+        the entry specified by the global index J.  PCOL  is at least
+        zero and less than NPCOL.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_jumpit.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_jumpit.html new file mode 100755 index 000000000..be87a1f53 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_jumpit.html @@ -0,0 +1,65 @@ + + +HPL_jumpit HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_jumpit jump into the random sequence. + +

Synopsis

+#include "hpl.h"

+void +HPL_jumpit( +int * +MULT, +int * +IADD, +int * +IRANN, +int * +IRANM +); + +

Description

+HPL_jumpit +jumps in the random sequence from the number X(n) encoded +in IRANN to the number X(m) encoded in IRANM using the constants A +and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A +and C obviously depend on m and n, see the function HPL_xjumpm in +order to initialize them. + +

Arguments

+
+MULT    (local input)                 int *
+        On entry, MULT is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of the constant A.
+
+
+IADD    (local input)                 int *
+        On entry, IADD is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of the constant C.
+
+
+IRANN   (local input)                 int *
+        On entry,  IRANN  is an array of dimension 2,  that contains 
+        the 16-lower and 15-higher bits of the encoding of X(n).
+
+
+IRANM   (local output)                int *
+        On entry,  IRANM  is an array of dimension 2.  On exit, this
+        array contains respectively the 16-lower and  15-higher bits
+        of the encoding of X(m).
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ladd.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ladd.html new file mode 100755 index 000000000..0c42d80d8 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ladd.html @@ -0,0 +1,57 @@ + + +HPL_ladd HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_ladd Adds two long positive integers. + +

Synopsis

+#include "hpl.h"

+void +HPL_ladd( +int * +J, +int * +K, +int * +I +); + +

Description

+HPL_ladd +adds without carry two long positive integers K and J and +puts the result into I. The long integers I, J, K are encoded on 64 +bits using an array of 2 integers. The 32-lower bits are stored in +the first entry of each array, the 32-higher bits in the second +entry. + +

Arguments

+
+J       (local input)                 int *
+        On entry, J is an integer array of dimension 2 containing the
+        encoded long integer J.
+
+
+K       (local input)                 int *
+        On entry, K is an integer array of dimension 2 containing the
+        encoded long integer K.
+
+
+I       (local output)                int *
+        On entry, I is an integer array of dimension 2. On exit, this
+        array contains the encoded long integer result.
+
+ +

See Also

+HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_lmul.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_lmul.html new file mode 100755 index 000000000..8ef70cba5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_lmul.html @@ -0,0 +1,58 @@ + + +HPL_lmul HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_lmul multiplies 2 long positive integers. + +

Synopsis

+#include "hpl.h"

+void +HPL_lmul( +int * +K, +int * +J, +int * +I +); + +

Description

+HPL_lmul +multiplies without carry two long positive integers K and J +and puts the result into I. The long integers I, J, K are encoded on +64 bits using an array of 2 integers. The 32-lower bits are stored in +the first entry of each array, the 32-higher bits in the second entry +of each array. For efficiency purposes, the intrisic modulo function +is inlined. + +

Arguments

+
+K       (local input)                 int *
+        On entry, K is an integer array of dimension 2 containing the
+        encoded long integer K.
+
+
+J       (local input)                 int *
+        On entry, J is an integer array of dimension 2 containing the
+        encoded long integer J.
+
+
+I       (local output)                int *
+        On entry, I is an integer array of dimension 2. On exit, this
+        array contains the encoded long integer result.
+
+ +

See Also

+HPL_ladd, +HPL_setran, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_logsort.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_logsort.html new file mode 100755 index 000000000..da271fc19 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_logsort.html @@ -0,0 +1,83 @@ + + +HPL_logsort HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_logsort Sort the processes in logarithmic order. + +

Synopsis

+#include "hpl.h"

+void +HPL_logsort( +const int +NPROCS, +const int +ICURROC, +int * +IPLEN, +int * +IPMAP, +int * +IPMAPM1 +); + +

Description

+HPL_logsort +computes an array IPMAP and its inverse IPMAPM1 that +contain the logarithmic sorted processes id with repect to the local +number of rows of U that they own. This is necessary to ensure that +the logarithmic spreading of U is optimal in terms of number of steps +and communication volume as well. In other words, the larget pieces +of U will be sent a minimal number of times. + +

Arguments

+
+NPROCS  (global input)                const int
+        On entry, NPROCS  specifies the number of process rows in the
+        process grid. NPROCS is at least one.
+
+
+ICURROC (global input)                const int
+        On entry, ICURROC is the source process row.
+
+
+IPLEN   (global input/output)         int *
+        On entry, IPLEN is an array of dimension NPROCS+1,  such that
+        IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
+        that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
+        rows of U  in the processes before process IPMAP[i] after the
+        sort,  with  the convention that  IPLEN[NPROCS] is  the total
+        number  of rows  of the panel.  In other words,  IPLEN[i+1] -
+        IPLEN[i] is  the  number of rows of A that should be moved to
+        the process IPMAP[i].  IPLEN  is such that the number of rows
+        of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
+        remaining  entries  of  this  array  are  sorted  so that the
+        quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
+
+
+IPMAP   (global output)               int *
+        On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myroc] is the corresponding sorted process
+        coordinate.
+
+
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROCS)
+
+ +

See Also

+HPL_plindx1, +HPL_plindx10, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_max.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_max.html new file mode 100755 index 000000000..7cf0b0670 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_max.html @@ -0,0 +1,60 @@ + + +HPL_max HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_max Combine (max) two buffers. + +

Synopsis

+#include "hpl.h"

+void +HPL_max( +const int +N, +const void * +IN, +void * +INOUT, +const HPL_T_TYPE +DTYPE +); + +

Description

+HPL_max +combines (max) two buffers. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+
+
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+
+
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+
+
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_all_reduce, +HPL_barrier, +HPL_min, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_min.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_min.html new file mode 100755 index 000000000..9c109c338 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_min.html @@ -0,0 +1,60 @@ + + +HPL_min HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_min Combine (min) two buffers. + +

Synopsis

+#include "hpl.h"

+void +HPL_min( +const int +N, +const void * +IN, +void * +INOUT, +const HPL_T_TYPE +DTYPE +); + +

Description

+HPL_min +combines (min) two buffers. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+
+
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+
+
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+
+
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_all_reduce, +HPL_barrier, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_numroc.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_numroc.html new file mode 100755 index 000000000..fa617cac3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_numroc.html @@ -0,0 +1,79 @@ + + +HPL_numroc HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_numroc Compute the local number of row/columns. + +

Synopsis

+#include "hpl.h"

+int +HPL_numroc( +const int +N, +const int +INB, +const int +NB, +const int +PROC, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_numroc +returns the local number of matrix rows/columns process +PROC will get if we give out N rows/columns starting from global +index 0. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies the number of rows/columns being dealt
+        out. N must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of the
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+PROC    (input)                       const int
+        On entry, PROC specifies  the coordinate of the process whose
+        local portion is determined.  PROC must be at least zero  and
+        strictly less than NPROCS.
+
+
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  process
+        that possesses the first row or column of the matrix. SRCPROC
+        must be at least zero and strictly less than NPROCS.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process rows
+        or columns over which the matrix is distributed.  NPROCS must
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2lp, +HPL_indxg2p, +HPL_indxl2g, +HPL_numrocI. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_numrocI.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_numrocI.html new file mode 100755 index 000000000..c1037a193 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_numrocI.html @@ -0,0 +1,86 @@ + + +HPL_numrocI HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_numrocI Compute the local number of row/columns. + +

Synopsis

+#include "hpl.h"

+int +HPL_numrocI( +const int +N, +const int +I, +const int +INB, +const int +NB, +const int +PROC, +const int +SRCPROC, +const int +NPROCS +); + +

Description

+HPL_numrocI +returns the local number of matrix rows/columns process +PROC will get if we give out N rows/columns starting from global +index I. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies the number of rows/columns being dealt
+        out. N must be at least zero.
+
+
+I       (input)                       const int
+        On entry, I  specifies the global index of the matrix  entry
+        I must be at least zero.
+
+
+INB     (input)                       const int
+        On entry,  INB  specifies  the size of the first block of th
+        global matrix. INB must be at least one.
+
+
+NB      (input)                       const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+PROC    (input)                       const int
+        On entry, PROC specifies  the coordinate of the process whos
+        local portion is determined.  PROC must be at least zero  an
+        strictly less than NPROCS.
+
+
+SRCPROC (input)                       const int
+        On entry,  SRCPROC  specifies  the coordinate of the  proces
+        that possesses the first row or column of the matrix. SRCPRO
+        must be at least zero and strictly less than NPROCS.
+
+
+NPROCS  (input)                       const int
+        On entry,  NPROCS  specifies the total number of process row
+        or columns over which the matrix is distributed.  NPROCS mus
+        be at least one.
+
+ +

See Also

+HPL_indxg2l, +HPL_indxg2lp, +HPL_indxg2p, +HPL_indxl2g, +HPL_numroc. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pabort.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pabort.html new file mode 100755 index 000000000..89aacbd9f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pabort.html @@ -0,0 +1,57 @@ + + +HPL_pabort HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pabort halts execution. + +

Synopsis

+#include "hpl.h"

+void +HPL_pabort( +int +LINE, +const char * +SRNAME, +const char * +FORM, +... +); + +

Description

+HPL_pabort +displays an error message on stderr and halts execution. + +

Arguments

+
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+
+
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

See Also

+HPL_fprintf, +HPL_pwarn. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_packL.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_packL.html new file mode 100755 index 000000000..1e8f8106c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_packL.html @@ -0,0 +1,59 @@ + + +HPL_packL HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_packL Form the MPI structure for the row ring broadcasts. + +

Synopsis

+#include "hpl.h"

+int +HPL_packL( +HPL_T_panel * +PANEL, +const int +INDEX, +const int +LEN, +const int +IBUF +); + +

Description

+HPL_packL +forms the MPI data type for the panel to be broadcast. +Successful completion is indicated by the returned error code +MPI_SUCCESS. + +

Arguments

+
+PANEL   (input/output)                HPL_T_panel *
+        On entry,  PANEL  points to the  current panel data structure
+        being broadcast.
+
+
+INDEX   (input)                       const int
+        On entry,  INDEX  points  to  the  first entry of the  packed
+        buffer being broadcast.
+
+
+LEN     (input)                       const int
+        On entry, LEN is the length of the packed buffer.
+
+
+IBUF    (input)                       const int
+        On entry, IBUF  specifies the panel buffer/count/type entries
+        that should be initialized.
+
+ +

See Also

+HPL_binit, +HPL_bcast, +HPL_bwait. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pddriver.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pddriver.html new file mode 100755 index 000000000..adcc02e00 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pddriver.html @@ -0,0 +1,27 @@ + + +main HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+main HPL main timing program. + +

Synopsis

+#include "hpl.h"

+int +main(); + +

Description

+main +is the main driver program for testing the HPL routines. +This program is driven by a short data file named "HPL.dat". + +

See Also

+HPL_pdinfo, +HPL_pdtest. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdfact.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdfact.html new file mode 100755 index 000000000..f51cee5d2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdfact.html @@ -0,0 +1,78 @@ + + +HPL_pdfact HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdfact recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdfact( +HPL_T_panel * +PANEL +); + +

Description

+HPL_pdfact +recursively factorizes a 1-dimensional panel of columns. +The RPFACT function pointer specifies the recursive algorithm to be +used, either Crout, Left- or Right looking. NBMIN allows to vary the +recursive stopping criterium in terms of the number of columns in the +panel, and NDIV allow to specify the number of subpanels each panel +should be divided into. Usuallly a value of 2 will be chosen. Finally +PFACT is a function pointer specifying the non-recursive algorithm to +to be used on at most NBMIN columns. One can also choose here between +Crout, Left- or Right looking. Empirical tests seem to indicate that +values of 4 or 8 for NBMIN give the best results. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesv.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesv.html new file mode 100755 index 000000000..ebb9c18e4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesv.html @@ -0,0 +1,56 @@ + + +HPL_pdgesv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdgesv Solve A x = b. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdgesv( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +HPL_T_pmat * +A +); + +

Description

+HPL_pdgesv +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with or without look-ahead. The lower triangular factor is left +unpivoted and the pivots are not returned. The right hand side is the +N+1 column of the coefficient matrix. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+ +

See Also

+HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdtrsv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesv0.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesv0.html new file mode 100755 index 000000000..c137975d4 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesv0.html @@ -0,0 +1,63 @@ + + +HPL_pdgesv0 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdgesv0 Factor an N x N+1 matrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdgesv0( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +HPL_T_pmat * +A +); + +

Description

+HPL_pdgesv0 +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +without look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdfact, +HPL_binit, +HPL_bcast, +HPL_bwait, +HPL_pdupdateNN, +HPL_pdupdateNT, +HPL_pdupdateTN, +HPL_pdupdateTT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesvK1.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesvK1.html new file mode 100755 index 000000000..1a19edc05 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesvK1.html @@ -0,0 +1,62 @@ + + +HPL_pdgesvK1 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdgesvK1 Factor an N x N+1 matrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdgesvK1( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +HPL_T_pmat * +A +); + +

Description

+HPL_pdgesvK1 +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdfact, +HPL_binit, +HPL_bcast, +HPL_bwait, +HPL_pdupdateNN, +HPL_pdupdateNT, +HPL_pdupdateTN, +HPL_pdupdateTT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesvK2.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesvK2.html new file mode 100755 index 000000000..f2a9a25f0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdgesvK2.html @@ -0,0 +1,63 @@ + + +HPL_pdgesvK2 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdgesvK2 Factor an N x N+1 matrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdgesvK2( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +HPL_T_pmat * +A +); + +

Description

+HPL_pdgesvK2 +factors a N+1-by-N matrix using LU factorization with row +partial pivoting. The main algorithm is the "right looking" variant +with look-ahead. The lower triangular factor is left unpivoted and +the pivots are not returned. The right hand side is the N+1 column of +the coefficient matrix. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdfact, +HPL_binit, +HPL_bcast, +HPL_bwait, +HPL_pdupdateNN, +HPL_pdupdateNT, +HPL_pdupdateTN, +HPL_pdupdateTT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdinfo.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdinfo.html new file mode 100755 index 000000000..94a7f78c0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdinfo.html @@ -0,0 +1,252 @@ + + +HPL_pdinfo HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdinfo Read input parameter file. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdinfo( +HPL_T_test * +TEST, +int * +NS, +int * +N, +int * +NBS, +int * +NB, +HPL_T_ORDER * +PMAPPIN, +int * +NPQS, +int * +P, +int * +Q, +int * +NPFS, +HPL_T_FACT * +PF, +int * +NBMS, +int * +NBM, +int * +NDVS, +int * +NDV, +int * +NRFS, +HPL_T_FACT * +RF, +int * +NTPS, +HPL_T_TOP * +TP, +int * +NDHS, +int * +DH, +HPL_T_SWAP * +FSWAP, +int * +TSWAP, +int * +L1NOTRAN, +int * +UNOTRAN, +int * +EQUIL, +int * +ALIGN +); + +

Description

+HPL_pdinfo +reads the startup information for the various tests and +transmits it to all processes. + +

Arguments

+
+TEST    (global output)               HPL_T_test *
+        On entry, TEST  points to a testing data structure.  On exit,
+        the fields of this data structure are initialized as follows:
+        TEST->outfp  specifies the output file where the results will
+        be printed.  It is only defined and used by  the process 0 of
+        the grid.  TEST->thrsh specifies the threshhold value for the
+        test ratio.  TEST->epsil is the relative machine precision of
+        the distributed computer.  Finally  the test counters, kfail,
+        kpass, kskip, ktest are initialized to zero.
+
+
+NS      (global output)               int *
+        On exit,  NS  specifies the number of different problem sizes
+        to be tested. NS is less than or equal to HPL_MAX_PARAM.
+
+
+N       (global output)               int *
+        On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
+        the first NS entries of this array contain the  problem sizes
+        to run the code with.
+
+
+NBS     (global output)               int *
+        On exit,  NBS  specifies the number of different distribution
+        blocking factors to be tested. NBS must be less than or equal
+        to HPL_MAX_PARAM.
+
+
+NB      (global output)               int *
+        On exit,  PMAPPIN  specifies the process mapping onto the no-
+        des of the  MPI machine configuration.  PMAPPIN  defaults  to
+        row-major ordering.
+
+
+PMAPPIN (global output)               HPL_T_ORDER *
+        On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NBS entries of this array contain the values of the
+        various distribution blocking factors, to run the code with.
+
+
+NPQS    (global output)               int *
+        On exit, NPQS  specifies the  number of different values that
+        can be used for P and Q, i.e., the number of process grids to
+        run  the  code with.  NPQS must be  less  than  or  equal  to
+        HPL_MAX_PARAM.
+
+
+P       (global output)               int *
+        On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NPQS entries of this array contain the values of P,
+        the number of process rows of the  NPQS grids to run the code
+        with.
+
+
+Q       (global output)               int *
+        On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
+        the first NPQS entries of this array contain the values of Q,
+        the number of process columns of the  NPQS  grids to  run the
+        code with.
+
+
+NPFS    (global output)               int *
+        On exit, NPFS  specifies the  number of different values that
+        can be used for PF : the panel factorization algorithm to run
+        the code with. NPFS is less than or equal to HPL_MAX_PARAM.
+
+
+PF      (global output)               HPL_T_FACT *
+        On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
+        the first  NPFS  entries  of this array  contain  the various
+        panel factorization algorithms to run the code with.
+
+
+NBMS    (global output)               int *
+        On exit,  NBMS  specifies  the  number  of  various recursive
+        stopping criteria  to be tested.  NBMS  must be  less than or
+        equal to HPL_MAX_PARAM.
+
+
+NBM     (global output)               int *
+        On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NBMS entries of this array contain the values
+        of the various recursive stopping criteria to be tested.
+
+
+NDVS    (global output)               int *
+        On exit,  NDVS  specifies  the number  of various numbers  of
+        panels in recursion to be tested.  NDVS is less than or equal
+        to HPL_MAX_PARAM.
+
+
+NDV     (global output)               int *
+        On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NDVS entries of this array contain the values
+        of the various numbers of panels in recursion to be tested.
+
+
+NRFS    (global output)               int *
+        On exit, NRFS  specifies the  number of different values that
+        can be used for RF : the recursive factorization algorithm to
+        be tested. NRFS is less than or equal to HPL_MAX_PARAM.
+
+
+RF      (global output)               HPL_T_FACT *
+        On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
+        the first  NRFS  entries  of  this array contain  the various
+        recursive factorization algorithms to run the code with.
+
+
+NTPS    (global output)               int *
+        On exit, NTPS  specifies the  number of different values that
+        can be used for the  broadcast topologies  to be tested. NTPS
+        is less than or equal to HPL_MAX_PARAM.
+
+
+TP      (global output)               HPL_T_TOP *
+        On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
+        the  first NTPS  entries of this  array  contain  the various
+        broadcast (along rows) topologies to run the code with.
+
+
+NDHS    (global output)               int *
+        On exit, NDHS  specifies the  number of different values that
+        can be used for the  lookahead depths to be  tested.  NDHS is
+        less than or equal to HPL_MAX_PARAM.
+
+
+DH      (global output)               int *
+        On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
+        exit, the first NDHS entries of this array contain the values
+        of lookahead depths to run the code with.  Such a value is at
+        least 0 (no-lookahead) or greater than zero.
+
+
+FSWAP   (global output)               HPL_T_SWAP *
+        On exit, FSWAP specifies the swapping algorithm to be used in
+        all tests.
+
+
+TSWAP   (global output)               int *
+        On exit,  TSWAP  specifies the swapping threshold as a number
+        of columns when the mixed swapping algorithm was chosen.
+
+
+L1NOTRA (global output)               int *
+        On exit, L1NOTRAN specifies whether the upper triangle of the
+        panels of columns  should  be stored  in  no-transposed  form
+        (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
+
+
+UNOTRAN (global output)               int *
+        On exit, UNOTRAN  specifies whether the panels of rows should
+        be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
+        form (UNOTRAN=0) during their broadcast.
+
+
+EQUIL   (global output)               int *
+        On exit,  EQUIL  specifies  whether  equilibration during the
+        swap-broadcast  of  the  panel of rows  should  be  performed
+        (EQUIL=1) or not (EQUIL=0).
+
+
+ALIGN   (global output)               int *
+        On exit,  ALIGN  specifies the alignment  of  the dynamically
+        allocated buffers in double precision words. ALIGN is greater
+        than zero.
+
+ +

See Also

+HPL_pddriver, +HPL_pdtest. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlamch.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlamch.html new file mode 100755 index 000000000..c1b51370a --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlamch.html @@ -0,0 +1,67 @@ + + +HPL_pdlamch HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlamch determines machine-specific arithmetic constants. + +

Synopsis

+#include "hpl.h"

+double +HPL_pdlamch( +MPI_Comm +COMM, +const HPL_T_MACH +CMACH +); + +

Description

+HPL_pdlamch +determines machine-specific arithmetic constants such as +the relative machine precision (eps), the safe minimum(sfmin) such that +1/sfmin does not overflow, the base of the machine (base), the precision +(prec), the number of (base) digits in the mantissa (t), whether +rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum +exponent before (gradual) underflow (emin), the underflow threshold +(rmin)- base**(emin-1), the largest exponent before overflow (emax), the +overflow threshold (rmax) - (base**emax)*(1-eps). + +

Arguments

+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+
+CMACH   (global input)                const HPL_T_MACH
+        Specifies the value to be returned by HPL_pdlamch            
+           = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
+           = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
+           = HPL_MACH_BASE,  HPL_pdlamch := base                     
+           = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
+           = HPL_MACH_MLEN,  HPL_pdlamch := t                        
+           = HPL_MACH_RND,   HPL_pdlamch := rnd                      
+           = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
+           = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
+           = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
+           = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
+         
+        where                                                        
+         
+           eps   = relative machine precision,                       
+           sfmin = safe minimum,                                     
+           base  = base of the machine,                              
+           prec  = eps*base,                                         
+           t     = number of digits in the mantissa,                 
+           rnd   = 1.0 if rounding occurs in addition,               
+           emin  = minimum exponent before underflow,                
+           rmin  = underflow threshold,                              
+           emax  = largest exponent before overflow,                 
+           rmax  = overflow threshold.
+
+ + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlange.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlange.html new file mode 100755 index 000000000..0d1affc3d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlange.html @@ -0,0 +1,88 @@ + + +HPL_pdlange HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlange Compute ||A||. + +

Synopsis

+#include "hpl.h"

+double +HPL_pdlange( +const HPL_T_grid * +GRID, +const HPL_T_NORM +NORM, +const int +M, +const int +N, +const int +NB, +const double * +A, +const int +LDA +); + +

Description

+HPL_pdlange +returns the value of the one norm, or the infinity norm, +or the element of largest absolute value of a distributed matrix A: + + + max(abs(A(i,j))) when NORM = HPL_NORM_A, + norm1(A), when NORM = HPL_NORM_1, + normI(A), when NORM = HPL_NORM_I, + +where norm1 denotes the one norm of a matrix (maximum column sum) and +normI denotes the infinity norm of a matrix (maximum row sum). Note +that max(abs(A(i,j))) is not a matrix norm. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+NORM    (global input)                const HPL_T_NORM
+        On entry,  NORM  specifies  the  value to be returned by this
+        function as described above.
+
+
+M       (global input)                const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+
+
+N       (global input)                const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+
+
+A       (local input)                 const double *
+        On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
+        that contains the local pieces of the distributed matrix A.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+
+ +

See Also

+HPL_pdlaprnt, +HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaprnt.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaprnt.html new file mode 100755 index 000000000..0ce810db0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaprnt.html @@ -0,0 +1,94 @@ + + +HPL_pdlaprnt HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaprnt Print a distributed matrix A. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaprnt( +const HPL_T_grid * +GRID, +const int +M, +const int +N, +const int +NB, +double * +A, +const int +LDA, +const int +IAROW, +const int +IACOL, +const char * +CMATNM +); + +

Description

+HPL_pdlaprnt +prints to standard error a distributed matrix A. The +local pieces of A are sent to the process of coordinates (0,0) in +the grid and then printed. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+M       (global input)                const int
+        On entry,  M  specifies the number of rows of the coefficient
+        matrix A. M must be at least zero.
+
+
+N       (global input)                const int
+        On  entry,   N   specifies  the  number  of  columns  of  the
+        coefficient matrix A. N must be at least zero.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix. NB must be larger than one.
+
+
+A       (local input)                 double *
+        On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
+        This array contains the coefficient matrix to be printed.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+
+
+IAROW   (global input)                const int
+        On entry,  IAROW  specifies the row process coordinate owning
+        the  first row of A.  IAROW  must be  larger than or equal to
+        zero and less than NPROW.
+
+
+IACOL   (global input)                const int
+        On entry,  IACOL  specifies  the  column  process  coordinate
+        owning the  first column  of A. IACOL  must be larger than or
+        equal to zero and less than NPCOL.
+
+
+CMATNM  (global input)                const char *
+        On entry, CMATNM is the name of the matrix to be printed.
+
+ +

See Also

+HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp00N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp00N.html new file mode 100755 index 000000000..07279fdb0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp00N.html @@ -0,0 +1,82 @@ + + +HPL_pdlaswp00N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaswp00N Broadcast a column panel L and swap the row panel U. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaswp00N( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdlaswp00N +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +Bi-directional exchange is used to perform the swap :: broadcast of +the row panel U at once, resulting in a lower number of messages than +usual as well as a lower communication volume. With P process rows and +assuming bi-directional links, the running time of this function can +be approximated by: + + log_2(P) * (lat + NB*LocQ(N) / bdwth) + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. Mono +directional links will double this communication cost. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be broadcast and swapped) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdupdateNN, +HPL_pdupdateTN, +HPL_pipid, +HPL_plindx0, +HPL_dlaswp01N, +HPL_dlaswp02N, +HPL_dlaswp03N, +HPL_dlaswp04N, +HPL_dlaswp05N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp00T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp00T.html new file mode 100755 index 000000000..08b8ea770 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp00T.html @@ -0,0 +1,82 @@ + + +HPL_pdlaswp00T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaswp00T Broadcast a column panel L and swap the row panel U. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaswp00T( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdlaswp00T +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +Bi-directional exchange is used to perform the swap :: broadcast of +the row panel U at once, resulting in a lower number of messages than +usual as well as a lower communication volume. With P process rows and +assuming bi-directional links, the running time of this function can +be approximated by: + + log_2(P) * (lat + NB*LocQ(N) / bdwth) + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. Mono +directional links will double this communication cost. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be broadcast and swapped) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdupdateNT, +HPL_pdupdateTT, +HPL_pipid, +HPL_plindx0, +HPL_dlaswp01T, +HPL_dlaswp02N, +HPL_dlaswp03T, +HPL_dlaswp04T, +HPL_dlaswp05T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp01N.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp01N.html new file mode 100755 index 000000000..2d4772fda --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp01N.html @@ -0,0 +1,86 @@ + + +HPL_pdlaswp01N HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaswp01N Broadcast a column panel L and swap the row panel U. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaswp01N( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdlaswp01N +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +A "Spread then roll" algorithm performs the swap :: broadcast of the +row panel U at once, resulting in a minimal communication volume and +a "very good" use of the connectivity if available. With P process +rows and assuming bi-directional links, the running time of this +function can be approximated by: + + (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. K is +a constant in (2,3] that depends on the achieved bandwidth during a +simultaneous message exchange between two processes. An empirical +optimistic value of K is typically 2.4. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdupdateNN, +HPL_pdupdateTN, +HPL_pipid, +HPL_plindx1, +HPL_plindx10, +HPL_spreadN, +HPL_equil, +HPL_rollN, +HPL_dlaswp00N, +HPL_dlaswp01N, +HPL_dlaswp06N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp01T.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp01T.html new file mode 100755 index 000000000..f6a5d8c4b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdlaswp01T.html @@ -0,0 +1,86 @@ + + +HPL_pdlaswp01T HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdlaswp01T Broadcast a column panel L and swap the row panel U. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdlaswp01T( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdlaswp01T +applies the NB row interchanges to NN columns of the +trailing submatrix and broadcast a column panel. + +A "Spread then roll" algorithm performs the swap :: broadcast of the +row panel U at once, resulting in a minimal communication volume and +a "very good" use of the connectivity if available. With P process +rows and assuming bi-directional links, the running time of this +function can be approximated by: + + (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth + +where NB is the number of rows of the row panel U, N is the global +number of columns being updated, lat and bdwth are the latency and +bandwidth of the network for double precision real words. K is +a constant in (2,3] that depends on the achieved bandwidth during a +simultaneous message exchange between two processes. An empirical +optimistic value of K is typically 2.4. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to  be swapped and broadcast starting at
+        the current position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesvK2, +HPL_pdupdateNT, +HPL_pdupdateTT, +HPL_pipid, +HPL_plindx1, +HPL_plindx10, +HPL_spreadT, +HPL_equil, +HPL_rollT, +HPL_dlaswp10N, +HPL_dlaswp01T, +HPL_dlaswp06T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdmatgen.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdmatgen.html new file mode 100755 index 000000000..28fb95509 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdmatgen.html @@ -0,0 +1,87 @@ + + +HPL_pdmatgen HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdmatgen Parallel random matrix generator. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdmatgen( +const HPL_T_grid * +GRID, +const int +M, +const int +N, +const int +NB, +double * +A, +const int +LDA, +const int +ISEED +); + +

Description

+HPL_pdmatgen +generates (or regenerates) a parallel random matrix A. + +The pseudo-random generator uses the linear congruential algorithm: +X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer +Programming, Knuth 1973, Vol. 2. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+M       (global input)                const int
+        On entry,  M  specifies  the number  of rows of the matrix A.
+        M must be at least zero.
+
+
+N       (global input)                const int
+        On entry,  N specifies the number of columns of the matrix A.
+        N must be at least zero.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+
+A       (local output)                double *
+        On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
+        On exit, this array contains the coefficients of the randomly
+        generated matrix.
+
+
+LDA     (local input)                 const int
+        On entry, LDA specifies the leading dimension of the array A.
+        LDA must be at least max(1,LocP(M)).
+
+
+ISEED   (global input)                const int
+        On entry, ISEED  specifies  the  seed  number to generate the
+        matrix A. ISEED must be at least zero.
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdmxswp.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdmxswp.html new file mode 100755 index 000000000..c11d2b2da --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdmxswp.html @@ -0,0 +1,96 @@ + + +HPL_pdmxswp HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdmxswp swaps and broacast the pivot row. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdmxswp( +HPL_T_panel * +PANEL, +const int +M, +const int +II, +const int +JJ, +double * +WORK +); + +

Description

+HPL_pdmxswp +swaps and broadcasts the absolute value max row using +bi-directional exchange. The buffer is partially set by HPL_dlocmax. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by + + log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) + +where lat and bdwth are the latency and bandwidth of the network for +double precision real elements. Communication only occurs in one +process column. Mono-directional links will cause the communication +cost to double. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of the matrix
+        column on which this function operates.
+
+
+II      (local input)                 const int
+        On entry, II  specifies the row offset where the column to be
+        operated on starts with respect to the panel.
+
+
+JJ      (local input)                 const int
+        On entry, JJ  specifies the column offset where the column to
+        be operated on starts with respect to the panel.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
+        It  is assumed that  HPL_dlocmax  was called  prior  to  this
+        routine to  initialize  the first four entries of this array.
+        On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
+        Note that this is also the  JJth  row  (or column) of L1. The
+        remaining part is used as a temporary array.
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpancrN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpancrN.html new file mode 100755 index 000000000..663d2e266 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpancrN.html @@ -0,0 +1,100 @@ + + +HPL_pdpancrN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpancrN Crout panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpancrN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpancrN +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Crout variant of the usual +one-dimensional algorithm. The lower triangular N0-by-N0 upper block +of the panel is stored in no-transpose form (i.e. just like the input +matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpancrT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpancrT.html new file mode 100755 index 000000000..0e1490430 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpancrT.html @@ -0,0 +1,99 @@ + + +HPL_pdpancrT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpancrT Crout panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpancrT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpancrT +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Crout variant of the usual +one-dimensional algorithm. The lower triangular N0-by-N0 upper block +of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_disp.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_disp.html new file mode 100755 index 000000000..cb78fa4be --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_disp.html @@ -0,0 +1,38 @@ + + +HPL_pdpanel_disp HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanel_disp Deallocate a panel data structure. + +

Synopsis

+#include "hpl.h"

+int +HPL_pdpanel_disp( +HPL_T_panel * * +PANEL +); + +

Description

+HPL_pdpanel_disp +deallocates the panel structure and resources and +stores the error code returned by the panel factorization. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel * *
+        On entry,  PANEL  points  to  the  address  of the panel data
+        structure to be deallocated.
+
+ +

See Also

+HPL_pdpanel_new, +HPL_pdpanel_init, +HPL_pdpanel_free. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_free.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_free.html new file mode 100755 index 000000000..d33e5e400 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_free.html @@ -0,0 +1,38 @@ + + +HPL_pdpanel_free HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanel_free Deallocate the panel ressources. + +

Synopsis

+#include "hpl.h"

+int +HPL_pdpanel_free( +HPL_T_panel * +PANEL +); + +

Description

+HPL_pdpanel_free +deallocates the panel resources and stores the error +code returned by the panel factorization. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points  to  the  panel data  structure from
+        which the resources should be deallocated.
+
+ +

See Also

+HPL_pdpanel_new, +HPL_pdpanel_init, +HPL_pdpanel_disp. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_init.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_init.html new file mode 100755 index 000000000..2d105354f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_init.html @@ -0,0 +1,99 @@ + + +HPL_pdpanel_init HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanel_init Initialize the panel resources. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanel_init( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +const int +M, +const int +N, +const int +JB, +HPL_T_pmat * +A, +const int +IA, +const int +JA, +const int +TAG, +HPL_T_panel * +PANEL +); + +

Description

+HPL_pdpanel_init +initializes a panel data structure. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+M       (local input)                 const int
+        On entry, M specifies the global number of rows of the panel.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the  global number of columns of the
+        panel and trailing submatrix. N must be at least zero.
+
+
+JB      (global input)                const int
+        On entry, JB specifies is the number of columns of the panel.
+        JB must be at least zero.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+
+IA      (global input)                const int
+        On entry,  IA  is  the global row index identifying the panel
+        and trailing submatrix. IA must be at least zero.
+
+
+JA      (global input)                const int
+        On entry, JA is the global column index identifying the panel
+        and trailing submatrix. JA must be at least zero.
+
+
+TAG     (global input)                const int
+        On entry, TAG is the row broadcast message id.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+ +

See Also

+HPL_pdpanel_new, +HPL_pdpanel_disp, +HPL_pdpanel_free. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_new.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_new.html new file mode 100755 index 000000000..1b3029ecb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanel_new.html @@ -0,0 +1,99 @@ + + +HPL_pdpanel_new HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanel_new Create a panel data structure. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanel_new( +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +const int +M, +const int +N, +const int +JB, +HPL_T_pmat * +A, +const int +IA, +const int +JA, +const int +TAG, +HPL_T_panel * * +PANEL +); + +

Description

+HPL_pdpanel_new +creates and initializes a panel data structure. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters.
+
+
+M       (local input)                 const int
+        On entry, M specifies the global number of rows of the panel.
+        M must be at least zero.
+
+
+N       (local input)                 const int
+        On entry,  N  specifies  the  global number of columns of the
+        panel and trailing submatrix. N must be at least zero.
+
+
+JB      (global input)                const int
+        On entry, JB specifies is the number of columns of the panel.
+        JB must be at least zero.
+
+
+A       (local input/output)          HPL_T_pmat *
+        On entry, A points to the data structure containing the local
+        array information.
+
+
+IA      (global input)                const int
+        On entry,  IA  is  the global row index identifying the panel
+        and trailing submatrix. IA must be at least zero.
+
+
+JA      (global input)                const int
+        On entry, JA is the global column index identifying the panel
+        and trailing submatrix. JA must be at least zero.
+
+
+TAG     (global input)                const int
+        On entry, TAG is the row broadcast message id.
+
+
+PANEL   (local input/output)          HPL_T_panel * *
+        On entry,  PANEL  points  to  the  address  of the panel data
+        structure to create and initialize.
+
+ +

See Also

+HPL_pdpanel_new, +HPL_pdpanel_init, +HPL_pdpanel_disp. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanllN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanllN.html new file mode 100755 index 000000000..386815fd2 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanllN.html @@ -0,0 +1,100 @@ + + +HPL_pdpanllN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanllN Left-looking panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanllN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpanllN +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Left-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in no-transpose form (i.e. just like the +input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanllT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanllT.html new file mode 100755 index 000000000..04307e823 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanllT.html @@ -0,0 +1,99 @@ + + +HPL_pdpanllT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanllT Left-looking panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanllT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpanllT +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Left-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanrlN, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanrlN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanrlN.html new file mode 100755 index 000000000..8d705c63c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanrlN.html @@ -0,0 +1,100 @@ + + +HPL_pdpanrlN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanrlN Right-looking panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanrlN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpanrlN +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Right-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in no-transpose form (i.e. just like the +input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlT. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanrlT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanrlT.html new file mode 100755 index 000000000..af458e7a1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdpanrlT.html @@ -0,0 +1,99 @@ + + +HPL_pdpanrlT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdpanrlT Right-looking panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdpanrlT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdpanrlT +factorizes a panel of columns that is a sub-array of a +larger one-dimensional panel A using the Right-looking variant of the +usual one-dimensional algorithm. The lower triangular N0-by-N0 upper +block of the panel is stored in transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +Note that one iteration of the the main loop is unrolled. The local +computation of the absolute value max of the next column is performed +just after its update by the current column. This allows to bring the +current column only once through cache at each step. The current +implementation does not perform any blocking for this sequence of +BLAS operations, however the design allows for plugging in an optimal +(machine-specific) specialized BLAS-like kernel. This idea has been +suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpancrN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpancrN.html new file mode 100755 index 000000000..9169c48cc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpancrN.html @@ -0,0 +1,97 @@ + + +HPL_pdrpancrN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpancrN Crout recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpancrN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpancrN +HPL_pdrpancrN recursively factorizes a panel of columns using the +recursive Crout variant of the usual one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpancrT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpancrT.html new file mode 100755 index 000000000..cc9047c3c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpancrT.html @@ -0,0 +1,97 @@ + + +HPL_pdrpancrT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpancrT Crout recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpancrT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpancrT +recursively factorizes a panel of columns using the +recursive Crout variant of the usual one-dimensional algorithm. +The lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanllN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanllN.html new file mode 100755 index 000000000..bf16e6009 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanllN.html @@ -0,0 +1,97 @@ + + +HPL_pdrpanllN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpanllN Left-looking recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpanllN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpanllN +recursively factorizes a panel of columns using the +recursive Left-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanllT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanllT.html new file mode 100755 index 000000000..9904fb326 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanllT.html @@ -0,0 +1,97 @@ + + +HPL_pdrpanllT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpanllT Left-looking recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpanllT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpanllT +recursively factorizes a panel of columns using the +recursive Left-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanrlN, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanrlN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanrlN.html new file mode 100755 index 000000000..9758c0722 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanrlN.html @@ -0,0 +1,97 @@ + + +HPL_pdrpanrlN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpanrlN Right-looking recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpanrlN( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpanrlN +recursively factorizes a panel of columns using the +recursive Right-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +no-transpose form (i.e. just like the input matrix itself). + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlT, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanrlT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanrlT.html new file mode 100755 index 000000000..ed48a815d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdrpanrlT.html @@ -0,0 +1,97 @@ + + +HPL_pdrpanrlT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdrpanrlT Right-looking recursive panel factorization. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdrpanrlT( +HPL_T_panel * +PANEL, +const int +M, +const int +N, +const int +ICOFF, +double * +WORK +); + +

Description

+HPL_pdrpanrlT +recursively factorizes a panel of columns using the +recursive Right-looking variant of the one-dimensional algorithm. The +lower triangular N0-by-N0 upper block of the panel is stored in +transpose form. + +Bi-directional exchange is used to perform the swap::broadcast +operations at once for one column in the panel. This results in a +lower number of slightly larger messages than usual. On P processes +and assuming bi-directional links, the running time of this function +can be approximated by (when N is equal to N0): + + N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + + N0^2 * ( M - N0/3 ) * gam2-3 + +where M is the local number of rows of the panel, lat and bdwth are +the latency and bandwidth of the network for double precision real +words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS +rate of execution. The recursive algorithm allows indeed to almost +achieve Level 3 BLAS performance in the panel factorization. On a +large number of modern machines, this operation is however latency +bound, meaning that its cost can be estimated by only the latency +portion N0 * log_2(P) * lat. Mono-directional links will double this +communication cost. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+M       (local input)                 const int
+        On entry,  M specifies the local number of rows of sub(A).
+
+
+N       (local input)                 const int
+        On entry,  N specifies the local number of columns of sub(A).
+
+
+ICOFF   (global input)                const int
+        On entry, ICOFF specifies the row and column offset of sub(A)
+        in A.
+
+
+WORK    (local workspace)             double *
+        On entry, WORK  is a workarray of size at least 2*(4+2*N0).
+
+ +

See Also

+HPL_dlocmax, +HPL_dlocswpN, +HPL_dlocswpT, +HPL_pdmxswp, +HPL_pdpancrN, +HPL_pdpancrT, +HPL_pdpanllN, +HPL_pdpanllT, +HPL_pdpanrlN, +HPL_pdpanrlT, +HPL_pdrpancrN, +HPL_pdrpancrT, +HPL_pdrpanllN, +HPL_pdrpanllT, +HPL_pdrpanrlN, +HPL_pdfact. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdtest.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdtest.html new file mode 100755 index 000000000..1c11c34d7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdtest.html @@ -0,0 +1,81 @@ + + +HPL_pdtest HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdtest Perform one test. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdtest( +HPL_T_test * +TEST, +HPL_T_grid * +GRID, +HPL_T_palg * +ALGO, +const int +N, +const int +NB +); + +

Description

+HPL_pdtest +performs one test given a set of parameters such as the +process grid, the problem size, the distribution blocking factor ... +This function generates the data, calls and times the linear system +solver, checks the accuracy of the obtained vector solution and +writes this information to the file pointed to by TEST->outfp. + +

Arguments

+
+TEST    (global input)                HPL_T_test *
+        On entry,  TEST  points  to a testing data structure:  outfp
+        specifies the output file where the results will be printed.
+        It is only defined and used by the process  0  of the  grid.
+        thrsh  specifies  the  threshhold value  for the test ratio.
+        Concretely, a test is declared "PASSED"  if and only if  the
+        following inequality is satisfied:
+        ||Ax-b||_oo / ( epsil *
+                        ( || x ||_oo * || A ||_oo + || b ||_oo ) *
+                         N )  < thrsh.
+        epsil  is the  relative machine precision of the distributed
+        computer. Finally the test counters, kfail, kpass, kskip and
+        ktest are updated as follows:  if the test passes,  kpass is
+        incremented by one;  if the test fails, kfail is incremented
+        by one; if the test is skipped, kskip is incremented by one.
+        ktest is left unchanged.
+
+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+ALGO    (global input)                HPL_T_palg *
+        On entry,  ALGO  points to  the data structure containing the
+        algorithmic parameters to be used for this test.
+
+
+N       (global input)                const int
+        On entry,  N specifies the order of the coefficient matrix A.
+        N must be at least zero.
+
+
+NB      (global input)                const int
+        On entry,  NB specifies the blocking factor used to partition
+        and distribute the matrix A. NB must be larger than one.
+
+ +

See Also

+HPL_pddriver, +HPL_pdinfo. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdtrsv.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdtrsv.html new file mode 100755 index 000000000..0bb182dc9 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdtrsv.html @@ -0,0 +1,64 @@ + + +HPL_pdtrsv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdtrsv Solve triu( A ) x = b. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdtrsv( +HPL_T_grid * +GRID, +HPL_T_pmat * +AMAT +); + +

Description

+HPL_pdtrsv +solves an upper triangular system of linear equations. + +The rhs is the last column of the N by N+1 matrix A. The solve starts +in the process column owning the Nth column of A, so the rhs b may +need to be moved one process column to the left at the beginning. The +routine therefore needs a column vector in every process column but +the one owning b. The result is replicated in all process rows, and +returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. + +The algorithm uses decreasing one-ring broadcast in process rows and +columns implemented in terms of synchronous communication point to +point primitives. The lookahead of depth 1 is used to minimize the +critical path. This entire operation is essentially ``latency'' bound +and an estimate of its running time is given by: + + (move rhs) lat + N / ( P bdwth ) + + (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + + gam2 N^2 / ( P Q ), + +where gam2 is an estimate of the Level 2 BLAS rate of execution. +There are N / NB diagonal blocks. One must exchange 2 messages of +length NB to compute the next NB entries of the vector solution, as +well as performing a total of N^2 floating point operations. + +

Arguments

+
+GRID    (local input)                 HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+AMAT    (local input/output)          HPL_T_pmat *
+        On entry,  AMAT  points  to the data structure containing the
+        local array information.
+
+ +

See Also

+HPL_pdgesv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateNN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateNN.html new file mode 100755 index 000000000..b77cddbce --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateNN.html @@ -0,0 +1,65 @@ + + +HPL_pdupdateNN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdupdateNN Broadcast a panel and update the trailing submatrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdupdateNN( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdupdateNN +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdlaswp00N, +HPL_pdlaswp01N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateNT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateNT.html new file mode 100755 index 000000000..4ecb1f687 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateNT.html @@ -0,0 +1,65 @@ + + +HPL_pdupdateNT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdupdateNT Broadcast a panel and update the trailing submatrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdupdateNT( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdupdateNT +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdlaswp00T, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateTN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateTN.html new file mode 100755 index 000000000..ae735bf84 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateTN.html @@ -0,0 +1,65 @@ + + +HPL_pdupdateTN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdupdateTN Broadcast a panel and update the trailing submatrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdupdateTN( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdupdateTN +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdlaswp00N, +HPL_pdlaswp01N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateTT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateTT.html new file mode 100755 index 000000000..7c69f8828 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pdupdateTT.html @@ -0,0 +1,65 @@ + + +HPL_pdupdateTT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pdupdateTT Broadcast a panel and update the trailing submatrix. + +

Synopsis

+#include "hpl.h"

+void +HPL_pdupdateTT( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +NN +); + +

Description

+HPL_pdupdateTT +broadcast - forward the panel PBCST and simultaneously +applies the row interchanges and updates part of the trailing (using +the panel PANEL) submatrix. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local output)                int *
+        On exit,  IFLAG  indicates  whether or not  the broadcast has
+        been completed when PBCST is not NULL on entry. In that case,
+        IFLAG is left unchanged.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be updated) information.
+
+
+NN      (local input)                 const int
+        On entry, NN specifies  the  local  number  of columns of the
+        trailing  submatrix  to be updated  starting  at the  current
+        position. NN must be at least zero.
+
+ +

See Also

+HPL_pdgesv, +HPL_pdgesv0, +HPL_pdgesvK1, +HPL_pdgesvK2, +HPL_pdlaswp00T, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_perm.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_perm.html new file mode 100755 index 000000000..9312eb4eb --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_perm.html @@ -0,0 +1,67 @@ + + +HPL_perm HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_perm Combine 2 index arrays - Generate the permutation. + +

Synopsis

+#include "hpl.h"

+void +HPL_perm( +const int +N, +int * +LINDXA, +int * +LINDXAU, +int * +IWORK +); + +

Description

+HPL_perm +combines two index arrays and generate the corresponding +permutation. First, this function computes the inverse of LINDXA, and +then combine it with LINDXAU. Second, in order to be able to perform +the permutation in place, LINDXAU is overwritten by the sequence of +permutation producing the same result. What we ultimately want to +achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the +call to this function, this in place permutation can be performed by +for i in [0..N) swap U[i] with U[LINDXAU[i]]. + +

Arguments

+
+N       (global input)                const int
+        On entry,  N  specifies the length of the arrays  LINDXA  and
+        LINDXAU. N should be at least zero.
+
+
+LINDXA  (global input/output)         int *
+        On entry,  LINDXA  is an array of dimension N  containing the
+        source indexes. On exit,  LINDXA  contains the combined index
+        array.
+
+
+LINDXAU (global input/output)         int *
+        On entry,  LINDXAU is an array of dimension N  containing the
+        target indexes.  On exit,  LINDXAU  contains  the sequence of
+        permutation,  that  should be applied  in increasing order to
+        permute the underlying array U in place.
+
+
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension N.
+
+ +

See Also

+HPL_plindx1, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pipid.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pipid.html new file mode 100755 index 000000000..e6deb3d93 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pipid.html @@ -0,0 +1,95 @@ + + +HPL_pipid HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pipid Simplify the pivot vector. + +

Synopsis

+#include "hpl.h"

+void +HPL_pipid( +HPL_T_panel * +PANEL, +int * +K, +int * +IPID +); + +

Description

+HPL_pipid +computes an array IPID that contains the source and final +destination of matrix rows resulting from the application of N +interchanges as computed by the LU factorization with row partial +pivoting. The array IPID is such that the row of global index IPID(i) +should be mapped onto the row of global index IPID(i+1). Note that we +cannot really know the length of IPID a priori. However, we know that +this array is at least 2*N long, since there are N rows to swap and +broadcast. The length of this array must be smaller than or equal to +4*N, since every row is swapped with at most a single distinct remote +row. The algorithm constructing IPID goes as follows: Let IA be the +global index of the first row to be swapped. + +For every row src IA + i with i in [0..N) to be swapped with row dst +such that dst is given by DPIV[i]: + +Is row src the destination of a previous row of the current block, +that is, is there k odd such that IPID(k) is equal to src ? + Yes: update this destination with dst. For example, if the +pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, +we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it +was thought so far ... + No : add the pair (src,dst) at the end of IPID; row src has not +been moved yet. + +Is row dst different from src the destination of a previous row of +the current block, i.e., is there k odd such that IPID(k) is equal to +dst ? + Yes: update IPID(k) with src. For example, if the pivot array +is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in +fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought +so far ... + No : add the pair (dst,src) at the end of IPID; row dst has not +been moved yet. + +Note that when src is equal to dst, the pair (dst,src) should not be +added to IPID in order to avoid duplicated entries in this array. +During the construction of the array IPID, we make sure that the +first N entries are such that IPID(k) with k odd is equal to IA+k/2. +For k in [0..K/2), the row of global index IPID(2*k) should be +mapped onto the row of global index IPID(2*k+1). + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+K       (global output)               int *
+        On exit, K specifies the number of entries in  IPID.  K is at
+        least 2*N, and at most 4*N.
+
+
+IPID    (global output)               int *
+        On entry, IPID is an array of length 4*N.  On exit, the first
+        K entries of that array contain the src and final destination
+        resulting  from  the  application of the  N  interchanges  as
+        specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
+        stored and sorted so that IPID(2*i+1) is equal to IA+i with i
+        in [0..N)
+
+ +

See Also

+HPL_pdlaswp00N, +HPL_pdlaswp00T, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx0.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx0.html new file mode 100755 index 000000000..f3dbbcdea --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx0.html @@ -0,0 +1,187 @@ + + +HPL_plindx0 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_plindx0 Compute local swapping index arrays. + +

Synopsis

+#include "hpl.h"

+void +HPL_plindx0( +HPL_T_panel * +PANEL, +const int +K, +int * +IPID, +int * +LINDXA, +int * +LINDXAU, +int * +LLEN +); + +

Description

+HPL_plindx0 +computes two local arrays LINDXA and LINDXAU containing +the local source and final destination position resulting from the +application of row interchanges. + +On entry, the array IPID of length K is such that the row of global +index IPID(i) should be mapped onto row of global index IPID(i+1). +Let IA be the global index of the first row to be swapped. For k in +[0..K/2), the row of global index IPID(2*k) should be mapped onto the +row of global index IPID(2*k+1). The question then, is to determine +which rows should ultimately be part of U. + +First, some rows of the process ICURROW may be swapped locally. One +of this row belongs to U, the other one belongs to my local piece of +A. The other rows of the current block are swapped with remote rows +and are thus not part of U. These rows however should be sent along, +and grabbed by the other processes as we progress in the exchange +phase. + +So, assume that I am ICURROW and consider a row of index IPID(2*i) +that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less +than N, this row is locally swapped and should be copied into U at +the position IPID(2*i+1) - IA. No row will be exchanged for this one. +If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be +locally copied into my local piece of A at the position corresponding +to the row of global index IPID(2*i+1). + +If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) +is to be swapped away and strictly speaking does not belong to U, but +to A remotely. Since this process will however send this array U, +this row is copied into U, exactly where the row IPID(2*i+1) should +go. For this, we search IPID for k1, such that IPID(2*k1) is equal to +IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position +IPID(2*k1+1)-IA. + +It is thus important to put the rows that go into U, i.e., such that +IPID(2*i+1) - IA is less than N at the begining of the array IPID. By +doing so, U is formed, and the local copy is performed in just one +sweep. + +Two lists LINDXA and LINDXAU are built. LINDXA contains the local +index of the rows I have that should be copied. LINDXAU contains the +local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A +is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) +of A should be locally copied into A(-LINDXAU(k),:). In the process +ICURROW, the initial packing algorithm proceeds as follows. + + for all entries in IPID, + if IPID(2*i) is in ICURROW, + if IPID(2*i+1) is in ICURROW, + if( IPID(2*i+1) - IA < N ) + save corresponding local position + of this row (LINDXA); + save local position (LINDXAU) in U + where this row goes; + [copy row IPID(2*i) in U at position + IPID(2*i+1)-IA; ]; + else + save corresponding local position of + this row (LINDXA); + save local position (-LINDXAU) in A + where this row goes; + [copy row IPID(2*i) in my piece of A + at IPID(2*i+1);] + end if + else + find k1 such that IPID(2*k1) = IPID(2*i+1); + copy row IPID(2*i) in U at position + IPID(2*k1+1)-IA; + save corresponding local position of this + row (LINDXA); + save local position (LINDXAU) in U where + this row goes; + end if + end if + end for + +Second, if I am not the current row process ICURROW, all source rows +in IPID that I own are part of U. Indeed, they are swapped with one +row of the current block of rows, and the main factorization +algorithm proceeds one row after each other. The processes different +from ICURROW, should exchange and accumulate those rows until they +receive some data previously owned by the process ICURROW. + +In processes different from ICURROW, the initial packing algorithm +proceeds as follows. Consider a row of global index IPID(2*i) that I +own. When I will be receiving data previously owned by ICURROW, i.e., +U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, +and this particular row of U should be first copied into my piece of +A, at A(il,:), where il is the local row index corresponding to +IPID(2*i). Now,initially, this row will be packed into workspace, say +as the kth row of that work array. The following algorithm sets +LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row +should be copied. LINDXA(k) stores the local index in A where this +row of U should be copied, i.e il. + + for all entries in IPID, + if IPID(2*i) is not in ICURROW, + copy row IPID(2*i) in work array; + save corresponding local position + of this row (LINDXA); + save position (LINDXAU) in U where + this row should be copied; + end if + end for + +Since we are at it, we also globally figure out how many rows every +process has. That is necessary, because it would rather be cumbersome +to figure it on the fly during the bi-directional exchange phase. +This information is kept in the array LLEN of size NPROW. Also note +that the arrays LINDXA and LINDXAU are of max length equal to 2*N. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+
+
+IPID    (global input)                int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+
+
+LINDXA  (local output)                int *
+        On entry, LINDXA  is an array of dimension 2*N. On exit, this
+        array contains the local indexes of the rows of A I have that
+        should be copied into U.
+
+
+LINDXAU (local output)                int *
+        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+        array contains  the local destination  information encoded as
+        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+        of A should be locally copied into A(-LINDXAU(k),:).
+
+
+LLEN    (global output)               int *
+        On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
+        contains how many rows every process has.
+
+ +

See Also

+HPL_pdlaswp00N, +HPL_pdlaswp00T, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx1.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx1.html new file mode 100755 index 000000000..0a49ede0b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx1.html @@ -0,0 +1,130 @@ + + +HPL_plindx1 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_plindx1 Compute local swapping index arrays. + +

Synopsis

+#include "hpl.h"

+void +HPL_plindx1( +HPL_T_panel * +PANEL, +const int +K, +const int * +IPID, +int * +IPA, +int * +LINDXA, +int * +LINDXAU, +int * +IPLEN, +int * +IPMAP, +int * +IPMAPM1, +int * +PERMU, +int * +IWORK +); + +

Description

+HPL_plindx1 +computes two local arrays LINDXA and LINDXAU containing +the local source and final destination position resulting from the +application of row interchanges. In addition, this function computes +three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic +mapping information for the spreading phase. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+
+
+IPID    (global input)                const int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+
+
+IPA     (global output)               int *
+        On exit,  IPA  specifies  the number of rows that the current
+        process row has that either belong to U  or should be swapped
+        with remote rows of A.
+
+
+LINDXA  (global output)               int *
+        On entry, LINDXA  is an array of dimension 2*N. On exit, this
+        array contains the local indexes of the rows of A I have that
+        should be copied into U.
+
+
+LINDXAU (global output)               int *
+        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
+        array contains  the local destination  information encoded as
+        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
+        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
+        of A should be locally copied into A(-LINDXAU(k),:).
+
+
+IPLEN   (global output)               int *
+        On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
+        this array is such that  IPLEN[i]  is the number of rows of A
+        in  the  processes  before  process  IPMAP[i]  after the sort
+        with the convention that IPLEN[nprow]  is the total number of
+        rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
+        local number of rows of A that should be moved to the process
+        IPMAP[i]. IPLEN is such that the number of rows of the source
+        process  row can be computed as  IPLEN[1] - IPLEN[0], and the
+        remaining  entries  of  this  array  are  sorted  so that the
+        quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+
+
+IPMAP   (global output)               int *
+        On entry, IPMAP is an array of dimension NPROW. On exit, this
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myrow] is the corresponding sorted process
+        coordinate.
+
+
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROCS)
+
+
+PERMU   (global output)               int *
+        On entry,  PERMU  is an array of dimension JB. On exit, PERMU
+        contains  a sequence of permutations,  that should be applied
+        in increasing order to permute in place the row panel U.
+
+
+IWORK   (workspace)                   int *
+        On entry, IWORK is a workarray of dimension 2*JB.
+
+ +

See Also

+HPL_pdlaswp00N, +HPL_pdlaswp00T, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx10.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx10.html new file mode 100755 index 000000000..fbfd6be2f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_plindx10.html @@ -0,0 +1,87 @@ + + +HPL_plindx10 HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_plindx10 Compute the logarithmic maps for the spreading. + +

Synopsis

+#include "hpl.h"

+void +HPL_plindx10( +HPL_T_panel * +PANEL, +const int +K, +const int * +IPID, +int * +IPLEN, +int * +IPMAP, +int * +IPMAPM1 +); + +

Description

+HPL_plindx10 +computes three arrays IPLEN, IPMAP and IPMAPM1 that +contain the logarithmic mapping information for the spreading phase. + +

Arguments

+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel information.
+
+
+K       (global input)                const int
+        On entry, K specifies the number of entries in IPID.  K is at
+        least 2*N, and at most 4*N.
+
+
+IPID    (global input)                const int *
+        On entry,  IPID  is an array of length K. The first K entries
+        of that array contain the src and final destination resulting
+        from the application of the interchanges.
+
+
+IPLEN   (global output)               int *
+        On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
+        this array is such that  IPLEN[i]  is the number of rows of A
+        in the processes  before process IMAP[i] after the sort, with
+        the convention that IPLEN[nprow] is the total number of rows.
+        In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
+        rows of  A  that should be moved for each process.  IPLEN  is
+        such that the number of rows of the source process row can be
+        computed as IPLEN[1] - IPLEN[0], and the remaining entries of
+        this  array are sorted  so  that  the quantities IPLEN[i+1] -
+        IPLEN[i] are logarithmically sorted.
+
+
+IPMAP   (global output)               int *
+        On entry, IPMAP is an array of dimension NPROW. On exit, this
+        array contains  the logarithmic mapping of the processes.  In
+        other words, IPMAP[myrow] is the corresponding sorted process
+        coordinate.
+
+
+IPMAPM1 (global output)               int *
+        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
+        this  array  contains  the inverse of the logarithmic mapping
+        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
+        [0.. NPROW)
+
+ +

See Also

+HPL_pdlaswp00N, +HPL_pdlaswp00T, +HPL_pdlaswp01N, +HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pnum.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pnum.html new file mode 100755 index 000000000..8bedc3016 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pnum.html @@ -0,0 +1,54 @@ + + +HPL_pnum HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pnum Rank determination. + +

Synopsis

+#include "hpl.h"

+int +HPL_pnum( +const HPL_T_grid * +GRID, +const int +MYROW, +const int +MYCOL +); + +

Description

+HPL_pnum +determines the rank of a process as a function of its +coordinates in the grid. + +

Arguments

+
+GRID    (local input)                 const HPL_T_grid *
+        On entry,  GRID  points  to the data structure containing the
+        process grid information.
+
+
+MYROW   (local input)                 const int
+        On entry,  MYROW  specifies the row coordinate of the process
+        whose rank is to be determined. MYROW must be greater than or
+        equal to zero and less than NPROW.
+
+
+MYCOL   (local input)                 const int
+        On entry,  MYCOL  specifies  the  column  coordinate  of  the
+        process whose rank is to be determined. MYCOL must be greater
+        than or equal to zero and less than NPCOL.
+
+ +

See Also

+HPL_grid_init, +HPL_grid_info, +HPL_grid_exit. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer.html new file mode 100755 index 000000000..abef45946 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer.html @@ -0,0 +1,49 @@ + + +HPL_ptimer HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_ptimer Timer facility. + +

Synopsis

+#include "hpl.h"

+void +HPL_ptimer( +const int +I +); + +

Description

+HPL_ptimer +provides a "stopwatch" functionality cpu/wall timer in +seconds. Up to 64 separate timers can be functioning at once. The +first call starts the timer, and the second stops it. This routine +can be disenabled by calling HPL_ptimer_disable(), so that calls to +the timer are ignored. This feature can be used to make sure certain +sections of code do not affect timings, even if they call routines +which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable +the timer functionality. One can retrieve the current value of a +timer by calling + +t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + +where I is the timer index in [0..64). To inititialize the timer +functionality, one must have called HPL_ptimer_boot() prior to any of +the functions mentioned above. + +

Arguments

+
+I       (global input)                const int
+        On entry, I specifies the timer to stop/start.
+
+ +

See Also

+HPL_ptimer_cputime, +HPL_ptimer_walltime. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer_cputime.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer_cputime.html new file mode 100755 index 000000000..cffd863b3 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer_cputime.html @@ -0,0 +1,35 @@ + + +HPL_ptimer_cputime HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_ptimer_cputime Return the CPU time. + +

Synopsis

+#include "hpl.h"

+double +HPL_ptimer_cputime(); + +

Description

+HPL_ptimer_cputime +returns the cpu time. If HPL_USE_CLOCK is defined, +the clock() function is used to return an approximation of processor +time used by the program. The value returned is the CPU time used so +far as a clock_t; to get the number of seconds used, the result is +divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C +standard library. If HPL_USE_TIMES is defined, the times() function +is used instead. This function returns the current process times. +times() returns the number of clock ticks that have elapsed since the +system has been up. Otherwise and by default, the standard library +function getrusage() is used. + +

See Also

+HPL_ptimer_walltime, +HPL_ptimer. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer_walltime.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer_walltime.html new file mode 100755 index 000000000..a509897f1 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_ptimer_walltime.html @@ -0,0 +1,26 @@ + + +HPL_ptimer_walltime HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_ptimer_walltime Return the elapsed (wall-clock) time. + +

Synopsis

+#include "hpl.h"

+double +HPL_ptimer_walltime(); + +

Description

+HPL_ptimer_walltime +returns the elapsed (wall-clock) time. + +

See Also

+HPL_ptimer_cputime, +HPL_ptimer. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pwarn.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pwarn.html new file mode 100755 index 000000000..221d23982 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_pwarn.html @@ -0,0 +1,63 @@ + + +HPL_pwarn HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_pwarn displays an error message. + +

Synopsis

+#include "hpl.h"

+void +HPL_pwarn( +FILE * +STREAM, +int +LINE, +const char * +SRNAME, +const char * +FORM, +... +); + +

Description

+HPL_pwarn +displays an error message. + +

Arguments

+
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+
+
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+
+
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

See Also

+HPL_pabort, +HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rand.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rand.html new file mode 100755 index 000000000..5aef6669c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rand.html @@ -0,0 +1,40 @@ + + +HPL_rand HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_rand random number generator. + +

Synopsis

+#include "hpl.h"

+double +HPL_rand(); + +

Description

+HPL_rand +generates the next number in the random sequence. This +function ensures that this number lies in the interval (-0.5, 0.5]. + +The static array irand contains the information (2 integers) required +to generate the next number in the sequence X(n). This number is +computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the +constant d is the largest 64 bit positive integer. The array irand is +then updated for the generation of the next number X(n+1) in the +random sequence as follows X(n+1) = a * X(n) + c. The constants a and +c should have been preliminarily stored in the arrays ias and ics as +2 pairs of integers. The initialization of ias, ics and irand is +performed by the function HPL_setran. + +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_xjumpm, +HPL_jumpit. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_recv.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_recv.html new file mode 100755 index 000000000..afcb570c5 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_recv.html @@ -0,0 +1,67 @@ + + +HPL_recv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_recv Receive a message. + +

Synopsis

+#include "hpl.h"

+int +HPL_recv( +double * +RBUF, +int +RCOUNT, +int +SRC, +int +RTAG, +MPI_Comm +COMM +); + +

Description

+HPL_recv +is a simple wrapper around MPI_Recv. Its main purpose is +to allow for some experimentation / tuning of this simple routine. +Successful completion is indicated by the returned error code +HPL_SUCCESS. In the case of messages of length less than or equal to +zero, this function returns immediately. + +

Arguments

+
+RBUF    (local output)                double *
+        On entry, RBUF specifies the starting address of buffer to be
+        received.
+
+
+RCOUNT  (local input)                 int
+        On entry,  RCOUNT  specifies  the number  of double precision
+        entries in RBUF. RCOUNT must be at least zero.
+
+
+SRC     (local input)                 int
+        On entry, SRC  specifies the rank of the  sending  process in
+        the communication space defined by COMM.
+
+
+RTAG    (local input)                 int
+        On entry,  STAG specifies the message tag to be used for this
+        communication operation.
+
+
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+
+ +

See Also

+HPL_send, +HPL_sdrv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_reduce.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_reduce.html new file mode 100755 index 000000000..026435ed6 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_reduce.html @@ -0,0 +1,75 @@ + + +HPL_reduce HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_reduce Reduce operation. + +

Synopsis

+#include "hpl.h"

+int +HPL_reduce( +void * +BUFFER, +const int +COUNT, +const HPL_T_TYPE +DTYPE, +const HPL_T_OP +OP, +const int +ROOT, +MPI_Comm +COMM +); + +

Description

+HPL_reduce +performs a global reduce operation across all processes of +a group. Note that the input buffer is used as workarray and in all +processes but the accumulating process corrupting the original data. + +

Arguments

+
+BUFFER  (local input/output)          void *
+        On entry,  BUFFER  points to  the  buffer to be  reduced.  On
+        exit,  and  in process of rank  ROOT  this array contains the
+        reduced data.  This  buffer  is also used as workspace during
+        the operation in the other processes of the group.
+
+
+COUNT   (global input)                const int
+        On entry,  COUNT  indicates the number of entries in  BUFFER.
+        COUNT must be at least zero.
+
+
+DTYPE   (global input)                const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+
+OP      (global input)                const HPL_T_OP 
+        On entry, OP is a pointer to the local combine function.
+
+
+ROOT    (global input)                const int
+        On entry, ROOT is the coordinate of the accumulating process.
+
+
+COMM    (global/local input)          MPI_Comm
+        The MPI communicator identifying the process collection.
+
+ +

See Also

+HPL_broadcast, +HPL_all_reduce, +HPL_barrier, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rollN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rollN.html new file mode 100755 index 000000000..1e1a49068 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rollN.html @@ -0,0 +1,99 @@ + + +HPL_rollN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_rollN Roll U and forward the column panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_rollN( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +N, +double * +U, +const int +LDU, +const int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1 +); + +

Description

+HPL_rollN +rolls the local arrays containing the local pieces of U, so +that on exit to this function U is replicated in every process row. +In addition, this function probe for the presence of the column panel +and forwards it when available. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be rolled) information.
+
+
+N       (local input)                 const int
+        On entry, N specifies the number of columns of  U.  N must be
+        at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least  MAX(1,IPLEN[NPROW]).
+
+
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process row.
+
+
+IPMAP   (global input)                const int *
+        On entry, IMAP  is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words,  IMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry,  IMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+
+ +

See Also

+HPL_pdlaswp01N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rollT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rollT.html new file mode 100755 index 000000000..a6ac29336 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_rollT.html @@ -0,0 +1,99 @@ + + +HPL_rollT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_rollT Roll U and forward the column panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_rollT( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const int +N, +double * +U, +const int +LDU, +const int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1 +); + +

Description

+HPL_rollT +rolls the local arrays containing the local pieces of U, so +that on exit to this function U is replicated in every process row. +In addition, this function probe for the presence of the column panel +and forwards it when available. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be rolled) information.
+
+
+N       (local input)                 const int
+        On entry, N specifies the local number of rows of  U.  N must
+        be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U in each process row.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least  MAX(1,N).
+
+
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process row.
+
+
+IPMAP   (global input)                const int *
+        On entry, IMAP  is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words,  IMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry,  IMAPM1  is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
+
+ +

See Also

+HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_sdrv.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_sdrv.html new file mode 100755 index 000000000..6f5b5880c --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_sdrv.html @@ -0,0 +1,88 @@ + + +HPL_sdrv HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_sdrv Send and receive a message. + +

Synopsis

+#include "hpl.h"

+int +HPL_sdrv( +double * +SBUF, +int +SCOUNT, +int +STAG, +double * +RBUF, +int +RCOUNT, +int +RTAG, +int +PARTNER, +MPI_Comm +COMM +); + +

Description

+HPL_sdrv +is a simple wrapper around MPI_Sendrecv. Its main purpose is +to allow for some experimentation and tuning of this simple function. +Messages of length less than or equal to zero are not sent nor +received. Successful completion is indicated by the returned error +code HPL_SUCCESS. + +

Arguments

+
+SBUF    (local input)                 double *
+        On entry, SBUF specifies the starting address of buffer to be
+        sent.
+
+
+SCOUNT  (local input)                 int
+        On entry,  SCOUNT  specifies  the number  of double precision
+        entries in SBUF. SCOUNT must be at least zero.
+
+
+STAG    (local input)                 int
+        On entry,  STAG  specifies the message tag to be used for the
+        sending communication operation.
+
+
+RBUF    (local output)                double *
+        On entry, RBUF specifies the starting address of buffer to be
+        received.
+
+
+RCOUNT  (local input)                 int
+        On entry,  RCOUNT  specifies  the number  of double precision
+        entries in RBUF. RCOUNT must be at least zero.
+
+
+RTAG    (local input)                 int
+        On entry,  RTAG  specifies the message tag to be used for the
+        receiving communication operation.
+
+
+PARTNER (local input)                 int
+        On entry,  PARTNER  specifies  the rank of the  collaborative
+        process in the communication space defined by COMM.
+
+
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+
+ +

See Also

+HPL_send, +HPL_recv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_send.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_send.html new file mode 100755 index 000000000..05dcb7e6d --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_send.html @@ -0,0 +1,67 @@ + + +HPL_send HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_send Send a message. + +

Synopsis

+#include "hpl.h"

+int +HPL_send( +double * +SBUF, +int +SCOUNT, +int +DEST, +int +STAG, +MPI_Comm +COMM +); + +

Description

+HPL_send +is a simple wrapper around MPI_Send. Its main purpose is +to allow for some experimentation / tuning of this simple routine. +Successful completion is indicated by the returned error code +MPI_SUCCESS. In the case of messages of length less than or equal to +zero, this function returns immediately. + +

Arguments

+
+SBUF    (local input)                 double *
+        On entry, SBUF specifies the starting address of buffer to be
+        sent.
+
+
+SCOUNT  (local input)                 int
+        On entry,  SCOUNT  specifies  the number of  double precision
+        entries in SBUF. SCOUNT must be at least zero.
+
+
+DEST    (local input)                 int
+        On entry, DEST specifies the rank of the receiving process in
+        the communication space defined by COMM.
+
+
+STAG    (local input)                 int
+        On entry,  STAG specifies the message tag to be used for this
+        communication operation.
+
+
+COMM    (local input)                 MPI_Comm
+        The MPI communicator identifying the communication space.
+
+ +

See Also

+HPL_recv, +HPL_sdrv. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_setran.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_setran.html new file mode 100755 index 000000000..44f37e35e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_setran.html @@ -0,0 +1,52 @@ + + +HPL_setran HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_setran Manage the random number generator. + +

Synopsis

+#include "hpl.h"

+void +HPL_setran( +const int +OPTION, +int * +IRAN +); + +

Description

+HPL_setran +initializes the random generator with the encoding of the +first number X(0) in the sequence, and the constants a and c used to +compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), +a and c are stored in the static variables irand, ias and ics. When +OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the +values of the input array IRAN. When OPTION is 3, IRAN is set to the +current value of irand, and irand is then incremented. + +

Arguments

+
+OPTION  (local input)                 const int
+        On entry, OPTION  is an integer that specifies the operations
+        to be performed on the random generator as specified above.
+
+
+IRAN    (local input/output)          int *
+        On entry,  IRAN is an array of dimension 2, that contains the
+        16-lower and 15-higher bits of a random number.
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_xjumpm, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_spreadN.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_spreadN.html new file mode 100755 index 000000000..f0d8f8938 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_spreadN.html @@ -0,0 +1,120 @@ + + +HPL_spreadN HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_spreadN Spread row panel U and forward current column panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_spreadN( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const enum HPL_SIDE +SIDE, +const int +N, +double * +U, +const int +LDU, +const int +SRCDIST, +const int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1 +); + +

Description

+HPL_spreadN +spreads the local array containing local pieces of U, so +that on exit to this function, a piece of U is contained in every +process row. The array IPLEN contains the number of rows of U, that +should be spread on any given process row. This function also probes +for the presence of the column panel PBCST. In case of success, this +panel will be forwarded. If PBCST is NULL on input, this probing +mechanism will be disabled. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be spread) information.
+
+
+SIDE    (global input)                const enum HPL_SIDE
+        On entry, SIDE specifies whether the local piece of U located
+        in process IPMAP[SRCDIST] should be spread to the right or to
+        the left. This feature is used by the equilibration process.
+
+
+N       (global input)                const int
+        On entry,  N  specifies  the  local number of columns of U. N
+        must be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,IPLEN[nprow]).
+
+
+SRCDIST (local input)                 const int
+        On entry,  SRCDIST  specifies the source process that spreads
+        its piece of U.
+
+
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process before process IPMAP[i], with the  convention
+        that IPLEN[nprow] is the total number of rows. In other words
+        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+        should be moved to process IPMAP[i].
+
+
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry,  IPMAPM1 is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+
+ +

See Also

+HPL_pdlaswp01N. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_spreadT.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_spreadT.html new file mode 100755 index 000000000..cec561646 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_spreadT.html @@ -0,0 +1,120 @@ + + +HPL_spreadT HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_spreadT Spread row panel U and forward current column panel. + +

Synopsis

+#include "hpl.h"

+void +HPL_spreadT( +HPL_T_panel * +PBCST, +int * +IFLAG, +HPL_T_panel * +PANEL, +const enum HPL_SIDE +SIDE, +const int +N, +double * +U, +const int +LDU, +const int +SRCDIST, +const int * +IPLEN, +const int * +IPMAP, +const int * +IPMAPM1 +); + +

Description

+HPL_spreadT +spreads the local array containing local pieces of U, so +that on exit to this function, a piece of U is contained in every +process row. The array IPLEN contains the number of columns of U, +that should be spread on any given process row. This function also +probes for the presence of the column panel PBCST. If available, +this panel will be forwarded. If PBCST is NULL on input, this +probing mechanism will be disabled. + +

Arguments

+
+PBCST   (local input/output)          HPL_T_panel *
+        On entry,  PBCST  points to the data structure containing the
+        panel (to be broadcast) information.
+
+
+IFLAG   (local input/output)          int *
+        On entry, IFLAG  indicates  whether or not  the broadcast has
+        already been completed.  If not,  probing will occur, and the
+        outcome will be contained in IFLAG on exit.
+
+
+PANEL   (local input/output)          HPL_T_panel *
+        On entry,  PANEL  points to the data structure containing the
+        panel (to be spread) information.
+
+
+SIDE    (global input)                const enum HPL_SIDE
+        On entry, SIDE specifies whether the local piece of U located
+        in process IPMAP[SRCDIST] should be spread to the right or to
+        the left. This feature is used by the equilibration process.
+
+
+N       (global input)                const int
+        On entry,  N  specifies the local number of rows of U. N must
+        be at least zero.
+
+
+U       (local input/output)          double *
+        On entry,  U  is an array of dimension (LDU,*) containing the
+        local pieces of U.
+
+
+LDU     (local input)                 const int
+        On entry, LDU specifies the local leading dimension of U. LDU
+        should be at least MAX(1,N).
+
+
+SRCDIST (local input)                 const int
+        On entry,  SRCDIST  specifies the source process that spreads
+        its piece of U.
+
+
+IPLEN   (global input)                const int *
+        On entry, IPLEN is an array of dimension NPROW+1.  This array
+        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
+        in each process before process IPMAP[i], with the  convention
+        that IPLEN[nprow] is the total number of rows. In other words
+        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
+        should be moved to process IPMAP[i].
+
+
+IPMAP   (global input)                const int *
+        On entry, IPMAP is an array of dimension  NPROW.  This  array
+        contains  the  logarithmic mapping of the processes. In other
+        words, IPMAP[myrow]  is the absolute coordinate of the sorted
+        process.
+
+
+IPMAPM1 (global input)                const int *
+        On entry,  IPMAPM1 is an array of dimension NPROW. This array
+        contains  the inverse of the logarithmic mapping contained in
+        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.
+
+ +

See Also

+HPL_pdlaswp01T. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_sum.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_sum.html new file mode 100755 index 000000000..be785b99e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_sum.html @@ -0,0 +1,61 @@ + + +HPL_sum HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_sum Combine (sum) two buffers. + +

Synopsis

+#include "hpl.h"

+void +HPL_sum( +const int +N, +const void * +IN, +void * +INOUT, +const HPL_T_TYPE +DTYPE +); + +

Description

+HPL_sum +combines (sum) two buffers. + +

Arguments

+
+N       (input)                       const int
+        On entry, N  specifies  the  length  of  the  buffers  to  be
+        combined. N must be at least zero.
+
+
+IN      (input)                       const void *
+        On entry, IN points to the input-only buffer to be combined.
+
+
+INOUT   (input/output)                void *
+        On entry, INOUT  points  to  the  input-output  buffer  to be
+        combined.  On exit,  the  entries of this array contains  the
+        combined results.
+
+
+DTYPE   (input)                       const HPL_T_TYPE
+        On entry,  DTYPE  specifies the type of the buffers operands.
+
+ +

See Also

+HPL_broadcast, +HPL_reduce, +HPL_all_reduce, +HPL_barrier, +HPL_min, +HPL_max, +HPL_sum. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer.html new file mode 100755 index 000000000..8e6a79803 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer.html @@ -0,0 +1,49 @@ + + +HPL_timer HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_timer Timer facility. + +

Synopsis

+#include "hpl.h"

+void +HPL_timer( +const int +I +); + +

Description

+HPL_timer +provides a "stopwatch" functionality cpu/wall timer in +seconds. Up to 64 separate timers can be functioning at once. The +first call starts the timer, and the second stops it. This routine +can be disenabled by calling HPL_timer_disable(), so that calls to +the timer are ignored. This feature can be used to make sure certain +sections of code do not affect timings, even if they call routines +which have HPL_timer calls in them. HPL_timer_enable() will re-enable +the timer functionality. One can retrieve the current value of a +timer by calling + +t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) + +where I is the timer index in [0..64). To initialize the timer +functionality, one must have called HPL_timer_boot() prior to any of +the functions mentioned above. + +

Arguments

+
+I       (global input)                const int
+        On entry, I specifies the timer to stop/start.
+
+ +

See Also

+HPL_timer_cputime, +HPL_timer_walltime. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer_cputime.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer_cputime.html new file mode 100755 index 000000000..0fa9b6575 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer_cputime.html @@ -0,0 +1,35 @@ + + +HPL_timer_cputime HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_timer_cputime Return the CPU time. + +

Synopsis

+#include "hpl.h"

+double +HPL_timer_cputime(); + +

Description

+HPL_timer_cputime +returns the cpu time. If HPL_USE_CLOCK is defined, +the clock() function is used to return an approximation of processor +time used by the program. The value returned is the CPU time used so +far as a clock_t; to get the number of seconds used, the result is +divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C +standard library. If HPL_USE_TIMES is defined, the times() function +is used instead. This function returns the current process times. +times() returns the number of clock ticks that have elapsed since the +system has been up. Otherwise and by default, the standard library +function getrusage() is used. + +

See Also

+HPL_timer_walltime, +HPL_timer. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer_walltime.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer_walltime.html new file mode 100755 index 000000000..92588e49f --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_timer_walltime.html @@ -0,0 +1,26 @@ + + +HPL_timer_walltime HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_timer_walltime Return the elapsed (wall-clock) time. + +

Synopsis

+#include "hpl.h"

+double +HPL_timer_walltime(); + +

Description

+HPL_timer_walltime +returns the elapsed (wall-clock) time. + +

See Also

+HPL_timer_cputime, +HPL_timer. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_warn.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_warn.html new file mode 100755 index 000000000..773df9ae0 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_warn.html @@ -0,0 +1,74 @@ + + +HPL_warn HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_warn displays an error message. + +

Synopsis

+#include "hpl.h"

+void +HPL_warn( +FILE * +STREAM, +int +LINE, +const char * +SRNAME, +const char * +FORM, +... +); + +

Description

+HPL_warn +displays an error message. + +

Arguments

+
+STREAM  (local input)                 FILE *
+        On entry, STREAM specifies the output stream.
+
+
+LINE    (local input)                 int
+        On entry,  LINE  specifies the line  number in the file where
+        the  error  has  occured.  When  LINE  is not a positive line
+        number, it is ignored.
+
+
+SRNAME  (local input)                 const char *
+        On entry, SRNAME  should  be the name of the routine  calling
+        this error handler.
+
+
+FORM    (local input)                 const char *
+        On entry, FORM specifies the format, i.e., how the subsequent
+        arguments are converted for output.
+
+
+        (local input)                 ...
+        On entry,  ...  is the list of arguments to be printed within
+        the format string.
+
+ +

Example

+#include "hpl.h"

+
+int main(int argc, char *argv[])
+{
+   HPL_warn( stderr, __LINE__, __FILE__,
+             "Demo.\n" );
+   exit(0); return(0);
+}
+
+ +

See Also

+HPL_abort, +HPL_fprintf. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_xjumpm.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_xjumpm.html new file mode 100755 index 000000000..794ae3a8b --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/HPL_xjumpm.html @@ -0,0 +1,97 @@ + + +HPL_xjumpm HPL 2.3 Library Functions December 2, 2018 + + + + +

Name

+HPL_xjumpm Compute constants to jump in the random sequence. + +

Synopsis

+#include "hpl.h"

+void +HPL_xjumpm( +const int +JUMPM, +int * +MULT, +int * +IADD, +int * +IRANN, +int * +IRANM, +int * +IAM, +int * +ICM +); + +

Description

+HPL_xjumpm +computes the constants A and C to jump JUMPM numbers in +the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in +MULT and IADD specify how to jump from one entry in the sequence to +the next. + +

Arguments

+
+JUMPM   (local input)                 const int
+        On entry,  JUMPM  specifies  the  number  of entries  in  the
+        sequence to jump over. When JUMPM is less or equal than zero,
+        A and C are not computed, IRANM is set to IRANN corresponding
+        to a jump of size zero.
+
+
+MULT    (local input)                 int *
+        On entry, MULT is an array of dimension 2,  that contains the
+        16-lower  and 15-higher bits of the constant  a  to jump from
+        X(n) to X(n+1) = a*X(n) + c in the random sequence.
+
+
+IADD    (local input)                 int *
+        On entry, IADD is an array of dimension 2,  that contains the
+        16-lower  and 15-higher bits of the constant  c  to jump from
+        X(n) to X(n+1) = a*X(n) + c in the random sequence.
+
+
+IRANN   (local input)                 int *
+        On entry, IRANN is an array of dimension 2. that contains the
+        16-lower and 15-higher bits of the encoding of X(n).
+
+
+IRANM   (local output)                int *
+        On entry,  IRANM  is an array of dimension 2.   On exit, this
+        array  contains respectively  the 16-lower and 15-higher bits
+        of the encoding of X(n+JUMPM).
+
+
+IAM     (local output)                int *
+        On entry, IAM is an array of dimension 2. On exit, when JUMPM
+        is  greater  than  zero,  this  array  contains  the  encoded
+        constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
+        sequence. IAM(0:1)  contains  respectively  the  16-lower and
+        15-higher  bits  of this constant  A. When  JUMPM  is less or
+        equal than zero, this array is not referenced.
+
+
+ICM     (local output)                int *
+        On entry, ICM is an array of dimension 2. On exit, when JUMPM
+        is  greater  than  zero,  this  array  contains  the  encoded
+        constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
+        sequence. ICM(0:1)  contains  respectively  the  16-lower and
+        15-higher  bits  of this constant  C. When  JUMPM  is less or
+        equal than zero, this array is not referenced.
+
+ +

See Also

+HPL_ladd, +HPL_lmul, +HPL_setran, +HPL_jumpit, +HPL_rand. + + + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/algorithm.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/algorithm.html new file mode 100755 index 000000000..9b1d7222e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/algorithm.html @@ -0,0 +1,299 @@ + + +HPL Algorithm + + + + +

HPL Algorithm

+ + +This page provides a high-level description of the algorithm used in +this package. As indicated below, HPL contains in fact many possible +variants for various operations. Defaults could have been chosen, or +even variants could be selected during the execution. Due to the +performance requirements, it was decided to leave the user with the +opportunity of choosing, so that an "optimal" set of parameters could +easily be experimentally determined for a given machine configuration. +From a numerical accuracy point of view, all possible +combinations are rigorously equivalent to each other even though the +result may slightly differ (bit-wise). +

+ + +
+ +

Main Algorithm

+ +This software package solves a linear system of order n: A x = b by +first computing the LU factorization with row partial pivoting of the +n-by-n+1 coefficient matrix [A b] = [[L,U] y]. Since the lower triangular +factor L is applied to b as the factorization progresses, the solution x +is obtained by solving the upper triangular system U x = y. The lower +triangular matrix L is left unpivoted and the array of pivots is not +returned.

+ + + + + + +
+The data is distributed onto a two-dimensional P-by-Q grid of processes +according to the block-cyclic scheme to ensure "good" load balance +as well as the scalability of the algorithm. The n-by-n+1 coefficient +matrix is first logically partitioned into nb-by-nb blocks, that are +cyclically "dealt" onto the P-by-Q process grid. This is done in both +dimensions of the matrix.
+ + + + + +
+The right-looking variant has been chosen for the main loop of the LU +factorization. This means that at each iteration of the loop a panel of +nb columns is factorized, and the trailing submatrix is updated. Note +that this computation is thus logically partitioned with the same block +size nb that was used for the data distribution.
+
+ +

Panel Factorization

+ + + + + + +
+At a given iteration of the main loop, and because of the cartesian +property of the distribution scheme, each panel factorization occurs in +one column of processes. This particular part of the computation lies +on the critical path of the overall algorithm. The user is offered the +choice of three (Crout, left- and right-looking) matrix-multiply based +recursive variants. The software also allows the user to choose in how +many sub-panels the current panel should be divided into during the +recursion. Furthermore, one can also select at run-time the recursion +stopping criterium in terms of the number of columns left to factorize. +When this threshold is reached, the sub-panel will then be factorized +using one of the three Crout, left- or right-looking matrix-vector based +variant. Finally, for each panel column the pivot search, the associated +swap and broadcast operation of the pivot row are combined into one +single communication step. A binary-exchange (leave-on-all) reduction +performs these three operations at once.
+
+ +

Panel Broadcast

+ +Once the panel factorization has been computed, this panel of columns +is broadcast to the other process columns. There are many possible +broadcast algorithms and the software currently offers 6 variants to +choose from. These variants are described below assuming that process 0 +is the source of the broadcast for convenience. "->" means "sends to". +
    +
  • Increasing-ring: 0 -> 1; 1 -> 2; 2 -> 3 and so on. +This algorithm is the classic one; it has the caveat that process 1 has +to send a message. +
    + +
    + +
  • Increasing-ring (modified): 0 -> 1; 0 -> 2; 2 -> 3 +and so on. Process 0 sends two messages and process 1 only receives one +message. This algorithm is almost always better, if not the best. +
    + +
    + +
  • Increasing-2-ring: The Q processes are divided into +two parts: 0 -> 1 and 0 -> Q/2; Then processes 1 and Q/2 act as sources +of two rings: 1 -> 2, Q/2 -> Q/2+1; 2 -> 3, Q/2+1 -> to Q/2+2 and so on. +This algorithm has the advantage of reducing the time by which the last +process will receive the panel at the cost of process 0 sending 2 +messages. +
    + +
    + +
  • Increasing-2-ring (modified): As one may expect, +first 0 -> 1, then the Q-1 processes left are divided into two equal +parts: 0 -> 2 and 0 -> Q/2; Processes 2 and Q/2 act then as sources of +two rings: 2 -> 3, Q/2 -> Q/2+1; 3 -> 4, Q/2+1 -> to Q/2+2 and so on. +This algorithm is probably the most serious competitor to the increasing +ring modified variant. +
    + +
    + +
  • Long (bandwidth reducing): as opposed to the +previous variants, this algorithm and its follower synchronize all +processes involved in the operation. The message is chopped into Q equal +pieces that are scattered across the Q processes. +
    + +
    +The pieces are then rolled in Q-1 steps. The scatter phase uses a binary +tree and the rolling phase exclusively uses mutual message exchanges. In +odd steps 0 <-> 1, 2 <-> 3, 4 <-> 5 and so on; in even steps Q-1 <-> 0, +1 <-> 2, 3 <-> 4, 5 <-> 6 and so on. +
    + +
    +More messages are exchanged, however the total volume of communication is +independent of Q, making this algorithm particularly suitable for large +messages. This algorithm becomes competitive when the nodes are "very +fast" and the network (comparatively) "very slow".

    + +
  • Long (bandwidth reducing modified): same as above, +except that 0 -> 1 first, and then the Long variant is used on processes +0,2,3,4 .. Q-1.

    +
    + + +
    + +
+ +The rings variants are distinguished by a probe mechanism that activates +them. In other words, a process involved in the broadcast and different +from the source asynchronously probes for the message to receive. When +the message is available the broadcast proceeds, and otherwise the +function returns. This allows to interleave the broadcast operation with +the update phase. This contributes to reduce the idle time spent by those +processes waiting for the factorized panel. This mechanism is necessary +to accomodate for various computation/communication performance ratio.

+
+ +

Look-ahead

+ +Once the panel has been broadcast or say during this broadcast operation, +the trailing submatrix is updated using the last panel in the look-ahead +pipe: as mentioned before, the panel factorization lies on the critical +path, which means that when the kth panel has been factorized and then +broadcast, the next most urgent task to complete is the factorization and +broadcast of the k+1 th panel. This technique is often refered to as +"look-ahead" or "send-ahead" in the literature. This package allows to +select various "depth" of look-ahead. By convention, a depth of zero +corresponds to no lookahead, in which case the trailing submatrix is +updated by the panel currently broadcast. Look-ahead consumes some extra +memory to essentially keep all the panels of columns currently in the +look-ahead pipe. A look-ahead of depth 1 (maybe 2) is likely to achieve +the best performance gain.

+
+ +

Update

+ +The update of the trailing submatrix by the last panel in the look-ahead +pipe is made of two phases. First, the pivots must be applied to form the +current row panel U. U should then be solved by the upper triangle of the +column panel. U finally needs to be broadcast to each process row so that +the local rank-nb update can take place. We choose to combine the +swapping and broadcast of U at the cost of replicating the solve. Two +algorithms are available for this communication operation. +
    +
  • Binary-exchange: this is a modified variant of the +binary-exchange (leave on all) reduction operation. Every process column +performs the same operation. The algorithm essentially works as follows. +It pretends reducing the row panel U, but at the beginning the only valid +copy is owned by the current process row. The other process rows will +contribute rows of A they own that should be copied in U and replace them +with rows that were originally in the current process row. The complete +operation is performed in log(P) steps. For the sake of simplicity, let +assume that P is a power of two. At step k, process row p exchanges a +message with process row p+2^k. There are essentially two cases. First, +one of those two process rows has received U in a previous step. The +exchange occurs. One process swaps its local rows of A into U. Both +processes copy in U remote rows of A. Second, none of those process rows +has received U, the exchange occurs, and both processes simply add those +remote rows to the list they have accumulated so far. At each step, a +message of the size of U is exchanged by at least one pair of process +rows.

    + +
  • Long: this is a bandwidth reducing variant +accomplishing the same task. The row panel is first spread (using a tree) +among the process rows with respect to the pivot array. This is a scatter +(V variant for MPI users). Locally, every process row then swaps these +rows with the the rows of A it owns and that belong to U. These buffers +are then rolled (P-1 steps) to finish the broadcast of U. Every process +row permutes U and proceed with the computational part of the update. A +couple of notes: process rows are logarithmically sorted before +spreading, so that processes receiving the largest number of rows are +first in the tree. This makes the communication volume optimal for this +phase. Finally, before rolling and after the local swap, an equilibration +phase occurs during which the local pieces of U are uniformly spread +across the process rows. A tree-based algorithm is used. This operation +is necessary to keep the rolling phase optimal even when the pivot rows +are not equally distributed in process rows. This algorithm has a +complexity in terms of communication volume that solely depends on the +size of U. In particular, the number of process rows only impacts the +number of messages exchanged. It will thus outperforms the previous +variant for large problems on large machine configurations.

    + +
+ +The user can select any of the two variants above. In addition, a mix is +possible as well. The "binary-exchange" algorithm will be used when U +contains at most a certain number of columns. Choosing at least the block +size nb as the threshold value is clearly recommended when look-ahead is +on.

+
+ +

Backward Substitution

+ +The factorization has just now ended, the back-substitution remains to be +done. For this, we choose a look-ahead of depth one variant. The +right-hand-side is forwarded in process rows in a decreasing-ring +fashion, so that we solve Q * nb entries at a time. At each step, this +shrinking piece of the right-hand-side is updated. The process just above +the one owning the current diagonal block of the matrix A updates first +its last nb piece of x, forwards it to the previous process column, then +broadcast it in the process column in a decreasing-ring fashion as well. +The solution is then updated and sent to the previous process column. The +solution of the linear system is left replicated in every process row.

+
+ +

Checking the Solution

+ +To verify the result obtained, the input matrix and right-hand side are +regenerated. The normwise backward error (see formula below) is then +computed. A solution is considered as "numerically correct" when this +quantity is less than a threshold value of the order of 1.0. In the +expression below, eps is the relative (distributed-memory) machine +precision. + +
    +
  • || Ax - b ||_oo / ( eps * ( || A ||_oo * || x ||_oo + || b ||_oo ) * n ) +
+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/aprunner.gif b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/aprunner.gif new file mode 100755 index 000000000..6508c806f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/aprunner.gif differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/copyright.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/copyright.html new file mode 100755 index 000000000..934282c81 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/copyright.html @@ -0,0 +1,66 @@ + + +HPL Copyright and Licensing Terms + + + + +

HPL Copyright Notice and Licensing Terms

+ +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +
    +
  1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +
  2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions, and the following disclaimer in the +documentation and/or other materials provided with the distribution. +
  3. All advertising materials mentioning features or use of this +software must display the following acknowledgement: This product +includes software developed at the University of Tennessee, +Knoxville, Innovative Computing Laboratory. +
  4. The name of the University, the name of the Laboratory, or the +names of its contributors may not be used to endorse or promote +products derived from this software without specific written +permission. +
+ +

Disclaimer

+ +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +`AS IS' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/documentation.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/documentation.html new file mode 100755 index 000000000..152188041 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/documentation.html @@ -0,0 +1,304 @@ + + +HPL Documentation + + + + +

HPL Documentation

+ +The HPL software distribution comes with a set of text files explaining +how to install, run and tune the software. These files reside in the top +level directory and their names are in upper case. To a large extent, +this page reproduces them. In addition, man- and HTML-pages are provided +for every routine in the package. To access the man pages, one must add +hpl/man to its MANPATH environment variable. The HTML pages can be +accessed on this site, or by pointing your browser to your local hpl/www +directory. Finally, the source code has been heavily documented. Despite +all the other documentation efforts, the source code remains the most +trustworthy and truthful piece of information about what goes on in HPL. +

+ +

HPL Functions HTML Pages

+ +Computational Kernels Wrappers When calling the Fortran +77 BLAS interface, these C functions allow to confine the C to Fortran +77 interface issues to a small subset of routines. + + + +
+
+ +Local Auxiliaries Basic functionality, local swap functions. + + + +
+
+ +Parallel Auxiliaries Index computations, parallel basic +functionality. + + + +
+
+ +Grid Management Most of these routines have a direct +MPI equivalent. On new systems, when the entire MPI functionality is +not yet readily available, these functions are particularly convenient +since they rely on a mininal subset of the MPI standard. + + +
+
+ +Panel Management + + +
+
+ +Panel Factorization Recursive (matrix-multiply based) and +(matrix-vector based) panel factorization. + + +
+
+ +Panel Broadcast + + +
+
+ +Update + + +
+
+ +Main Factorization / Look-ahead + + +
+
+ +Backward Substitution + + +
+
+ +Matrix generation A C version of the ScaLAPACK random +matrix generator with less functionality though. + +
+
+ +Timers Sequential and parallel timing utilities. + +
+
+ +Main Testing / Timing Driver + + +
+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/errata.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/errata.html new file mode 100755 index 000000000..24275d2dd --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/errata.html @@ -0,0 +1,116 @@ + + +HPL Errata-Bugs + + + + +

HPL Errata - Bugs

+ +

Issues fixed in Version 2.1, October 26th, 2012

+ +The output now reports exact time stamps before and after the +execution of the solver function pdgesv() was run. This could +allow for accurate accounting of running time for data center +management purposes. For example as reporting power +consumption. This is important for the Green500 project.

+ +Fixed an out-of-bounds access to arrays in the HPL_spreadN() +and HPL_spreadT() functions. This may cause segmentation +fault signals. It was reported by Stephen Whalen from Cray.

+ +

Issues fixed in Version 2.0, September 10th, 2008

+ +Gregory Bauer found a problem size corresponding to the +periodicity of the pseudo-random matrix generator used in the +HPL timing program. This causes the LU factorization to +detect the singularity of the input matrix as it should have.

+ +A problem size of 2^17 = 131072 causes columns 14 modulo 2^14 +(i.e. 16384) (starting from 0) to be bitwise identical on a +homogeneous platform. Every problem size being a power of 2 +and larger than 2^15 will feature a similar problem if one +searches far enough in the columns of the square input matrix.

+ +The pseudo-random generator uses the linear congruential +algorithm: X(n+1) = (a * X(n) + c) mod m as described in the +Art of Computer Programming, Knuth 1973, Vol. 2. In the HPL +case, m is set to 2^31.

+ +It is very important to realize that this issue is a problem +of the testing part of the HPL software. The numerical +properties of the algorithms used in the factorization and +the solve should not be questioned because of this. In fact, +this is just the opposite: the factorization demonstrated the +weakness of the testing part of the software by detecting the +singularity of the input matrix.

+ +This issue of the testing program is not easy to fix. This +pseudo-random generator has very useful properties despite +this. It is thus currently recommended to HPL users willing +to test matrices of size larger than 2^15 to not use power +twos.

+ +This issue has been fixed by changing the pseudo-random +matrix generator. Now the periodicity of the generator is +2^64.

+ +

Issues fixed in Version 1.0b, December 15th, 2004

+ +When the matrix size is such that one needs more than 16 GB +per MPI rank, the intermediate calculation (mat.ld+1) * +mat.nq in HPL_pdtest.c ends up overflowing because it is +done using 32-bit arithmetic. This issue has been fixed by +typecasting to size_t; Thanks to John Baron.

+ +

Issues fixed in Version 1.0a, January 20th, 2004

+ +The MPI process grid numbering scheme defaults now to row- +major ordering. This option can now be selected at run time.

+ +The inlined assembly timer routine that was causing the +compilation to fail when using gcc version 3.3 and above has +been removed from the package.

+ +Various building problems on the T3E have been fixed; Thanks +to Edward Anderson.

+ +

Issues fixed in Version 1.0, September 27th, 2000

+ +Due to a couple errors spotted in the VSIPL port of the +software, the distribution contained in the tar file of +September 9th, 2000 had been updated on September 27th, 2000 +with a corrected distribution. These problems were +not affecting in any way possible the BLAS version of the +software. If you are using the VSIPL port of HPL, +and want to make sure you are indeed using the latest +corrected version, please check the date contained in the +file HPL.build.log contained in the main directory.

+ + + + +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/faqs.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/faqs.html new file mode 100755 index 000000000..ad853e760 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/faqs.html @@ -0,0 +1,126 @@ + + +HPL Frequently Asked Questions + + + + +

HPL Frequently Asked Questions

+ + +
+ +

What problem size N should I run ?

+ +In order to find out the best performance of your system, the +largest problem size fitting in memory is what you should aim for. +The amount of memory used by HPL is essentially the size of the +coefficient matrix. So for example, if you have 4 nodes with 256 Mb +of memory on each, this corresponds to 1 Gb total, i.e., 125 M double +precision (8 bytes) elements. The square root of that number is +11585. One definitely needs to leave some memory for the OS as well +as for other things, so a problem size of 10000 is likely to fit. As +a rule of thumb, 80 % of the total amount of memory is a good guess. +If the problem size you pick is too large, swapping will occur, and +the performance will drop. If multiple processes are spawn on each +node (say you have 2 processors per node), what counts is the +available amount of memory to each process.

+
+ +

What block size NB should I use ?

+ +HPL uses the block size NB for the data distribution as well as for +the computational granularity. From a data distribution point of +view, the smallest NB, the better the load balance. You definitely +want to stay away from very large values of NB. From a computation +point of view, a too small value of NB may limit the computational +performance by a large factor because almost no data reuse will occur +in the highest level of the memory hierarchy. The number of messages +will also increase. Efficient matrix-multiply routines are often +internally blocked. Small multiples of this blocking factor are +likely to be good block sizes for HPL. The bottom line is that "good" +block sizes are almost always in the [32 .. 256] interval. The best +values depend on the computation / communication performance ratio of +your system. To a much less extent, the problem size matters as well. +Say for example, you emperically found that 44 was a good block size +with respect to performance. 88 or 132 are likely to give slightly +better results for large problem sizes because of a slighlty higher +flop rate.

+
+ +

What process grid ratio P x Q should I use ?

+ +This depends on the physical interconnection network you have. +Assuming a mesh or a switch HPL "likes" a 1:k ratio with k in [1..3]. +In other words, P and Q should be approximately equal, with Q +slightly larger than P. Examples: 2 x 2, 2 x 4, 2 x 5, 3 x 4, 4 x 4, +4 x 6, 5 x 6, 4 x 8 ... If you are running on a simple Ethernet +network, there is only one wire through which all the messages are +exchanged. On such a network, the performance and scalability of HPL +is strongly limited and very flat process grids are likely to be the +best choices: 1 x 4, 1 x 8, 2 x 4 ...

+
+ +

What about the one processor case ?

+ +HPL has been designed to perform well for large problem sizes on +hundreds of nodes and more. The software works on one node and for +large problem sizes, one can usually achieve pretty good performance +on a single processor as well. For small problem sizes however, the +overhead due to message-passing, local indexing and so on can be +significant.

+
+ +

Why so many options in HPL.dat ?

+ +There are quite a few reasons. First off, these options are useful to +determine what matters and what does not on your system. Second, HPL +is often used in the context of early evaluation of new systems. In +such a case, everything is usually not quite working right, and it is +convenient to be able to vary these parameters without recompiling. +Finally, every system has its own peculiarities and one is likely to +be willing to emperically determine the best set of parameters. In +any case, one can always follow the advice provided in the +tuning section of this document and not +worry about the complexity of the input file.

+
+ +

Can HPL be Outperformed ?

+ +Certainly. There is always room for performance improvements. +Specific knowledge about a particular system is always a source of +performance gains. Even from a generic point of view, better +algorithms or more efficient formulation of the classic ones are +potential winners.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/index.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/index.html new file mode 100755 index 000000000..a3a53abfe --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/index.html @@ -0,0 +1,178 @@ + + + +HPL - A Portable Implementation of the High-Performance +Linpack Benchmark for Distributed-Memory Computers + + + + + +
+ + + + + +
+

HPL - A Portable Implementation of the High-Performance Linpack +Benchmark for Distributed-Memory Computers

+
+ + +
+ + + + + + + +
Version 2.2 +A. Petitet, +R. C. Whaley, +J. Dongarra, +A. Cleary +December 2, 2018 +# Accesses +
+

+ +HPL is a software package that solves a (random) +dense linear system in double precision (64 bits) arithmetic +on distributed-memory computers. It can thus be regarded as +a portable as well as freely available implementation of the High +Performance Computing Linpack Benchmark.

+ +The algorithm used by HPL can be summarized by the +following keywords: Two-dimensional block-cyclic data distribution +- Right-looking variant of the LU factorization with row partial +pivoting featuring multiple look-ahead depths - Recursive panel +factorization with pivot search and column broadcast combined - +Various virtual panel broadcast topologies - bandwidth reducing +swap-broadcast algorithm - backward substitution with look-ahead +of depth 1.

+ +The HPL package provides a testing and timing program to quantify +the accuracy of the obtained solution as well as +the time it took to compute it. The best performance +achievable by this software on your system depends on a large variety +of factors. Nonetheless, with some restrictive assumptions on the +interconnection network, the algorithm described here and its +attached implementation are scalable in the sense +that their parallel efficiency is maintained constant with respect +to the per processor memory usage.

+ +The HPL software package requires the availibility +on your system of an implementation of the Message Passing Interface +MPI (1.1 compliant). +An implementation of either the Basic Linear Algebra +Subprograms BLAS or the Vector Signal Image +Processing Library VSIPL is also needed. +Machine-specific as well as generic implementations of +MPI, the +BLAS and +VSIPL are available for a large +variety of systems.

+ +Acknowledgements: This work was supported in part +by a grant from the Department of Energy's Lawrence +Livermore National Laboratory and Los Alamos National Laboratory +as part of the ASCI Projects contract numbers B503962 and +12187-001-00 4R. + +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ +
+Innovative Computing Laboratory
+last revised December 2, 2018
+
+ +
+#########################################################################
+
+file    hpl-2.3.tar.gz
+for     HPL 2.3 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: December 2, 2018
+
+#########################################################################
+
+file    hpl-2.2.tar.gz
+for     HPL 2.2 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: February 24, 2016
+
+#########################################################################
+
+file    hpl-2.1.tar.gz
+for     HPL 2.1 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary, Piotr Luszczek
+Updated: October 26, 2012
+
+#########################################################################
+
+file    hpl-2.0.tar.gz
+for     HPL 2.0 - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary
+Updated: September 10, 2008
+
+#########################################################################
+
+file    hpl.tgz
+for     HPL 1.0a - A Portable Implementation of the High-Performance Linpack
+,       Benchmark for Distributed-Memory Computers 
+by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary
+Updated: January 20, 2004
+ +######################################################################### + +file hpl_qs22-2008-11-30.patch +for Implementation of the High-Performance Linpack benchmark for IBM +, QS22 systems with PowerXCell 8i processors. The file is a patch +, for HPL 1.0a. +by IBM + +file IBM_LICENSE.TXT +for IBM Copyright notice for QS22 HPL +by IBM + +file IBM_README.txt +for README for IBM QS22 HPL +by IBM +Updated: November 30, 2008 + + +######################################################################### +
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/links.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/links.html new file mode 100755 index 000000000..da2639e99 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/links.html @@ -0,0 +1,89 @@ + + +HPL Related Links + + + + +

HPL Related Links

+ +The list of links below contains some relevant material to this +work. This list is provided for illustrative purposes, and should be +regarded as an initial starting point for the interested reader. This +list is by all means not meant to be exhaustive.

+ +

Message Passing Interface (MPI)

+ +MPI is a library specification for message-passing, proposed as a +standard by a broadly based committee of vendors, implementors, and +users. Machine-specific (optimized) as well as freely available MPI +libraries are available for a large variety of systems. Browse the +Message Passing Interface (MPI) +standard web page for more information.

+ +

Basic Linear Algebra Subroutines (BLAS)

+ +The BLAS are high quality +"building block" routines for performing basic vector and matrix +operations. A lot of "BLAS-related" information can be found at this +site. In particular, a reference implementation is available. This +reference implementation is not optimized for any +system, and it is therefore not recommended to use it +for benchmarking purposes. +However, machine-specific +optimized BLAS libraries are available for a variety of computer +systems. For further details, please contact your local vendor +representative. Alternatively, one may also consider using automatic +code generators such as ATLAS. +This tool automatically generates a complete and optimized BLAS +library for a large variety of modern systems.

+ +

Vector Signal Image Processing Library (VSIPL)

+ +VSIPL is an API defined by an open +standard comprised of embedded signal and image processing hardware and +software vendors, academia, users, and government labs. A lot of +"VSIPL-related" information can be found at this site. In particular, a +reference implementation is available. Machine-specific optimized VSIPL +libraries are available for a variety of computer systems. For further +details, please contact your local vendor representative.

+ +

TOP 500 List

+ +The TOP 500 +is an ordered list of the 500 most powerful computer systems worldwide. +Computers are ranked in this list by their performance on the + +LINPACK Benchmark.

+ +

Parallel Dense Linear Algebra Software Libraries

+ +Browse the Netlib software repository +or the National HPCC Software Exchange +to find a large collection of freely available linear algebra libraries. +

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/main.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/main.jpg new file mode 100755 index 000000000..df62edd33 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/main.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/mat2.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/mat2.jpg new file mode 100755 index 000000000..25afdc44c Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/mat2.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/pfact.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/pfact.jpg new file mode 100755 index 000000000..33a7e55cb Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/pfact.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/references.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/references.html new file mode 100755 index 000000000..95c6db176 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/references.html @@ -0,0 +1,276 @@ + + +HPL References + + + + +

HPL References

+ + +The list of references below contains some relevant published material +to this work. This list is provided for illustrative purposes, and +should be regarded as an initial starting point for the interested +reader. This list is by all means not meant to be exhaustive. +

+ +The references have been sorted in four categories and chronologically +listed within each category. The four categories are + +
+ +

Linpack Benchmark

+ +
    + + +
  • LINPACK Users Guide, J. Dongarra, J. Bunch, C. Moler and +G. W. Stewart, SIAM, Philadelphia, PA, 1979. + + +
  • Performance of Various Computers Using Standard Linear Equations +Software, J. Dongarra, Technical Report CS-89-85, University of +Tennessee, 1989. (An updated version of this report can be found at + +http://www.netlib.org/benchmark/performance.ps). + + +
  • Towards Peak Parallel LINPACK Performance on 400, +R. Bisseling and L. Loyens, Supercomputer, Vol. 45, pp. 20-27, 1991. + +
  • Massively Parallel LINPACK Benchmark on the Intel Touchstone +DELTA and iPSC/860 Systems, R. van de Geijn, 1991 Annual Users +Conference Proceedings. Intel Supercomputer Users Group, Dallas, TX, +1991. + +
  • The LINPACK Benchmark on the AP 1000, R. Brent, Frontiers, +1992, pp. 128-135, McLean, VA, 1992. + + +
  • Implementation of BLAS Level 3 and LINPACK Benchmark on the +AP1000, R. Brent and P. Strazdins, Fujitsu Scientific and Technical +Journal, Vol. 5, No. 1, pp. 61-70, 1993. + + +
  • LU Factorization and the LINPACK Benchmark on the Intel +Paragon, D. Womble, D. Greenberg, D. Wheat and S. Riesen, Sandia +Technical Report, 1994. + + +
  • Massively Parallel Distributed Computing: Worlds First 281 +Gigaflop Supercomputer, J. Bolen, A. Davis, B. Dazey, S. Gupta, +G. Henry, D. Robboy, G. Schiffler, D. Scott, M. Stallcup, A. Taraghi, +S. Wheat from Intel SSD, L. Fisk, G. Istrail, C. Jong, R. Riesen, +L. Shuler, from Sandia National Laboratories, Proceedings of the Intel +Supercomputer Users Group 1995. + + +
  • High Performance Software on Intel Pentium Pro Processors or +Micro-Ops to TeraFLOPS, B. Greer and G. Henry, Proceedings of the +SuperComputing 1997 Conference, ACM SIGARCH - IEEE Computer Society +Press - ISBN: 0-89791-985-8, San Jose, CA, 1997. + +
+ +
+ +

Parallel LU Factorization

+ +
    + + +
  • Communication Complexity of the Gaussian Elimination Algorithm +on Multiprocessors, Y. Saad, Linear Algebra and Its Applications, +Vol. 77, pp. 315-340, 1986. + + +
  • LU Factorization Algorithms on Distributed-Memory Multiprocessor +Architectures, G. Geist and C. Romine, SIAM Journal on Scientific +and Statistical Computing, Vol. 9, pp. 639-649, 1988. + + +
  • Parallel LU Decomposition on a Transputer Network, +R. Bisseling and J. van der Vorst, Lecture Notes in Computer Sciences, +Springer-Verlag, Eds. G. van Zee and J. van der Vorst, Vol. 384, +pp. 61-77, 1989. + + +
  • The Distributed Solution of Linear Systems Using the Torus-Wrap +Data Mapping, C. Ashcraft, ECA-TR-147, Boeing Computer Services, +Seattle, WA, 1990. + +
  • Experiments with Multicomputer LU-Decomposition, E. van de +Velde, Concurrency: Practice and Experience, Vol. 2, pp. 1-26, 1990. + + +
  • A Taxonomy of Distributed Dense LU Factorization Methods, +C. Ashcraft, ECA-TR-161, Boeing Computer Services, Seattle, WA, 1991. + + +
  • The Torus-Wrap Mapping for Dense Matrix Calculations on Massively +Parallel Computers, B. Hendrickson and D. Womble, SIAM Journal on +Scientific and Statistical Computing, Vol. 15, pp. 1201-1226, 1994. + +
  • Scalability Issues in the Design of a Library for Dense Linear +Algebra, J. Dongarra, R. van de Geijn and D. Walker, Journal of +Parallel and Distributed Computing, Vol. 22, No. 3, pp. 523-537, 1994. + + +
  • Matrix Factorization using Distributed Panels on the Fujitsu +AP1000, P. Strazdins, Proceedings of the IEEE First International +Conference on Algorithms And Architectures for Parallel Processing +ICA3PP-95, Brisbane, 1995. + + +
  • The Design and Implementation of the ScaLAPACK LU, QR, and +Cholesky Factorization Routines, J. Choi, J. Dongarra, S. Ostrouchov, +A. Petitet, D. Walker and R. C. Whaley, Scientific Programming, Vol. 5, +pp. 173-184, 1996. + +
+ +
+ +

Recursive LU Factorization

+ +
    + + +
  • Locality of Reference in LU Decomposition with partial +pivoting, S. Toledo, SIAM Journal on Matrix. Anal. Appl., Vol. 18, +No. 4, 1997. + +
  • Recursion Leads to Automatic Variable Blocking for Dense +Linear-Algebra Algorithms, F. Gustavson, IBM Journal of Research +and Development, Vol. 41, No. 6, pp. 737-755, 1997 + +
+ +
+ +

Parallel Matrix Multiply

+ +
    + + +
  • Matrix Algorithms on a Hypercube I: Matrix Multiplication, +G. Fox, S. Otto and A. Hey, Parallel Computing, Vol. 3, pp. 17-31, 1987. + + +
  • Basic Matrix Subprograms for Distributed-Memory Systems, +A. Elster, Proceedings of the Fifth Distributed-Memory Computing +Conference, Eds. D. Walker and Q. Stout, IEEE Press, pp. 311-316, 1990. + + +
  • The Parallelization of Level 2 and 3 BLAS Operations on +Distributed-Memory Machines, M. Aboelaze, N. Chrisochoides +and E. Houstis, CSD-TR-91-007, Purdue University, West Lafayette, +IN, 1991. + + +
  • The Multicomputer Toolbox Approach to Concurrent BLAS and LACS, +R. Falgout, A. Skjellum, S. Smith and C. Still, Proceedings of the +Scalable High Performance Computing Conference SHPCC-92, IEEE Computer +Society Press, 1992. + + +
  • A High Performance Matrix Multiplication Algorithm on a +Distributed-Memory Parallel Computer, Using Overlapped Communication, +R. Agarwal, F. Gustavson and M. Zubair, IBM Journal or Research and +Development, Vol. 38, No. 6, pp. 673-681, 1994. + +
  • PUMMA: Parallel Universal Matrix Multiplication Algorithms on +Distributed-Memory Concurrent Computers, J. Choi, J. Dongarra and +D. Walker, Concurrency: Practice and Experience, Vol. 6, No. 7, +pp. 543-570, 1994. + +
  • Matrix Multiplication on the Intel Touchstone DELTA, +S. Huss-Lederman, E. Jacobson, A. Tsao and G. Zhang, Concurrency: +Practice and Experience, Vol. 6, No. 7, pp. 571-594, 1994. + + +
  • A Three-Dimensional Approach to Parallel Matrix Multiplication, +R. Agarwal, S. Balle, F. Gustavson, M. Joshi and P. Palkar, IBM Journal +or Research and Development, Vol. 39, No. 5, pp. 575-582, 1995. + + +
  • A High Performance Parallel Strassen Implementation, +B. Grayson and R. van de Geijn, Parallel Processing Letters, Vol. 6, +No. 1, pp. 3-12, 1996. + + +
  • Parallel Implementation of BLAS: General Techniques for Level +3 BLAS, A. Chtchelkanova, J. Gunnels, G. Morrow, J. Overfelt and +R. van de Geijn, Concurrency: Practice and Experience, Vol. 9, No. 9, +pp. 837-857, 1997. + +
  • A Poly-Algorithm for Parallel Dense Matrix Multiplication on +Two-Dimensional Process Grid Topologies, J. Li, R. Falgout and +A. Skjellum, Concurrency: Practice and Experience, Vol. 9, No. 5, +pp. 345-389, 1997. + +
  • SUMMA: Scalable Universal Matrix Multiplication Algorithm, +R. van de Geijn and J. Watts, Concurrency: Practice and Experience, +Vol. 9, No. 4, pp. 255-274, 1997. + +
+ +
+ +

Parallel Triangular Solve

+ +
    + + +
  • Parallel Solution Triangular Systems on Distributed-Memory +Multiprocessors, M. Heath and C. Romine, SIAM Journal on Scientific +and Statistical Computing, Vol. 9, pp. 558-588, 1988. + +
  • A Parallel Triangular Solver for a Distributed-Memory +Multiprocessor, G. Li and T. Coleman, SIAM Journal on Scientific +and Statistical Computing, Vol. 9, No. 3, pp. 485-502, 1988. + + +
  • A New Method for Solving Triangular Systems on Distributed-Memory +Message-Passing Multiprocessor, G. Li and T. Coleman, SIAM Journal +on Scientific and Statistical Computing, Vol. 10, No. 2, pp. 382-396, +1989. + + +
  • Parallel Triangular System Solving on a Mesh Network of +Transputers, R. Bisseling and J. van der Vorst, SIAM Journal +on Scientific and Statistical Computing, Vol. 12, pp. 787-799, 1991. + +
+ + +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/results.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/results.html new file mode 100755 index 000000000..9a7d8b8af --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/results.html @@ -0,0 +1,243 @@ + + +HPL Results + + + + + + + +
+ + +

HPL Performance Results

+ + +The performance achieved by this software package on a few machine +configurations is shown below. These results are only provided for +illustrative purposes. By the time you read this, those systems +have changed, they may not even exist anymore and one can surely +not exactly reproduce the state in which these machines were when +those measurements have been obtained. To obtain accurate figures +on your system, it is absolutely necessary to +download the software and run it there. + +
+
+ + + +
+
+ +

4 AMD Athlon K7 500 Mhz (256 Mb) - (2x) 100 Mbs +Switched - 2 NICs per node (channel bonding)

+ +
+ + + + + + + +
OS Linux 6.2 RedHat (Kernel 2.2.14)
C compiler gcc (egcs-2.91.66 egcs-1.1.2 release)
C flags -fomit-frame-pointer -O3 -funroll-loops
MPI MPIch 1.2.1
BLAS ATLAS (Version 3.0 beta)
Comments 09 / 00

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Performance (Gflops) w.r.t Problem size on 4 nodes. +
GRID 2000 5000 800010000
1 x 4 1.28 1.73 1.89 1.95
2 x 2 1.17 1.68 1.88 1.93
4 x 1 0.81 1.43 1.70 1.80

+

+ +
+

8 Duals Intel PIII 550 Mhz (512 Mb) - Myrinet

+ +
+ + + + + + + + + +
OS Linux 6.1 RedHat (Kernel 2.2.15)
C compiler gcc (egcs-2.91.66 egcs-1.1.2 release)
C flags -fomit-frame-pointer -O3 -funroll-loops
MPI MPI GM (Version 1.2.3)
BLAS ATLAS (Version 3.0 beta)
Comments UTK / ICL - Torc cluster - 09 / 00

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Performance (Gflops) w.r.t Problem size on 8- and 16-processors grids. +
GRID 2000 5000 8000100001500020000
2 x 4 1.76 2.32 2.51 2.58 2.72 2.73
4 x 4 2.27 3.94 4.46 4.68 5.00 5.16

+

+ +
+

Compaq 64 nodes (4 ev67 667 Mhz processors per node) +AlphaServer SC

+ +
+ + + + + + + + +
OS Tru64 Version 5
C compiler cc Version 6.1
C flags -arch host -tune host -std -O5
MPI -lmpi -lelan
BLAS CXML
Comments ORNL / NCCS + - falcon - 09 / 00

+

+ +In the table below, each row corresponds to a given number of cpus (or +processors) and nodes. The first row for example is denoted by 1 / 1, +i.e., 1 cpu / 1 node. Rmax is given in Gflops, and the value of Nmax +in fact corresponds to 351 Mb per cpu for all machine configurations.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CPUS / NODES GRID N 1/2 Nmax Rmax (Gflops) Parallel Efficiency
1 / 1 1 x 1 150 6625 1.136 1.000
4 / 1 2 x 2 800 13250 4.360 0.960
16 / 4 4 x 4 2300 26500 17.00 0.935
64 / 16 8 x 8 5700 53000 67.50 0.928
256 / 64 16 x 16 14000 106000 263.6 0.906

+

+For Rmax shown in the table, the parallel efficiency per cpu has been +computed using the performance achieved by HPL on 1 cpu. That is fair, +since the CXML matrix multiply routine was achieving at best 1.24 Gflops +for large matrix operands on one cpu, it would have been difficult for a +sequential Linpack benchmark implementation to achieve much more than +1.136 Gflops on this same cpu. For constant load (as in the table 351 Mb +per cpu for Nmax), HPL scales almost linearly as it should. + +

+The authors acknowledge the use of the Oak Ridge National Laboratory +Compaq computer, funded by the Department of Energy's Office +of Science and Energy Efficiency programs.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/roll.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/roll.jpg new file mode 100755 index 000000000..88d2c56af Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/roll.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/rollM.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/rollM.jpg new file mode 100755 index 000000000..0d7f076fd Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/rollM.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/scalability.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/scalability.html new file mode 100755 index 000000000..00bb1a27e --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/scalability.html @@ -0,0 +1,200 @@ + + +HPL Scalability Analysis + + + + +

HPL Scalability Analysis

+ +The machine model used for the +analysis is first described. This crude model is then used to first +estimate the parallel running time of the various phases of the +algorithm namely + +Finally the parallel efficiency +of the entire algorithm is estimated according to this machine model. +We show that for a given set of parameters HPL is scalable +not only with respect to the amount of computation, but also with +respect to the communication volume.

+
+ +

The Machine Model

+ +Distributed-memory computers consist of processors that are connected +using a message passing interconnection network. Each processor has +its own memory called the local memory, which is accessible only to +that processor. As the time to access a remote memory is longer than +the time to access a local one, such computers are often referred to +as Non-Uniform Memory Access (NUMA) machines.

+ +The interconnection network of our machine model is static, meaning +that it consists of point-to-point communication links among +processors. This type of network is also referred to as a direct +network as opposed to dynamic networks. The latter are constructed +from switches and communication links. These links are dynamically +connected to one another by the switching elements to establish, at +run time, the paths between processors memories.

+ +The interconnection network of the two-dimensional machine model +considered here is a static, fully connected physical topology. It +is also assumed that processors can be treated equally in terms +of local performance and that the communication rate between two +processors depends on the processors considered.

+ +Our model assumes that a processor can send or receive data on only +one of its communication ports at a time (assuming it has more than +one). In the literature, this assumption is also referred to as the +one-port communication model.

+ +The time spent to communicate a message between two given processors +is called the communication time Tc. In our machine model, Tc is +approximated by a linear function of the number L of double +precision (64-bits) items communicated. Tc is the sum of the time to +prepare the message for transmission (alpha) and the time (beta * L) +taken by the message of length L to traverse the network to its +destination, i.e.,

+
+Tc = alpha + beta L.

+
+ +Finally, the model assumes that the communication links are +bi-directional, that is, the time for two processors to send each +other a message of length L is also Tc. A processor can send and/or +receive a message on only one of its communication links at a time. +In particular, a processor can send a message while receiving another +message from the processor it is sending to at the same time.

+ +Since this document is only concerned with regular local dense linear +algebra operations, the time taken to perform one floating point +operation is assumed to be summarized by three constants gam1, +gam2 and gam3. These quantitites are flop rates approximations of the +vector-vector, matrix-vector and matrix-matrix operations for each +processor. This very crude approximation summarizes all the steps +performed by a processor to achieve such a computation. Obviously, +such a model neglects all the phenomena occurring in the processor +components, such as cache misses, pipeline startups, memory load or +store, floating point arithmetic and so on, that may influence the +value of these constants as a function of the problem size for +example.

+ +Similarly, the model does not make any assumption on the amount of +physical memory per node. It is assumed that if a process has been +spawn on a processor, one has ensured that enough memory was +available on that processor. In other words, swapping will not occur +during the modeled computation.

+ + +This machine model is a very crude approximation that is designed +specifically to illustrate the cost of the dominant factors of our +particular case.

+
+
+ +

Panel Factorization and Broadcast

+ +Let consider an M-by-N panel distributed over a P-process column. +Because of the recursive formulation of the panel factorization, it +is reasonable to consider that the floating point operations will +be performed at matrix-matrix multiply "speed". For every column in +the panel a binary-exchange is performed on 2*N data items. When this +panel is broadcast, what matters is the time that the next process +column will spend in this communication operation. Assuming one +chooses the increasing-ring (modified) +variant, only one message needs to be taken into account. The +execution time of the panel factorization and broadcast can thus be +approximated by:

+
+Tpfact( M, N ) = (M/P - N/3) N^2 gam3 + N log(P)( alpha + beta 2 N ) + +alpha + beta M N / P.

+
+
+ +

Trailing Submatrix Update

+ +Let consider the update phase of an N-by-N trailing submatrix +distributed on a P-by-Q process grid. From a computational point of +view one has to (triangular) solve N right-hand-sides and perform a +local rank-NB update of this trailing submatrix. Assuming one chooses +the long variant, the execution +time of the update operation can be approximated by:

+
+Tupdate( N, NB ) = gam3 ( N NB^2 / Q + 2 N^2 NB / ( P Q ) ) + +alpha ( log( P ) + P - 1 ) + 3 beta N NB / Q.

+
+The constant "3" in front of the "beta" term is obtained by counting +one for the (logarithmic) spread phase and two for the rolling phase; +In the case of bi-directional links this constant 3 should therefore +be only a 2.

+
+ +

Backward Substitution

+ +The number of floating point operations performed during the backward +substitution in given by N^2 / (P*Q). Because of the lookahead, the +communication cost can be approximated at each step by two messages +of length NB, i.e., the time to communicate the NB-piece of the +solution vector from one diagonal block of the matrix to another. It +follows that the execution time of the backward substitution can be +approximated by:

+
+Tbacks( N, NB ) = gam2 N^2 / (P Q) + N ( alpha / NB + 2 beta ).

+
+
+ +

Putting it All Together

+ +The total execution time of the algorithm described above is given by

+
+Sum(k=0,N,NB)[Tpfact( N-k, NB ) + Tupdate( N-k-NB, NB )] + +Tbacks( N, NB ).

+
+That is, by only considering only the dominant term in alpha, beta and +gam3:

+
+Thpl = 2 gam3 N^3 / ( 3 P Q ) + beta N^2 (3 P + Q) / ( 2 P Q ) + +alpha N ((NB + 1) log(P) + P) / NB.

+
+The serial execution time is given by Tser = 2 gam3 N^3 / 3. If we +define the parallel efficiency E as the ratio Tser / ( P Q Thpl ), we +obtain:

+
+E = 1 / ( 1 + 3 beta (3 P + Q) / ( 4 gam3 N ) + +3 alpha P Q ((NB + 1) log(P) + P) / (2 N^2 NB gam3) ).

+
+This last equality shows that when the memory usage per processor +N^2 / (P Q) is maintained constant, the parallel efficiency slowly +decreases only because of the alpha term. The communication volume +(the beta term) however remains constant. Due to these results, HPL +is said to be scalable not only with respect to the +amount of computation, but also with respect to the communication +volume.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/software.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/software.html new file mode 100755 index 000000000..34d82b2b7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/software.html @@ -0,0 +1,109 @@ + + +HPL Software + + + + +

HPL Software

+ +

Download and Installation

+ +
    +
  1. Download the tar-gzipped file, +issue then "gunzip hpl-2.3.tar.gz; tar -xvf hpl-2.3.tar" and this +should create an hpl-2.3 directory containing the distribution. +We call this directory the top level directory. + +
  2. Create a file Make.<arch> in the top-level directory. +For this purpose, you may want to re-use one contained in the +setup directory. This Make.<arch> file essentially contains +the compilers, libraries, and their paths to be used on your system. + +
  3. Type "make arch=<arch>". This should create an executable +in the bin/<arch> directory called xhpl. For example, on our +Linux PII cluster, I create a file called Make.Linux_PII in the +top-level directory. Then, I type "make arch=Linux_PII". This +creates the executable file bin/Linux_PII/xhpl. + +
  4. Quick check: run a few tests (assuming you have 4 nodes for +interactive use) by issuing the following commands from the top +level directory: "cd bin/<arch> ; mpirun -np 4 xhpl". This +should produce quite a bit of meaningful output on the screen. + +
  5. Most of the performance parameters can be tuned, by modifying +the input file bin/<arch>/HPL.dat. See the +tuning page or the TUNING file in the +top-level directory. +
+
+ +

Compile Time Options

+ +At the end of the "model" Make.<arch>, the user is given +the opportunity to override some default compile options of this +software. The list of these options and their meaning is:

+ +
+ + + + + + + + + +
-DHPL_COPY_Lforce the copy of the panel L before bcast
-DHPL_CALL_CBLAScall the BLAS C interface
-DHPL_CALL_VSIPLcall the vsip library
-DHPL_DETAILED_TIMINGenable detailed timers

+

+ +The user must choose between either the BLAS Fortran 77 interface, +or the BLAS C interface, or the VSIPL library depending on which +computational kernels are available on his system. Only one of these +options should be selected. If you choose the BLAS Fortran 77 +interface, it is necessary to fill out the machine-specific C to +Fortran 77 interface section of the Make.<arch> file. To do +this, please refer to the Make.<arch> examples contained in +the setup directory.

+ +By default HPL will: +
    +
  • not copy L before broadcast, +
  • call the BLAS Fortran 77 interface, +
  • not display detailed timing information. +
+ +As an example, suppose one wants this software to copy the panel of +columns into a contiguous buffer before broadcasting. It should +be more efficient to let the software create the appropriate MPI +user-defined data type since this may avoid the data copy. So, it +is a strange idea, but one insists. To achieve this one would add +-DHPL_COPY_L to the definition of HPL_OPTS at the end of the file +Make.<arch>. Issue then a "make clean arch=<arch> ; +make build arch=<arch>" and the executable will be re-build +with that feature in.

+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/spread.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/spread.jpg new file mode 100755 index 000000000..56c255a3f Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/spread.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/spreadM.jpg b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/spreadM.jpg new file mode 100755 index 000000000..433e4c077 Binary files /dev/null and b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/spreadM.jpg differ diff --git a/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/tuning.html b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/tuning.html new file mode 100755 index 000000000..fbbf17fb7 --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hip/hpl-2.3/www/tuning.html @@ -0,0 +1,476 @@ + + +HPL Tuning + + + + +

HPL Tuning

+ +After having built the executable hpl/bin/<arch>/xhpl, +one may want to modify the input data file HPL.dat. This file +should reside in the same directory as the executable +hpl/bin/<arch>/xhpl. An example HPL.dat file is +provided by default. This file contains information about the +problem sizes, machine configuration, and algorithm features +to be used by the executable. It is 31 lines long. All the +selected parameters will be printed in the output generated +by the executable.

+ +We first describe the meaning of each line of this input file +below. Finally, a few useful +experimental guide lines to set up the file are given at +the end of this page.

+
+ +

Description of the HPL.dat File

+ +Line 1: (unused) Typically one would use +this line for its own good. For example, it could be used +to summarize the content of the input file. By default this +line reads: +
+HPL Linpack benchmark input file
+
+ +
+Line 2: (unused) same as line 1. By default +this line reads: +
+Innovative Computing Laboratory, University of Tennessee
+
+ +
+Line 3: the user can choose where the +output should be redirected to. In the case of a file, a +name is necessary, and this is the line where one wants to +specify it. Only the first name on this line is significant. +By default, the line reads: +
+HPL.out  output file name (if any)
+
+ +This means that if one chooses to redirect the output to a +file, the file will be called "HPL.out". The rest of the line +is unused, and this space to put some informative comment on +the meaning of this line.

+ +
+Line 4: This line specifies where the output +should go. The line is formatted, it must begin with a +positive integer, the rest is unsignificant. 3 choices are +possible for the positive integer, 6 means that the output +will go the standard output, 7 means that the output will +go to the standard error. Any other integer means that the +output should be redirected to a file, which name has been +specified in the line above. This line by default reads: +
+6        device out (6=stdout,7=stderr,file)
+
+which means that the output generated by the executable +should be redirected to the standard output.

+ +
+Line 5: This line specifies the number of +problem sizes to be executed. This number should be less than +or equal to 20. The first integer is significant, the rest +is ignored. If the line reads: +
+3        # of problems sizes (N)
+
+this means that the user is willing to run 3 problem sizes +that will be specified in the next line.

+ +
+Line 6: This line specifies the problem sizes +one wants to run. Assuming the line above started with 3, +the 3 first positive integers are significant, the rest is +ignored. For example: +
+3000 6000 10000    Ns
+
+means that one wants xhpl to run 3 (specified in line 5) +problem sizes, namely 3000, 6000 and 10000.

+ +
+Line 7: This line specifies the number of +block sizes to be runned. This number should be less than or +equal to 20. The first integer is significant, the rest is +ignored. If the line reads: +
+5        # of NBs
+
+this means that the user is willing to use 5 block sizes that +will be specified in the next line.

+ +
+Line 8: This line specifies the block sizes +one wants to run. Assuming the line above started with 5, +the 5 first positive integers are significant, the rest is +ignored. For example: +
+80 100 120 140 160 NBs
+
+means that one wants xhpl to use 5 (specified in line 7) +block sizes, namely 80, 100, 120, 140 and 160.

+ +
+Line 9: This line specifies how the MPI +processes should be mapped onto the nodes of your platform. +There are currently two possible mappings, namely row- and +column-major. This feature is mainly useful when these nodes +are themselves multi-processor computers. A row-major mapping +is recommended.

+ +
+Line 10: This line specifies the number of +process grid to be runned. This number should be less than +or equal to 20. The first integer is significant, the rest is +ignored. If the line reads: +
+2        # of process grids (P x Q)
+
+this means that you are willing to try 2 process grid sizes +that will be specified in the next line.

+ +
+Line 11-12: These two lines specify the +number of process rows and columns of each grid you want to +run on. Assuming the line above (10) started with 2, the 2 +first positive integers of those two lines are significant, +the rest is ignored. For example: +
+1 2          Ps
+6 8          Qs
+
+means that one wants to run xhpl on 2 process grids (line +10), namely 1-by-6 and 2-by-8. Note: In this example, it is +required then to start xhpl on at least 16 nodes (max +of Pi-by-Qi). The runs on the two grids will be consecutive. +If one was starting xhpl on more than 16 nodes, say 52, only +6 would be used for the first grid (1x6) and then 16 (2x8) +would be used for the second grid. The fact that you started +the MPI job on 52 nodes, will not make HPL use all of them. +In this example, only 16 would be used. If one wants to run +xhpl with 52 processes one needs to specify a grid of 52 +processes, for example the following lines would do the job: +
+4  2         Ps
+13 8         Qs
+
+ +
+Line 13: This line specifies the threshold +to which the residuals should be compared with. The residuals +should be or order 1, but are in practice slightly less than +this, typically 0.001. This line is made of a real number, +the rest is not significant. For example: +
+16.0         threshold
+
+In practice, a value of 16.0 will cover most cases. For +various reasons, it is possible that some of the residuals +become slightly larger, say for example 35.6. xhpl will flag +those runs as failed, however they can be considered as +correct. A run should be considered as failed if the residual +is a few order of magnitude bigger than 1 for example 10^6 or +more. Note: if one was to specify a threshold of 0.0, all +tests would be flagged as failed, even though the answer is +likely to be correct. It is allowed to specify a negative +value for this threshold, in which case the checks will be +by-passed, no matter what the threshold value is, as soon as +it is negative. This feature allows to save time when +performing a lot of experiments, say for instance during the +tuning phase. Example: +
+-16.0        threshold
+
+ +
+The remaning lines allow to specifies algorithmic features. +xhpl will run all possible combinations of those for each +problem size, block size, process grid combination. This is +handy when one looks for an "optimal" set of parameters. To +understand a little bit better, let say first a few words +about the algorithm implemented in HPL. Basically this is a +right-looking version with row-partial pivoting. The panel +factorization is matrix-matrix operation based and recursive, +dividing the panel into NDIV subpanels at each step. This +part of the panel factorization is denoted below by +"recursive panel fact. (RFACT)". The recursion stops when +the current panel is made of less than or equal to NBMIN +columns. At that point, xhpl uses a matrix-vector operation +based factorization denoted below by "PFACTs". Classic +recursion would then use NDIV=2, NBMIN=1. There are +essentially 3 numerically equivalent LU factorization +algorithm variants (left-looking, Crout and right-looking). +In HPL, one can choose every one of those for the RFACT, as +well as the PFACT. The following lines of HPL.dat allows you +to set those parameters.

+Lines 14-21: (Example 1) +
+3       # of panel fact
+0 1 2   PFACTs (0=left, 1=Crout, 2=Right)
+4       # of recursive stopping criterium
+1 2 4 8 NBMINs (>= 1)
+3       # of panels in recursion
+2 3 4   NDIVs
+3       # of recursive panel fact.
+0 1 2   RFACTs (0=left, 1=Crout, 2=Right)
+
+ +This example would try all variants of PFACT, 4 values for +NBMIN, namely 1, 2, 4 and 8, 3 values for NDIV namely 2, 3 +and 4, and all variants for RFACT.

+Lines 14-21: (Example 2) +
+2       # of panel fact
+2 0     PFACTs (0=left, 1=Crout, 2=Right)
+2       # of recursive stopping criterium
+4 8     NBMINs (>= 1)
+1       # of panels in recursion
+2       NDIVs
+1       # of recursive panel fact.
+2       RFACTs (0=left, 1=Crout, 2=Right)
+
+This example would try 2 variants of PFACT namely right +looking and left looking, 2 values for NBMIN, namely 4 and 8, +1 value for NDIV namely 2, and one variant for RFACT.

+ +
+In the main loop of the algorithm, the current panel of +column is broadcast in process rows using a virtual ring +topology. HPL offers various choices and one most likely want +to use the increasing ring modified encoded as 1. 3 and 4 are +also good choices.

+Lines 22-23: (Example 1) +
+1       # of broadcast
+1       BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+
+This will cause HPL to broadcast the current panel using the +increasing ring modified topology.

+Lines 22-23: (Example 2) +
+2       # of broadcast
+0 4     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
+
+This will cause HPL to broadcast the current panel using the +increasing ring virtual topology and the long message +algorithm.

+ +
+Lines 24-25 allow to specify the look-ahead +depth used by HPL. A depth of 0 means that the next panel +is factorized after the update by the current panel is +completely finished. A depth of 1 means that the next +panel is immediately factorized after being updated. The +update by the current panel is then finished. A depth of k +means that the k next panels are factorized immediately after +being updated. The update by the current panel is then +finished. It turns out that a depth of 1 seems to give the +best results, but may need a large problem size before one +can see the performance gain. So use 1, if you do not know +better, otherwise you may want to try 0. Look-ahead of +depths 3 and larger will probably not give you better +results.

+Lines 24-25: (Example 1): +
+1       # of lookahead depth
+1       DEPTHs (>=0)
+
+This will cause HPL to use a look-ahead of depth 1.

+Lines 24-25: (Example 2): +
+2       # of lookahead depth
+0 1     DEPTHs (>=0)
+
+This will cause HPL to use a look-ahead of depths 0 and 1.

+ +
+Lines 26-27 allow to specify the swapping +algorithm used by HPL for all tests. There are currently +two swapping algorithms available, one based on "binary +exchange" and the other one based on a "spread-roll" +procedure (also called "long" below). For large problem +sizes, this last one is likely to be more efficient. The user +can also choose to mix both variants, that is "binary-exchange" +for a number of columns less than a threshold value, and then +the "spread-roll" algorithm. This threshold value is then +specified on Line 27.

+Lines 26-27: (Example 1): +
+1       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold
+
+This will cause HPL to use the "long" or "spread-roll" +swapping algorithm. Note that a threshold is specified in +that example but not used by HPL.

+Lines 26-27: (Example 2): +
+2       SWAP (0=bin-exch,1=long,2=mix)
+60      swapping threshold
+
+This will cause HPL to use the "long" or "spread-roll" +swapping algorithm as soon as there is more than 60 columns +in the row panel. Otherwise, the "binary-exchange" algorithm +will be used instead.

+ +
+Line 28 allows to specify whether the upper +triangle of the panel of columns should be stored in +no-transposed or transposed form. Example: +
+0            L1 in (0=transposed,1=no-transposed) form
+
+ +
+Line 29 allows to specify whether the panel +of rows U should be stored in no-transposed or transposed +form. Example: +
+0            U  in (0=transposed,1=no-transposed) form
+
+ +
+Line 30 enables / disables the equilibration +phase. This option will not be used unless you selected 1 or +2 in Line 26. Example: +
+1            Equilibration (0=no,1=yes)
+
+ +
+Line 31 allows to specify the alignment in +memory for the memory space allocated by HPL. On modern +machines, one probably wants to use 4, 8 or 16. This may +result in a tiny amount of memory wasted. Example: +
+8       memory alignment in double (> 0)
+
+ +
+

Guide Lines

+ +
    +
  1. Figure out a good block size for the matrix multiply +routine. The best method is to try a few out. If you happen +to know the block size used by the matrix-matrix multiply +routine, a small multiple of that block size will do fine. +This particular topic is discussed in the +FAQs section.

    + +
  2. The process mapping should not matter if the nodes of +your platform are single processor computers. If these nodes +are multi-processors, a row-major mapping is recommended.

    + +
  3. HPL likes "square" or slightly flat process grids. Unless +you are using a very small process grid, stay away from the +1-by-Q and P-by-1 process grids. This particular topic is also +discussed in the FAQs section.

    + +
  4. Panel factorization parameters: a good start are the +following for the lines 14-21: +
    +1       # of panel fact
    +1       PFACTs (0=left, 1=Crout, 2=Right)
    +2       # of recursive stopping criterium
    +4 8     NBMINs (>= 1)
    +1       # of panels in recursion
    +2       NDIVs
    +1       # of recursive panel fact.
    +2       RFACTs (0=left, 1=Crout, 2=Right)
    +
    + +
  5. Broadcast parameters: at this time it is far from obvious +to me what the best setting is, so i would probably try them +all. If I had to guess I would probably start with the +following for the lines 22-23: +
    +2       # of broadcast
    +1 3     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
    +
    +The best broadcast depends on your problem size and harware +performance. My take is that 4 or 5 may be competitive for +machines featuring very fast nodes comparatively to the +network.

    + +
  6. Look-ahead depth: as mentioned above 0 or 1 are likely to +be the best choices. This also depends on the problem size +and machine configuration, so I would try "no look-ahead (0)" +and "look-ahead of depth 1 (1)". That is for lines 24-25: +
    +2       # of lookahead depth
    +0 1     DEPTHs (>=0)
    +
    + +
  7. Swapping: one can select only one of the three algorithm +in the input file. Theoretically, mix (2) should win, however +long (1) might just be good enough. The difference should be +small between those two assuming a swapping threshold of the +order of the block size (NB) selected. If this threshold is +very large, HPL will use bin_exch (0) most of the time and if +it is very small (< NB) long (1) will always be used. In +short and assuming the block size (NB) used is say 60, I +would choose for the lines 26-27: +
    +2       SWAP (0=bin-exch,1=long,2=mix)
    +60      swapping threshold 
    +
    +I would also try the long variant. For a very small number +of processes in every column of the process grid (say < 4), +very little performance difference should be observable.

    + +
  8. Local storage: I do not think Line 28 matters. Pick 0 in +doubt. Line 29 is more important. It controls how the panel +of rows should be stored. No doubt 0 is better. The caveat is +that in that case the matrix-multiply function is called with +( Notrans, Trans, ... ), that is C := C - A B^T. Unless the +computational kernel you are using has a very poor (with +respect to performance) implementation of that case, and is +much more efficient with ( Notrans, Notrans, ... ) just pick +0 as well. So, my choice: +
    +0       L1 in (0=transposed,1=no-transposed) form
    +0       U  in (0=transposed,1=no-transposed) form
    +
    + +
  9. Equilibration: It is hard to tell whether equilibration +should always be performed or not. Not knowing much about the +random matrix generated and because the overhead is so small +compared to the possible gain, I turn it on all the time. +
    +1       Equilibration (0=no,1=yes)
    +
    + +
  10. For alignment, 4 should be plenty, but just to be safe, +one may want to pick 8 instead. +
    +8       memory alignment in double (> 0)
    +
    +
+ +
+
+ [Home] + [Copyright and Licensing Terms] + [Algorithm] + [Scalability] + [Performance Results] + [Documentation] + [Software] + [FAQs] + [Tuning] + [Errata-Bugs] + [References] + [Related Links]
+
+
+ + diff --git a/third-party-programs/Velocity-Bench/hplinpack/hplinkpack_migration.md b/third-party-programs/Velocity-Bench/hplinpack/hplinkpack_migration.md new file mode 100644 index 000000000..dbf4629dc --- /dev/null +++ b/third-party-programs/Velocity-Bench/hplinpack/hplinkpack_migration.md @@ -0,0 +1,146 @@ +# SYCLomatic Tool: Migrate hplinpack APP +## Use the command line to migrate large code base. +The SYCLomatic project (the Open source version of Intel® DPC++ Compatibility Tool) can migrate project that contain multiple source and header files. +| Optimized for | Description +|:--- |:--- +| OS | Linux* Ubuntu* 22.04 +| Software | Intel® DPC++ Compatibility Tool +| What you will learn | Simple invocation of dpct to migrate CUDA code +| Time to complete | 15 minutes + + +# Purpose +The SYCLomatic tool can migrate projects composed with multiple source and header files. +Used the dpct option **--in-root** option to set the root location of your prepared migration APP. Only the files under this specified root will be considered to migrate. Files located outside the **--in-root** will be considered system files or libraries files and will not be migrated. + +The dpct **--out-root** will specify the directory into which generated SYCL*-compilant code producted by the dpct tool is written. The relative path and the name will be kept, except the file extensions are changed to **.dp.cpp**. + + +# Key Implementation Details +Except the --in-root and --out-root options, there are additional options can help to migrate the code more smoothly: [Command Line Options Reference](https://software.intel.com/content/www/us/en/develop/documentation/intel-dpcpp-compatibility-tool-user-guide/top/command-line-options-reference.html). + + + +## Migrating the CUDA Sample to Data Parallel C++ with the Intel® DPC++ Compatibility Tool + +Building and running the CUDA sample is not required to migrate this project +to a SYCL*-compliant project. + +> **Note**: Certain CUDA header files, referenced by the CUDA application +> source files to be migrated, need to be accessible for the migration step. +> See *Before you Begin* in [Get Started with the Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/develop/documentation/get-started-with-intel-dpcpp-compatibility-tool/top.html#top_BEFORE_YOU_BEGIN). + +> **Note**: If you have not already done so, set up your CLI +> environment by sourcing the `setvars` script located in +> the root of your oneAPI installation. +> +> Linux*: +> - For system wide installations: `. /opt/intel/oneapi/setvars.sh` +> - For private installations: `. ~/intel/oneapi/setvars.sh` +> - For non-POSIX shells, like csh, use the following command: `$ bash -c 'source /setvars.sh ; exec csh'` +> +> Windows*: +> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat` +> - For Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'` +> +> For more information on configuring environment variables, see [Use the setvars Script with Linux* or MacOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html). + + +### Command-Line on a Linux* System + +1. This sample project contains a simple CUDA program, located in ```cuda``` directory and the sub-directory src of ```cuda```: + +2. Use the **intercept-build** tool to intercept the build step to generate the compilation database `compile_commands.json` file under the same fodler. +``` sh +$ cd cuda/hp-2.3 +$ intercept-build make +``` +2. Use the tool's `--in-root` option and provide input files to specify where + to locate the CUDA files that needs migration; use the tool’s `--out-root` + option to designate where to generate the resulting files(default is `dpct_output`); use the tool's `-p` option to specify compilation database to migrate the whole project: + +```sh +# From the cuda directory as root directory: +$ dpct --in-root=. --out-root=out --cuda-include-path=/usr/local/cuda/include -p . --gen-build-script +``` + +> If an `--in-root` option is not specified, the directory of the first input +> source file is implied. If `--out-root` is not specified, `./dpct_output` +> is implied. + +You should see the migrated files in the `out` folder that was specified +by the `--out-root` option: + +3. To build the migration app, the Makefile.dpct needs to be updated. Details are in the following: + + +```make +5 #DPCT2001:4: You can link with more library by add them here. +6 LIB := +7 +8 FLAGS := +9 +...... +582 TARGET := ${TARGET_0} ${TARGET_1} ${TARGET_2} +...... +589 $(TARGET_0): $(OBJS_0) +590 $(CC) -fsycl -o $@ $^ $(LIB) -qmkl +...... +628 $(TARGET_1): $(OBJS_1) +629 ar -r $@ $^ $(LIB) -qmkl +...... +1009 $(TARGET_2): $(OBJS_2) +1010 $(CC) -fsycl -o $@ $^ $(LIB) -qmkl +1011 +1012 $(TARGET_2_OBJ_0):$(TARGET_2_SRC_0) +1013 cc -c ${TARGET_2_SRC_0} -o ${TARGET_2_OBJ_0} $(TARGET_2_FLAG_0) +1014 +1015 $(TARGET_2_OBJ_1):$(TARGET_2_SRC_1) +1016 cc -c ${TARGET_2_SRC_1} -o ${TARGET_2_OBJ_1} $(TARGET_2_FLAG_1) +``` +change to +``` make +5 #DPCT2001:4: You can link with more library by add them here. +6 LIB := -lmpi +7 +8 FLAGS := -fPIC +9 +...... +582 TARGET := ${TARGET_1} ${TARGET_2} ${TARGET_0} +...... +589 $(TARGET_0): $(OBJS_0) +590 $(CC) -fsycl -o $@ $^ $(LIB) -qmkl libdgemm.so.1.0.1 ../lib/intel64/libhpl.a +627 +628 $(TARGET_1): $(OBJS_1) +629 ar -r $@ $^ $(LIB) +630 +1008 +1009 $(TARGET_2): $(OBJS_2) +1010 $(CC) -fPIC -shared -fsycl -o $@ $^ $(LIB) -qmkl +1011 +1012 $(TARGET_2_OBJ_0):$(TARGET_2_SRC_0) +1013 cc -c ${TARGET_2_SRC_0} -o ${TARGET_2_OBJ_0} $(TARGET_2_FLAG_0) +1014 +1015 $(TARGET_2_OBJ_1):$(TARGET_2_SRC_1) +1016 icpx -c ${TARGET_2_SRC_1} -o ${TARGET_2_OBJ_1} $(TARGET_2_FLAG_1) +1017 +``` +execute the ```vimdiff Makefile.dpct Makefile.dpct.patched``` in the out folder can get the changing details. + + +4. Build the migrated code with generated Makefile.dpct +``` +$ make -f Makefile.dpct +# Please make sure the oneAPI package was installed before building the application to make sure the oneAPI DPC++ compiler was installed. +``` + +# Example Output + +When you run the migrated application, you can follow the [README](https://github.com/oneapi-src/Velocity-Bench/blob/main/hplinpack/README.md) + +If an error occurs, troubleshoot the problem using the Diagnostics Utility for Intel® oneAPI Toolkits. +[Learn more](https://www.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html). + +## License +Code samples are licensed under the GNU General Public License version 2. See +[License.txt](https://github.com/oneapi-src/Velocity-Bench/blob/main/hplinpack/LICENSE.md) for details.